yt_dlp/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import base64
   6 import calendar
   7 import copy
   8 import datetime
   9 import hashlib
  10 import itertools
  11 import json
  12 import math
  13 import os.path
  14 import random
  15 import re
  16 import time
  17 import traceback
  18
  19 from .common import InfoExtractor, SearchInfoExtractor
  20 from ..compat import (
  21     compat_chr,
  22     compat_HTTPError,
  23     compat_parse_qs,
  24     compat_str,
  25     compat_urllib_parse_unquote_plus,
  26     compat_urllib_parse_urlencode,
  27     compat_urllib_parse_urlparse,
  28     compat_urlparse,
  29 )
  30 from ..jsinterp import JSInterpreter
  31 from ..utils import (
  32     bug_reports_message,
  33     bytes_to_intlist,
  34     clean_html,
  35     datetime_from_str,
  36     dict_get,
  37     error_to_compat_str,
  38     ExtractorError,
  39     float_or_none,
  40     format_field,
  41     int_or_none,
  42     intlist_to_bytes,
  43     is_html,
  44     join_nonempty,
  45     mimetype2ext,
  46     network_exceptions,
  47     orderedSet,
  48     parse_codecs,
  49     parse_count,
  50     parse_duration,
  51     parse_iso8601,
  52     parse_qs,
  53     qualities,
  54     remove_end,
  55     remove_start,
  56     smuggle_url,
  57     str_or_none,
  58     str_to_int,
  59     traverse_obj,
  60     try_get,
  61     unescapeHTML,
  62     unified_strdate,
  63     unsmuggle_url,
  64     update_url_query,
  65     url_or_none,
  66     urljoin,
  67     variadic,
  68 )
  69
  70
  71 def get_first(obj, keys, **kwargs):
  72     return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
  73
  74
  75 # any clients starting with _ cannot be explicity requested by the user
  76 INNERTUBE_CLIENTS = {
  77     'web': {
  78         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
  79         'INNERTUBE_CONTEXT': {
  80             'client': {
  81                 'clientName': 'WEB',
  82                 'clientVersion': '2.20210622.10.00',
  83             }
  84         },
  85         'INNERTUBE_CONTEXT_CLIENT_NAME': 1
  86     },
  87     'web_embedded': {
  88         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
  89         'INNERTUBE_CONTEXT': {
  90             'client': {
  91                 'clientName': 'WEB_EMBEDDED_PLAYER',
  92                 'clientVersion': '1.20210620.0.1',
  93             },
  94         },
  95         'INNERTUBE_CONTEXT_CLIENT_NAME': 56
  96     },
  97     'web_music': {
  98         'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
  99         'INNERTUBE_HOST': 'music.youtube.com',
 100         'INNERTUBE_CONTEXT': {
 101             'client': {
 102                 'clientName': 'WEB_REMIX',
 103                 'clientVersion': '1.20210621.00.00',
 104             }
 105         },
 106         'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
 107     },
 108     'web_creator': {
 109         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 110         'INNERTUBE_CONTEXT': {
 111             'client': {
 112                 'clientName': 'WEB_CREATOR',
 113                 'clientVersion': '1.20210621.00.00',
 114             }
 115         },
 116         'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
 117     },
 118     'android': {
 119         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 120         'INNERTUBE_CONTEXT': {
 121             'client': {
 122                 'clientName': 'ANDROID',
 123                 'clientVersion': '16.20',
 124             }
 125         },
 126         'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
 127         'REQUIRE_JS_PLAYER': False
 128     },
 129     'android_embedded': {
 130         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 131         'INNERTUBE_CONTEXT': {
 132             'client': {
 133                 'clientName': 'ANDROID_EMBEDDED_PLAYER',
 134                 'clientVersion': '16.20',
 135             },
 136         },
 137         'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
 138         'REQUIRE_JS_PLAYER': False
 139     },
 140     'android_music': {
 141         'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
 142         'INNERTUBE_HOST': 'music.youtube.com',
 143         'INNERTUBE_CONTEXT': {
 144             'client': {
 145                 'clientName': 'ANDROID_MUSIC',
 146                 'clientVersion': '4.32',
 147             }
 148         },
 149         'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
 150         'REQUIRE_JS_PLAYER': False
 151     },
 152     'android_creator': {
 153         'INNERTUBE_CONTEXT': {
 154             'client': {
 155                 'clientName': 'ANDROID_CREATOR',
 156                 'clientVersion': '21.24.100',
 157             },
 158         },
 159         'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
 160         'REQUIRE_JS_PLAYER': False
 161     },
 162     # ios has HLS live streams
 163     # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680
 164     'ios': {
 165         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 166         'INNERTUBE_CONTEXT': {
 167             'client': {
 168                 'clientName': 'IOS',
 169                 'clientVersion': '16.20',
 170             }
 171         },
 172         'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
 173         'REQUIRE_JS_PLAYER': False
 174     },
 175     'ios_embedded': {
 176         'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
 177         'INNERTUBE_CONTEXT': {
 178             'client': {
 179                 'clientName': 'IOS_MESSAGES_EXTENSION',
 180                 'clientVersion': '16.20',
 181             },
 182         },
 183         'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
 184         'REQUIRE_JS_PLAYER': False
 185     },
 186     'ios_music': {
 187         'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
 188         'INNERTUBE_HOST': 'music.youtube.com',
 189         'INNERTUBE_CONTEXT': {
 190             'client': {
 191                 'clientName': 'IOS_MUSIC',
 192                 'clientVersion': '4.32',
 193             },
 194         },
 195         'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
 196         'REQUIRE_JS_PLAYER': False
 197     },
 198     'ios_creator': {
 199         'INNERTUBE_CONTEXT': {
 200             'client': {
 201                 'clientName': 'IOS_CREATOR',
 202                 'clientVersion': '21.24.100',
 203             },
 204         },
 205         'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
 206         'REQUIRE_JS_PLAYER': False
 207     },
 208     # mweb has 'ultralow' formats
 209     # See: https://github.com/yt-dlp/yt-dlp/pull/557
 210     'mweb': {
 211         'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
 212         'INNERTUBE_CONTEXT': {
 213             'client': {
 214                 'clientName': 'MWEB',
 215                 'clientVersion': '2.20210721.07.00',
 216             }
 217         },
 218         'INNERTUBE_CONTEXT_CLIENT_NAME': 2
 219     },
 220 }
 221
 222
 223 def build_innertube_clients():
 224     third_party = {
 225         'embedUrl': 'https://google.com',  # Can be any valid URL
 226     }
 227     base_clients = ('android', 'web', 'ios', 'mweb')
 228     priority = qualities(base_clients[::-1])
 229
 230     for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
 231         ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
 232         ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
 233         ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
 234         ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
 235         ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
 236
 237         if client in base_clients:
 238             INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
 239             agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
 240             agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
 241             agegate_ytcfg['priority'] -= 1
 242         elif client.endswith('_embedded'):
 243             ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
 244             ytcfg['priority'] -= 2
 245         else:
 246             ytcfg['priority'] -= 3
 247
 248
 249 build_innertube_clients()
 250
 251
 252 class YoutubeBaseInfoExtractor(InfoExtractor):
 253     """Provide base functions for Youtube extractors"""
 254
 255     _RESERVED_NAMES = (
 256         r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
 257         r'shorts|movies|results|shared|hashtag|trending|feed|feeds|'
 258         r'browse|oembed|get_video_info|iframe_api|s/player|'
 259         r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
 260
 261     _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
 262
 263     _NETRC_MACHINE = 'youtube'
 264
 265     # If True it will raise an error if no login info is provided
 266     _LOGIN_REQUIRED = False
 267
 268     _INVIDIOUS_SITES = (
 269         # invidious-redirect websites
 270         r'(?:www\.)?redirect\.invidious\.io',
 271         r'(?:(?:www|dev)\.)?invidio\.us',
 272         # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
 273         r'(?:www\.)?invidious\.pussthecat\.org',
 274         r'(?:www\.)?invidious\.zee\.li',
 275         r'(?:www\.)?invidious\.ethibox\.fr',
 276         r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
 277         # youtube-dl invidious instances list
 278         r'(?:(?:www|no)\.)?invidiou\.sh',
 279         r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
 280         r'(?:www\.)?invidious\.kabi\.tk',
 281         r'(?:www\.)?invidious\.mastodon\.host',
 282         r'(?:www\.)?invidious\.zapashcanon\.fr',
 283         r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
 284         r'(?:www\.)?invidious\.tinfoil-hat\.net',
 285         r'(?:www\.)?invidious\.himiko\.cloud',
 286         r'(?:www\.)?invidious\.reallyancient\.tech',
 287         r'(?:www\.)?invidious\.tube',
 288         r'(?:www\.)?invidiou\.site',
 289         r'(?:www\.)?invidious\.site',
 290         r'(?:www\.)?invidious\.xyz',
 291         r'(?:www\.)?invidious\.nixnet\.xyz',
 292         r'(?:www\.)?invidious\.048596\.xyz',
 293         r'(?:www\.)?invidious\.drycat\.fr',
 294         r'(?:www\.)?inv\.skyn3t\.in',
 295         r'(?:www\.)?tube\.poal\.co',
 296         r'(?:www\.)?tube\.connect\.cafe',
 297         r'(?:www\.)?vid\.wxzm\.sx',
 298         r'(?:www\.)?vid\.mint\.lgbt',
 299         r'(?:www\.)?vid\.puffyan\.us',
 300         r'(?:www\.)?yewtu\.be',
 301         r'(?:www\.)?yt\.elukerio\.org',
 302         r'(?:www\.)?yt\.lelux\.fi',
 303         r'(?:www\.)?invidious\.ggc-project\.de',
 304         r'(?:www\.)?yt\.maisputain\.ovh',
 305         r'(?:www\.)?ytprivate\.com',
 306         r'(?:www\.)?invidious\.13ad\.de',
 307         r'(?:www\.)?invidious\.toot\.koeln',
 308         r'(?:www\.)?invidious\.fdn\.fr',
 309         r'(?:www\.)?watch\.nettohikari\.com',
 310         r'(?:www\.)?invidious\.namazso\.eu',
 311         r'(?:www\.)?invidious\.silkky\.cloud',
 312         r'(?:www\.)?invidious\.exonip\.de',
 313         r'(?:www\.)?invidious\.riverside\.rocks',
 314         r'(?:www\.)?invidious\.blamefran\.net',
 315         r'(?:www\.)?invidious\.moomoo\.de',
 316         r'(?:www\.)?ytb\.trom\.tf',
 317         r'(?:www\.)?yt\.cyberhost\.uk',
 318         r'(?:www\.)?kgg2m7yk5aybusll\.onion',
 319         r'(?:www\.)?qklhadlycap4cnod\.onion',
 320         r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
 321         r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
 322         r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
 323         r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
 324         r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
 325         r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
 326         r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
 327         r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
 328         r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
 329         r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
 330     )
 331
 332     def _login(self):
 333         """
 334         Attempt to log in to YouTube.
 335         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
 336         """
 337
 338         if (self._LOGIN_REQUIRED
 339                 and self.get_param('cookiefile') is None
 340                 and self.get_param('cookiesfrombrowser') is None):
 341             self.raise_login_required(
 342                 'Login details are needed to download this content', method='cookies')
 343         username, password = self._get_login_info()
 344         if username:
 345             self.report_warning(f'Cannot login to YouTube using username and password. {self._LOGIN_HINTS["cookies"]}')
 346
 347     def _initialize_consent(self):
 348         cookies = self._get_cookies('https://www.youtube.com/')
 349         if cookies.get('__Secure-3PSID'):
 350             return
 351         consent_id = None
 352         consent = cookies.get('CONSENT')
 353         if consent:
 354             if 'YES' in consent.value:
 355                 return
 356             consent_id = self._search_regex(
 357                 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
 358         if not consent_id:
 359             consent_id = random.randint(100, 999)
 360         self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
 361
 362     def _real_initialize(self):
 363         self._initialize_consent()
 364         self._login()
 365
 366     _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
 367     _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
 368     _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
 369
 370     def _get_default_ytcfg(self, client='web'):
 371         return copy.deepcopy(INNERTUBE_CLIENTS[client])
 372
 373     def _get_innertube_host(self, client='web'):
 374         return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
 375
 376     def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
 377         # try_get but with fallback to default ytcfg client values when present
 378         _func = lambda y: try_get(y, getter, expected_type)
 379         return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
 380
 381     def _extract_client_name(self, ytcfg, default_client='web'):
 382         return self._ytcfg_get_safe(
 383             ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
 384                     lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
 385
 386     def _extract_client_version(self, ytcfg, default_client='web'):
 387         return self._ytcfg_get_safe(
 388             ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
 389                     lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
 390
 391     def _extract_api_key(self, ytcfg=None, default_client='web'):
 392         return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
 393
 394     def _extract_context(self, ytcfg=None, default_client='web'):
 395         _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
 396         context = _get_context(ytcfg)
 397         if context:
 398             return context
 399
 400         context = _get_context(self._get_default_ytcfg(default_client))
 401         if not ytcfg:
 402             return context
 403
 404         # Recreate the client context (required)
 405         context['client'].update({
 406             'clientVersion': self._extract_client_version(ytcfg, default_client),
 407             'clientName': self._extract_client_name(ytcfg, default_client),
 408         })
 409         visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
 410         if visitor_data:
 411             context['client']['visitorData'] = visitor_data
 412         return context
 413
 414     _SAPISID = None
 415
 416     def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
 417         time_now = round(time.time())
 418         if self._SAPISID is None:
 419             yt_cookies = self._get_cookies('https://www.youtube.com')
 420             # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
 421             # See: https://github.com/yt-dlp/yt-dlp/issues/393
 422             sapisid_cookie = dict_get(
 423                 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
 424             if sapisid_cookie and sapisid_cookie.value:
 425                 self._SAPISID = sapisid_cookie.value
 426                 self.write_debug('Extracted SAPISID cookie')
 427                 # SAPISID cookie is required if not already present
 428                 if not yt_cookies.get('SAPISID'):
 429                     self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
 430                     self._set_cookie(
 431                         '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
 432             else:
 433                 self._SAPISID = False
 434         if not self._SAPISID:
 435             return None
 436         # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
 437         sapisidhash = hashlib.sha1(
 438             f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
 439         return f'SAPISIDHASH {time_now}_{sapisidhash}'
 440
 441     def _call_api(self, ep, query, video_id, fatal=True, headers=None,
 442                   note='Downloading API JSON', errnote='Unable to download API page',
 443                   context=None, api_key=None, api_hostname=None, default_client='web'):
 444
 445         data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
 446         data.update(query)
 447         real_headers = self.generate_api_headers(default_client=default_client)
 448         real_headers.update({'content-type': 'application/json'})
 449         if headers:
 450             real_headers.update(headers)
 451         return self._download_json(
 452             'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
 453             video_id=video_id, fatal=fatal, note=note, errnote=errnote,
 454             data=json.dumps(data).encode('utf8'), headers=real_headers,
 455             query={'key': api_key or self._extract_api_key()})
 456
 457     def extract_yt_initial_data(self, item_id, webpage, fatal=True):
 458         data = self._search_regex(
 459             (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
 460              self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)
 461         if data:
 462             return self._parse_json(data, item_id, fatal=fatal)
 463
 464     @staticmethod
 465     def _extract_session_index(*data):
 466         """
 467         Index of current account in account list.
 468         See: https://github.com/yt-dlp/yt-dlp/pull/519
 469         """
 470         for ytcfg in data:
 471             session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
 472             if session_index is not None:
 473                 return session_index
 474
 475     # Deprecated?
 476     def _extract_identity_token(self, ytcfg=None, webpage=None):
 477         if ytcfg:
 478             token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
 479             if token:
 480                 return token
 481         if webpage:
 482             return self._search_regex(
 483                 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
 484                 'identity token', default=None, fatal=False)
 485
 486     @staticmethod
 487     def _extract_account_syncid(*args):
 488         """
 489         Extract syncId required to download private playlists of secondary channels
 490         @params response and/or ytcfg
 491         """
 492         for data in args:
 493             # ytcfg includes channel_syncid if on secondary channel
 494             delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
 495             if delegated_sid:
 496                 return delegated_sid
 497             sync_ids = (try_get(
 498                 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
 499                        lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')
 500             if len(sync_ids) >= 2 and sync_ids[1]:
 501                 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
 502                 # and just "user_syncid||" for primary channel. We only want the channel_syncid
 503                 return sync_ids[0]
 504
 505     @staticmethod
 506     def _extract_visitor_data(*args):
 507         """
 508         Extracts visitorData from an API response or ytcfg
 509         Appears to be used to track session state
 510         """
 511         return get_first(
 512             args, (('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))),
 513             expected_type=str)
 514
 515     @property
 516     def is_authenticated(self):
 517         return bool(self._generate_sapisidhash_header())
 518
 519     def extract_ytcfg(self, video_id, webpage):
 520         if not webpage:
 521             return {}
 522         return self._parse_json(
 523             self._search_regex(
 524                 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
 525                 default='{}'), video_id, fatal=False) or {}
 526
 527     def generate_api_headers(
 528             self, *, ytcfg=None, account_syncid=None, session_index=None,
 529             visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
 530
 531         origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
 532         headers = {
 533             'X-YouTube-Client-Name': compat_str(
 534                 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
 535             'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
 536             'Origin': origin,
 537             'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
 538             'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
 539             'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)
 540         }
 541         if session_index is None:
 542             session_index = self._extract_session_index(ytcfg)
 543         if account_syncid or session_index is not None:
 544             headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
 545
 546         auth = self._generate_sapisidhash_header(origin)
 547         if auth is not None:
 548             headers['Authorization'] = auth
 549             headers['X-Origin'] = origin
 550         return {h: v for h, v in headers.items() if v is not None}
 551
 552     @staticmethod
 553     def _build_api_continuation_query(continuation, ctp=None):
 554         query = {
 555             'continuation': continuation
 556         }
 557         # TODO: Inconsistency with clickTrackingParams.
 558         # Currently we have a fixed ctp contained within context (from ytcfg)
 559         # and a ctp in root query for continuation.
 560         if ctp:
 561             query['clickTracking'] = {'clickTrackingParams': ctp}
 562         return query
 563
 564     @classmethod
 565     def _extract_next_continuation_data(cls, renderer):
 566         next_continuation = try_get(
 567             renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
 568                        lambda x: x['continuation']['reloadContinuationData']), dict)
 569         if not next_continuation:
 570             return
 571         continuation = next_continuation.get('continuation')
 572         if not continuation:
 573             return
 574         ctp = next_continuation.get('clickTrackingParams')
 575         return cls._build_api_continuation_query(continuation, ctp)
 576
 577     @classmethod
 578     def _extract_continuation_ep_data(cls, continuation_ep: dict):
 579         if isinstance(continuation_ep, dict):
 580             continuation = try_get(
 581                 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
 582             if not continuation:
 583                 return
 584             ctp = continuation_ep.get('clickTrackingParams')
 585             return cls._build_api_continuation_query(continuation, ctp)
 586
 587     @classmethod
 588     def _extract_continuation(cls, renderer):
 589         next_continuation = cls._extract_next_continuation_data(renderer)
 590         if next_continuation:
 591             return next_continuation
 592
 593         contents = []
 594         for key in ('contents', 'items'):
 595             contents.extend(try_get(renderer, lambda x: x[key], list) or [])
 596
 597         for content in contents:
 598             if not isinstance(content, dict):
 599                 continue
 600             continuation_ep = try_get(
 601                 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
 602                           lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
 603                 dict)
 604             continuation = cls._extract_continuation_ep_data(continuation_ep)
 605             if continuation:
 606                 return continuation
 607
 608     @classmethod
 609     def _extract_alerts(cls, data):
 610         for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
 611             if not isinstance(alert_dict, dict):
 612                 continue
 613             for alert in alert_dict.values():
 614                 alert_type = alert.get('type')
 615                 if not alert_type:
 616                     continue
 617                 message = cls._get_text(alert, 'text')
 618                 if message:
 619                     yield alert_type, message
 620
 621     def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
 622         errors = []
 623         warnings = []
 624         for alert_type, alert_message in alerts:
 625             if alert_type.lower() == 'error' and fatal:
 626                 errors.append([alert_type, alert_message])
 627             else:
 628                 warnings.append([alert_type, alert_message])
 629
 630         for alert_type, alert_message in (warnings + errors[:-1]):
 631             self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message), only_once=only_once)
 632         if errors:
 633             raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
 634
 635     def _extract_and_report_alerts(self, data, *args, **kwargs):
 636         return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
 637
 638     def _extract_badges(self, renderer: dict):
 639         badges = set()
 640         for badge in try_get(renderer, lambda x: x['badges'], list) or []:
 641             label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
 642             if label:
 643                 badges.add(label.lower())
 644         return badges
 645
 646     @staticmethod
 647     def _get_text(data, *path_list, max_runs=None):
 648         for path in path_list or [None]:
 649             if path is None:
 650                 obj = [data]
 651             else:
 652                 obj = traverse_obj(data, path, default=[])
 653                 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
 654                     obj = [obj]
 655             for item in obj:
 656                 text = try_get(item, lambda x: x['simpleText'], compat_str)
 657                 if text:
 658                     return text
 659                 runs = try_get(item, lambda x: x['runs'], list) or []
 660                 if not runs and isinstance(item, list):
 661                     runs = item
 662
 663                 runs = runs[:min(len(runs), max_runs or len(runs))]
 664                 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
 665                 if text:
 666                     return text
 667
 668     def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
 669                           ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
 670                           default_client='web'):
 671         response = None
 672         last_error = None
 673         count = -1
 674         retries = self.get_param('extractor_retries', 3)
 675         if check_get_keys is None:
 676             check_get_keys = []
 677         while count < retries:
 678             count += 1
 679             if last_error:
 680                 self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))
 681             try:
 682                 response = self._call_api(
 683                     ep=ep, fatal=True, headers=headers,
 684                     video_id=item_id, query=query,
 685                     context=self._extract_context(ytcfg, default_client),
 686                     api_key=self._extract_api_key(ytcfg, default_client),
 687                     api_hostname=api_hostname, default_client=default_client,
 688                     note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
 689             except ExtractorError as e:
 690                 if isinstance(e.cause, network_exceptions):
 691                     if isinstance(e.cause, compat_HTTPError) and not is_html(e.cause.read(512)):
 692                         e.cause.seek(0)
 693                         yt_error = try_get(
 694                             self._parse_json(e.cause.read().decode(), item_id, fatal=False),
 695                             lambda x: x['error']['message'], compat_str)
 696                         if yt_error:
 697                             self._report_alerts([('ERROR', yt_error)], fatal=False)
 698                     # Downloading page may result in intermittent 5xx HTTP error
 699                     # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
 700                     # We also want to catch all other network exceptions since errors in later pages can be troublesome
 701                     # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
 702                     if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
 703                         last_error = error_to_compat_str(e.cause or e.msg)
 704                         if count < retries:
 705                             continue
 706                 if fatal:
 707                     raise
 708                 else:
 709                     self.report_warning(error_to_compat_str(e))
 710                     return
 711
 712             else:
 713                 try:
 714                     self._extract_and_report_alerts(response, only_once=True)
 715                 except ExtractorError as e:
 716                     # YouTube servers may return errors we want to retry on in a 200 OK response
 717                     # See: https://github.com/yt-dlp/yt-dlp/issues/839
 718                     if 'unknown error' in e.msg.lower():
 719                         last_error = e.msg
 720                         continue
 721                     if fatal:
 722                         raise
 723                     self.report_warning(error_to_compat_str(e))
 724                     return
 725                 if not check_get_keys or dict_get(response, check_get_keys):
 726                     break
 727                 # Youtube sometimes sends incomplete data
 728                 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
 729                 last_error = 'Incomplete data received'
 730                 if count >= retries:
 731                     if fatal:
 732                         raise ExtractorError(last_error)
 733                     else:
 734                         self.report_warning(last_error)
 735                         return
 736         return response
 737
 738     @staticmethod
 739     def is_music_url(url):
 740         return re.match(r'https?://music\.youtube\.com/', url) is not None
 741
 742     def _extract_video(self, renderer):
 743         video_id = renderer.get('videoId')
 744         title = self._get_text(renderer, 'title')
 745         description = self._get_text(renderer, 'descriptionSnippet')
 746         duration = parse_duration(self._get_text(
 747             renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
 748         view_count_text = self._get_text(renderer, 'viewCountText') or ''
 749         view_count = str_to_int(self._search_regex(
 750             r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
 751             'view count', default=None))
 752
 753         uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
 754
 755         return {
 756             '_type': 'url',
 757             'ie_key': YoutubeIE.ie_key(),
 758             'id': video_id,
 759             'url': f'https://www.youtube.com/watch?v={video_id}',
 760             'title': title,
 761             'description': description,
 762             'duration': duration,
 763             'view_count': view_count,
 764             'uploader': uploader,
 765         }
 766
 767
 768 class YoutubeIE(YoutubeBaseInfoExtractor):
 769     IE_DESC = 'YouTube'
 770     _VALID_URL = r"""(?x)^
 771                      (
 772                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 773                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
 774                             (?:www\.)?deturl\.com/www\.youtube\.com|
 775                             (?:www\.)?pwnyoutube\.com|
 776                             (?:www\.)?hooktube\.com|
 777                             (?:www\.)?yourepeat\.com|
 778                             tube\.majestyc\.net|
 779                             %(invidious)s|
 780                             youtube\.googleapis\.com)/                        # the various hostnames, with wildcard subdomains
 781                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 782                          (?:                                                  # the various things that can precede the ID:
 783                              (?:(?:v|embed|e|shorts)/(?!videoseries))         # v/ or embed/ or e/ or shorts/
 784                              |(?:                                             # or the v= param in all its forms
 785                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 786                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 787                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 788                                  v=
 789                              )
 790                          ))
 791                          |(?:
 792                             youtu\.be|                                        # just youtu.be/xxxx
 793                             vid\.plus|                                        # or vid.plus/xxxx
 794                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 795                             %(invidious)s
 796                          )/
 797                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 798                          )
 799                      )?                                                       # all until now is optional -> you can pass the naked ID
 800                      (?P<id>[0-9A-Za-z_-]{11})                                # here is it! the YouTube video ID
 801                      (?(1).+)?                                                # if we found the ID, everything can follow
 802                      (?:\#|$)""" % {
 803         'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
 804     }
 805     _PLAYER_INFO_RE = (
 806         r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
 807         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
 808         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
 809     )
 810     _formats = {
 811         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 812         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 813         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 814         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 815         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 816         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 817         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 818         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 819         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 820         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 821         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 822         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 823         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 824         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 825         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 826         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 827         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 828         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 829
 830
 831         # 3D videos
 832         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 833         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 834         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 835         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 836         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 837         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 838         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 839
 840         # Apple HTTP Live Streaming
 841         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 842         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 843         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 844         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 845         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 846         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 847         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 848         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
 849
 850         # DASH mp4 video
 851         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
 852         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
 853         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 854         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
 855         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
 856         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
 857         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
 858         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 859         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
 860         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 861         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 862         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
 863
 864         # Dash mp4 audio
 865         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
 866         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
 867         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
 868         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 869         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 870         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
 871         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
 872
 873         # Dash webm
 874         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 875         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 876         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 877         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 878         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 879         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 880         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
 881         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 882         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 883         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 884         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 885         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 886         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 887         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 888         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 889         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
 890         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 891         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 892         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 893         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 894         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 895         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 896
 897         # Dash webm audio
 898         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
 899         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
 900
 901         # Dash webm audio with opus inside
 902         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
 903         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
 904         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
 905
 906         # RTMP (unnamed)
 907         '_rtmp': {'protocol': 'rtmp'},
 908
 909         # av01 video only formats sometimes served with "unknown" codecs
 910         '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
 911         '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
 912         '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
 913         '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
 914         '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
 915         '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
 916         '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
 917         '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
 918     }
 919     _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
 920
 921     _GEO_BYPASS = False
 922
 923     IE_NAME = 'youtube'
 924     _TESTS = [
 925         {
 926             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
 927             'info_dict': {
 928                 'id': 'BaW_jenozKc',
 929                 'ext': 'mp4',
 930                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 931                 'uploader': 'Philipp Hagemeister',
 932                 'uploader_id': 'phihag',
 933                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 934                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
 935                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
 936                 'upload_date': '20121002',
 937                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 938                 'categories': ['Science & Technology'],
 939                 'tags': ['youtube-dl'],
 940                 'duration': 10,
 941                 'view_count': int,
 942                 'like_count': int,
 943                 'dislike_count': int,
 944                 'start_time': 1,
 945                 'end_time': 9,
 946             }
 947         },
 948         {
 949             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
 950             'note': 'Embed-only video (#1746)',
 951             'info_dict': {
 952                 'id': 'yZIXLfi8CZQ',
 953                 'ext': 'mp4',
 954                 'upload_date': '20120608',
 955                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
 956                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
 957                 'uploader': 'SET India',
 958                 'uploader_id': 'setindia',
 959                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
 960                 'age_limit': 18,
 961             },
 962             'skip': 'Private video',
 963         },
 964         {
 965             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
 966             'note': 'Use the first video ID in the URL',
 967             'info_dict': {
 968                 'id': 'BaW_jenozKc',
 969                 'ext': 'mp4',
 970                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 971                 'uploader': 'Philipp Hagemeister',
 972                 'uploader_id': 'phihag',
 973                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 974                 'upload_date': '20121002',
 975                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 976                 'categories': ['Science & Technology'],
 977                 'tags': ['youtube-dl'],
 978                 'duration': 10,
 979                 'view_count': int,
 980                 'like_count': int,
 981                 'dislike_count': int,
 982             },
 983             'params': {
 984                 'skip_download': True,
 985             },
 986         },
 987         {
 988             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
 989             'note': '256k DASH audio (format 141) via DASH manifest',
 990             'info_dict': {
 991                 'id': 'a9LDPn-MO4I',
 992                 'ext': 'm4a',
 993                 'upload_date': '20121002',
 994                 'uploader_id': '8KVIDEO',
 995                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
 996                 'description': '',
 997                 'uploader': '8KVIDEO',
 998                 'title': 'UHDTV TEST 8K VIDEO.mp4'
 999             },
1000             'params': {
1001                 'youtube_include_dash_manifest': True,
1002                 'format': '141',
1003             },
1004             'skip': 'format 141 not served anymore',
1005         },
1006         # DASH manifest with encrypted signature
1007         {
1008             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1009             'info_dict': {
1010                 'id': 'IB3lcPjvWLA',
1011                 'ext': 'm4a',
1012                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1013                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1014                 'duration': 244,
1015                 'uploader': 'AfrojackVEVO',
1016                 'uploader_id': 'AfrojackVEVO',
1017                 'upload_date': '20131011',
1018                 'abr': 129.495,
1019             },
1020             'params': {
1021                 'youtube_include_dash_manifest': True,
1022                 'format': '141/bestaudio[ext=m4a]',
1023             },
1024         },
1025         # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
1026         {
1027             'note': 'Embed allowed age-gate video',
1028             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1029             'info_dict': {
1030                 'id': 'HtVdAasjOgU',
1031                 'ext': 'mp4',
1032                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1033                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1034                 'duration': 142,
1035                 'uploader': 'The Witcher',
1036                 'uploader_id': 'WitcherGame',
1037                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1038                 'upload_date': '20140605',
1039                 'age_limit': 18,
1040             },
1041         },
1042         {
1043             'note': 'Age-gate video with embed allowed in public site',
1044             'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1045             'info_dict': {
1046                 'id': 'HsUATh_Nc2U',
1047                 'ext': 'mp4',
1048                 'title': 'Godzilla 2 (Official Video)',
1049                 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1050                 'upload_date': '20200408',
1051                 'uploader_id': 'FlyingKitty900',
1052                 'uploader': 'FlyingKitty',
1053                 'age_limit': 18,
1054             },
1055         },
1056         {
1057             'note': 'Age-gate video embedable only with clientScreen=EMBED',
1058             'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1059             'info_dict': {
1060                 'id': 'Tq92D6wQ1mg',
1061                 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
1062                 'ext': 'mp4',
1063                 'upload_date': '20191227',
1064                 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1065                 'uploader': 'Projekt Melody',
1066                 'description': 'md5:17eccca93a786d51bc67646756894066',
1067                 'age_limit': 18,
1068             },
1069         },
1070         {
1071             'note': 'Non-Agegated non-embeddable video',
1072             'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1073             'info_dict': {
1074                 'id': 'MeJVWBSsPAY',
1075                 'ext': 'mp4',
1076                 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1077                 'uploader': 'Herr Lurik',
1078                 'uploader_id': 'st3in234',
1079                 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1080                 'upload_date': '20130730',
1081             },
1082         },
1083         {
1084             'note': 'Non-bypassable age-gated video',
1085             'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1086             'only_matching': True,
1087         },
1088         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1089         # YouTube Red ad is not captured for creator
1090         {
1091             'url': '__2ABJjxzNo',
1092             'info_dict': {
1093                 'id': '__2ABJjxzNo',
1094                 'ext': 'mp4',
1095                 'duration': 266,
1096                 'upload_date': '20100430',
1097                 'uploader_id': 'deadmau5',
1098                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1099                 'creator': 'deadmau5',
1100                 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1101                 'uploader': 'deadmau5',
1102                 'title': 'Deadmau5 - Some Chords (HD)',
1103                 'alt_title': 'Some Chords',
1104             },
1105             'expected_warnings': [
1106                 'DASH manifest missing',
1107             ]
1108         },
1109         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1110         {
1111             'url': 'lqQg6PlCWgI',
1112             'info_dict': {
1113                 'id': 'lqQg6PlCWgI',
1114                 'ext': 'mp4',
1115                 'duration': 6085,
1116                 'upload_date': '20150827',
1117                 'uploader_id': 'olympic',
1118                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1119                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
1120                 'uploader': 'Olympics',
1121                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
1122             },
1123             'params': {
1124                 'skip_download': 'requires avconv',
1125             }
1126         },
1127         # Non-square pixels
1128         {
1129             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1130             'info_dict': {
1131                 'id': '_b-2C3KPAM0',
1132                 'ext': 'mp4',
1133                 'stretched_ratio': 16 / 9.,
1134                 'duration': 85,
1135                 'upload_date': '20110310',
1136                 'uploader_id': 'AllenMeow',
1137                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1138                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1139                 'uploader': '孫ᄋᄅ',
1140                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1141             },
1142         },
1143         # url_encoded_fmt_stream_map is empty string
1144         {
1145             'url': 'qEJwOuvDf7I',
1146             'info_dict': {
1147                 'id': 'qEJwOuvDf7I',
1148                 'ext': 'webm',
1149                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1150                 'description': '',
1151                 'upload_date': '20150404',
1152                 'uploader_id': 'spbelect',
1153                 'uploader': 'Наблюдатели Петербурга',
1154             },
1155             'params': {
1156                 'skip_download': 'requires avconv',
1157             },
1158             'skip': 'This live event has ended.',
1159         },
1160         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1161         {
1162             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1163             'info_dict': {
1164                 'id': 'FIl7x6_3R5Y',
1165                 'ext': 'webm',
1166                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1167                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1168                 'duration': 220,
1169                 'upload_date': '20150625',
1170                 'uploader_id': 'dorappi2000',
1171                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1172                 'uploader': 'dorappi2000',
1173                 'formats': 'mincount:31',
1174             },
1175             'skip': 'not actual anymore',
1176         },
1177         # DASH manifest with segment_list
1178         {
1179             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1180             'md5': '8ce563a1d667b599d21064e982ab9e31',
1181             'info_dict': {
1182                 'id': 'CsmdDsKjzN8',
1183                 'ext': 'mp4',
1184                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1185                 'uploader': 'Airtek',
1186                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1187                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1188                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1189             },
1190             'params': {
1191                 'youtube_include_dash_manifest': True,
1192                 'format': '135',  # bestvideo
1193             },
1194             'skip': 'This live event has ended.',
1195         },
1196         {
1197             # Multifeed videos (multiple cameras), URL is for Main Camera
1198             'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1199             'info_dict': {
1200                 'id': 'jvGDaLqkpTg',
1201                 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1202                 'description': 'md5:e03b909557865076822aa169218d6a5d',
1203             },
1204             'playlist': [{
1205                 'info_dict': {
1206                     'id': 'jvGDaLqkpTg',
1207                     'ext': 'mp4',
1208                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1209                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1210                     'duration': 10643,
1211                     'upload_date': '20161111',
1212                     'uploader': 'Team PGP',
1213                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1214                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1215                 },
1216             }, {
1217                 'info_dict': {
1218                     'id': '3AKt1R1aDnw',
1219                     'ext': 'mp4',
1220                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1221                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1222                     'duration': 10991,
1223                     'upload_date': '20161111',
1224                     'uploader': 'Team PGP',
1225                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1226                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1227                 },
1228             }, {
1229                 'info_dict': {
1230                     'id': 'RtAMM00gpVc',
1231                     'ext': 'mp4',
1232                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1233                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1234                     'duration': 10995,
1235                     'upload_date': '20161111',
1236                     'uploader': 'Team PGP',
1237                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1238                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1239                 },
1240             }, {
1241                 'info_dict': {
1242                     'id': '6N2fdlP3C5U',
1243                     'ext': 'mp4',
1244                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1245                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1246                     'duration': 10990,
1247                     'upload_date': '20161111',
1248                     'uploader': 'Team PGP',
1249                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1250                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1251                 },
1252             }],
1253             'params': {
1254                 'skip_download': True,
1255             },
1256             'skip': 'Not multifeed anymore',
1257         },
1258         {
1259             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1260             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1261             'info_dict': {
1262                 'id': 'gVfLd0zydlo',
1263                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1264             },
1265             'playlist_count': 2,
1266             'skip': 'Not multifeed anymore',
1267         },
1268         {
1269             'url': 'https://vid.plus/FlRa-iH7PGw',
1270             'only_matching': True,
1271         },
1272         {
1273             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1274             'only_matching': True,
1275         },
1276         {
1277             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1278             # Also tests cut-off URL expansion in video description (see
1279             # https://github.com/ytdl-org/youtube-dl/issues/1892,
1280             # https://github.com/ytdl-org/youtube-dl/issues/8164)
1281             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1282             'info_dict': {
1283                 'id': 'lsguqyKfVQg',
1284                 'ext': 'mp4',
1285                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1286                 'alt_title': 'Dark Walk',
1287                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1288                 'duration': 133,
1289                 'upload_date': '20151119',
1290                 'uploader_id': 'IronSoulElf',
1291                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1292                 'uploader': 'IronSoulElf',
1293                 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1294                 'track': 'Dark Walk',
1295                 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1296                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1297             },
1298             'params': {
1299                 'skip_download': True,
1300             },
1301         },
1302         {
1303             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1304             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1305             'only_matching': True,
1306         },
1307         {
1308             # Video with yt:stretch=17:0
1309             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1310             'info_dict': {
1311                 'id': 'Q39EVAstoRM',
1312                 'ext': 'mp4',
1313                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1314                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1315                 'upload_date': '20151107',
1316                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1317                 'uploader': 'CH GAMER DROID',
1318             },
1319             'params': {
1320                 'skip_download': True,
1321             },
1322             'skip': 'This video does not exist.',
1323         },
1324         {
1325             # Video with incomplete 'yt:stretch=16:'
1326             'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1327             'only_matching': True,
1328         },
1329         {
1330             # Video licensed under Creative Commons
1331             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1332             'info_dict': {
1333                 'id': 'M4gD1WSo5mA',
1334                 'ext': 'mp4',
1335                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1336                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1337                 'duration': 721,
1338                 'upload_date': '20150127',
1339                 'uploader_id': 'BerkmanCenter',
1340                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1341                 'uploader': 'The Berkman Klein Center for Internet & Society',
1342                 'license': 'Creative Commons Attribution license (reuse allowed)',
1343             },
1344             'params': {
1345                 'skip_download': True,
1346             },
1347         },
1348         {
1349             # Channel-like uploader_url
1350             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1351             'info_dict': {
1352                 'id': 'eQcmzGIKrzg',
1353                 'ext': 'mp4',
1354                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1355                 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1356                 'duration': 4060,
1357                 'upload_date': '20151119',
1358                 'uploader': 'Bernie Sanders',
1359                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1360                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1361                 'license': 'Creative Commons Attribution license (reuse allowed)',
1362             },
1363             'params': {
1364                 'skip_download': True,
1365             },
1366         },
1367         {
1368             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1369             'only_matching': True,
1370         },
1371         {
1372             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1373             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1374             'only_matching': True,
1375         },
1376         {
1377             # Rental video preview
1378             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1379             'info_dict': {
1380                 'id': 'uGpuVWrhIzE',
1381                 'ext': 'mp4',
1382                 'title': 'Piku - Trailer',
1383                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1384                 'upload_date': '20150811',
1385                 'uploader': 'FlixMatrix',
1386                 'uploader_id': 'FlixMatrixKaravan',
1387                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1388                 'license': 'Standard YouTube License',
1389             },
1390             'params': {
1391                 'skip_download': True,
1392             },
1393             'skip': 'This video is not available.',
1394         },
1395         {
1396             # YouTube Red video with episode data
1397             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1398             'info_dict': {
1399                 'id': 'iqKdEhx-dD4',
1400                 'ext': 'mp4',
1401                 'title': 'Isolation - Mind Field (Ep 1)',
1402                 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1403                 'duration': 2085,
1404                 'upload_date': '20170118',
1405                 'uploader': 'Vsauce',
1406                 'uploader_id': 'Vsauce',
1407                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1408                 'series': 'Mind Field',
1409                 'season_number': 1,
1410                 'episode_number': 1,
1411             },
1412             'params': {
1413                 'skip_download': True,
1414             },
1415             'expected_warnings': [
1416                 'Skipping DASH manifest',
1417             ],
1418         },
1419         {
1420             # The following content has been identified by the YouTube community
1421             # as inappropriate or offensive to some audiences.
1422             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1423             'info_dict': {
1424                 'id': '6SJNVb0GnPI',
1425                 'ext': 'mp4',
1426                 'title': 'Race Differences in Intelligence',
1427                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1428                 'duration': 965,
1429                 'upload_date': '20140124',
1430                 'uploader': 'New Century Foundation',
1431                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1432                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1433             },
1434             'params': {
1435                 'skip_download': True,
1436             },
1437             'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1438         },
1439         {
1440             # itag 212
1441             'url': '1t24XAntNCY',
1442             'only_matching': True,
1443         },
1444         {
1445             # geo restricted to JP
1446             'url': 'sJL6WA-aGkQ',
1447             'only_matching': True,
1448         },
1449         {
1450             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1451             'only_matching': True,
1452         },
1453         {
1454             'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1455             'only_matching': True,
1456         },
1457         {
1458             # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1459             'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1460             'only_matching': True,
1461         },
1462         {
1463             # DRM protected
1464             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1465             'only_matching': True,
1466         },
1467         {
1468             # Video with unsupported adaptive stream type formats
1469             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1470             'info_dict': {
1471                 'id': 'Z4Vy8R84T1U',
1472                 'ext': 'mp4',
1473                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1474                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1475                 'duration': 433,
1476                 'upload_date': '20130923',
1477                 'uploader': 'Amelia Putri Harwita',
1478                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1479                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1480                 'formats': 'maxcount:10',
1481             },
1482             'params': {
1483                 'skip_download': True,
1484                 'youtube_include_dash_manifest': False,
1485             },
1486             'skip': 'not actual anymore',
1487         },
1488         {
1489             # Youtube Music Auto-generated description
1490             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1491             'info_dict': {
1492                 'id': 'MgNrAu2pzNs',
1493                 'ext': 'mp4',
1494                 'title': 'Voyeur Girl',
1495                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1496                 'upload_date': '20190312',
1497                 'uploader': 'Stephen - Topic',
1498                 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1499                 'artist': 'Stephen',
1500                 'track': 'Voyeur Girl',
1501                 'album': 'it\'s too much love to know my dear',
1502                 'release_date': '20190313',
1503                 'release_year': 2019,
1504             },
1505             'params': {
1506                 'skip_download': True,
1507             },
1508         },
1509         {
1510             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1511             'only_matching': True,
1512         },
1513         {
1514             # invalid -> valid video id redirection
1515             'url': 'DJztXj2GPfl',
1516             'info_dict': {
1517                 'id': 'DJztXj2GPfk',
1518                 'ext': 'mp4',
1519                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1520                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1521                 'upload_date': '20090125',
1522                 'uploader': 'Prochorowka',
1523                 'uploader_id': 'Prochorowka',
1524                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1525                 'artist': 'Panjabi MC',
1526                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1527                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1528             },
1529             'params': {
1530                 'skip_download': True,
1531             },
1532             'skip': 'Video unavailable',
1533         },
1534         {
1535             # empty description results in an empty string
1536             'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1537             'info_dict': {
1538                 'id': 'x41yOUIvK2k',
1539                 'ext': 'mp4',
1540                 'title': 'IMG 3456',
1541                 'description': '',
1542                 'upload_date': '20170613',
1543                 'uploader_id': 'ElevageOrVert',
1544                 'uploader': 'ElevageOrVert',
1545             },
1546             'params': {
1547                 'skip_download': True,
1548             },
1549         },
1550         {
1551             # with '};' inside yt initial data (see [1])
1552             # see [2] for an example with '};' inside ytInitialPlayerResponse
1553             # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1554             # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1555             'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1556             'info_dict': {
1557                 'id': 'CHqg6qOn4no',
1558                 'ext': 'mp4',
1559                 'title': 'Part 77   Sort a list of simple types in c#',
1560                 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1561                 'upload_date': '20130831',
1562                 'uploader_id': 'kudvenkat',
1563                 'uploader': 'kudvenkat',
1564             },
1565             'params': {
1566                 'skip_download': True,
1567             },
1568         },
1569         {
1570             # another example of '};' in ytInitialData
1571             'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1572             'only_matching': True,
1573         },
1574         {
1575             'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1576             'only_matching': True,
1577         },
1578         {
1579             # https://github.com/ytdl-org/youtube-dl/pull/28094
1580             'url': 'OtqTfy26tG0',
1581             'info_dict': {
1582                 'id': 'OtqTfy26tG0',
1583                 'ext': 'mp4',
1584                 'title': 'Burn Out',
1585                 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1586                 'upload_date': '20141120',
1587                 'uploader': 'The Cinematic Orchestra - Topic',
1588                 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1589                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1590                 'artist': 'The Cinematic Orchestra',
1591                 'track': 'Burn Out',
1592                 'album': 'Every Day',
1593                 'release_data': None,
1594                 'release_year': None,
1595             },
1596             'params': {
1597                 'skip_download': True,
1598             },
1599         },
1600         {
1601             # controversial video, only works with bpctr when authenticated with cookies
1602             'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1603             'only_matching': True,
1604         },
1605         {
1606             # controversial video, requires bpctr/contentCheckOk
1607             'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1608             'info_dict': {
1609                 'id': 'SZJvDhaSDnc',
1610                 'ext': 'mp4',
1611                 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1612                 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1613                 'uploader': 'CBS This Morning',
1614                 'uploader_id': 'CBSThisMorning',
1615                 'upload_date': '20140716',
1616                 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1617             }
1618         },
1619         {
1620             # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1621             'url': 'cBvYw8_A0vQ',
1622             'info_dict': {
1623                 'id': 'cBvYw8_A0vQ',
1624                 'ext': 'mp4',
1625                 'title': '4K Ueno Okachimachi  Street  Scenes  上野御徒町歩き',
1626                 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1627                 'upload_date': '20201120',
1628                 'uploader': 'Walk around Japan',
1629                 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1630                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1631             },
1632             'params': {
1633                 'skip_download': True,
1634             },
1635         }, {
1636             # Has multiple audio streams
1637             'url': 'WaOKSUlf4TM',
1638             'only_matching': True
1639         }, {
1640             # Requires Premium: has format 141 when requested using YTM url
1641             'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1642             'only_matching': True
1643         }, {
1644             # multiple subtitles with same lang_code
1645             'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1646             'only_matching': True,
1647         }, {
1648             # Force use android client fallback
1649             'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1650             'info_dict': {
1651                 'id': 'YOelRv7fMxY',
1652                 'title': 'DIGGING A SECRET TUNNEL Part 1',
1653                 'ext': '3gp',
1654                 'upload_date': '20210624',
1655                 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1656                 'uploader': 'colinfurze',
1657                 'uploader_id': 'colinfurze',
1658                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1659                 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
1660             },
1661             'params': {
1662                 'format': '17',  # 3gp format available on android
1663                 'extractor_args': {'youtube': {'player_client': ['android']}},
1664             },
1665         },
1666         {
1667             # Skip download of additional client configs (remix client config in this case)
1668             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1669             'only_matching': True,
1670             'params': {
1671                 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1672             },
1673         }, {
1674             # shorts
1675             'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
1676             'only_matching': True,
1677         }, {
1678             'note': 'Storyboards',
1679             'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
1680             'info_dict': {
1681                 'id': '5KLPxDtMqe8',
1682                 'ext': 'mhtml',
1683                 'format_id': 'sb0',
1684                 'title': 'Your Brain is Plastic',
1685                 'uploader_id': 'scishow',
1686                 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
1687                 'upload_date': '20140324',
1688                 'uploader': 'SciShow',
1689             }, 'params': {'format': 'mhtml', 'skip_download': True}
1690         }
1691     ]
1692
1693     @classmethod
1694     def suitable(cls, url):
1695         from ..utils import parse_qs
1696
1697         qs = parse_qs(url)
1698         if qs.get('list', [None])[0]:
1699             return False
1700         return super(YoutubeIE, cls).suitable(url)
1701
1702     def __init__(self, *args, **kwargs):
1703         super(YoutubeIE, self).__init__(*args, **kwargs)
1704         self._code_cache = {}
1705         self._player_cache = {}
1706
1707     def _extract_player_url(self, *ytcfgs, webpage=None):
1708         player_url = traverse_obj(
1709             ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
1710             get_all=False, expected_type=compat_str)
1711         if not player_url:
1712             return
1713         if player_url.startswith('//'):
1714             player_url = 'https:' + player_url
1715         elif not re.match(r'https?://', player_url):
1716             player_url = compat_urlparse.urljoin(
1717                 'https://www.youtube.com', player_url)
1718         return player_url
1719
1720     def _download_player_url(self, video_id, fatal=False):
1721         res = self._download_webpage(
1722             'https://www.youtube.com/iframe_api',
1723             note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
1724         if res:
1725             player_version = self._search_regex(
1726                 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
1727             if player_version:
1728                 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
1729
1730     def _signature_cache_id(self, example_sig):
1731         """ Return a string representation of a signature """
1732         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1733
1734     @classmethod
1735     def _extract_player_info(cls, player_url):
1736         for player_re in cls._PLAYER_INFO_RE:
1737             id_m = re.search(player_re, player_url)
1738             if id_m:
1739                 break
1740         else:
1741             raise ExtractorError('Cannot identify player %r' % player_url)
1742         return id_m.group('id')
1743
1744     def _load_player(self, video_id, player_url, fatal=True):
1745         player_id = self._extract_player_info(player_url)
1746         if player_id not in self._code_cache:
1747             code = self._download_webpage(
1748                 player_url, video_id, fatal=fatal,
1749                 note='Downloading player ' + player_id,
1750                 errnote='Download of %s failed' % player_url)
1751             if code:
1752                 self._code_cache[player_id] = code
1753         return self._code_cache.get(player_id)
1754
1755     def _extract_signature_function(self, video_id, player_url, example_sig):
1756         player_id = self._extract_player_info(player_url)
1757
1758         # Read from filesystem cache
1759         func_id = 'js_%s_%s' % (
1760             player_id, self._signature_cache_id(example_sig))
1761         assert os.path.basename(func_id) == func_id
1762
1763         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1764         if cache_spec is not None:
1765             return lambda s: ''.join(s[i] for i in cache_spec)
1766
1767         code = self._load_player(video_id, player_url)
1768         if code:
1769             res = self._parse_sig_js(code)
1770
1771             test_string = ''.join(map(compat_chr, range(len(example_sig))))
1772             cache_res = res(test_string)
1773             cache_spec = [ord(c) for c in cache_res]
1774
1775             self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1776             return res
1777
1778     def _print_sig_code(self, func, example_sig):
1779         if not self.get_param('youtube_print_sig_code'):
1780             return
1781
1782         def gen_sig_code(idxs):
1783             def _genslice(start, end, step):
1784                 starts = '' if start == 0 else str(start)
1785                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1786                 steps = '' if step == 1 else (':%d' % step)
1787                 return 's[%s%s%s]' % (starts, ends, steps)
1788
1789             step = None
1790             # Quelch pyflakes warnings - start will be set when step is set
1791             start = '(Never used)'
1792             for i, prev in zip(idxs[1:], idxs[:-1]):
1793                 if step is not None:
1794                     if i - prev == step:
1795                         continue
1796                     yield _genslice(start, prev, step)
1797                     step = None
1798                     continue
1799                 if i - prev in [-1, 1]:
1800                     step = i - prev
1801                     start = prev
1802                     continue
1803                 else:
1804                     yield 's[%d]' % prev
1805             if step is None:
1806                 yield 's[%d]' % i
1807             else:
1808                 yield _genslice(start, i, step)
1809
1810         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1811         cache_res = func(test_string)
1812         cache_spec = [ord(c) for c in cache_res]
1813         expr_code = ' + '.join(gen_sig_code(cache_spec))
1814         signature_id_tuple = '(%s)' % (
1815             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1816         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1817                 '    return %s\n') % (signature_id_tuple, expr_code)
1818         self.to_screen('Extracted signature function:\n' + code)
1819
1820     def _parse_sig_js(self, jscode):
1821         funcname = self._search_regex(
1822             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1823              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1824              r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
1825              r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
1826              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
1827              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1828              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1829              # Obsolete patterns
1830              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1831              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1832              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1833              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1834              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1835              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1836              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1837              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1838             jscode, 'Initial JS player signature function name', group='sig')
1839
1840         jsi = JSInterpreter(jscode)
1841         initial_function = jsi.extract_function(funcname)
1842         return lambda s: initial_function([s])
1843
1844     def _decrypt_signature(self, s, video_id, player_url):
1845         """Turn the encrypted s field into a working signature"""
1846
1847         if player_url is None:
1848             raise ExtractorError('Cannot decrypt signature without player_url')
1849
1850         try:
1851             player_id = (player_url, self._signature_cache_id(s))
1852             if player_id not in self._player_cache:
1853                 func = self._extract_signature_function(
1854                     video_id, player_url, s
1855                 )
1856                 self._player_cache[player_id] = func
1857             func = self._player_cache[player_id]
1858             self._print_sig_code(func, s)
1859             return func(s)
1860         except Exception as e:
1861             raise ExtractorError('Signature extraction failed: ' + traceback.format_exc(), cause=e)
1862
1863     def _decrypt_nsig(self, s, video_id, player_url):
1864         """Turn the encrypted n field into a working signature"""
1865         if player_url is None:
1866             raise ExtractorError('Cannot decrypt nsig without player_url')
1867         if player_url.startswith('//'):
1868             player_url = 'https:' + player_url
1869         elif not re.match(r'https?://', player_url):
1870             player_url = compat_urlparse.urljoin(
1871                 'https://www.youtube.com', player_url)
1872
1873         sig_id = ('nsig_value', s)
1874         if sig_id in self._player_cache:
1875             return self._player_cache[sig_id]
1876
1877         try:
1878             player_id = ('nsig', player_url)
1879             if player_id not in self._player_cache:
1880                 self._player_cache[player_id] = self._extract_n_function(video_id, player_url)
1881             func = self._player_cache[player_id]
1882             self._player_cache[sig_id] = func(s)
1883             self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')
1884             return self._player_cache[sig_id]
1885         except Exception as e:
1886             raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)
1887
1888     def _extract_n_function_name(self, jscode):
1889         return self._search_regex(
1890             (r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]{3})\([a-zA-Z0-9]\)',),
1891             jscode, 'Initial JS player n function name', group='nfunc')
1892
1893     def _extract_n_function(self, video_id, player_url):
1894         player_id = self._extract_player_info(player_url)
1895         func_code = self._downloader.cache.load('youtube-nsig', player_id)
1896
1897         if func_code:
1898             jsi = JSInterpreter(func_code)
1899         else:
1900             jscode = self._load_player(video_id, player_url)
1901             funcname = self._extract_n_function_name(jscode)
1902             jsi = JSInterpreter(jscode)
1903             func_code = jsi.extract_function_code(funcname)
1904             self._downloader.cache.store('youtube-nsig', player_id, func_code)
1905
1906         if self.get_param('youtube_print_sig_code'):
1907             self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
1908
1909         return lambda s: jsi.extract_function_from_code(*func_code)([s])
1910
1911     def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1912         """
1913         Extract signatureTimestamp (sts)
1914         Required to tell API what sig/player version is in use.
1915         """
1916         sts = None
1917         if isinstance(ytcfg, dict):
1918             sts = int_or_none(ytcfg.get('STS'))
1919
1920         if not sts:
1921             # Attempt to extract from player
1922             if player_url is None:
1923                 error_msg = 'Cannot extract signature timestamp without player_url.'
1924                 if fatal:
1925                     raise ExtractorError(error_msg)
1926                 self.report_warning(error_msg)
1927                 return
1928             code = self._load_player(video_id, player_url, fatal=fatal)
1929             if code:
1930                 sts = int_or_none(self._search_regex(
1931                     r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
1932                     'JS player signature timestamp', group='sts', fatal=fatal))
1933         return sts
1934
1935     def _mark_watched(self, video_id, player_responses):
1936         playback_url = get_first(
1937             player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
1938             expected_type=url_or_none)
1939         if not playback_url:
1940             self.report_warning('Unable to mark watched')
1941             return
1942         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1943         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1944
1945         # cpn generation algorithm is reverse engineered from base.js.
1946         # In fact it works even with dummy cpn.
1947         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1948         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1949
1950         qs.update({
1951             'ver': ['2'],
1952             'cpn': [cpn],
1953         })
1954         playback_url = compat_urlparse.urlunparse(
1955             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1956
1957         self._download_webpage(
1958             playback_url, video_id, 'Marking watched',
1959             'Unable to mark watched', fatal=False)
1960
1961     @staticmethod
1962     def _extract_urls(webpage):
1963         # Embedded YouTube player
1964         entries = [
1965             unescapeHTML(mobj.group('url'))
1966             for mobj in re.finditer(r'''(?x)
1967             (?:
1968                 <iframe[^>]+?src=|
1969                 data-video-url=|
1970                 <embed[^>]+?src=|
1971                 embedSWF\(?:\s*|
1972                 <object[^>]+data=|
1973                 new\s+SWFObject\(
1974             )
1975             (["\'])
1976                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1977                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1978             \1''', webpage)]
1979
1980         # lazyYT YouTube embed
1981         entries.extend(list(map(
1982             unescapeHTML,
1983             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1984
1985         # Wordpress "YouTube Video Importer" plugin
1986         matches = re.findall(r'''(?x)<div[^>]+
1987             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1988             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1989         entries.extend(m[-1] for m in matches)
1990
1991         return entries
1992
1993     @staticmethod
1994     def _extract_url(webpage):
1995         urls = YoutubeIE._extract_urls(webpage)
1996         return urls[0] if urls else None
1997
1998     @classmethod
1999     def extract_id(cls, url):
2000         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
2001         if mobj is None:
2002             raise ExtractorError('Invalid URL: %s' % url)
2003         return mobj.group('id')
2004
2005     def _extract_chapters_from_json(self, data, duration):
2006         chapter_list = traverse_obj(
2007             data, (
2008                 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2009                 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2010             ), expected_type=list)
2011
2012         return self._extract_chapters(
2013             chapter_list,
2014             chapter_time=lambda chapter: float_or_none(
2015                 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2016             chapter_title=lambda chapter: traverse_obj(
2017                 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2018             duration=duration)
2019
2020     def _extract_chapters_from_engagement_panel(self, data, duration):
2021         content_list = traverse_obj(
2022             data,
2023             ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
2024             expected_type=list, default=[])
2025         chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2026         chapter_title = lambda chapter: self._get_text(chapter, 'title')
2027
2028         return next((
2029             filter(None, (
2030                 self._extract_chapters(
2031                     traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2032                     chapter_time, chapter_title, duration)
2033                 for contents in content_list
2034             ))), [])
2035
2036     def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
2037         chapters = []
2038         last_chapter = {'start_time': 0}
2039         for idx, chapter in enumerate(chapter_list or []):
2040             title = chapter_title(chapter)
2041             start_time = chapter_time(chapter)
2042             if start_time is None:
2043                 continue
2044             last_chapter['end_time'] = start_time
2045             if start_time < last_chapter['start_time']:
2046                 if idx == 1:
2047                     chapters.pop()
2048                     self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2049                 else:
2050                     self.report_warning(f'Invalid start time for chapter "{title}"')
2051                     continue
2052             last_chapter = {'start_time': start_time, 'title': title}
2053             chapters.append(last_chapter)
2054         last_chapter['end_time'] = duration
2055         return chapters
2056
2057     def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2058         return self._parse_json(self._search_regex(
2059             (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2060              regex), webpage, name, default='{}'), video_id, fatal=False)
2061
2062     @staticmethod
2063     def parse_time_text(time_text):
2064         """
2065         Parse the comment time text
2066         time_text is in the format 'X units ago (edited)'
2067         """
2068         time_text_split = time_text.split(' ')
2069         if len(time_text_split) >= 3:
2070             try:
2071                 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
2072             except ValueError:
2073                 return None
2074
2075     def _extract_comment(self, comment_renderer, parent=None):
2076         comment_id = comment_renderer.get('commentId')
2077         if not comment_id:
2078             return
2079
2080         text = self._get_text(comment_renderer, 'contentText')
2081
2082         # note: timestamp is an estimate calculated from the current time and time_text
2083         time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
2084         time_text_dt = self.parse_time_text(time_text)
2085         if isinstance(time_text_dt, datetime.datetime):
2086             timestamp = calendar.timegm(time_text_dt.timetuple())
2087         author = self._get_text(comment_renderer, 'authorText')
2088         author_id = try_get(comment_renderer,
2089                             lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
2090
2091         votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2092                                                        lambda x: x['likeCount']), compat_str)) or 0
2093         author_thumbnail = try_get(comment_renderer,
2094                                    lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2095
2096         author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2097         is_favorited = 'creatorHeart' in (try_get(
2098             comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2099         return {
2100             'id': comment_id,
2101             'text': text,
2102             'timestamp': timestamp,
2103             'time_text': time_text,
2104             'like_count': votes,
2105             'is_favorited': is_favorited,
2106             'author': author,
2107             'author_id': author_id,
2108             'author_thumbnail': author_thumbnail,
2109             'author_is_uploader': author_is_uploader,
2110             'parent': parent or 'root'
2111         }
2112
2113     def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, comment_counts=None):
2114
2115         def extract_header(contents):
2116             _continuation = None
2117             for content in contents:
2118                 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
2119                 expected_comment_count = parse_count(self._get_text(
2120                     comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
2121
2122                 if expected_comment_count:
2123                     comment_counts[1] = expected_comment_count
2124                     self.to_screen('Downloading ~%d comments' % expected_comment_count)
2125                 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2126                 comment_sort_index = int(sort_mode_str != 'top')  # 1 = new, 0 = top
2127
2128                 sort_menu_item = try_get(
2129                     comments_header_renderer,
2130                     lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2131                 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2132
2133                 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2134                 if not _continuation:
2135                     continue
2136
2137                 sort_text = sort_menu_item.get('title')
2138                 if isinstance(sort_text, compat_str):
2139                     sort_text = sort_text.lower()
2140                 else:
2141                     sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2142                 self.to_screen('Sorting comments by %s' % sort_text)
2143                 break
2144             return _continuation
2145
2146         def extract_thread(contents):
2147             if not parent:
2148                 comment_counts[2] = 0
2149             for content in contents:
2150                 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2151                 comment_renderer = try_get(
2152                     comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2153                     content, (lambda x: x['commentRenderer'], dict))
2154
2155                 if not comment_renderer:
2156                     continue
2157                 comment = self._extract_comment(comment_renderer, parent)
2158                 if not comment:
2159                     continue
2160                 comment_counts[0] += 1
2161                 yield comment
2162                 # Attempt to get the replies
2163                 comment_replies_renderer = try_get(
2164                     comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2165
2166                 if comment_replies_renderer:
2167                     comment_counts[2] += 1
2168                     comment_entries_iter = self._comment_entries(
2169                         comment_replies_renderer, ytcfg, video_id,
2170                         parent=comment.get('id'), comment_counts=comment_counts)
2171
2172                     for reply_comment in comment_entries_iter:
2173                         yield reply_comment
2174
2175         # YouTube comments have a max depth of 2
2176         max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2177         if max_depth == 1 and parent:
2178             return
2179         if not comment_counts:
2180             # comment so far, est. total comments, current comment thread #
2181             comment_counts = [0, 0, 0]
2182
2183         continuation = self._extract_continuation(root_continuation_data)
2184         if continuation and len(continuation['continuation']) < 27:
2185             self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2186             continuation_token = self._generate_comment_continuation(video_id)
2187             continuation = self._build_api_continuation_query(continuation_token, None)
2188
2189         message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
2190         if message and not parent:
2191             self.report_warning(message, video_id=video_id)
2192
2193         visitor_data = None
2194         is_first_continuation = parent is None
2195
2196         for page_num in itertools.count(0):
2197             if not continuation:
2198                 break
2199             headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data)
2200             comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2201             if page_num == 0:
2202                 if is_first_continuation:
2203                     note_prefix = 'Downloading comment section API JSON'
2204                 else:
2205                     note_prefix = '    Downloading comment API JSON reply thread %d %s' % (
2206                         comment_counts[2], comment_prog_str)
2207             else:
2208                 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2209                     '       ' if parent else '', ' replies' if parent else '',
2210                     page_num, comment_prog_str)
2211
2212             response = self._extract_response(
2213                 item_id=None, query=continuation,
2214                 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2215                 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
2216             if not response:
2217                 break
2218             visitor_data = try_get(
2219                 response,
2220                 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2221                 compat_str) or visitor_data
2222
2223             continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
2224
2225             continuation = None
2226             if isinstance(continuation_contents, list):
2227                 for continuation_section in continuation_contents:
2228                     if not isinstance(continuation_section, dict):
2229                         continue
2230                     continuation_items = try_get(
2231                         continuation_section,
2232                         (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2233                          lambda x: x['appendContinuationItemsAction']['continuationItems']),
2234                         list) or []
2235                     if is_first_continuation:
2236                         continuation = extract_header(continuation_items)
2237                         is_first_continuation = False
2238                         if continuation:
2239                             break
2240                         continue
2241                     count = 0
2242                     for count, entry in enumerate(extract_thread(continuation_items)):
2243                         yield entry
2244                     continuation = self._extract_continuation({'contents': continuation_items})
2245                     if continuation:
2246                         # Sometimes YouTube provides a continuation without any comments
2247                         # In most cases we end up just downloading these with very little comments to come.
2248                         if count == 0:
2249                             if not parent:
2250                                 self.report_warning('No comments received - assuming end of comments')
2251                             continuation = None
2252                         break
2253
2254             # Deprecated response structure
2255             elif isinstance(continuation_contents, dict):
2256                 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2257                 for key, continuation_renderer in continuation_contents.items():
2258                     if key not in known_continuation_renderers:
2259                         continue
2260                     if not isinstance(continuation_renderer, dict):
2261                         continue
2262                     if is_first_continuation:
2263                         header_continuation_items = [continuation_renderer.get('header') or {}]
2264                         continuation = extract_header(header_continuation_items)
2265                         is_first_continuation = False
2266                         if continuation:
2267                             break
2268
2269                     # Sometimes YouTube provides a continuation without any comments
2270                     # In most cases we end up just downloading these with very little comments to come.
2271                     count = 0
2272                     for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2273                         yield entry
2274                     continuation = self._extract_continuation(continuation_renderer)
2275                     if count == 0:
2276                         if not parent:
2277                             self.report_warning('No comments received - assuming end of comments')
2278                         continuation = None
2279                     break
2280
2281     @staticmethod
2282     def _generate_comment_continuation(video_id):
2283         """
2284         Generates initial comment section continuation token from given video id
2285         """
2286         b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2287         parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2288         new_continuation_intlist = list(itertools.chain.from_iterable(
2289             [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2290         return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2291
2292     def _get_comments(self, ytcfg, video_id, contents, webpage):
2293         """Entry for comment extraction"""
2294         def _real_comment_extract(contents):
2295             renderer = next((
2296                 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
2297                 if item.get('sectionIdentifier') == 'comment-item-section'), None)
2298             yield from self._comment_entries(renderer, ytcfg, video_id)
2299
2300         max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
2301         # Force English regardless of account setting to prevent parsing issues
2302         # See: https://github.com/yt-dlp/yt-dlp/issues/532
2303         ytcfg = copy.deepcopy(ytcfg)
2304         traverse_obj(
2305             ytcfg, ('INNERTUBE_CONTEXT', 'client'), expected_type=dict, default={})['hl'] = 'en'
2306         return itertools.islice(_real_comment_extract(contents), 0, max_comments)
2307
2308     @staticmethod
2309     def _get_checkok_params():
2310         return {'contentCheckOk': True, 'racyCheckOk': True}
2311
2312     @classmethod
2313     def _generate_player_context(cls, sts=None):
2314         context = {
2315             'html5Preference': 'HTML5_PREF_WANTS',
2316         }
2317         if sts is not None:
2318             context['signatureTimestamp'] = sts
2319         return {
2320             'playbackContext': {
2321                 'contentPlaybackContext': context
2322             },
2323             **cls._get_checkok_params()
2324         }
2325
2326     @staticmethod
2327     def _is_agegated(player_response):
2328         if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
2329             return True
2330
2331         reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
2332         AGE_GATE_REASONS = (
2333             'confirm your age', 'age-restricted', 'inappropriate',  # reason
2334             'age_verification_required', 'age_check_required',  # status
2335         )
2336         return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
2337
2338     @staticmethod
2339     def _is_unplayable(player_response):
2340         return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
2341
2342     def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):
2343
2344         session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2345         syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
2346         sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
2347         headers = self.generate_api_headers(
2348             ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
2349
2350         yt_query = {'videoId': video_id}
2351         yt_query.update(self._generate_player_context(sts))
2352         return self._extract_response(
2353             item_id=video_id, ep='player', query=yt_query,
2354             ytcfg=player_ytcfg, headers=headers, fatal=True,
2355             default_client=client,
2356             note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2357         ) or None
2358
2359     def _get_requested_clients(self, url, smuggled_data):
2360         requested_clients = []
2361         default = ['android', 'web']
2362         allowed_clients = sorted(
2363             [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],
2364             key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
2365         for client in self._configuration_arg('player_client'):
2366             if client in allowed_clients:
2367                 requested_clients.append(client)
2368             elif client == 'default':
2369                 requested_clients.extend(default)
2370             elif client == 'all':
2371                 requested_clients.extend(allowed_clients)
2372             else:
2373                 self.report_warning(f'Skipping unsupported client {client}')
2374         if not requested_clients:
2375             requested_clients = default
2376
2377         if smuggled_data.get('is_music_url') or self.is_music_url(url):
2378             requested_clients.extend(
2379                 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
2380
2381         return orderedSet(requested_clients)
2382
2383     def _extract_player_ytcfg(self, client, video_id):
2384         url = {
2385             'web_music': 'https://music.youtube.com',
2386             'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
2387         }.get(client)
2388         if not url:
2389             return {}
2390         webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config')
2391         return self.extract_ytcfg(video_id, webpage) or {}
2392
2393     def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):
2394         initial_pr = None
2395         if webpage:
2396             initial_pr = self._extract_yt_initial_variable(
2397                 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2398                 video_id, 'initial player response')
2399
2400         original_clients = clients
2401         clients = clients[::-1]
2402         prs = []
2403
2404         def append_client(client_name):
2405             if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:
2406                 clients.append(client_name)
2407
2408         # Android player_response does not have microFormats which are needed for
2409         # extraction of some data. So we return the initial_pr with formats
2410         # stripped out even if not requested by the user
2411         # See: https://github.com/yt-dlp/yt-dlp/issues/501
2412         if initial_pr:
2413             pr = dict(initial_pr)
2414             pr['streamingData'] = None
2415             prs.append(pr)
2416
2417         last_error = None
2418         tried_iframe_fallback = False
2419         player_url = None
2420         while clients:
2421             client = clients.pop()
2422             player_ytcfg = master_ytcfg if client == 'web' else {}
2423             if 'configs' not in self._configuration_arg('player_skip'):
2424                 player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
2425
2426             player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
2427             require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
2428             if 'js' in self._configuration_arg('player_skip'):
2429                 require_js_player = False
2430                 player_url = None
2431
2432             if not player_url and not tried_iframe_fallback and require_js_player:
2433                 player_url = self._download_player_url(video_id)
2434                 tried_iframe_fallback = True
2435
2436             try:
2437                 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
2438                     client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)
2439             except ExtractorError as e:
2440                 if last_error:
2441                     self.report_warning(last_error)
2442                 last_error = e
2443                 continue
2444
2445             if pr:
2446                 prs.append(pr)
2447
2448             # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
2449             if client.endswith('_agegate') and self._is_unplayable(pr) and self.is_authenticated:
2450                 append_client(client.replace('_agegate', '_creator'))
2451             elif self._is_agegated(pr):
2452                 append_client(f'{client}_agegate')
2453
2454         if last_error:
2455             if not len(prs):
2456                 raise last_error
2457             self.report_warning(last_error)
2458         return prs, player_url
2459
2460     def _extract_formats(self, streaming_data, video_id, player_url, is_live):
2461         itags, stream_ids = {}, []
2462         itag_qualities, res_qualities = {}, {}
2463         q = qualities([
2464             # Normally tiny is the smallest video-only formats. But
2465             # audio-only formats with unknown quality may get tagged as tiny
2466             'tiny',
2467             'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high',  # Audio only formats
2468             'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2469         ])
2470         streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
2471
2472         for fmt in streaming_formats:
2473             if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2474                 continue
2475
2476             itag = str_or_none(fmt.get('itag'))
2477             audio_track = fmt.get('audioTrack') or {}
2478             stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2479             if stream_id in stream_ids:
2480                 continue
2481
2482             quality = fmt.get('quality')
2483             height = int_or_none(fmt.get('height'))
2484             if quality == 'tiny' or not quality:
2485                 quality = fmt.get('audioQuality', '').lower() or quality
2486             # The 3gp format (17) in android client has a quality of "small",
2487             # but is actually worse than other formats
2488             if itag == '17':
2489                 quality = 'tiny'
2490             if quality:
2491                 if itag:
2492                     itag_qualities[itag] = quality
2493                 if height:
2494                     res_qualities[height] = quality
2495             # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2496             # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2497             # number of fragment that would subsequently requested with (`&sq=N`)
2498             if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2499                 continue
2500
2501             fmt_url = fmt.get('url')
2502             if not fmt_url:
2503                 sc = compat_parse_qs(fmt.get('signatureCipher'))
2504                 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2505                 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2506                 if not (sc and fmt_url and encrypted_sig):
2507                     continue
2508                 if not player_url:
2509                     continue
2510                 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2511                 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2512                 fmt_url += '&' + sp + '=' + signature
2513
2514             query = parse_qs(fmt_url)
2515             throttled = False
2516             if query.get('ratebypass') != ['yes'] and query.get('n'):
2517                 try:
2518                     fmt_url = update_url_query(fmt_url, {
2519                         'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})
2520                 except ExtractorError as e:
2521                     self.report_warning(
2522                         f'nsig extraction failed: You may experience throttling for some formats\n'
2523                         f'n = {query["n"][0]} ; player = {player_url}\n{e}', only_once=True)
2524                     throttled = True
2525
2526             if itag:
2527                 itags[itag] = 'https'
2528                 stream_ids.append(stream_id)
2529
2530             tbr = float_or_none(
2531                 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
2532             dct = {
2533                 'asr': int_or_none(fmt.get('audioSampleRate')),
2534                 'filesize': int_or_none(fmt.get('contentLength')),
2535                 'format_id': itag,
2536                 'format_note': join_nonempty(
2537                     '%s%s' % (audio_track.get('displayName') or '',
2538                               ' (default)' if audio_track.get('audioIsDefault') else ''),
2539                     fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
2540                     throttled and 'THROTTLED', delim=', '),
2541                 'source_preference': -10 if throttled else -1,
2542                 'fps': int_or_none(fmt.get('fps')) or None,
2543                 'height': height,
2544                 'quality': q(quality),
2545                 'tbr': tbr,
2546                 'url': fmt_url,
2547                 'width': int_or_none(fmt.get('width')),
2548                 'language': audio_track.get('id', '').split('.')[0],
2549                 'language_preference': 1 if audio_track.get('audioIsDefault') else -1,
2550             }
2551             mime_mobj = re.match(
2552                 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2553             if mime_mobj:
2554                 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2555                 dct.update(parse_codecs(mime_mobj.group(2)))
2556             no_audio = dct.get('acodec') == 'none'
2557             no_video = dct.get('vcodec') == 'none'
2558             if no_audio:
2559                 dct['vbr'] = tbr
2560             if no_video:
2561                 dct['abr'] = tbr
2562             if no_audio or no_video:
2563                 dct['downloader_options'] = {
2564                     # Youtube throttles chunks >~10M
2565                     'http_chunk_size': 10485760,
2566                 }
2567                 if dct.get('ext'):
2568                     dct['container'] = dct['ext'] + '_dash'
2569             yield dct
2570
2571         skip_manifests = self._configuration_arg('skip')
2572         get_dash = (
2573             (not is_live or self._configuration_arg('include_live_dash'))
2574             and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True))
2575         get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2576
2577         def process_manifest_format(f, proto, itag):
2578             if itag in itags:
2579                 if itags[itag] == proto or f'{itag}-{proto}' in itags:
2580                     return False
2581                 itag = f'{itag}-{proto}'
2582             if itag:
2583                 f['format_id'] = itag
2584                 itags[itag] = proto
2585
2586             f['quality'] = next((
2587                 q(qdict[val])
2588                 for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities))
2589                 if val in qdict), -1)
2590             return True
2591
2592         for sd in streaming_data:
2593             hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
2594             if hls_manifest_url:
2595                 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
2596                     if process_manifest_format(f, 'hls', self._search_regex(
2597                             r'/itag/(\d+)', f['url'], 'itag', default=None)):
2598                         yield f
2599
2600             dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2601             if dash_manifest_url:
2602                 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
2603                     if process_manifest_format(f, 'dash', f['format_id']):
2604                         f['filesize'] = int_or_none(self._search_regex(
2605                             r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
2606                         yield f
2607
2608     def _extract_storyboard(self, player_responses, duration):
2609         spec = get_first(
2610             player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
2611         if not spec:
2612             return
2613         base_url = spec.pop()
2614         L = len(spec) - 1
2615         for i, args in enumerate(spec):
2616             args = args.split('#')
2617             counts = list(map(int_or_none, args[:5]))
2618             if len(args) != 8 or not all(counts):
2619                 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
2620                 continue
2621             width, height, frame_count, cols, rows = counts
2622             N, sigh = args[6:]
2623
2624             url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
2625             fragment_count = frame_count / (cols * rows)
2626             fragment_duration = duration / fragment_count
2627             yield {
2628                 'format_id': f'sb{i}',
2629                 'format_note': 'storyboard',
2630                 'ext': 'mhtml',
2631                 'protocol': 'mhtml',
2632                 'acodec': 'none',
2633                 'vcodec': 'none',
2634                 'url': url,
2635                 'width': width,
2636                 'height': height,
2637                 'fragments': [{
2638                     'path': url.replace('$M', str(j)),
2639                     'duration': min(fragment_duration, duration - (j * fragment_duration)),
2640                 } for j in range(math.ceil(fragment_count))],
2641             }
2642
2643     def _real_extract(self, url):
2644         url, smuggled_data = unsmuggle_url(url, {})
2645         video_id = self._match_id(url)
2646
2647         base_url = self.http_scheme() + '//www.youtube.com/'
2648         webpage_url = base_url + 'watch?v=' + video_id
2649         webpage = None
2650         if 'webpage' not in self._configuration_arg('player_skip'):
2651             webpage = self._download_webpage(
2652                 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2653
2654         master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2655
2656         player_responses, player_url = self._extract_player_responses(
2657             self._get_requested_clients(url, smuggled_data),
2658             video_id, webpage, master_ytcfg)
2659
2660         playability_statuses = traverse_obj(
2661             player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2662
2663         trailer_video_id = get_first(
2664             playability_statuses,
2665             ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2666             expected_type=str)
2667         if trailer_video_id:
2668             return self.url_result(
2669                 trailer_video_id, self.ie_key(), trailer_video_id)
2670
2671         search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2672                        if webpage else (lambda x: None))
2673
2674         video_details = traverse_obj(
2675             player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2676         microformats = traverse_obj(
2677             player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2678             expected_type=dict, default=[])
2679         video_title = (
2680             get_first(video_details, 'title')
2681             or self._get_text(microformats, (..., 'title'))
2682             or search_meta(['og:title', 'twitter:title', 'title']))
2683         video_description = get_first(video_details, 'shortDescription')
2684
2685         multifeed_metadata_list = get_first(
2686             player_responses,
2687             ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2688             expected_type=str)
2689         if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
2690             if self.get_param('noplaylist'):
2691                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2692             else:
2693                 entries = []
2694                 feed_ids = []
2695                 for feed in multifeed_metadata_list.split(','):
2696                     # Unquote should take place before split on comma (,) since textual
2697                     # fields may contain comma as well (see
2698                     # https://github.com/ytdl-org/youtube-dl/issues/8536)
2699                     feed_data = compat_parse_qs(
2700                         compat_urllib_parse_unquote_plus(feed))
2701
2702                     def feed_entry(name):
2703                         return try_get(
2704                             feed_data, lambda x: x[name][0], compat_str)
2705
2706                     feed_id = feed_entry('id')
2707                     if not feed_id:
2708                         continue
2709                     feed_title = feed_entry('title')
2710                     title = video_title
2711                     if feed_title:
2712                         title += ' (%s)' % feed_title
2713                     entries.append({
2714                         '_type': 'url_transparent',
2715                         'ie_key': 'Youtube',
2716                         'url': smuggle_url(
2717                             '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2718                             {'force_singlefeed': True}),
2719                         'title': title,
2720                     })
2721                     feed_ids.append(feed_id)
2722                 self.to_screen(
2723                     'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2724                     % (', '.join(feed_ids), video_id))
2725                 return self.playlist_result(
2726                     entries, video_id, video_title, video_description)
2727
2728         live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
2729         is_live = get_first(video_details, 'isLive')
2730         if is_live is None:
2731             is_live = get_first(live_broadcast_details, 'isLiveNow')
2732
2733         streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2734         formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
2735
2736         if not formats:
2737             if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
2738                 self.report_drm(video_id)
2739             pemr = get_first(
2740                 playability_statuses,
2741                 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2742             reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2743             subreason = clean_html(self._get_text(pemr, 'subreason') or '')
2744             if subreason:
2745                 if subreason == 'The uploader has not made this video available in your country.':
2746                     countries = get_first(microformats, 'availableCountries')
2747                     if not countries:
2748                         regions_allowed = search_meta('regionsAllowed')
2749                         countries = regions_allowed.split(',') if regions_allowed else None
2750                     self.raise_geo_restricted(subreason, countries, metadata_available=True)
2751                 reason += f'. {subreason}'
2752             if reason:
2753                 self.raise_no_formats(reason, expected=True)
2754
2755         keywords = get_first(video_details, 'keywords', expected_type=list) or []
2756         if not keywords and webpage:
2757             keywords = [
2758                 unescapeHTML(m.group('content'))
2759                 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2760         for keyword in keywords:
2761             if keyword.startswith('yt:stretch='):
2762                 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2763                 if mobj:
2764                     # NB: float is intentional for forcing float division
2765                     w, h = (float(v) for v in mobj.groups())
2766                     if w > 0 and h > 0:
2767                         ratio = w / h
2768                         for f in formats:
2769                             if f.get('vcodec') != 'none':
2770                                 f['stretched_ratio'] = ratio
2771                         break
2772
2773         thumbnails = []
2774         thumbnail_dicts = traverse_obj(
2775             (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2776             expected_type=dict, default=[])
2777         for thumbnail in thumbnail_dicts:
2778             thumbnail_url = thumbnail.get('url')
2779             if not thumbnail_url:
2780                 continue
2781             # Sometimes youtube gives a wrong thumbnail URL. See:
2782             # https://github.com/yt-dlp/yt-dlp/issues/233
2783             # https://github.com/ytdl-org/youtube-dl/issues/28023
2784             if 'maxresdefault' in thumbnail_url:
2785                 thumbnail_url = thumbnail_url.split('?')[0]
2786             thumbnails.append({
2787                 'url': thumbnail_url,
2788                 'height': int_or_none(thumbnail.get('height')),
2789                 'width': int_or_none(thumbnail.get('width')),
2790             })
2791         thumbnail_url = search_meta(['og:image', 'twitter:image'])
2792         if thumbnail_url:
2793             thumbnails.append({
2794                 'url': thumbnail_url,
2795             })
2796         original_thumbnails = thumbnails.copy()
2797
2798         # The best resolution thumbnails sometimes does not appear in the webpage
2799         # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
2800         # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2801         thumbnail_names = [
2802             'maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3',
2803             'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2804             'mqdefault', 'mq1', 'mq2', 'mq3',
2805             'default', '1', '2', '3'
2806         ]
2807         n_thumbnail_names = len(thumbnail_names)
2808         thumbnails.extend({
2809             'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2810                 video_id=video_id, name=name, ext=ext,
2811                 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
2812         } for name in thumbnail_names for ext in ('webp', 'jpg'))
2813         for thumb in thumbnails:
2814             i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
2815             thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
2816         self._remove_duplicate_formats(thumbnails)
2817         self._downloader._sort_thumbnails(original_thumbnails)
2818
2819         category = get_first(microformats, 'category') or search_meta('genre')
2820         channel_id = str_or_none(
2821             get_first(video_details, 'channelId')
2822             or get_first(microformats, 'externalChannelId')
2823             or search_meta('channelId'))
2824         duration = int_or_none(
2825             get_first(video_details, 'lengthSeconds')
2826             or get_first(microformats, 'lengthSeconds')
2827             or parse_duration(search_meta('duration'))) or None
2828         owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2829
2830         live_content = get_first(video_details, 'isLiveContent')
2831         is_upcoming = get_first(video_details, 'isUpcoming')
2832         if is_live is None:
2833             if is_upcoming or live_content is False:
2834                 is_live = False
2835         if is_upcoming is None and (live_content or is_live):
2836             is_upcoming = False
2837         live_starttime = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2838         live_endtime = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2839         if not duration and live_endtime and live_starttime:
2840             duration = live_endtime - live_starttime
2841
2842         formats.extend(self._extract_storyboard(player_responses, duration))
2843
2844         # Source is given priority since formats that throttle are given lower source_preference
2845         # When throttling issue is fully fixed, remove this
2846         self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))
2847
2848         info = {
2849             'id': video_id,
2850             'title': self._live_title(video_title) if is_live else video_title,
2851             'formats': formats,
2852             'thumbnails': thumbnails,
2853             # The best thumbnail that we are sure exists. Prevents unnecessary
2854             # URL checking if user don't care about getting the best possible thumbnail
2855             'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
2856             'description': video_description,
2857             'upload_date': unified_strdate(
2858                 get_first(microformats, 'uploadDate')
2859                 or search_meta('uploadDate')),
2860             'uploader': get_first(video_details, 'author'),
2861             'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2862             'uploader_url': owner_profile_url,
2863             'channel_id': channel_id,
2864             'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
2865             'duration': duration,
2866             'view_count': int_or_none(
2867                 get_first((video_details, microformats), (..., 'viewCount'))
2868                 or search_meta('interactionCount')),
2869             'average_rating': float_or_none(get_first(video_details, 'averageRating')),
2870             'age_limit': 18 if (
2871                 get_first(microformats, 'isFamilySafe') is False
2872                 or search_meta('isFamilyFriendly') == 'false'
2873                 or search_meta('og:restrictions:age') == '18+') else 0,
2874             'webpage_url': webpage_url,
2875             'categories': [category] if category else None,
2876             'tags': keywords,
2877             'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
2878             'is_live': is_live,
2879             'was_live': (False if is_live or is_upcoming or live_content is False
2880                          else None if is_live is None or is_upcoming is None
2881                          else live_content),
2882             'live_status': 'is_upcoming' if is_upcoming else None,  # rest will be set by YoutubeDL
2883             'release_timestamp': live_starttime,
2884         }
2885
2886         pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
2887         if pctr:
2888             def get_lang_code(track):
2889                 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
2890                         or track.get('languageCode'))
2891
2892             # Converted into dicts to remove duplicates
2893             captions = {
2894                 get_lang_code(sub): sub
2895                 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
2896             translation_languages = {
2897                 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
2898                 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
2899
2900             def process_language(container, base_url, lang_code, sub_name, query):
2901                 lang_subs = container.setdefault(lang_code, [])
2902                 for fmt in self._SUBTITLE_FORMATS:
2903                     query.update({
2904                         'fmt': fmt,
2905                     })
2906                     lang_subs.append({
2907                         'ext': fmt,
2908                         'url': update_url_query(base_url, query),
2909                         'name': sub_name,
2910                     })
2911
2912             subtitles, automatic_captions = {}, {}
2913             for lang_code, caption_track in captions.items():
2914                 base_url = caption_track.get('baseUrl')
2915                 if not base_url:
2916                     continue
2917                 lang_name = self._get_text(caption_track, 'name', max_runs=1)
2918                 if caption_track.get('kind') != 'asr':
2919                     if not lang_code:
2920                         continue
2921                     process_language(
2922                         subtitles, base_url, lang_code, lang_name, {})
2923                     if not caption_track.get('isTranslatable'):
2924                         continue
2925                 for trans_code, trans_name in translation_languages.items():
2926                     if not trans_code:
2927                         continue
2928                     if caption_track.get('kind') != 'asr':
2929                         trans_code += f'-{lang_code}'
2930                         trans_name += format_field(lang_name, template=' from %s')
2931                     process_language(
2932                         automatic_captions, base_url, trans_code, trans_name, {'tlang': trans_code})
2933             info['automatic_captions'] = automatic_captions
2934             info['subtitles'] = subtitles
2935
2936         parsed_url = compat_urllib_parse_urlparse(url)
2937         for component in [parsed_url.fragment, parsed_url.query]:
2938             query = compat_parse_qs(component)
2939             for k, v in query.items():
2940                 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2941                     d_k += '_time'
2942                     if d_k not in info and k in s_ks:
2943                         info[d_k] = parse_duration(query[k][0])
2944
2945         # Youtube Music Auto-generated description
2946         if video_description:
2947             mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
2948             if mobj:
2949                 release_year = mobj.group('release_year')
2950                 release_date = mobj.group('release_date')
2951                 if release_date:
2952                     release_date = release_date.replace('-', '')
2953                     if not release_year:
2954                         release_year = release_date[:4]
2955                 info.update({
2956                     'album': mobj.group('album'.strip()),
2957                     'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2958                     'track': mobj.group('track').strip(),
2959                     'release_date': release_date,
2960                     'release_year': int_or_none(release_year),
2961                 })
2962
2963         initial_data = None
2964         if webpage:
2965             initial_data = self._extract_yt_initial_variable(
2966                 webpage, self._YT_INITIAL_DATA_RE, video_id,
2967                 'yt initial data')
2968         if not initial_data:
2969             query = {'videoId': video_id}
2970             query.update(self._get_checkok_params())
2971             initial_data = self._extract_response(
2972                 item_id=video_id, ep='next', fatal=False,
2973                 ytcfg=master_ytcfg, query=query,
2974                 headers=self.generate_api_headers(ytcfg=master_ytcfg),
2975                 note='Downloading initial data API JSON')
2976
2977         try:
2978             # This will error if there is no livechat
2979             initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2980             info.setdefault('subtitles', {})['live_chat'] = [{
2981                 'url': 'https://www.youtube.com/watch?v=%s' % video_id,  # url is needed to set cookies
2982                 'video_id': video_id,
2983                 'ext': 'json',
2984                 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
2985             }]
2986         except (KeyError, IndexError, TypeError):
2987             pass
2988
2989         if initial_data:
2990             info['chapters'] = (
2991                 self._extract_chapters_from_json(initial_data, duration)
2992                 or self._extract_chapters_from_engagement_panel(initial_data, duration)
2993                 or None)
2994
2995             contents = try_get(
2996                 initial_data,
2997                 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2998                 list) or []
2999             for content in contents:
3000                 vpir = content.get('videoPrimaryInfoRenderer')
3001                 if vpir:
3002                     stl = vpir.get('superTitleLink')
3003                     if stl:
3004                         stl = self._get_text(stl)
3005                         if try_get(
3006                                 vpir,
3007                                 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3008                             info['location'] = stl
3009                         else:
3010                             mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
3011                             if mobj:
3012                                 info.update({
3013                                     'series': mobj.group(1),
3014                                     'season_number': int(mobj.group(2)),
3015                                     'episode_number': int(mobj.group(3)),
3016                                 })
3017                     for tlb in (try_get(
3018                             vpir,
3019                             lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3020                             list) or []):
3021                         tbr = tlb.get('toggleButtonRenderer') or {}
3022                         for getter, regex in [(
3023                                 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3024                                 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3025                                     lambda x: x['accessibility'],
3026                                     lambda x: x['accessibilityData']['accessibilityData'],
3027                                 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3028                             label = (try_get(tbr, getter, dict) or {}).get('label')
3029                             if label:
3030                                 mobj = re.match(regex, label)
3031                                 if mobj:
3032                                     info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3033                                     break
3034                     sbr_tooltip = try_get(
3035                         vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3036                     if sbr_tooltip:
3037                         like_count, dislike_count = sbr_tooltip.split(' / ')
3038                         info.update({
3039                             'like_count': str_to_int(like_count),
3040                             'dislike_count': str_to_int(dislike_count),
3041                         })
3042                 vsir = content.get('videoSecondaryInfoRenderer')
3043                 if vsir:
3044                     info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
3045                     rows = try_get(
3046                         vsir,
3047                         lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3048                         list) or []
3049                     multiple_songs = False
3050                     for row in rows:
3051                         if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3052                             multiple_songs = True
3053                             break
3054                     for row in rows:
3055                         mrr = row.get('metadataRowRenderer') or {}
3056                         mrr_title = mrr.get('title')
3057                         if not mrr_title:
3058                             continue
3059                         mrr_title = self._get_text(mrr, 'title')
3060                         mrr_contents_text = self._get_text(mrr, ('contents', 0))
3061                         if mrr_title == 'License':
3062                             info['license'] = mrr_contents_text
3063                         elif not multiple_songs:
3064                             if mrr_title == 'Album':
3065                                 info['album'] = mrr_contents_text
3066                             elif mrr_title == 'Artist':
3067                                 info['artist'] = mrr_contents_text
3068                             elif mrr_title == 'Song':
3069                                 info['track'] = mrr_contents_text
3070
3071         fallbacks = {
3072             'channel': 'uploader',
3073             'channel_id': 'uploader_id',
3074             'channel_url': 'uploader_url',
3075         }
3076         for to, frm in fallbacks.items():
3077             if not info.get(to):
3078                 info[to] = info.get(frm)
3079
3080         for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3081             v = info.get(s_k)
3082             if v:
3083                 info[d_k] = v
3084
3085         is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3086         is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
3087         is_membersonly = None
3088         is_premium = None
3089         if initial_data and is_private is not None:
3090             is_membersonly = False
3091             is_premium = False
3092             contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3093             badge_labels = set()
3094             for content in contents:
3095                 if not isinstance(content, dict):
3096                     continue
3097                 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3098             for badge_label in badge_labels:
3099                 if badge_label.lower() == 'members only':
3100                     is_membersonly = True
3101                 elif badge_label.lower() == 'premium':
3102                     is_premium = True
3103                 elif badge_label.lower() == 'unlisted':
3104                     is_unlisted = True
3105
3106         info['availability'] = self._availability(
3107             is_private=is_private,
3108             needs_premium=is_premium,
3109             needs_subscription=is_membersonly,
3110             needs_auth=info['age_limit'] >= 18,
3111             is_unlisted=None if is_private is None else is_unlisted)
3112
3113         info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
3114
3115         self.mark_watched(video_id, player_responses)
3116
3117         return info
3118
3119
3120 class YoutubeTabIE(YoutubeBaseInfoExtractor):
3121     IE_DESC = 'YouTube Tabs'
3122     _VALID_URL = r'''(?x)
3123                     https?://
3124                         (?:\w+\.)?
3125                         (?:
3126                             youtube(?:kids)?\.com|
3127                             %(invidious)s
3128                         )/
3129                         (?:
3130                             (?P<channel_type>channel|c|user|browse)/|
3131                             (?P<not_channel>
3132                                 feed/|hashtag/|
3133                                 (?:playlist|watch)\?.*?\blist=
3134                             )|
3135                             (?!(?:%(reserved_names)s)\b)  # Direct URLs
3136                         )
3137                         (?P<id>[^/?\#&]+)
3138                     ''' % {
3139         'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
3140         'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
3141     }
3142     IE_NAME = 'youtube:tab'
3143
3144     _TESTS = [{
3145         'note': 'playlists, multipage',
3146         'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3147         'playlist_mincount': 94,
3148         'info_dict': {
3149             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3150             'title': 'Игорь Клейнер - Playlists',
3151             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3152             'uploader': 'Игорь Клейнер',
3153             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3154         },
3155     }, {
3156         'note': 'playlists, multipage, different order',
3157         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3158         'playlist_mincount': 94,
3159         'info_dict': {
3160             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3161             'title': 'Игорь Клейнер - Playlists',
3162             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3163             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3164             'uploader': 'Игорь Клейнер',
3165         },
3166     }, {
3167         'note': 'playlists, series',
3168         'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3169         'playlist_mincount': 5,
3170         'info_dict': {
3171             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3172             'title': '3Blue1Brown - Playlists',
3173             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3174             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3175             'uploader': '3Blue1Brown',
3176         },
3177     }, {
3178         'note': 'playlists, singlepage',
3179         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3180         'playlist_mincount': 4,
3181         'info_dict': {
3182             'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3183             'title': 'ThirstForScience - Playlists',
3184             'description': 'md5:609399d937ea957b0f53cbffb747a14c',
3185             'uploader': 'ThirstForScience',
3186             'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3187         }
3188     }, {
3189         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3190         'only_matching': True,
3191     }, {
3192         'note': 'basic, single video playlist',
3193         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3194         'info_dict': {
3195             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3196             'uploader': 'Sergey M.',
3197             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3198             'title': 'youtube-dl public playlist',
3199         },
3200         'playlist_count': 1,
3201     }, {
3202         'note': 'empty playlist',
3203         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3204         'info_dict': {
3205             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3206             'uploader': 'Sergey M.',
3207             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3208             'title': 'youtube-dl empty playlist',
3209         },
3210         'playlist_count': 0,
3211     }, {
3212         'note': 'Home tab',
3213         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
3214         'info_dict': {
3215             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3216             'title': 'lex will - Home',
3217             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3218             'uploader': 'lex will',
3219             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3220         },
3221         'playlist_mincount': 2,
3222     }, {
3223         'note': 'Videos tab',
3224         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
3225         'info_dict': {
3226             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3227             'title': 'lex will - Videos',
3228             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3229             'uploader': 'lex will',
3230             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3231         },
3232         'playlist_mincount': 975,
3233     }, {
3234         'note': 'Videos tab, sorted by popular',
3235         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
3236         'info_dict': {
3237             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3238             'title': 'lex will - Videos',
3239             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3240             'uploader': 'lex will',
3241             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3242         },
3243         'playlist_mincount': 199,
3244     }, {
3245         'note': 'Playlists tab',
3246         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
3247         'info_dict': {
3248             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3249             'title': 'lex will - Playlists',
3250             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3251             'uploader': 'lex will',
3252             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3253         },
3254         'playlist_mincount': 17,
3255     }, {
3256         'note': 'Community tab',
3257         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
3258         'info_dict': {
3259             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3260             'title': 'lex will - Community',
3261             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3262             'uploader': 'lex will',
3263             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3264         },
3265         'playlist_mincount': 18,
3266     }, {
3267         'note': 'Channels tab',
3268         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
3269         'info_dict': {
3270             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3271             'title': 'lex will - Channels',
3272             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3273             'uploader': 'lex will',
3274             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3275         },
3276         'playlist_mincount': 12,
3277     }, {
3278         'note': 'Search tab',
3279         'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3280         'playlist_mincount': 40,
3281         'info_dict': {
3282             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3283             'title': '3Blue1Brown - Search - linear algebra',
3284             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3285             'uploader': '3Blue1Brown',
3286             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3287         },
3288     }, {
3289         'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3290         'only_matching': True,
3291     }, {
3292         'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3293         'only_matching': True,
3294     }, {
3295         'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3296         'only_matching': True,
3297     }, {
3298         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3299         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3300         'info_dict': {
3301             'title': '29C3: Not my department',
3302             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3303             'uploader': 'Christiaan008',
3304             'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
3305             'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
3306         },
3307         'playlist_count': 96,
3308     }, {
3309         'note': 'Large playlist',
3310         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
3311         'info_dict': {
3312             'title': 'Uploads from Cauchemar',
3313             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3314             'uploader': 'Cauchemar',
3315             'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
3316         },
3317         'playlist_mincount': 1123,
3318     }, {
3319         'note': 'even larger playlist, 8832 videos',
3320         'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3321         'only_matching': True,
3322     }, {
3323         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3324         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3325         'info_dict': {
3326             'title': 'Uploads from Interstellar Movie',
3327             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
3328             'uploader': 'Interstellar Movie',
3329             'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
3330         },
3331         'playlist_mincount': 21,
3332     }, {
3333         'note': 'Playlist with "show unavailable videos" button',
3334         'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3335         'info_dict': {
3336             'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3337             'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3338             'uploader': 'Phim Siêu Nhân Nhật Bản',
3339             'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3340         },
3341         'playlist_mincount': 200,
3342     }, {
3343         'note': 'Playlist with unavailable videos in page 7',
3344         'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3345         'info_dict': {
3346             'title': 'Uploads from BlankTV',
3347             'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3348             'uploader': 'BlankTV',
3349             'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3350         },
3351         'playlist_mincount': 1000,
3352     }, {
3353         'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
3354         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3355         'info_dict': {
3356             'title': 'Data Analysis with Dr Mike Pound',
3357             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3358             'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3359             'uploader': 'Computerphile',
3360             'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
3361         },
3362         'playlist_mincount': 11,
3363     }, {
3364         'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3365         'only_matching': True,
3366     }, {
3367         'note': 'Playlist URL that does not actually serve a playlist',
3368         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3369         'info_dict': {
3370             'id': 'FqZTN594JQw',
3371             'ext': 'webm',
3372             'title': "Smiley's People 01 detective, Adventure Series, Action",
3373             'uploader': 'STREEM',
3374             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
3375             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
3376             'upload_date': '20150526',
3377             'license': 'Standard YouTube License',
3378             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3379             'categories': ['People & Blogs'],
3380             'tags': list,
3381             'view_count': int,
3382             'like_count': int,
3383             'dislike_count': int,
3384         },
3385         'params': {
3386             'skip_download': True,
3387         },
3388         'skip': 'This video is not available.',
3389         'add_ie': [YoutubeIE.ie_key()],
3390     }, {
3391         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
3392         'only_matching': True,
3393     }, {
3394         'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
3395         'only_matching': True,
3396     }, {
3397         'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3398         'info_dict': {
3399             'id': '3yImotZU3tw',  # This will keep changing
3400             'ext': 'mp4',
3401             'title': compat_str,
3402             'uploader': 'Sky News',
3403             'uploader_id': 'skynews',
3404             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
3405             'upload_date': r're:\d{8}',
3406             'description': compat_str,
3407             'categories': ['News & Politics'],
3408             'tags': list,
3409             'like_count': int,
3410             'dislike_count': int,
3411         },
3412         'params': {
3413             'skip_download': True,
3414         },
3415         'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
3416     }, {
3417         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3418         'info_dict': {
3419             'id': 'a48o2S1cPoo',
3420             'ext': 'mp4',
3421             'title': 'The Young Turks - Live Main Show',
3422             'uploader': 'The Young Turks',
3423             'uploader_id': 'TheYoungTurks',
3424             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3425             'upload_date': '20150715',
3426             'license': 'Standard YouTube License',
3427             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3428             'categories': ['News & Politics'],
3429             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3430             'like_count': int,
3431             'dislike_count': int,
3432         },
3433         'params': {
3434             'skip_download': True,
3435         },
3436         'only_matching': True,
3437     }, {
3438         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3439         'only_matching': True,
3440     }, {
3441         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3442         'only_matching': True,
3443     }, {
3444         'note': 'A channel that is not live. Should raise error',
3445         'url': 'https://www.youtube.com/user/numberphile/live',
3446         'only_matching': True,
3447     }, {
3448         'url': 'https://www.youtube.com/feed/trending',
3449         'only_matching': True,
3450     }, {
3451         'url': 'https://www.youtube.com/feed/library',
3452         'only_matching': True,
3453     }, {
3454         'url': 'https://www.youtube.com/feed/history',
3455         'only_matching': True,
3456     }, {
3457         'url': 'https://www.youtube.com/feed/subscriptions',
3458         'only_matching': True,
3459     }, {
3460         'url': 'https://www.youtube.com/feed/watch_later',
3461         'only_matching': True,
3462     }, {
3463         'note': 'Recommended - redirects to home page.',
3464         'url': 'https://www.youtube.com/feed/recommended',
3465         'only_matching': True,
3466     }, {
3467         'note': 'inline playlist with not always working continuations',
3468         'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3469         'only_matching': True,
3470     }, {
3471         'url': 'https://www.youtube.com/course',
3472         'only_matching': True,
3473     }, {
3474         'url': 'https://www.youtube.com/zsecurity',
3475         'only_matching': True,
3476     }, {
3477         'url': 'http://www.youtube.com/NASAgovVideo/videos',
3478         'only_matching': True,
3479     }, {
3480         'url': 'https://www.youtube.com/TheYoungTurks/live',
3481         'only_matching': True,
3482     }, {
3483         'url': 'https://www.youtube.com/hashtag/cctv9',
3484         'info_dict': {
3485             'id': 'cctv9',
3486             'title': '#cctv9',
3487         },
3488         'playlist_mincount': 350,
3489     }, {
3490         'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3491         'only_matching': True,
3492     }, {
3493         'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
3494         'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3495         'only_matching': True
3496     }, {
3497         'note': '/browse/ should redirect to /channel/',
3498         'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3499         'only_matching': True
3500     }, {
3501         'note': 'VLPL, should redirect to playlist?list=PL...',
3502         'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3503         'info_dict': {
3504             'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3505             'uploader': 'NoCopyrightSounds',
3506             'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3507             'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3508             'title': 'NCS Releases',
3509         },
3510         'playlist_mincount': 166,
3511     }, {
3512         'note': 'Topic, should redirect to playlist?list=UU...',
3513         'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3514         'info_dict': {
3515             'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3516             'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3517             'title': 'Uploads from Royalty Free Music - Topic',
3518             'uploader': 'Royalty Free Music - Topic',
3519         },
3520         'expected_warnings': [
3521             'A channel/user page was given',
3522             'The URL does not have a videos tab',
3523         ],
3524         'playlist_mincount': 101,
3525     }, {
3526         'note': 'Topic without a UU playlist',
3527         'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3528         'info_dict': {
3529             'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3530             'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3531         },
3532         'expected_warnings': [
3533             'A channel/user page was given',
3534             'The URL does not have a videos tab',
3535             'Falling back to channel URL',
3536         ],
3537         'playlist_mincount': 9,
3538     }, {
3539         'note': 'Youtube music Album',
3540         'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3541         'info_dict': {
3542             'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3543             'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3544         },
3545         'playlist_count': 50,
3546     }, {
3547         'note': 'unlisted single video playlist',
3548         'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3549         'info_dict': {
3550             'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3551             'uploader': 'colethedj',
3552             'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3553             'title': 'yt-dlp unlisted playlist test',
3554             'availability': 'unlisted'
3555         },
3556         'playlist_count': 1,
3557     }, {
3558         'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
3559         'url': 'https://www.youtube.com/feed/recommended',
3560         'info_dict': {
3561             'id': 'recommended',
3562             'title': 'recommended',
3563         },
3564         'playlist_mincount': 50,
3565         'params': {
3566             'skip_download': True,
3567             'extractor_args': {'youtubetab': {'skip': ['webpage']}}
3568         },
3569     }, {
3570         'note': 'API Fallback: /videos tab, sorted by oldest first',
3571         'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
3572         'info_dict': {
3573             'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
3574             'title': 'Cody\'sLab - Videos',
3575             'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
3576             'uploader': 'Cody\'sLab',
3577             'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
3578         },
3579         'playlist_mincount': 650,
3580         'params': {
3581             'skip_download': True,
3582             'extractor_args': {'youtubetab': {'skip': ['webpage']}}
3583         },
3584     }, {
3585         'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
3586         'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3587         'info_dict': {
3588             'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3589             'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3590             'title': 'Uploads from Royalty Free Music - Topic',
3591             'uploader': 'Royalty Free Music - Topic',
3592         },
3593         'expected_warnings': [
3594             'A channel/user page was given',
3595             'The URL does not have a videos tab',
3596         ],
3597         'playlist_mincount': 101,
3598         'params': {
3599             'skip_download': True,
3600             'extractor_args': {'youtubetab': {'skip': ['webpage']}}
3601         },
3602     }]
3603
3604     @classmethod
3605     def suitable(cls, url):
3606         return False if YoutubeIE.suitable(url) else super(
3607             YoutubeTabIE, cls).suitable(url)
3608
3609     def _extract_channel_id(self, webpage):
3610         channel_id = self._html_search_meta(
3611             'channelId', webpage, 'channel id', default=None)
3612         if channel_id:
3613             return channel_id
3614         channel_url = self._html_search_meta(
3615             ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3616              'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3617              'twitter:app:url:googleplay'), webpage, 'channel url')
3618         return self._search_regex(
3619             r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3620             channel_url, 'channel id')
3621
3622     @staticmethod
3623     def _extract_basic_item_renderer(item):
3624         # Modified from _extract_grid_item_renderer
3625         known_basic_renderers = (
3626             'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
3627         )
3628         for key, renderer in item.items():
3629             if not isinstance(renderer, dict):
3630                 continue
3631             elif key in known_basic_renderers:
3632                 return renderer
3633             elif key.startswith('grid') and key.endswith('Renderer'):
3634                 return renderer
3635
3636     def _grid_entries(self, grid_renderer):
3637         for item in grid_renderer['items']:
3638             if not isinstance(item, dict):
3639                 continue
3640             renderer = self._extract_basic_item_renderer(item)
3641             if not isinstance(renderer, dict):
3642                 continue
3643             title = self._get_text(renderer, 'title')
3644
3645             # playlist
3646             playlist_id = renderer.get('playlistId')
3647             if playlist_id:
3648                 yield self.url_result(
3649                     'https://www.youtube.com/playlist?list=%s' % playlist_id,
3650                     ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3651                     video_title=title)
3652                 continue
3653             # video
3654             video_id = renderer.get('videoId')
3655             if video_id:
3656                 yield self._extract_video(renderer)
3657                 continue
3658             # channel
3659             channel_id = renderer.get('channelId')
3660             if channel_id:
3661                 yield self.url_result(
3662                     'https://www.youtube.com/channel/%s' % channel_id,
3663                     ie=YoutubeTabIE.ie_key(), video_title=title)
3664                 continue
3665             # generic endpoint URL support
3666             ep_url = urljoin('https://www.youtube.com/', try_get(
3667                 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3668                 compat_str))
3669             if ep_url:
3670                 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3671                     if ie.suitable(ep_url):
3672                         yield self.url_result(
3673                             ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3674                         break
3675
3676     def _shelf_entries_from_content(self, shelf_renderer):
3677         content = shelf_renderer.get('content')
3678         if not isinstance(content, dict):
3679             return
3680         renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3681         if renderer:
3682             # TODO: add support for nested playlists so each shelf is processed
3683             # as separate playlist
3684             # TODO: this includes only first N items
3685             for entry in self._grid_entries(renderer):
3686                 yield entry
3687         renderer = content.get('horizontalListRenderer')
3688         if renderer:
3689             # TODO
3690             pass
3691
3692     def _shelf_entries(self, shelf_renderer, skip_channels=False):
3693         ep = try_get(
3694             shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3695             compat_str)
3696         shelf_url = urljoin('https://www.youtube.com', ep)
3697         if shelf_url:
3698             # Skipping links to another channels, note that checking for
3699             # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3700             # will not work
3701             if skip_channels and '/channels?' in shelf_url:
3702                 return
3703             title = self._get_text(shelf_renderer, 'title')
3704             yield self.url_result(shelf_url, video_title=title)
3705         # Shelf may not contain shelf URL, fallback to extraction from content
3706         for entry in self._shelf_entries_from_content(shelf_renderer):
3707             yield entry
3708
3709     def _playlist_entries(self, video_list_renderer):
3710         for content in video_list_renderer['contents']:
3711             if not isinstance(content, dict):
3712                 continue
3713             renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3714             if not isinstance(renderer, dict):
3715                 continue
3716             video_id = renderer.get('videoId')
3717             if not video_id:
3718                 continue
3719             yield self._extract_video(renderer)
3720
3721     def _rich_entries(self, rich_grid_renderer):
3722         renderer = try_get(
3723             rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3724         video_id = renderer.get('videoId')
3725         if not video_id:
3726             return
3727         yield self._extract_video(renderer)
3728
3729     def _video_entry(self, video_renderer):
3730         video_id = video_renderer.get('videoId')
3731         if video_id:
3732             return self._extract_video(video_renderer)
3733
3734     def _post_thread_entries(self, post_thread_renderer):
3735         post_renderer = try_get(
3736             post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3737         if not post_renderer:
3738             return
3739         # video attachment
3740         video_renderer = try_get(
3741             post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3742         video_id = video_renderer.get('videoId')
3743         if video_id:
3744             entry = self._extract_video(video_renderer)
3745             if entry:
3746                 yield entry
3747         # playlist attachment
3748         playlist_id = try_get(
3749             post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3750         if playlist_id:
3751             yield self.url_result(
3752                 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3753                 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3754         # inline video links
3755         runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3756         for run in runs:
3757             if not isinstance(run, dict):
3758                 continue
3759             ep_url = try_get(
3760                 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3761             if not ep_url:
3762                 continue
3763             if not YoutubeIE.suitable(ep_url):
3764                 continue
3765             ep_video_id = YoutubeIE._match_id(ep_url)
3766             if video_id == ep_video_id:
3767                 continue
3768             yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
3769
3770     def _post_thread_continuation_entries(self, post_thread_continuation):
3771         contents = post_thread_continuation.get('contents')
3772         if not isinstance(contents, list):
3773             return
3774         for content in contents:
3775             renderer = content.get('backstagePostThreadRenderer')
3776             if not isinstance(renderer, dict):
3777                 continue
3778             for entry in self._post_thread_entries(renderer):
3779                 yield entry
3780
3781     r''' # unused
3782     def _rich_grid_entries(self, contents):
3783         for content in contents:
3784             video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3785             if video_renderer:
3786                 entry = self._video_entry(video_renderer)
3787                 if entry:
3788                     yield entry
3789     '''
3790     def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
3791
3792         def extract_entries(parent_renderer):  # this needs to called again for continuation to work with feeds
3793             contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3794             for content in contents:
3795                 if not isinstance(content, dict):
3796                     continue
3797                 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3798                 if not is_renderer:
3799                     renderer = content.get('richItemRenderer')
3800                     if renderer:
3801                         for entry in self._rich_entries(renderer):
3802                             yield entry
3803                         continuation_list[0] = self._extract_continuation(parent_renderer)
3804                     continue
3805                 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3806                 for isr_content in isr_contents:
3807                     if not isinstance(isr_content, dict):
3808                         continue
3809
3810                     known_renderers = {
3811                         'playlistVideoListRenderer': self._playlist_entries,
3812                         'gridRenderer': self._grid_entries,
3813                         'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3814                         'backstagePostThreadRenderer': self._post_thread_entries,
3815                         'videoRenderer': lambda x: [self._video_entry(x)],
3816                     }
3817                     for key, renderer in isr_content.items():
3818                         if key not in known_renderers:
3819                             continue
3820                         for entry in known_renderers[key](renderer):
3821                             if entry:
3822                                 yield entry
3823                         continuation_list[0] = self._extract_continuation(renderer)
3824                         break
3825
3826                 if not continuation_list[0]:
3827                     continuation_list[0] = self._extract_continuation(is_renderer)
3828
3829             if not continuation_list[0]:
3830                 continuation_list[0] = self._extract_continuation(parent_renderer)
3831
3832         continuation_list = [None]  # Python 2 does not support nonlocal
3833         tab_content = try_get(tab, lambda x: x['content'], dict)
3834         if not tab_content:
3835             return
3836         parent_renderer = (
3837             try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3838             or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
3839         for entry in extract_entries(parent_renderer):
3840             yield entry
3841         continuation = continuation_list[0]
3842
3843         for page_num in itertools.count(1):
3844             if not continuation:
3845                 break
3846             headers = self.generate_api_headers(
3847                 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
3848             response = self._extract_response(
3849                 item_id='%s page %s' % (item_id, page_num),
3850                 query=continuation, headers=headers, ytcfg=ytcfg,
3851                 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3852
3853             if not response:
3854                 break
3855             # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
3856             # See: https://github.com/ytdl-org/youtube-dl/issues/28702
3857             visitor_data = self._extract_visitor_data(response) or visitor_data
3858
3859             known_continuation_renderers = {
3860                 'playlistVideoListContinuation': self._playlist_entries,
3861                 'gridContinuation': self._grid_entries,
3862                 'itemSectionContinuation': self._post_thread_continuation_entries,
3863                 'sectionListContinuation': extract_entries,  # for feeds
3864             }
3865             continuation_contents = try_get(
3866                 response, lambda x: x['continuationContents'], dict) or {}
3867             continuation_renderer = None
3868             for key, value in continuation_contents.items():
3869                 if key not in known_continuation_renderers:
3870                     continue
3871                 continuation_renderer = value
3872                 continuation_list = [None]
3873                 for entry in known_continuation_renderers[key](continuation_renderer):
3874                     yield entry
3875                 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3876                 break
3877             if continuation_renderer:
3878                 continue
3879
3880             known_renderers = {
3881                 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3882                 'gridVideoRenderer': (self._grid_entries, 'items'),
3883                 'gridChannelRenderer': (self._grid_entries, 'items'),
3884                 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
3885                 'itemSectionRenderer': (extract_entries, 'contents'),  # for feeds
3886                 'richItemRenderer': (extract_entries, 'contents'),  # for hashtag
3887                 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
3888             }
3889             on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3890             continuation_items = try_get(
3891                 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
3892             continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3893             video_items_renderer = None
3894             for key, value in continuation_item.items():
3895                 if key not in known_renderers:
3896                     continue
3897                 video_items_renderer = {known_renderers[key][1]: continuation_items}
3898                 continuation_list = [None]
3899                 for entry in known_renderers[key][0](video_items_renderer):
3900                     yield entry
3901                 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
3902                 break
3903             if video_items_renderer:
3904                 continue
3905             break
3906
3907     @staticmethod
3908     def _extract_selected_tab(tabs):
3909         for tab in tabs:
3910             renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3911             if renderer.get('selected') is True:
3912                 return renderer
3913         else:
3914             raise ExtractorError('Unable to find selected tab')
3915
3916     @classmethod
3917     def _extract_uploader(cls, data):
3918         uploader = {}
3919         renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3920         owner = try_get(
3921             renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3922         if owner:
3923             uploader['uploader'] = owner.get('text')
3924             uploader['uploader_id'] = try_get(
3925                 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3926             uploader['uploader_url'] = urljoin(
3927                 'https://www.youtube.com/',
3928                 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3929         return {k: v for k, v in uploader.items() if v is not None}
3930
3931     def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
3932         playlist_id = title = description = channel_url = channel_name = channel_id = None
3933         thumbnails_list = []
3934         tags = []
3935
3936         selected_tab = self._extract_selected_tab(tabs)
3937         renderer = try_get(
3938             data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3939         if renderer:
3940             channel_name = renderer.get('title')
3941             channel_url = renderer.get('channelUrl')
3942             channel_id = renderer.get('externalId')
3943         else:
3944             renderer = try_get(
3945                 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3946
3947         if renderer:
3948             title = renderer.get('title')
3949             description = renderer.get('description', '')
3950             playlist_id = channel_id
3951             tags = renderer.get('keywords', '').split()
3952             thumbnails_list = (
3953                 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
3954                 or try_get(
3955                     self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3956                     lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3957                     list)
3958                 or [])
3959
3960         thumbnails = []
3961         for t in thumbnails_list:
3962             if not isinstance(t, dict):
3963                 continue
3964             thumbnail_url = url_or_none(t.get('url'))
3965             if not thumbnail_url:
3966                 continue
3967             thumbnails.append({
3968                 'url': thumbnail_url,
3969                 'width': int_or_none(t.get('width')),
3970                 'height': int_or_none(t.get('height')),
3971             })
3972         if playlist_id is None:
3973             playlist_id = item_id
3974         if title is None:
3975             title = (
3976                 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3977                 or playlist_id)
3978         title += format_field(selected_tab, 'title', ' - %s')
3979         title += format_field(selected_tab, 'expandedText', ' - %s')
3980         metadata = {
3981             'playlist_id': playlist_id,
3982             'playlist_title': title,
3983             'playlist_description': description,
3984             'uploader': channel_name,
3985             'uploader_id': channel_id,
3986             'uploader_url': channel_url,
3987             'thumbnails': thumbnails,
3988             'tags': tags,
3989         }
3990         availability = self._extract_availability(data)
3991         if availability:
3992             metadata['availability'] = availability
3993         if not channel_id:
3994             metadata.update(self._extract_uploader(data))
3995         metadata.update({
3996             'channel': metadata['uploader'],
3997             'channel_id': metadata['uploader_id'],
3998             'channel_url': metadata['uploader_url']})
3999         return self.playlist_result(
4000             self._entries(
4001                 selected_tab, playlist_id, ytcfg,
4002                 self._extract_account_syncid(ytcfg, data),
4003                 self._extract_visitor_data(data, ytcfg)),
4004             **metadata)
4005
4006     def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg):
4007         first_id = last_id = response = None
4008         for page_num in itertools.count(1):
4009             videos = list(self._playlist_entries(playlist))
4010             if not videos:
4011                 return
4012             start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4013             if start >= len(videos):
4014                 return
4015             for video in videos[start:]:
4016                 if video['id'] == first_id:
4017                     self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
4018                     return
4019                 yield video
4020             first_id = first_id or videos[0]['id']
4021             last_id = videos[-1]['id']
4022             watch_endpoint = try_get(
4023                 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
4024             headers = self.generate_api_headers(
4025                 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4026                 visitor_data=self._extract_visitor_data(response, data, ytcfg))
4027             query = {
4028                 'playlistId': playlist_id,
4029                 'videoId': watch_endpoint.get('videoId') or last_id,
4030                 'index': watch_endpoint.get('index') or len(videos),
4031                 'params': watch_endpoint.get('params') or 'OAE%3D'
4032             }
4033             response = self._extract_response(
4034                 item_id='%s page %d' % (playlist_id, page_num),
4035                 query=query, ep='next', headers=headers, ytcfg=ytcfg,
4036                 check_get_keys='contents'
4037             )
4038             playlist = try_get(
4039                 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4040
4041     def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
4042         title = playlist.get('title') or try_get(
4043             data, lambda x: x['titleText']['simpleText'], compat_str)
4044         playlist_id = playlist.get('playlistId') or item_id
4045
4046         # Delegating everything except mix playlists to regular tab-based playlist URL
4047         playlist_url = urljoin(url, try_get(
4048             playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4049             compat_str))
4050         if playlist_url and playlist_url != url:
4051             return self.url_result(
4052                 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4053                 video_title=title)
4054
4055         return self.playlist_result(
4056             self._extract_mix_playlist(playlist, playlist_id, data, ytcfg),
4057             playlist_id=playlist_id, playlist_title=title)
4058
4059     def _extract_availability(self, data):
4060         """
4061         Gets the availability of a given playlist/tab.
4062         Note: Unless YouTube tells us explicitly, we do not assume it is public
4063         @param data: response
4064         """
4065         is_private = is_unlisted = None
4066         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4067         badge_labels = self._extract_badges(renderer)
4068
4069         # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4070         privacy_dropdown_entries = try_get(
4071             renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4072         for renderer_dict in privacy_dropdown_entries:
4073             is_selected = try_get(
4074                 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4075             if not is_selected:
4076                 continue
4077             label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
4078             if label:
4079                 badge_labels.add(label.lower())
4080                 break
4081
4082         for badge_label in badge_labels:
4083             if badge_label == 'unlisted':
4084                 is_unlisted = True
4085             elif badge_label == 'private':
4086                 is_private = True
4087             elif badge_label == 'public':
4088                 is_unlisted = is_private = False
4089         return self._availability(is_private, False, False, False, is_unlisted)
4090
4091     @staticmethod
4092     def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4093         sidebar_renderer = try_get(
4094             data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4095         for item in sidebar_renderer:
4096             renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4097             if renderer:
4098                 return renderer
4099
4100     def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
4101         """
4102         Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4103         """
4104         browse_id = params = None
4105         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4106         if not renderer:
4107             return
4108         menu_renderer = try_get(
4109             renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4110         for menu_item in menu_renderer:
4111             if not isinstance(menu_item, dict):
4112                 continue
4113             nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4114             text = try_get(
4115                 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
4116             if not text or text.lower() != 'show unavailable videos':
4117                 continue
4118             browse_endpoint = try_get(
4119                 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4120             browse_id = browse_endpoint.get('browseId')
4121             params = browse_endpoint.get('params')
4122             break
4123
4124         headers = self.generate_api_headers(
4125             ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4126             visitor_data=self._extract_visitor_data(data, ytcfg))
4127         query = {
4128             'params': params or 'wgYCCAA=',
4129             'browseId': browse_id or 'VL%s' % item_id
4130         }
4131         return self._extract_response(
4132             item_id=item_id, headers=headers, query=query,
4133             check_get_keys='contents', fatal=False, ytcfg=ytcfg,
4134             note='Downloading API JSON with unavailable videos')
4135
4136     def _extract_webpage(self, url, item_id, fatal=True):
4137         retries = self.get_param('extractor_retries', 3)
4138         count = -1
4139         webpage = data = last_error = None
4140         while count < retries:
4141             count += 1
4142             # Sometimes youtube returns a webpage with incomplete ytInitialData
4143             # See: https://github.com/yt-dlp/yt-dlp/issues/116
4144             if last_error:
4145                 self.report_warning('%s. Retrying ...' % last_error)
4146             try:
4147                 webpage = self._download_webpage(
4148                     url, item_id,
4149                     note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))
4150                 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
4151             except ExtractorError as e:
4152                 if isinstance(e.cause, network_exceptions):
4153                     if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
4154                         last_error = error_to_compat_str(e.cause or e.msg)
4155                         if count < retries:
4156                             continue
4157                 if fatal:
4158                     raise
4159                 self.report_warning(error_to_compat_str(e))
4160                 break
4161             else:
4162                 try:
4163                     self._extract_and_report_alerts(data)
4164                 except ExtractorError as e:
4165                     if fatal:
4166                         raise
4167                     self.report_warning(error_to_compat_str(e))
4168                     break
4169
4170                 if dict_get(data, ('contents', 'currentVideoEndpoint')):
4171                     break
4172
4173                 last_error = 'Incomplete yt initial data received'
4174                 if count >= retries:
4175                     if fatal:
4176                         raise ExtractorError(last_error)
4177                     self.report_warning(last_error)
4178                     break
4179
4180         return webpage, data
4181
4182     def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
4183         data = None
4184         if 'webpage' not in self._configuration_arg('skip'):
4185             webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
4186             ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
4187         if not data:
4188             if not ytcfg and self.is_authenticated:
4189                 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download.'
4190                 if 'authcheck' not in self._configuration_arg('skip') and fatal:
4191                     raise ExtractorError(
4192                         msg + ' If you are not downloading private content, or your cookies are only for the first account and channel,'
4193                               ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
4194                         expected=True)
4195                 self.report_warning(msg, only_once=True)
4196             data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
4197         return data, ytcfg
4198
4199     def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
4200         headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
4201         resolve_response = self._extract_response(
4202             item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
4203             ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
4204         endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
4205         for ep_key, ep in endpoints.items():
4206             params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
4207             if params:
4208                 return self._extract_response(
4209                     item_id=item_id, query=params, ep=ep, headers=headers,
4210                     ytcfg=ytcfg, fatal=fatal, default_client=default_client,
4211                     check_get_keys=('contents', 'currentVideoEndpoint'))
4212         err_note = 'Failed to resolve url (does the playlist exist?)'
4213         if fatal:
4214             raise ExtractorError(err_note, expected=True)
4215         self.report_warning(err_note, item_id)
4216
4217     @staticmethod
4218     def _smuggle_data(entries, data):
4219         for entry in entries:
4220             if data:
4221                 entry['url'] = smuggle_url(entry['url'], data)
4222             yield entry
4223
4224     def _real_extract(self, url):
4225         url, smuggled_data = unsmuggle_url(url, {})
4226         if self.is_music_url(url):
4227             smuggled_data['is_music_url'] = True
4228         info_dict = self.__real_extract(url, smuggled_data)
4229         if info_dict.get('entries'):
4230             info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4231         return info_dict
4232
4233     _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4234
4235     def __real_extract(self, url, smuggled_data):
4236         item_id = self._match_id(url)
4237         url = compat_urlparse.urlunparse(
4238             compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
4239         compat_opts = self.get_param('compat_opts', [])
4240
4241         def get_mobj(url):
4242             mobj = self._url_re.match(url).groupdict()
4243             mobj.update((k, '') for k, v in mobj.items() if v is None)
4244             return mobj
4245
4246         mobj = get_mobj(url)
4247         # Youtube returns incomplete data if tabname is not lower case
4248         pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4249         if is_channel:
4250             if smuggled_data.get('is_music_url'):
4251                 if item_id[:2] == 'VL':
4252                     # Youtube music VL channels have an equivalent playlist
4253                     item_id = item_id[2:]
4254                     pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4255                 elif item_id[:2] == 'MP':
4256                     # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
4257                     mdata = self._extract_tab_endpoint(
4258                         'https://music.youtube.com/channel/%s' % item_id, item_id, default_client='web_music')
4259                     murl = traverse_obj(
4260                         mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'), get_all=False, expected_type=compat_str)
4261                     if not murl:
4262                         raise ExtractorError('Failed to resolve album to playlist.')
4263                     return self.url_result(murl, ie=YoutubeTabIE.ie_key())
4264                 elif mobj['channel_type'] == 'browse':
4265                     # Youtube music /browse/ should be changed to /channel/
4266                     pre = 'https://www.youtube.com/channel/%s' % item_id
4267         if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4268             # Home URLs should redirect to /videos/
4269             self.report_warning(
4270                 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4271                 'To download only the videos in the home page, add a "/featured" to the URL')
4272             tab = '/videos'
4273
4274         url = ''.join((pre, tab, post))
4275         mobj = get_mobj(url)
4276
4277         # Handle both video/playlist URLs
4278         qs = parse_qs(url)
4279         video_id = qs.get('v', [None])[0]
4280         playlist_id = qs.get('list', [None])[0]
4281
4282         if not video_id and mobj['not_channel'].startswith('watch'):
4283             if not playlist_id:
4284                 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
4285                 raise ExtractorError('Unable to recognize tab page')
4286             # Common mistake: https://www.youtube.com/watch?list=playlist_id
4287             self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
4288             url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
4289             mobj = get_mobj(url)
4290
4291         if video_id and playlist_id:
4292             if self.get_param('noplaylist'):
4293                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4294                 return self.url_result(f'https://www.youtube.com/watch?v={video_id}', ie=YoutubeIE.ie_key(), video_id=video_id)
4295             self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4296
4297         data, ytcfg = self._extract_data(url, item_id)
4298
4299         tabs = try_get(
4300             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4301         if tabs:
4302             selected_tab = self._extract_selected_tab(tabs)
4303             tab_name = selected_tab.get('title', '')
4304             if 'no-youtube-channel-redirect' not in compat_opts:
4305                 if mobj['tab'] == '/live':
4306                     # Live tab should have redirected to the video
4307                     raise ExtractorError('The channel is not currently live', expected=True)
4308                 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4309                     if not mobj['not_channel'] and item_id[:2] == 'UC':
4310                         # Topic channels don't have /videos. Use the equivalent playlist instead
4311                         self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4312                         pl_id = 'UU%s' % item_id[2:]
4313                         pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4314                         try:
4315                             data, ytcfg, item_id, url = *self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True), pl_id, pl_url
4316                         except ExtractorError:
4317                             self.report_warning('The playlist gave error. Falling back to channel URL')
4318                     else:
4319                         self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
4320
4321         self.write_debug('Final URL: %s' % url)
4322
4323         # YouTube sometimes provides a button to reload playlist with unavailable videos.
4324         if 'no-youtube-unavailable-videos' not in compat_opts:
4325             data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data
4326         self._extract_and_report_alerts(data, only_once=True)
4327         tabs = try_get(
4328             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4329         if tabs:
4330             return self._extract_from_tabs(item_id, ytcfg, data, tabs)
4331
4332         playlist = try_get(
4333             data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4334         if playlist:
4335             return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
4336
4337         video_id = try_get(
4338             data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4339             compat_str) or video_id
4340         if video_id:
4341             if mobj['tab'] != '/live':  # live tab is expected to redirect to video
4342                 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
4343             return self.url_result(f'https://www.youtube.com/watch?v={video_id}', ie=YoutubeIE.ie_key(), video_id=video_id)
4344
4345         raise ExtractorError('Unable to recognize tab page')
4346
4347
4348 class YoutubePlaylistIE(InfoExtractor):
4349     IE_DESC = 'YouTube playlists'
4350     _VALID_URL = r'''(?x)(?:
4351                         (?:https?://)?
4352                         (?:\w+\.)?
4353                         (?:
4354                             (?:
4355                                 youtube(?:kids)?\.com|
4356                                 %(invidious)s
4357                             )
4358                             /.*?\?.*?\blist=
4359                         )?
4360                         (?P<id>%(playlist_id)s)
4361                      )''' % {
4362         'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
4363         'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
4364     }
4365     IE_NAME = 'youtube:playlist'
4366     _TESTS = [{
4367         'note': 'issue #673',
4368         'url': 'PLBB231211A4F62143',
4369         'info_dict': {
4370             'title': '[OLD]Team Fortress 2 (Class-based LP)',
4371             'id': 'PLBB231211A4F62143',
4372             'uploader': 'Wickydoo',
4373             'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4374             'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
4375         },
4376         'playlist_mincount': 29,
4377     }, {
4378         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4379         'info_dict': {
4380             'title': 'YDL_safe_search',
4381             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4382         },
4383         'playlist_count': 2,
4384         'skip': 'This playlist is private',
4385     }, {
4386         'note': 'embedded',
4387         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4388         'playlist_count': 4,
4389         'info_dict': {
4390             'title': 'JODA15',
4391             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4392             'uploader': 'milan',
4393             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
4394         }
4395     }, {
4396         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4397         'playlist_mincount': 654,
4398         'info_dict': {
4399             'title': '2018 Chinese New Singles (11/6 updated)',
4400             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4401             'uploader': 'LBK',
4402             'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4403             'description': 'md5:da521864744d60a198e3a88af4db0d9d',
4404         }
4405     }, {
4406         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4407         'only_matching': True,
4408     }, {
4409         # music album playlist
4410         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4411         'only_matching': True,
4412     }]
4413
4414     @classmethod
4415     def suitable(cls, url):
4416         if YoutubeTabIE.suitable(url):
4417             return False
4418         from ..utils import parse_qs
4419         qs = parse_qs(url)
4420         if qs.get('v', [None])[0]:
4421             return False
4422         return super(YoutubePlaylistIE, cls).suitable(url)
4423
4424     def _real_extract(self, url):
4425         playlist_id = self._match_id(url)
4426         is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
4427         url = update_url_query(
4428             'https://www.youtube.com/playlist',
4429             parse_qs(url) or {'list': playlist_id})
4430         if is_music_url:
4431             url = smuggle_url(url, {'is_music_url': True})
4432         return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4433
4434
4435 class YoutubeYtBeIE(InfoExtractor):
4436     IE_DESC = 'youtu.be'
4437     _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4438     _TESTS = [{
4439         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4440         'info_dict': {
4441             'id': 'yeWKywCrFtk',
4442             'ext': 'mp4',
4443             'title': 'Small Scale Baler and Braiding Rugs',
4444             'uploader': 'Backus-Page House Museum',
4445             'uploader_id': 'backuspagemuseum',
4446             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4447             'upload_date': '20161008',
4448             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4449             'categories': ['Nonprofits & Activism'],
4450             'tags': list,
4451             'like_count': int,
4452             'dislike_count': int,
4453         },
4454         'params': {
4455             'noplaylist': True,
4456             'skip_download': True,
4457         },
4458     }, {
4459         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
4460         'only_matching': True,
4461     }]
4462
4463     def _real_extract(self, url):
4464         mobj = self._match_valid_url(url)
4465         video_id = mobj.group('id')
4466         playlist_id = mobj.group('playlist_id')
4467         return self.url_result(
4468             update_url_query('https://www.youtube.com/watch', {
4469                 'v': video_id,
4470                 'list': playlist_id,
4471                 'feature': 'youtu.be',
4472             }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4473
4474
4475 class YoutubeYtUserIE(InfoExtractor):
4476     IE_DESC = 'YouTube user videos; "ytuser:" prefix'
4477     _VALID_URL = r'ytuser:(?P<id>.+)'
4478     _TESTS = [{
4479         'url': 'ytuser:phihag',
4480         'only_matching': True,
4481     }]
4482
4483     def _real_extract(self, url):
4484         user_id = self._match_id(url)
4485         return self.url_result(
4486             'https://www.youtube.com/user/%s/videos' % user_id,
4487             ie=YoutubeTabIE.ie_key(), video_id=user_id)
4488
4489
4490 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
4491     IE_NAME = 'youtube:favorites'
4492     IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
4493     _VALID_URL = r':ytfav(?:ou?rite)?s?'
4494     _LOGIN_REQUIRED = True
4495     _TESTS = [{
4496         'url': ':ytfav',
4497         'only_matching': True,
4498     }, {
4499         'url': ':ytfavorites',
4500         'only_matching': True,
4501     }]
4502
4503     def _real_extract(self, url):
4504         return self.url_result(
4505             'https://www.youtube.com/playlist?list=LL',
4506             ie=YoutubeTabIE.ie_key())
4507
4508
4509 class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
4510     IE_DESC = 'YouTube searches'
4511     IE_NAME = 'youtube:search'
4512     _SEARCH_KEY = 'ytsearch'
4513     _SEARCH_PARAMS = None
4514     _TESTS = []
4515
4516     def _search_results(self, query):
4517         data = {'query': query}
4518         if self._SEARCH_PARAMS:
4519             data['params'] = self._SEARCH_PARAMS
4520         continuation = {}
4521         for page_num in itertools.count(1):
4522             data.update(continuation)
4523             search = self._extract_response(
4524                 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4525                 check_get_keys=('contents', 'onResponseReceivedCommands')
4526             )
4527             if not search:
4528                 break
4529             slr_contents = try_get(
4530                 search,
4531                 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4532                  lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4533                 list)
4534             if not slr_contents:
4535                 break
4536
4537             # Youtube sometimes adds promoted content to searches,
4538             # changing the index location of videos and token.
4539             # So we search through all entries till we find them.
4540             continuation = None
4541             for slr_content in slr_contents:
4542                 if not continuation:
4543                     continuation = self._extract_continuation({'contents': [slr_content]})
4544
4545                 isr_contents = try_get(
4546                     slr_content,
4547                     lambda x: x['itemSectionRenderer']['contents'],
4548                     list)
4549                 if not isr_contents:
4550                     continue
4551                 for content in isr_contents:
4552                     if not isinstance(content, dict):
4553                         continue
4554                     video = content.get('videoRenderer')
4555                     if not isinstance(video, dict):
4556                         continue
4557                     video_id = video.get('videoId')
4558                     if not video_id:
4559                         continue
4560
4561                     yield self._extract_video(video)
4562
4563             if not continuation:
4564                 break
4565
4566
4567 class YoutubeSearchDateIE(YoutubeSearchIE):
4568     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
4569     _SEARCH_KEY = 'ytsearchdate'
4570     IE_DESC = 'YouTube searches, newest videos first'
4571     _SEARCH_PARAMS = 'CAI%3D'
4572
4573
4574 class YoutubeSearchURLIE(YoutubeSearchIE):
4575     IE_DESC = 'YouTube search URLs with sorting and filter support'
4576     IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4577     _SEARCH_KEY = None
4578     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
4579     # _MAX_RESULTS = 100
4580     _TESTS = [{
4581         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4582         'playlist_mincount': 5,
4583         'info_dict': {
4584             'id': 'youtube-dl test video',
4585             'title': 'youtube-dl test video',
4586         }
4587     }, {
4588         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4589         'only_matching': True,
4590     }]
4591
4592     @classmethod
4593     def _make_valid_url(cls):
4594         return cls._VALID_URL
4595
4596     def _real_extract(self, url):
4597         qs = parse_qs(url)
4598         query = (qs.get('search_query') or qs.get('q'))[0]
4599         self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4600         return self._get_n_results(query, self._MAX_RESULTS)
4601
4602
4603 class YoutubeFeedsInfoExtractor(YoutubeTabIE):
4604     """
4605     Base class for feed extractors
4606     Subclasses must define the _FEED_NAME property.
4607     """
4608     _LOGIN_REQUIRED = True
4609     _TESTS = []
4610
4611     @property
4612     def IE_NAME(self):
4613         return 'youtube:%s' % self._FEED_NAME
4614
4615     def _real_extract(self, url):
4616         return self.url_result(
4617             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4618             ie=YoutubeTabIE.ie_key())
4619
4620
4621 class YoutubeWatchLaterIE(InfoExtractor):
4622     IE_NAME = 'youtube:watchlater'
4623     IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
4624     _VALID_URL = r':ytwatchlater'
4625     _TESTS = [{
4626         'url': ':ytwatchlater',
4627         'only_matching': True,
4628     }]
4629
4630     def _real_extract(self, url):
4631         return self.url_result(
4632             'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
4633
4634
4635 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4636     IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
4637     _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
4638     _FEED_NAME = 'recommended'
4639     _LOGIN_REQUIRED = False
4640     _TESTS = [{
4641         'url': ':ytrec',
4642         'only_matching': True,
4643     }, {
4644         'url': ':ytrecommended',
4645         'only_matching': True,
4646     }, {
4647         'url': 'https://youtube.com',
4648         'only_matching': True,
4649     }]
4650
4651
4652 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
4653     IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
4654     _VALID_URL = r':ytsub(?:scription)?s?'
4655     _FEED_NAME = 'subscriptions'
4656     _TESTS = [{
4657         'url': ':ytsubs',
4658         'only_matching': True,
4659     }, {
4660         'url': ':ytsubscriptions',
4661         'only_matching': True,
4662     }]
4663
4664
4665 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
4666     IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
4667     _VALID_URL = r':ythis(?:tory)?'
4668     _FEED_NAME = 'history'
4669     _TESTS = [{
4670         'url': ':ythistory',
4671         'only_matching': True,
4672     }]
4673
4674
4675 class YoutubeTruncatedURLIE(InfoExtractor):
4676     IE_NAME = 'youtube:truncated_url'
4677     IE_DESC = False  # Do not list
4678     _VALID_URL = r'''(?x)
4679         (?:https?://)?
4680         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4681         (?:watch\?(?:
4682             feature=[a-z_]+|
4683             annotation_id=annotation_[^&]+|
4684             x-yt-cl=[0-9]+|
4685             hl=[^&]*|
4686             t=[0-9]+
4687         )?
4688         |
4689             attribution_link\?a=[^&]+
4690         )
4691         $
4692     '''
4693
4694     _TESTS = [{
4695         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
4696         'only_matching': True,
4697     }, {
4698         'url': 'https://www.youtube.com/watch?',
4699         'only_matching': True,
4700     }, {
4701         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4702         'only_matching': True,
4703     }, {
4704         'url': 'https://www.youtube.com/watch?feature=foo',
4705         'only_matching': True,
4706     }, {
4707         'url': 'https://www.youtube.com/watch?hl=en-GB',
4708         'only_matching': True,
4709     }, {
4710         'url': 'https://www.youtube.com/watch?t=2372',
4711         'only_matching': True,
4712     }]
4713
4714     def _real_extract(self, url):
4715         raise ExtractorError(
4716             'Did you forget to quote the URL? Remember that & is a meta '
4717             'character in most shells, so you want to put the URL in quotes, '
4718             'like  youtube-dl '
4719             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
4720             ' or simply  youtube-dl BaW_jenozKc  .',
4721             expected=True)
4722
4723
4724 class YoutubeClipIE(InfoExtractor):
4725     IE_NAME = 'youtube:clip'
4726     IE_DESC = False  # Do not list
4727     _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'
4728
4729     def _real_extract(self, url):
4730         self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')
4731         return self.url_result(url, 'Generic')
4732
4733
4734 class YoutubeTruncatedIDIE(InfoExtractor):
4735     IE_NAME = 'youtube:truncated_id'
4736     IE_DESC = False  # Do not list
4737     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
4738
4739     _TESTS = [{
4740         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4741         'only_matching': True,
4742     }]
4743
4744     def _real_extract(self, url):
4745         video_id = self._match_id(url)
4746         raise ExtractorError(
4747             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4748             expected=True)