yt_dlp/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import calendar
   6 import copy
   7 import datetime
   8 import functools
   9 import hashlib
  10 import itertools
  11 import json
  12 import math
  13 import os.path
  14 import random
  15 import re
  16 import sys
  17 import time
  18 import traceback
  19 import threading
  20
  21 from .common import InfoExtractor, SearchInfoExtractor
  22 from ..compat import (
  23     compat_chr,
  24     compat_HTTPError,
  25     compat_parse_qs,
  26     compat_str,
  27     compat_urllib_parse_unquote_plus,
  28     compat_urllib_parse_urlencode,
  29     compat_urllib_parse_urlparse,
  30     compat_urlparse,
  31 )
  32 from ..jsinterp import JSInterpreter
  33 from ..utils import (
  34     bug_reports_message,
  35     clean_html,
  36     datetime_from_str,
  37     dict_get,
  38     error_to_compat_str,
  39     ExtractorError,
  40     float_or_none,
  41     format_field,
  42     int_or_none,
  43     is_html,
  44     join_nonempty,
  45     mimetype2ext,
  46     network_exceptions,
  47     NO_DEFAULT,
  48     orderedSet,
  49     parse_codecs,
  50     parse_count,
  51     parse_duration,
  52     parse_iso8601,
  53     parse_qs,
  54     qualities,
  55     remove_end,
  56     remove_start,
  57     smuggle_url,
  58     str_or_none,
  59     str_to_int,
  60     strftime_or_none,
  61     traverse_obj,
  62     try_get,
  63     unescapeHTML,
  64     unified_strdate,
  65     unsmuggle_url,
  66     update_url_query,
  67     url_or_none,
  68     urljoin,
  69     variadic,
  70 )
  71
  72
  73 def get_first(obj, keys, **kwargs):
  74     return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
  75
  76
  77 # any clients starting with _ cannot be explicity requested by the user
  78 INNERTUBE_CLIENTS = {
  79     'web': {
  80         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
  81         'INNERTUBE_CONTEXT': {
  82             'client': {
  83                 'clientName': 'WEB',
  84                 'clientVersion': '2.20211221.00.00',
  85             }
  86         },
  87         'INNERTUBE_CONTEXT_CLIENT_NAME': 1
  88     },
  89     'web_embedded': {
  90         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
  91         'INNERTUBE_CONTEXT': {
  92             'client': {
  93                 'clientName': 'WEB_EMBEDDED_PLAYER',
  94                 'clientVersion': '1.20211215.00.01',
  95             },
  96         },
  97         'INNERTUBE_CONTEXT_CLIENT_NAME': 56
  98     },
  99     'web_music': {
 100         'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
 101         'INNERTUBE_HOST': 'music.youtube.com',
 102         'INNERTUBE_CONTEXT': {
 103             'client': {
 104                 'clientName': 'WEB_REMIX',
 105                 'clientVersion': '1.20211213.00.00',
 106             }
 107         },
 108         'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
 109     },
 110     'web_creator': {
 111         'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
 112         'INNERTUBE_CONTEXT': {
 113             'client': {
 114                 'clientName': 'WEB_CREATOR',
 115                 'clientVersion': '1.20211220.02.00',
 116             }
 117         },
 118         'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
 119     },
 120     'android': {
 121         'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
 122         'INNERTUBE_CONTEXT': {
 123             'client': {
 124                 'clientName': 'ANDROID',
 125                 'clientVersion': '16.49',
 126             }
 127         },
 128         'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
 129         'REQUIRE_JS_PLAYER': False
 130     },
 131     'android_embedded': {
 132         'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
 133         'INNERTUBE_CONTEXT': {
 134             'client': {
 135                 'clientName': 'ANDROID_EMBEDDED_PLAYER',
 136                 'clientVersion': '16.49',
 137             },
 138         },
 139         'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
 140         'REQUIRE_JS_PLAYER': False
 141     },
 142     'android_music': {
 143         'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
 144         'INNERTUBE_CONTEXT': {
 145             'client': {
 146                 'clientName': 'ANDROID_MUSIC',
 147                 'clientVersion': '4.57',
 148             }
 149         },
 150         'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
 151         'REQUIRE_JS_PLAYER': False
 152     },
 153     'android_creator': {
 154         'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
 155         'INNERTUBE_CONTEXT': {
 156             'client': {
 157                 'clientName': 'ANDROID_CREATOR',
 158                 'clientVersion': '21.47',
 159             },
 160         },
 161         'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
 162         'REQUIRE_JS_PLAYER': False
 163     },
 164     # iOS clients have HLS live streams. Setting device model to get 60fps formats.
 165     # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
 166     'ios': {
 167         'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
 168         'INNERTUBE_CONTEXT': {
 169             'client': {
 170                 'clientName': 'IOS',
 171                 'clientVersion': '16.46',
 172                 'deviceModel': 'iPhone14,3',
 173             }
 174         },
 175         'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
 176         'REQUIRE_JS_PLAYER': False
 177     },
 178     'ios_embedded': {
 179         'INNERTUBE_CONTEXT': {
 180             'client': {
 181                 'clientName': 'IOS_MESSAGES_EXTENSION',
 182                 'clientVersion': '16.46',
 183                 'deviceModel': 'iPhone14,3',
 184             },
 185         },
 186         'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
 187         'REQUIRE_JS_PLAYER': False
 188     },
 189     'ios_music': {
 190         'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
 191         'INNERTUBE_CONTEXT': {
 192             'client': {
 193                 'clientName': 'IOS_MUSIC',
 194                 'clientVersion': '4.57',
 195             },
 196         },
 197         'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
 198         'REQUIRE_JS_PLAYER': False
 199     },
 200     'ios_creator': {
 201         'INNERTUBE_CONTEXT': {
 202             'client': {
 203                 'clientName': 'IOS_CREATOR',
 204                 'clientVersion': '21.47',
 205             },
 206         },
 207         'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
 208         'REQUIRE_JS_PLAYER': False
 209     },
 210     # mweb has 'ultralow' formats
 211     # See: https://github.com/yt-dlp/yt-dlp/pull/557
 212     'mweb': {
 213         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 214         'INNERTUBE_CONTEXT': {
 215             'client': {
 216                 'clientName': 'MWEB',
 217                 'clientVersion': '2.20211221.01.00',
 218             }
 219         },
 220         'INNERTUBE_CONTEXT_CLIENT_NAME': 2
 221     }
 222 }
 223
 224
 225 def build_innertube_clients():
 226     third_party = {
 227         'embedUrl': 'https://google.com',  # Can be any valid URL
 228     }
 229     base_clients = ('android', 'web', 'ios', 'mweb')
 230     priority = qualities(base_clients[::-1])
 231
 232     for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
 233         ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
 234         ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
 235         ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
 236         ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
 237         ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
 238
 239         if client in base_clients:
 240             INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
 241             agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
 242             agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
 243             agegate_ytcfg['priority'] -= 1
 244         elif client.endswith('_embedded'):
 245             ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
 246             ytcfg['priority'] -= 2
 247         else:
 248             ytcfg['priority'] -= 3
 249
 250
 251 build_innertube_clients()
 252
 253
 254 class YoutubeBaseInfoExtractor(InfoExtractor):
 255     """Provide base functions for Youtube extractors"""
 256
 257     _RESERVED_NAMES = (
 258         r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
 259         r'shorts|movies|results|shared|hashtag|trending|feed|feeds|'
 260         r'browse|oembed|get_video_info|iframe_api|s/player|'
 261         r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
 262
 263     _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
 264
 265     _NETRC_MACHINE = 'youtube'
 266
 267     # If True it will raise an error if no login info is provided
 268     _LOGIN_REQUIRED = False
 269
 270     _INVIDIOUS_SITES = (
 271         # invidious-redirect websites
 272         r'(?:www\.)?redirect\.invidious\.io',
 273         r'(?:(?:www|dev)\.)?invidio\.us',
 274         # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
 275         r'(?:www\.)?invidious\.pussthecat\.org',
 276         r'(?:www\.)?invidious\.zee\.li',
 277         r'(?:www\.)?invidious\.ethibox\.fr',
 278         r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
 279         # youtube-dl invidious instances list
 280         r'(?:(?:www|no)\.)?invidiou\.sh',
 281         r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
 282         r'(?:www\.)?invidious\.kabi\.tk',
 283         r'(?:www\.)?invidious\.mastodon\.host',
 284         r'(?:www\.)?invidious\.zapashcanon\.fr',
 285         r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
 286         r'(?:www\.)?invidious\.tinfoil-hat\.net',
 287         r'(?:www\.)?invidious\.himiko\.cloud',
 288         r'(?:www\.)?invidious\.reallyancient\.tech',
 289         r'(?:www\.)?invidious\.tube',
 290         r'(?:www\.)?invidiou\.site',
 291         r'(?:www\.)?invidious\.site',
 292         r'(?:www\.)?invidious\.xyz',
 293         r'(?:www\.)?invidious\.nixnet\.xyz',
 294         r'(?:www\.)?invidious\.048596\.xyz',
 295         r'(?:www\.)?invidious\.drycat\.fr',
 296         r'(?:www\.)?inv\.skyn3t\.in',
 297         r'(?:www\.)?tube\.poal\.co',
 298         r'(?:www\.)?tube\.connect\.cafe',
 299         r'(?:www\.)?vid\.wxzm\.sx',
 300         r'(?:www\.)?vid\.mint\.lgbt',
 301         r'(?:www\.)?vid\.puffyan\.us',
 302         r'(?:www\.)?yewtu\.be',
 303         r'(?:www\.)?yt\.elukerio\.org',
 304         r'(?:www\.)?yt\.lelux\.fi',
 305         r'(?:www\.)?invidious\.ggc-project\.de',
 306         r'(?:www\.)?yt\.maisputain\.ovh',
 307         r'(?:www\.)?ytprivate\.com',
 308         r'(?:www\.)?invidious\.13ad\.de',
 309         r'(?:www\.)?invidious\.toot\.koeln',
 310         r'(?:www\.)?invidious\.fdn\.fr',
 311         r'(?:www\.)?watch\.nettohikari\.com',
 312         r'(?:www\.)?invidious\.namazso\.eu',
 313         r'(?:www\.)?invidious\.silkky\.cloud',
 314         r'(?:www\.)?invidious\.exonip\.de',
 315         r'(?:www\.)?invidious\.riverside\.rocks',
 316         r'(?:www\.)?invidious\.blamefran\.net',
 317         r'(?:www\.)?invidious\.moomoo\.de',
 318         r'(?:www\.)?ytb\.trom\.tf',
 319         r'(?:www\.)?yt\.cyberhost\.uk',
 320         r'(?:www\.)?kgg2m7yk5aybusll\.onion',
 321         r'(?:www\.)?qklhadlycap4cnod\.onion',
 322         r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
 323         r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
 324         r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
 325         r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
 326         r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
 327         r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
 328         r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
 329         r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
 330         r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
 331         r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
 332     )
 333
 334     def _login(self):
 335         """
 336         Attempt to log in to YouTube.
 337         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
 338         """
 339
 340         if (self._LOGIN_REQUIRED
 341                 and self.get_param('cookiefile') is None
 342                 and self.get_param('cookiesfrombrowser') is None):
 343             self.raise_login_required(
 344                 'Login details are needed to download this content', method='cookies')
 345         username, password = self._get_login_info()
 346         if username:
 347             self.report_warning(f'Cannot login to YouTube using username and password. {self._LOGIN_HINTS["cookies"]}')
 348
 349     def _initialize_consent(self):
 350         cookies = self._get_cookies('https://www.youtube.com/')
 351         if cookies.get('__Secure-3PSID'):
 352             return
 353         consent_id = None
 354         consent = cookies.get('CONSENT')
 355         if consent:
 356             if 'YES' in consent.value:
 357                 return
 358             consent_id = self._search_regex(
 359                 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
 360         if not consent_id:
 361             consent_id = random.randint(100, 999)
 362         self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
 363
 364     def _initialize_pref(self):
 365         cookies = self._get_cookies('https://www.youtube.com/')
 366         pref_cookie = cookies.get('PREF')
 367         pref = {}
 368         if pref_cookie:
 369             try:
 370                 pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))
 371             except ValueError:
 372                 self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
 373         pref.update({'hl': 'en'})
 374         self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))
 375
 376     def _real_initialize(self):
 377         self._initialize_pref()
 378         self._initialize_consent()
 379         self._login()
 380
 381     _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
 382     _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
 383     _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
 384
 385     def _get_default_ytcfg(self, client='web'):
 386         return copy.deepcopy(INNERTUBE_CLIENTS[client])
 387
 388     def _get_innertube_host(self, client='web'):
 389         return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
 390
 391     def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
 392         # try_get but with fallback to default ytcfg client values when present
 393         _func = lambda y: try_get(y, getter, expected_type)
 394         return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
 395
 396     def _extract_client_name(self, ytcfg, default_client='web'):
 397         return self._ytcfg_get_safe(
 398             ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
 399                     lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
 400
 401     def _extract_client_version(self, ytcfg, default_client='web'):
 402         return self._ytcfg_get_safe(
 403             ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
 404                     lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
 405
 406     def _extract_api_key(self, ytcfg=None, default_client='web'):
 407         return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
 408
 409     def _extract_context(self, ytcfg=None, default_client='web'):
 410         context = get_first(
 411             (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
 412         # Enforce language for extraction
 413         traverse_obj(context, 'client', expected_type=dict, default={})['hl'] = 'en'
 414         return context
 415
 416     _SAPISID = None
 417
 418     def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
 419         time_now = round(time.time())
 420         if self._SAPISID is None:
 421             yt_cookies = self._get_cookies('https://www.youtube.com')
 422             # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
 423             # See: https://github.com/yt-dlp/yt-dlp/issues/393
 424             sapisid_cookie = dict_get(
 425                 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
 426             if sapisid_cookie and sapisid_cookie.value:
 427                 self._SAPISID = sapisid_cookie.value
 428                 self.write_debug('Extracted SAPISID cookie')
 429                 # SAPISID cookie is required if not already present
 430                 if not yt_cookies.get('SAPISID'):
 431                     self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
 432                     self._set_cookie(
 433                         '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
 434             else:
 435                 self._SAPISID = False
 436         if not self._SAPISID:
 437             return None
 438         # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
 439         sapisidhash = hashlib.sha1(
 440             f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
 441         return f'SAPISIDHASH {time_now}_{sapisidhash}'
 442
 443     def _call_api(self, ep, query, video_id, fatal=True, headers=None,
 444                   note='Downloading API JSON', errnote='Unable to download API page',
 445                   context=None, api_key=None, api_hostname=None, default_client='web'):
 446
 447         data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
 448         data.update(query)
 449         real_headers = self.generate_api_headers(default_client=default_client)
 450         real_headers.update({'content-type': 'application/json'})
 451         if headers:
 452             real_headers.update(headers)
 453         return self._download_json(
 454             'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
 455             video_id=video_id, fatal=fatal, note=note, errnote=errnote,
 456             data=json.dumps(data).encode('utf8'), headers=real_headers,
 457             query={'key': api_key or self._extract_api_key()})
 458
 459     def extract_yt_initial_data(self, item_id, webpage, fatal=True):
 460         data = self._search_regex(
 461             (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
 462              self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)
 463         if data:
 464             return self._parse_json(data, item_id, fatal=fatal)
 465
 466     @staticmethod
 467     def _extract_session_index(*data):
 468         """
 469         Index of current account in account list.
 470         See: https://github.com/yt-dlp/yt-dlp/pull/519
 471         """
 472         for ytcfg in data:
 473             session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
 474             if session_index is not None:
 475                 return session_index
 476
 477     # Deprecated?
 478     def _extract_identity_token(self, ytcfg=None, webpage=None):
 479         if ytcfg:
 480             token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
 481             if token:
 482                 return token
 483         if webpage:
 484             return self._search_regex(
 485                 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
 486                 'identity token', default=None, fatal=False)
 487
 488     @staticmethod
 489     def _extract_account_syncid(*args):
 490         """
 491         Extract syncId required to download private playlists of secondary channels
 492         @params response and/or ytcfg
 493         """
 494         for data in args:
 495             # ytcfg includes channel_syncid if on secondary channel
 496             delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
 497             if delegated_sid:
 498                 return delegated_sid
 499             sync_ids = (try_get(
 500                 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
 501                        lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')
 502             if len(sync_ids) >= 2 and sync_ids[1]:
 503                 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
 504                 # and just "user_syncid||" for primary channel. We only want the channel_syncid
 505                 return sync_ids[0]
 506
 507     @staticmethod
 508     def _extract_visitor_data(*args):
 509         """
 510         Extracts visitorData from an API response or ytcfg
 511         Appears to be used to track session state
 512         """
 513         return get_first(
 514             args, (('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))),
 515             expected_type=str)
 516
 517     @property
 518     def is_authenticated(self):
 519         return bool(self._generate_sapisidhash_header())
 520
 521     def extract_ytcfg(self, video_id, webpage):
 522         if not webpage:
 523             return {}
 524         return self._parse_json(
 525             self._search_regex(
 526                 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
 527                 default='{}'), video_id, fatal=False) or {}
 528
 529     def generate_api_headers(
 530             self, *, ytcfg=None, account_syncid=None, session_index=None,
 531             visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
 532
 533         origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
 534         headers = {
 535             'X-YouTube-Client-Name': compat_str(
 536                 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
 537             'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
 538             'Origin': origin,
 539             'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
 540             'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
 541             'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)
 542         }
 543         if session_index is None:
 544             session_index = self._extract_session_index(ytcfg)
 545         if account_syncid or session_index is not None:
 546             headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
 547
 548         auth = self._generate_sapisidhash_header(origin)
 549         if auth is not None:
 550             headers['Authorization'] = auth
 551             headers['X-Origin'] = origin
 552         return {h: v for h, v in headers.items() if v is not None}
 553
 554     @staticmethod
 555     def _build_api_continuation_query(continuation, ctp=None):
 556         query = {
 557             'continuation': continuation
 558         }
 559         # TODO: Inconsistency with clickTrackingParams.
 560         # Currently we have a fixed ctp contained within context (from ytcfg)
 561         # and a ctp in root query for continuation.
 562         if ctp:
 563             query['clickTracking'] = {'clickTrackingParams': ctp}
 564         return query
 565
 566     @classmethod
 567     def _extract_next_continuation_data(cls, renderer):
 568         next_continuation = try_get(
 569             renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
 570                        lambda x: x['continuation']['reloadContinuationData']), dict)
 571         if not next_continuation:
 572             return
 573         continuation = next_continuation.get('continuation')
 574         if not continuation:
 575             return
 576         ctp = next_continuation.get('clickTrackingParams')
 577         return cls._build_api_continuation_query(continuation, ctp)
 578
 579     @classmethod
 580     def _extract_continuation_ep_data(cls, continuation_ep: dict):
 581         if isinstance(continuation_ep, dict):
 582             continuation = try_get(
 583                 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
 584             if not continuation:
 585                 return
 586             ctp = continuation_ep.get('clickTrackingParams')
 587             return cls._build_api_continuation_query(continuation, ctp)
 588
 589     @classmethod
 590     def _extract_continuation(cls, renderer):
 591         next_continuation = cls._extract_next_continuation_data(renderer)
 592         if next_continuation:
 593             return next_continuation
 594
 595         contents = []
 596         for key in ('contents', 'items'):
 597             contents.extend(try_get(renderer, lambda x: x[key], list) or [])
 598
 599         for content in contents:
 600             if not isinstance(content, dict):
 601                 continue
 602             continuation_ep = try_get(
 603                 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
 604                           lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
 605                 dict)
 606             continuation = cls._extract_continuation_ep_data(continuation_ep)
 607             if continuation:
 608                 return continuation
 609
 610     @classmethod
 611     def _extract_alerts(cls, data):
 612         for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
 613             if not isinstance(alert_dict, dict):
 614                 continue
 615             for alert in alert_dict.values():
 616                 alert_type = alert.get('type')
 617                 if not alert_type:
 618                     continue
 619                 message = cls._get_text(alert, 'text')
 620                 if message:
 621                     yield alert_type, message
 622
 623     def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
 624         errors = []
 625         warnings = []
 626         for alert_type, alert_message in alerts:
 627             if alert_type.lower() == 'error' and fatal:
 628                 errors.append([alert_type, alert_message])
 629             else:
 630                 warnings.append([alert_type, alert_message])
 631
 632         for alert_type, alert_message in (warnings + errors[:-1]):
 633             self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message), only_once=only_once)
 634         if errors:
 635             raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
 636
 637     def _extract_and_report_alerts(self, data, *args, **kwargs):
 638         return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
 639
 640     def _extract_badges(self, renderer: dict):
 641         badges = set()
 642         for badge in try_get(renderer, lambda x: x['badges'], list) or []:
 643             label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
 644             if label:
 645                 badges.add(label.lower())
 646         return badges
 647
 648     @staticmethod
 649     def _get_text(data, *path_list, max_runs=None):
 650         for path in path_list or [None]:
 651             if path is None:
 652                 obj = [data]
 653             else:
 654                 obj = traverse_obj(data, path, default=[])
 655                 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
 656                     obj = [obj]
 657             for item in obj:
 658                 text = try_get(item, lambda x: x['simpleText'], compat_str)
 659                 if text:
 660                     return text
 661                 runs = try_get(item, lambda x: x['runs'], list) or []
 662                 if not runs and isinstance(item, list):
 663                     runs = item
 664
 665                 runs = runs[:min(len(runs), max_runs or len(runs))]
 666                 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
 667                 if text:
 668                     return text
 669
 670     @staticmethod
 671     def _extract_thumbnails(data, *path_list):
 672         """
 673         Extract thumbnails from thumbnails dict
 674         @param path_list: path list to level that contains 'thumbnails' key
 675         """
 676         thumbnails = []
 677         for path in path_list or [()]:
 678             for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):
 679                 thumbnail_url = url_or_none(thumbnail.get('url'))
 680                 if not thumbnail_url:
 681                     continue
 682                 # Sometimes youtube gives a wrong thumbnail URL. See:
 683                 # https://github.com/yt-dlp/yt-dlp/issues/233
 684                 # https://github.com/ytdl-org/youtube-dl/issues/28023
 685                 if 'maxresdefault' in thumbnail_url:
 686                     thumbnail_url = thumbnail_url.split('?')[0]
 687                 thumbnails.append({
 688                     'url': thumbnail_url,
 689                     'height': int_or_none(thumbnail.get('height')),
 690                     'width': int_or_none(thumbnail.get('width')),
 691                 })
 692         return thumbnails
 693
 694     @staticmethod
 695     def extract_relative_time(relative_time_text):
 696         """
 697         Extracts a relative time from string and converts to dt object
 698         e.g. 'streamed 6 days ago', '5 seconds ago (edited)'
 699         """
 700         mobj = re.search(r'(?P<time>\d+)\s*(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?\s*ago', relative_time_text)
 701         if mobj:
 702             try:
 703                 return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')), precision='auto')
 704             except ValueError:
 705                 return None
 706
 707     def _extract_time_text(self, renderer, *path_list):
 708         text = self._get_text(renderer, *path_list) or ''
 709         dt = self.extract_relative_time(text)
 710         timestamp = None
 711         if isinstance(dt, datetime.datetime):
 712             timestamp = calendar.timegm(dt.timetuple())
 713         if text and timestamp is None:
 714             self.report_warning('Cannot parse localized time text' + bug_reports_message(), only_once=True)
 715         return timestamp, text
 716
 717     def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
 718                           ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
 719                           default_client='web'):
 720         response = None
 721         last_error = None
 722         count = -1
 723         retries = self.get_param('extractor_retries', 3)
 724         if check_get_keys is None:
 725             check_get_keys = []
 726         while count < retries:
 727             count += 1
 728             if last_error:
 729                 self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))
 730             try:
 731                 response = self._call_api(
 732                     ep=ep, fatal=True, headers=headers,
 733                     video_id=item_id, query=query,
 734                     context=self._extract_context(ytcfg, default_client),
 735                     api_key=self._extract_api_key(ytcfg, default_client),
 736                     api_hostname=api_hostname, default_client=default_client,
 737                     note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
 738             except ExtractorError as e:
 739                 if isinstance(e.cause, network_exceptions):
 740                     if isinstance(e.cause, compat_HTTPError) and not is_html(e.cause.read(512)):
 741                         e.cause.seek(0)
 742                         yt_error = try_get(
 743                             self._parse_json(e.cause.read().decode(), item_id, fatal=False),
 744                             lambda x: x['error']['message'], compat_str)
 745                         if yt_error:
 746                             self._report_alerts([('ERROR', yt_error)], fatal=False)
 747                     # Downloading page may result in intermittent 5xx HTTP error
 748                     # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
 749                     # We also want to catch all other network exceptions since errors in later pages can be troublesome
 750                     # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
 751                     if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
 752                         last_error = error_to_compat_str(e.cause or e.msg)
 753                         if count < retries:
 754                             continue
 755                 if fatal:
 756                     raise
 757                 else:
 758                     self.report_warning(error_to_compat_str(e))
 759                     return
 760
 761             else:
 762                 try:
 763                     self._extract_and_report_alerts(response, only_once=True)
 764                 except ExtractorError as e:
 765                     # YouTube servers may return errors we want to retry on in a 200 OK response
 766                     # See: https://github.com/yt-dlp/yt-dlp/issues/839
 767                     if 'unknown error' in e.msg.lower():
 768                         last_error = e.msg
 769                         continue
 770                     if fatal:
 771                         raise
 772                     self.report_warning(error_to_compat_str(e))
 773                     return
 774                 if not check_get_keys or dict_get(response, check_get_keys):
 775                     break
 776                 # Youtube sometimes sends incomplete data
 777                 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
 778                 last_error = 'Incomplete data received'
 779                 if count >= retries:
 780                     if fatal:
 781                         raise ExtractorError(last_error)
 782                     else:
 783                         self.report_warning(last_error)
 784                         return
 785         return response
 786
 787     @staticmethod
 788     def is_music_url(url):
 789         return re.match(r'https?://music\.youtube\.com/', url) is not None
 790
 791     def _extract_video(self, renderer):
 792         video_id = renderer.get('videoId')
 793         title = self._get_text(renderer, 'title')
 794         description = self._get_text(renderer, 'descriptionSnippet')
 795         duration = parse_duration(self._get_text(
 796             renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
 797         view_count_text = self._get_text(renderer, 'viewCountText') or ''
 798         view_count = str_to_int(self._search_regex(
 799             r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
 800             'view count', default=None))
 801
 802         uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
 803         channel_id = traverse_obj(
 804             renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'), expected_type=str, get_all=False)
 805         timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')
 806         scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
 807         overlay_style = traverse_obj(
 808             renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'), get_all=False, expected_type=str)
 809         badges = self._extract_badges(renderer)
 810         thumbnails = self._extract_thumbnails(renderer, 'thumbnail')
 811
 812         return {
 813             '_type': 'url',
 814             'ie_key': YoutubeIE.ie_key(),
 815             'id': video_id,
 816             'url': f'https://www.youtube.com/watch?v={video_id}',
 817             'title': title,
 818             'description': description,
 819             'duration': duration,
 820             'view_count': view_count,
 821             'uploader': uploader,
 822             'channel_id': channel_id,
 823             'thumbnails': thumbnails,
 824             'upload_date': strftime_or_none(timestamp, '%Y%m%d'),
 825             'live_status': ('is_upcoming' if scheduled_timestamp is not None
 826                             else 'was_live' if 'streamed' in time_text.lower()
 827                             else 'is_live' if overlay_style is not None and overlay_style == 'LIVE' or 'live now' in badges
 828                             else None),
 829             'release_timestamp': scheduled_timestamp,
 830             'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)
 831         }
 832
 833
 834 class YoutubeIE(YoutubeBaseInfoExtractor):
 835     IE_DESC = 'YouTube'
 836     _VALID_URL = r"""(?x)^
 837                      (
 838                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 839                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
 840                             (?:www\.)?deturl\.com/www\.youtube\.com|
 841                             (?:www\.)?pwnyoutube\.com|
 842                             (?:www\.)?hooktube\.com|
 843                             (?:www\.)?yourepeat\.com|
 844                             tube\.majestyc\.net|
 845                             %(invidious)s|
 846                             youtube\.googleapis\.com)/                        # the various hostnames, with wildcard subdomains
 847                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 848                          (?:                                                  # the various things that can precede the ID:
 849                              (?:(?:v|embed|e|shorts)/(?!videoseries))         # v/ or embed/ or e/ or shorts/
 850                              |(?:                                             # or the v= param in all its forms
 851                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 852                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 853                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 854                                  v=
 855                              )
 856                          ))
 857                          |(?:
 858                             youtu\.be|                                        # just youtu.be/xxxx
 859                             vid\.plus|                                        # or vid.plus/xxxx
 860                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 861                             %(invidious)s
 862                          )/
 863                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 864                          )
 865                      )?                                                       # all until now is optional -> you can pass the naked ID
 866                      (?P<id>[0-9A-Za-z_-]{11})                                # here is it! the YouTube video ID
 867                      (?(1).+)?                                                # if we found the ID, everything can follow
 868                      (?:\#|$)""" % {
 869         'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
 870     }
 871     _PLAYER_INFO_RE = (
 872         r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
 873         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
 874         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
 875     )
 876     _formats = {
 877         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 878         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 879         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 880         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 881         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 882         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 883         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 884         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 885         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 886         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 887         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 888         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 889         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 890         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 891         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 892         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 893         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 894         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 895
 896
 897         # 3D videos
 898         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 899         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 900         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 901         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 902         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 903         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 904         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 905
 906         # Apple HTTP Live Streaming
 907         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 908         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 909         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 910         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 911         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 912         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 913         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 914         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
 915
 916         # DASH mp4 video
 917         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
 918         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
 919         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 920         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
 921         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
 922         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
 923         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
 924         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 925         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
 926         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 927         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 928         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
 929
 930         # Dash mp4 audio
 931         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
 932         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
 933         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
 934         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 935         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 936         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
 937         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
 938
 939         # Dash webm
 940         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 941         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 942         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 943         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 944         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 945         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 946         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
 947         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 948         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 949         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 950         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 951         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 952         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 953         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 954         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 955         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
 956         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 957         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 958         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 959         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 960         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 961         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 962
 963         # Dash webm audio
 964         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
 965         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
 966
 967         # Dash webm audio with opus inside
 968         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
 969         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
 970         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
 971
 972         # RTMP (unnamed)
 973         '_rtmp': {'protocol': 'rtmp'},
 974
 975         # av01 video only formats sometimes served with "unknown" codecs
 976         '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
 977         '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
 978         '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
 979         '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
 980         '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
 981         '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
 982         '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
 983         '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
 984     }
 985     _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
 986
 987     _GEO_BYPASS = False
 988
 989     IE_NAME = 'youtube'
 990     _TESTS = [
 991         {
 992             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
 993             'info_dict': {
 994                 'id': 'BaW_jenozKc',
 995                 'ext': 'mp4',
 996                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 997                 'uploader': 'Philipp Hagemeister',
 998                 'uploader_id': 'phihag',
 999                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1000                 'channel': 'Philipp Hagemeister',
1001                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1002                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1003                 'upload_date': '20121002',
1004                 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
1005                 'categories': ['Science & Technology'],
1006                 'tags': ['youtube-dl'],
1007                 'duration': 10,
1008                 'view_count': int,
1009                 'like_count': int,
1010                 # 'dislike_count': int,
1011                 'availability': 'public',
1012                 'playable_in_embed': True,
1013                 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1014                 'live_status': 'not_live',
1015                 'age_limit': 0,
1016                 'start_time': 1,
1017                 'end_time': 9,
1018             }
1019         },
1020         {
1021             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1022             'note': 'Embed-only video (#1746)',
1023             'info_dict': {
1024                 'id': 'yZIXLfi8CZQ',
1025                 'ext': 'mp4',
1026                 'upload_date': '20120608',
1027                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1028                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1029                 'uploader': 'SET India',
1030                 'uploader_id': 'setindia',
1031                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
1032                 'age_limit': 18,
1033             },
1034             'skip': 'Private video',
1035         },
1036         {
1037             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1038             'note': 'Use the first video ID in the URL',
1039             'info_dict': {
1040                 'id': 'BaW_jenozKc',
1041                 'ext': 'mp4',
1042                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1043                 'uploader': 'Philipp Hagemeister',
1044                 'uploader_id': 'phihag',
1045                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1046                 'upload_date': '20121002',
1047                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1048                 'categories': ['Science & Technology'],
1049                 'tags': ['youtube-dl'],
1050                 'duration': 10,
1051                 'view_count': int,
1052                 'like_count': int,
1053                 'dislike_count': int,
1054             },
1055             'params': {
1056                 'skip_download': True,
1057             },
1058         },
1059         {
1060             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1061             'note': '256k DASH audio (format 141) via DASH manifest',
1062             'info_dict': {
1063                 'id': 'a9LDPn-MO4I',
1064                 'ext': 'm4a',
1065                 'upload_date': '20121002',
1066                 'uploader_id': '8KVIDEO',
1067                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1068                 'description': '',
1069                 'uploader': '8KVIDEO',
1070                 'title': 'UHDTV TEST 8K VIDEO.mp4'
1071             },
1072             'params': {
1073                 'youtube_include_dash_manifest': True,
1074                 'format': '141',
1075             },
1076             'skip': 'format 141 not served anymore',
1077         },
1078         # DASH manifest with encrypted signature
1079         {
1080             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1081             'info_dict': {
1082                 'id': 'IB3lcPjvWLA',
1083                 'ext': 'm4a',
1084                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1085                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1086                 'duration': 244,
1087                 'uploader': 'AfrojackVEVO',
1088                 'uploader_id': 'AfrojackVEVO',
1089                 'upload_date': '20131011',
1090                 'abr': 129.495,
1091             },
1092             'params': {
1093                 'youtube_include_dash_manifest': True,
1094                 'format': '141/bestaudio[ext=m4a]',
1095             },
1096         },
1097         # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
1098         {
1099             'note': 'Embed allowed age-gate video',
1100             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1101             'info_dict': {
1102                 'id': 'HtVdAasjOgU',
1103                 'ext': 'mp4',
1104                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1105                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1106                 'duration': 142,
1107                 'uploader': 'The Witcher',
1108                 'uploader_id': 'WitcherGame',
1109                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1110                 'upload_date': '20140605',
1111                 'age_limit': 18,
1112             },
1113         },
1114         {
1115             'note': 'Age-gate video with embed allowed in public site',
1116             'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1117             'info_dict': {
1118                 'id': 'HsUATh_Nc2U',
1119                 'ext': 'mp4',
1120                 'title': 'Godzilla 2 (Official Video)',
1121                 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1122                 'upload_date': '20200408',
1123                 'uploader_id': 'FlyingKitty900',
1124                 'uploader': 'FlyingKitty',
1125                 'age_limit': 18,
1126             },
1127         },
1128         {
1129             'note': 'Age-gate video embedable only with clientScreen=EMBED',
1130             'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1131             'info_dict': {
1132                 'id': 'Tq92D6wQ1mg',
1133                 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
1134                 'ext': 'mp4',
1135                 'upload_date': '20191227',
1136                 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1137                 'uploader': 'Projekt Melody',
1138                 'description': 'md5:17eccca93a786d51bc67646756894066',
1139                 'age_limit': 18,
1140             },
1141         },
1142         {
1143             'note': 'Non-Agegated non-embeddable video',
1144             'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1145             'info_dict': {
1146                 'id': 'MeJVWBSsPAY',
1147                 'ext': 'mp4',
1148                 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1149                 'uploader': 'Herr Lurik',
1150                 'uploader_id': 'st3in234',
1151                 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1152                 'upload_date': '20130730',
1153             },
1154         },
1155         {
1156             'note': 'Non-bypassable age-gated video',
1157             'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1158             'only_matching': True,
1159         },
1160         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1161         # YouTube Red ad is not captured for creator
1162         {
1163             'url': '__2ABJjxzNo',
1164             'info_dict': {
1165                 'id': '__2ABJjxzNo',
1166                 'ext': 'mp4',
1167                 'duration': 266,
1168                 'upload_date': '20100430',
1169                 'uploader_id': 'deadmau5',
1170                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1171                 'creator': 'deadmau5',
1172                 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1173                 'uploader': 'deadmau5',
1174                 'title': 'Deadmau5 - Some Chords (HD)',
1175                 'alt_title': 'Some Chords',
1176             },
1177             'expected_warnings': [
1178                 'DASH manifest missing',
1179             ]
1180         },
1181         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1182         {
1183             'url': 'lqQg6PlCWgI',
1184             'info_dict': {
1185                 'id': 'lqQg6PlCWgI',
1186                 'ext': 'mp4',
1187                 'duration': 6085,
1188                 'upload_date': '20150827',
1189                 'uploader_id': 'olympic',
1190                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1191                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
1192                 'uploader': 'Olympics',
1193                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
1194             },
1195             'params': {
1196                 'skip_download': 'requires avconv',
1197             }
1198         },
1199         # Non-square pixels
1200         {
1201             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1202             'info_dict': {
1203                 'id': '_b-2C3KPAM0',
1204                 'ext': 'mp4',
1205                 'stretched_ratio': 16 / 9.,
1206                 'duration': 85,
1207                 'upload_date': '20110310',
1208                 'uploader_id': 'AllenMeow',
1209                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1210                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1211                 'uploader': '孫ᄋᄅ',
1212                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1213             },
1214         },
1215         # url_encoded_fmt_stream_map is empty string
1216         {
1217             'url': 'qEJwOuvDf7I',
1218             'info_dict': {
1219                 'id': 'qEJwOuvDf7I',
1220                 'ext': 'webm',
1221                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1222                 'description': '',
1223                 'upload_date': '20150404',
1224                 'uploader_id': 'spbelect',
1225                 'uploader': 'Наблюдатели Петербурга',
1226             },
1227             'params': {
1228                 'skip_download': 'requires avconv',
1229             },
1230             'skip': 'This live event has ended.',
1231         },
1232         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1233         {
1234             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1235             'info_dict': {
1236                 'id': 'FIl7x6_3R5Y',
1237                 'ext': 'webm',
1238                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1239                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1240                 'duration': 220,
1241                 'upload_date': '20150625',
1242                 'uploader_id': 'dorappi2000',
1243                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1244                 'uploader': 'dorappi2000',
1245                 'formats': 'mincount:31',
1246             },
1247             'skip': 'not actual anymore',
1248         },
1249         # DASH manifest with segment_list
1250         {
1251             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1252             'md5': '8ce563a1d667b599d21064e982ab9e31',
1253             'info_dict': {
1254                 'id': 'CsmdDsKjzN8',
1255                 'ext': 'mp4',
1256                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1257                 'uploader': 'Airtek',
1258                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1259                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1260                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1261             },
1262             'params': {
1263                 'youtube_include_dash_manifest': True,
1264                 'format': '135',  # bestvideo
1265             },
1266             'skip': 'This live event has ended.',
1267         },
1268         {
1269             # Multifeed videos (multiple cameras), URL is for Main Camera
1270             'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1271             'info_dict': {
1272                 'id': 'jvGDaLqkpTg',
1273                 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1274                 'description': 'md5:e03b909557865076822aa169218d6a5d',
1275             },
1276             'playlist': [{
1277                 'info_dict': {
1278                     'id': 'jvGDaLqkpTg',
1279                     'ext': 'mp4',
1280                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1281                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1282                     'duration': 10643,
1283                     'upload_date': '20161111',
1284                     'uploader': 'Team PGP',
1285                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1286                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1287                 },
1288             }, {
1289                 'info_dict': {
1290                     'id': '3AKt1R1aDnw',
1291                     'ext': 'mp4',
1292                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1293                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1294                     'duration': 10991,
1295                     'upload_date': '20161111',
1296                     'uploader': 'Team PGP',
1297                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1298                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1299                 },
1300             }, {
1301                 'info_dict': {
1302                     'id': 'RtAMM00gpVc',
1303                     'ext': 'mp4',
1304                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1305                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1306                     'duration': 10995,
1307                     'upload_date': '20161111',
1308                     'uploader': 'Team PGP',
1309                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1310                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1311                 },
1312             }, {
1313                 'info_dict': {
1314                     'id': '6N2fdlP3C5U',
1315                     'ext': 'mp4',
1316                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1317                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1318                     'duration': 10990,
1319                     'upload_date': '20161111',
1320                     'uploader': 'Team PGP',
1321                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1322                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1323                 },
1324             }],
1325             'params': {
1326                 'skip_download': True,
1327             },
1328             'skip': 'Not multifeed anymore',
1329         },
1330         {
1331             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1332             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1333             'info_dict': {
1334                 'id': 'gVfLd0zydlo',
1335                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1336             },
1337             'playlist_count': 2,
1338             'skip': 'Not multifeed anymore',
1339         },
1340         {
1341             'url': 'https://vid.plus/FlRa-iH7PGw',
1342             'only_matching': True,
1343         },
1344         {
1345             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1346             'only_matching': True,
1347         },
1348         {
1349             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1350             # Also tests cut-off URL expansion in video description (see
1351             # https://github.com/ytdl-org/youtube-dl/issues/1892,
1352             # https://github.com/ytdl-org/youtube-dl/issues/8164)
1353             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1354             'info_dict': {
1355                 'id': 'lsguqyKfVQg',
1356                 'ext': 'mp4',
1357                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1358                 'alt_title': 'Dark Walk',
1359                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1360                 'duration': 133,
1361                 'upload_date': '20151119',
1362                 'uploader_id': 'IronSoulElf',
1363                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1364                 'uploader': 'IronSoulElf',
1365                 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1366                 'track': 'Dark Walk',
1367                 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1368                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1369             },
1370             'params': {
1371                 'skip_download': True,
1372             },
1373         },
1374         {
1375             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1376             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1377             'only_matching': True,
1378         },
1379         {
1380             # Video with yt:stretch=17:0
1381             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1382             'info_dict': {
1383                 'id': 'Q39EVAstoRM',
1384                 'ext': 'mp4',
1385                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1386                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1387                 'upload_date': '20151107',
1388                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1389                 'uploader': 'CH GAMER DROID',
1390             },
1391             'params': {
1392                 'skip_download': True,
1393             },
1394             'skip': 'This video does not exist.',
1395         },
1396         {
1397             # Video with incomplete 'yt:stretch=16:'
1398             'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1399             'only_matching': True,
1400         },
1401         {
1402             # Video licensed under Creative Commons
1403             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1404             'info_dict': {
1405                 'id': 'M4gD1WSo5mA',
1406                 'ext': 'mp4',
1407                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1408                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1409                 'duration': 721,
1410                 'upload_date': '20150127',
1411                 'uploader_id': 'BerkmanCenter',
1412                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1413                 'uploader': 'The Berkman Klein Center for Internet & Society',
1414                 'license': 'Creative Commons Attribution license (reuse allowed)',
1415             },
1416             'params': {
1417                 'skip_download': True,
1418             },
1419         },
1420         {
1421             # Channel-like uploader_url
1422             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1423             'info_dict': {
1424                 'id': 'eQcmzGIKrzg',
1425                 'ext': 'mp4',
1426                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1427                 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1428                 'duration': 4060,
1429                 'upload_date': '20151119',
1430                 'uploader': 'Bernie Sanders',
1431                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1432                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1433                 'license': 'Creative Commons Attribution license (reuse allowed)',
1434             },
1435             'params': {
1436                 'skip_download': True,
1437             },
1438         },
1439         {
1440             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1441             'only_matching': True,
1442         },
1443         {
1444             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1445             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1446             'only_matching': True,
1447         },
1448         {
1449             # Rental video preview
1450             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1451             'info_dict': {
1452                 'id': 'uGpuVWrhIzE',
1453                 'ext': 'mp4',
1454                 'title': 'Piku - Trailer',
1455                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1456                 'upload_date': '20150811',
1457                 'uploader': 'FlixMatrix',
1458                 'uploader_id': 'FlixMatrixKaravan',
1459                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1460                 'license': 'Standard YouTube License',
1461             },
1462             'params': {
1463                 'skip_download': True,
1464             },
1465             'skip': 'This video is not available.',
1466         },
1467         {
1468             # YouTube Red video with episode data
1469             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1470             'info_dict': {
1471                 'id': 'iqKdEhx-dD4',
1472                 'ext': 'mp4',
1473                 'title': 'Isolation - Mind Field (Ep 1)',
1474                 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1475                 'duration': 2085,
1476                 'upload_date': '20170118',
1477                 'uploader': 'Vsauce',
1478                 'uploader_id': 'Vsauce',
1479                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1480                 'series': 'Mind Field',
1481                 'season_number': 1,
1482                 'episode_number': 1,
1483             },
1484             'params': {
1485                 'skip_download': True,
1486             },
1487             'expected_warnings': [
1488                 'Skipping DASH manifest',
1489             ],
1490         },
1491         {
1492             # The following content has been identified by the YouTube community
1493             # as inappropriate or offensive to some audiences.
1494             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1495             'info_dict': {
1496                 'id': '6SJNVb0GnPI',
1497                 'ext': 'mp4',
1498                 'title': 'Race Differences in Intelligence',
1499                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1500                 'duration': 965,
1501                 'upload_date': '20140124',
1502                 'uploader': 'New Century Foundation',
1503                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1504                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1505             },
1506             'params': {
1507                 'skip_download': True,
1508             },
1509             'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1510         },
1511         {
1512             # itag 212
1513             'url': '1t24XAntNCY',
1514             'only_matching': True,
1515         },
1516         {
1517             # geo restricted to JP
1518             'url': 'sJL6WA-aGkQ',
1519             'only_matching': True,
1520         },
1521         {
1522             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1523             'only_matching': True,
1524         },
1525         {
1526             'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1527             'only_matching': True,
1528         },
1529         {
1530             # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1531             'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1532             'only_matching': True,
1533         },
1534         {
1535             # DRM protected
1536             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1537             'only_matching': True,
1538         },
1539         {
1540             # Video with unsupported adaptive stream type formats
1541             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1542             'info_dict': {
1543                 'id': 'Z4Vy8R84T1U',
1544                 'ext': 'mp4',
1545                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1546                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1547                 'duration': 433,
1548                 'upload_date': '20130923',
1549                 'uploader': 'Amelia Putri Harwita',
1550                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1551                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1552                 'formats': 'maxcount:10',
1553             },
1554             'params': {
1555                 'skip_download': True,
1556                 'youtube_include_dash_manifest': False,
1557             },
1558             'skip': 'not actual anymore',
1559         },
1560         {
1561             # Youtube Music Auto-generated description
1562             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1563             'info_dict': {
1564                 'id': 'MgNrAu2pzNs',
1565                 'ext': 'mp4',
1566                 'title': 'Voyeur Girl',
1567                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1568                 'upload_date': '20190312',
1569                 'uploader': 'Stephen - Topic',
1570                 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1571                 'artist': 'Stephen',
1572                 'track': 'Voyeur Girl',
1573                 'album': 'it\'s too much love to know my dear',
1574                 'release_date': '20190313',
1575                 'release_year': 2019,
1576             },
1577             'params': {
1578                 'skip_download': True,
1579             },
1580         },
1581         {
1582             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1583             'only_matching': True,
1584         },
1585         {
1586             # invalid -> valid video id redirection
1587             'url': 'DJztXj2GPfl',
1588             'info_dict': {
1589                 'id': 'DJztXj2GPfk',
1590                 'ext': 'mp4',
1591                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1592                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1593                 'upload_date': '20090125',
1594                 'uploader': 'Prochorowka',
1595                 'uploader_id': 'Prochorowka',
1596                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1597                 'artist': 'Panjabi MC',
1598                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1599                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1600             },
1601             'params': {
1602                 'skip_download': True,
1603             },
1604             'skip': 'Video unavailable',
1605         },
1606         {
1607             # empty description results in an empty string
1608             'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1609             'info_dict': {
1610                 'id': 'x41yOUIvK2k',
1611                 'ext': 'mp4',
1612                 'title': 'IMG 3456',
1613                 'description': '',
1614                 'upload_date': '20170613',
1615                 'uploader_id': 'ElevageOrVert',
1616                 'uploader': 'ElevageOrVert',
1617             },
1618             'params': {
1619                 'skip_download': True,
1620             },
1621         },
1622         {
1623             # with '};' inside yt initial data (see [1])
1624             # see [2] for an example with '};' inside ytInitialPlayerResponse
1625             # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1626             # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1627             'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1628             'info_dict': {
1629                 'id': 'CHqg6qOn4no',
1630                 'ext': 'mp4',
1631                 'title': 'Part 77   Sort a list of simple types in c#',
1632                 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1633                 'upload_date': '20130831',
1634                 'uploader_id': 'kudvenkat',
1635                 'uploader': 'kudvenkat',
1636             },
1637             'params': {
1638                 'skip_download': True,
1639             },
1640         },
1641         {
1642             # another example of '};' in ytInitialData
1643             'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1644             'only_matching': True,
1645         },
1646         {
1647             'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1648             'only_matching': True,
1649         },
1650         {
1651             # https://github.com/ytdl-org/youtube-dl/pull/28094
1652             'url': 'OtqTfy26tG0',
1653             'info_dict': {
1654                 'id': 'OtqTfy26tG0',
1655                 'ext': 'mp4',
1656                 'title': 'Burn Out',
1657                 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1658                 'upload_date': '20141120',
1659                 'uploader': 'The Cinematic Orchestra - Topic',
1660                 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1661                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1662                 'artist': 'The Cinematic Orchestra',
1663                 'track': 'Burn Out',
1664                 'album': 'Every Day',
1665                 'release_data': None,
1666                 'release_year': None,
1667             },
1668             'params': {
1669                 'skip_download': True,
1670             },
1671         },
1672         {
1673             # controversial video, only works with bpctr when authenticated with cookies
1674             'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1675             'only_matching': True,
1676         },
1677         {
1678             # controversial video, requires bpctr/contentCheckOk
1679             'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1680             'info_dict': {
1681                 'id': 'SZJvDhaSDnc',
1682                 'ext': 'mp4',
1683                 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1684                 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1685                 'uploader': 'CBS This Morning',
1686                 'uploader_id': 'CBSThisMorning',
1687                 'upload_date': '20140716',
1688                 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1689             }
1690         },
1691         {
1692             # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1693             'url': 'cBvYw8_A0vQ',
1694             'info_dict': {
1695                 'id': 'cBvYw8_A0vQ',
1696                 'ext': 'mp4',
1697                 'title': '4K Ueno Okachimachi  Street  Scenes  上野御徒町歩き',
1698                 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1699                 'upload_date': '20201120',
1700                 'uploader': 'Walk around Japan',
1701                 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1702                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1703             },
1704             'params': {
1705                 'skip_download': True,
1706             },
1707         }, {
1708             # Has multiple audio streams
1709             'url': 'WaOKSUlf4TM',
1710             'only_matching': True
1711         }, {
1712             # Requires Premium: has format 141 when requested using YTM url
1713             'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1714             'only_matching': True
1715         }, {
1716             # multiple subtitles with same lang_code
1717             'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1718             'only_matching': True,
1719         }, {
1720             # Force use android client fallback
1721             'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1722             'info_dict': {
1723                 'id': 'YOelRv7fMxY',
1724                 'title': 'DIGGING A SECRET TUNNEL Part 1',
1725                 'ext': '3gp',
1726                 'upload_date': '20210624',
1727                 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1728                 'uploader': 'colinfurze',
1729                 'uploader_id': 'colinfurze',
1730                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1731                 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
1732             },
1733             'params': {
1734                 'format': '17',  # 3gp format available on android
1735                 'extractor_args': {'youtube': {'player_client': ['android']}},
1736             },
1737         },
1738         {
1739             # Skip download of additional client configs (remix client config in this case)
1740             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1741             'only_matching': True,
1742             'params': {
1743                 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1744             },
1745         }, {
1746             # shorts
1747             'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
1748             'only_matching': True,
1749         }, {
1750             'note': 'Storyboards',
1751             'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
1752             'info_dict': {
1753                 'id': '5KLPxDtMqe8',
1754                 'ext': 'mhtml',
1755                 'format_id': 'sb0',
1756                 'title': 'Your Brain is Plastic',
1757                 'uploader_id': 'scishow',
1758                 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
1759                 'upload_date': '20140324',
1760                 'uploader': 'SciShow',
1761             }, 'params': {'format': 'mhtml', 'skip_download': True}
1762         }
1763     ]
1764
1765     @classmethod
1766     def suitable(cls, url):
1767         from ..utils import parse_qs
1768
1769         qs = parse_qs(url)
1770         if qs.get('list', [None])[0]:
1771             return False
1772         return super(YoutubeIE, cls).suitable(url)
1773
1774     def __init__(self, *args, **kwargs):
1775         super(YoutubeIE, self).__init__(*args, **kwargs)
1776         self._code_cache = {}
1777         self._player_cache = {}
1778
1779     def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):
1780         lock = threading.Lock()
1781
1782         is_live = True
1783         start_time = time.time()
1784         formats = [f for f in formats if f.get('is_from_start')]
1785
1786         def refetch_manifest(format_id, delay):
1787             nonlocal formats, start_time, is_live
1788             if time.time() <= start_time + delay:
1789                 return
1790
1791             _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
1792             video_details = traverse_obj(
1793                 prs, (..., 'videoDetails'), expected_type=dict, default=[])
1794             microformats = traverse_obj(
1795                 prs, (..., 'microformat', 'playerMicroformatRenderer'),
1796                 expected_type=dict, default=[])
1797             _, is_live, _, formats = self._list_formats(video_id, microformats, video_details, prs, player_url)
1798             start_time = time.time()
1799
1800         def mpd_feed(format_id, delay):
1801             """
1802             @returns (manifest_url, manifest_stream_number, is_live) or None
1803             """
1804             with lock:
1805                 refetch_manifest(format_id, delay)
1806
1807             f = next((f for f in formats if f['format_id'] == format_id), None)
1808             if not f:
1809                 if not is_live:
1810                     self.to_screen(f'{video_id}: Video is no longer live')
1811                 else:
1812                     self.report_warning(
1813                         f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')
1814                 return None
1815             return f['manifest_url'], f['manifest_stream_number'], is_live
1816
1817         for f in formats:
1818             f['protocol'] = 'http_dash_segments_generator'
1819             f['fragments'] = functools.partial(
1820                 self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)
1821
1822     def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):
1823         FETCH_SPAN, MAX_DURATION = 5, 432000
1824
1825         mpd_url, stream_number, is_live = None, None, True
1826
1827         begin_index = 0
1828         download_start_time = ctx.get('start') or time.time()
1829
1830         lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
1831         if lack_early_segments:
1832             self.report_warning(bug_reports_message(
1833                 'Starting download from the last 120 hours of the live stream since '
1834                 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
1835             lack_early_segments = True
1836
1837         known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
1838         fragments, fragment_base_url = None, None
1839
1840         def _extract_sequence_from_mpd(refresh_sequence):
1841             nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
1842             # Obtain from MPD's maximum seq value
1843             old_mpd_url = mpd_url
1844             last_error = ctx.pop('last_error', None)
1845             expire_fast = last_error and isinstance(last_error, compat_HTTPError) and last_error.code == 403
1846             mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
1847                                                or (mpd_url, stream_number, False))
1848             if not refresh_sequence:
1849                 if expire_fast and not is_live:
1850                     return False, last_seq
1851                 elif old_mpd_url == mpd_url:
1852                     return True, last_seq
1853             try:
1854                 fmts, _ = self._extract_mpd_formats_and_subtitles(
1855                     mpd_url, None, note=False, errnote=False, fatal=False)
1856             except ExtractorError:
1857                 fmts = None
1858             if not fmts:
1859                 no_fragment_score += 1
1860                 return False, last_seq
1861             fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
1862             fragments = fmt_info['fragments']
1863             fragment_base_url = fmt_info['fragment_base_url']
1864             assert fragment_base_url
1865
1866             _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
1867             return True, _last_seq
1868
1869         while is_live:
1870             fetch_time = time.time()
1871             if no_fragment_score > 30:
1872                 return
1873             if last_segment_url:
1874                 # Obtain from "X-Head-Seqnum" header value from each segment
1875                 try:
1876                     urlh = self._request_webpage(
1877                         last_segment_url, None, note=False, errnote=False, fatal=False)
1878                 except ExtractorError:
1879                     urlh = None
1880                 last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
1881                 if last_seq is None:
1882                     no_fragment_score += 1
1883                     last_segment_url = None
1884                     continue
1885             else:
1886                 should_continue, last_seq = _extract_sequence_from_mpd(True)
1887                 if not should_continue:
1888                     continue
1889
1890             if known_idx > last_seq:
1891                 last_segment_url = None
1892                 continue
1893
1894             last_seq += 1
1895
1896             if begin_index < 0 and known_idx < 0:
1897                 # skip from the start when it's negative value
1898                 known_idx = last_seq + begin_index
1899             if lack_early_segments:
1900                 known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
1901             try:
1902                 for idx in range(known_idx, last_seq):
1903                     # do not update sequence here or you'll get skipped some part of it
1904                     should_continue, _ = _extract_sequence_from_mpd(False)
1905                     if not should_continue:
1906                         known_idx = idx - 1
1907                         raise ExtractorError('breaking out of outer loop')
1908                     last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)
1909                     yield {
1910                         'url': last_segment_url,
1911                     }
1912                 if known_idx == last_seq:
1913                     no_fragment_score += 5
1914                 else:
1915                     no_fragment_score = 0
1916                 known_idx = last_seq
1917             except ExtractorError:
1918                 continue
1919
1920             time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
1921
1922     def _extract_player_url(self, *ytcfgs, webpage=None):
1923         player_url = traverse_obj(
1924             ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
1925             get_all=False, expected_type=compat_str)
1926         if not player_url:
1927             return
1928         if player_url.startswith('//'):
1929             player_url = 'https:' + player_url
1930         elif not re.match(r'https?://', player_url):
1931             player_url = compat_urlparse.urljoin(
1932                 'https://www.youtube.com', player_url)
1933         return player_url
1934
1935     def _download_player_url(self, video_id, fatal=False):
1936         res = self._download_webpage(
1937             'https://www.youtube.com/iframe_api',
1938             note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
1939         if res:
1940             player_version = self._search_regex(
1941                 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
1942             if player_version:
1943                 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
1944
1945     def _signature_cache_id(self, example_sig):
1946         """ Return a string representation of a signature """
1947         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1948
1949     @classmethod
1950     def _extract_player_info(cls, player_url):
1951         for player_re in cls._PLAYER_INFO_RE:
1952             id_m = re.search(player_re, player_url)
1953             if id_m:
1954                 break
1955         else:
1956             raise ExtractorError('Cannot identify player %r' % player_url)
1957         return id_m.group('id')
1958
1959     def _load_player(self, video_id, player_url, fatal=True):
1960         player_id = self._extract_player_info(player_url)
1961         if player_id not in self._code_cache:
1962             code = self._download_webpage(
1963                 player_url, video_id, fatal=fatal,
1964                 note='Downloading player ' + player_id,
1965                 errnote='Download of %s failed' % player_url)
1966             if code:
1967                 self._code_cache[player_id] = code
1968         return self._code_cache.get(player_id)
1969
1970     def _extract_signature_function(self, video_id, player_url, example_sig):
1971         player_id = self._extract_player_info(player_url)
1972
1973         # Read from filesystem cache
1974         func_id = 'js_%s_%s' % (
1975             player_id, self._signature_cache_id(example_sig))
1976         assert os.path.basename(func_id) == func_id
1977
1978         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1979         if cache_spec is not None:
1980             return lambda s: ''.join(s[i] for i in cache_spec)
1981
1982         code = self._load_player(video_id, player_url)
1983         if code:
1984             res = self._parse_sig_js(code)
1985
1986             test_string = ''.join(map(compat_chr, range(len(example_sig))))
1987             cache_res = res(test_string)
1988             cache_spec = [ord(c) for c in cache_res]
1989
1990             self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1991             return res
1992
1993     def _print_sig_code(self, func, example_sig):
1994         if not self.get_param('youtube_print_sig_code'):
1995             return
1996
1997         def gen_sig_code(idxs):
1998             def _genslice(start, end, step):
1999                 starts = '' if start == 0 else str(start)
2000                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
2001                 steps = '' if step == 1 else (':%d' % step)
2002                 return 's[%s%s%s]' % (starts, ends, steps)
2003
2004             step = None
2005             # Quelch pyflakes warnings - start will be set when step is set
2006             start = '(Never used)'
2007             for i, prev in zip(idxs[1:], idxs[:-1]):
2008                 if step is not None:
2009                     if i - prev == step:
2010                         continue
2011                     yield _genslice(start, prev, step)
2012                     step = None
2013                     continue
2014                 if i - prev in [-1, 1]:
2015                     step = i - prev
2016                     start = prev
2017                     continue
2018                 else:
2019                     yield 's[%d]' % prev
2020             if step is None:
2021                 yield 's[%d]' % i
2022             else:
2023                 yield _genslice(start, i, step)
2024
2025         test_string = ''.join(map(compat_chr, range(len(example_sig))))
2026         cache_res = func(test_string)
2027         cache_spec = [ord(c) for c in cache_res]
2028         expr_code = ' + '.join(gen_sig_code(cache_spec))
2029         signature_id_tuple = '(%s)' % (
2030             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
2031         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
2032                 '    return %s\n') % (signature_id_tuple, expr_code)
2033         self.to_screen('Extracted signature function:\n' + code)
2034
2035     def _parse_sig_js(self, jscode):
2036         funcname = self._search_regex(
2037             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2038              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2039              r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
2040              r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
2041              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
2042              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
2043              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
2044              # Obsolete patterns
2045              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2046              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
2047              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2048              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2049              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2050              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2051              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2052              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
2053             jscode, 'Initial JS player signature function name', group='sig')
2054
2055         jsi = JSInterpreter(jscode)
2056         initial_function = jsi.extract_function(funcname)
2057         return lambda s: initial_function([s])
2058
2059     def _decrypt_signature(self, s, video_id, player_url):
2060         """Turn the encrypted s field into a working signature"""
2061
2062         if player_url is None:
2063             raise ExtractorError('Cannot decrypt signature without player_url')
2064
2065         try:
2066             player_id = (player_url, self._signature_cache_id(s))
2067             if player_id not in self._player_cache:
2068                 func = self._extract_signature_function(
2069                     video_id, player_url, s
2070                 )
2071                 self._player_cache[player_id] = func
2072             func = self._player_cache[player_id]
2073             self._print_sig_code(func, s)
2074             return func(s)
2075         except Exception as e:
2076             raise ExtractorError('Signature extraction failed: ' + traceback.format_exc(), cause=e)
2077
2078     def _decrypt_nsig(self, s, video_id, player_url):
2079         """Turn the encrypted n field into a working signature"""
2080         if player_url is None:
2081             raise ExtractorError('Cannot decrypt nsig without player_url')
2082         if player_url.startswith('//'):
2083             player_url = 'https:' + player_url
2084         elif not re.match(r'https?://', player_url):
2085             player_url = compat_urlparse.urljoin(
2086                 'https://www.youtube.com', player_url)
2087
2088         sig_id = ('nsig_value', s)
2089         if sig_id in self._player_cache:
2090             return self._player_cache[sig_id]
2091
2092         try:
2093             player_id = ('nsig', player_url)
2094             if player_id not in self._player_cache:
2095                 self._player_cache[player_id] = self._extract_n_function(video_id, player_url)
2096             func = self._player_cache[player_id]
2097             self._player_cache[sig_id] = func(s)
2098             self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')
2099             return self._player_cache[sig_id]
2100         except Exception as e:
2101             raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)
2102
2103     def _extract_n_function_name(self, jscode):
2104         return self._search_regex(
2105             (r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]{3})\([a-zA-Z0-9]\)',),
2106             jscode, 'Initial JS player n function name', group='nfunc')
2107
2108     def _extract_n_function(self, video_id, player_url):
2109         player_id = self._extract_player_info(player_url)
2110         func_code = self._downloader.cache.load('youtube-nsig', player_id)
2111
2112         if func_code:
2113             jsi = JSInterpreter(func_code)
2114         else:
2115             jscode = self._load_player(video_id, player_url)
2116             funcname = self._extract_n_function_name(jscode)
2117             jsi = JSInterpreter(jscode)
2118             func_code = jsi.extract_function_code(funcname)
2119             self._downloader.cache.store('youtube-nsig', player_id, func_code)
2120
2121         if self.get_param('youtube_print_sig_code'):
2122             self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
2123
2124         return lambda s: jsi.extract_function_from_code(*func_code)([s])
2125
2126     def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
2127         """
2128         Extract signatureTimestamp (sts)
2129         Required to tell API what sig/player version is in use.
2130         """
2131         sts = None
2132         if isinstance(ytcfg, dict):
2133             sts = int_or_none(ytcfg.get('STS'))
2134
2135         if not sts:
2136             # Attempt to extract from player
2137             if player_url is None:
2138                 error_msg = 'Cannot extract signature timestamp without player_url.'
2139                 if fatal:
2140                     raise ExtractorError(error_msg)
2141                 self.report_warning(error_msg)
2142                 return
2143             code = self._load_player(video_id, player_url, fatal=fatal)
2144             if code:
2145                 sts = int_or_none(self._search_regex(
2146                     r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2147                     'JS player signature timestamp', group='sts', fatal=fatal))
2148         return sts
2149
2150     def _mark_watched(self, video_id, player_responses):
2151         playback_url = get_first(
2152             player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
2153             expected_type=url_or_none)
2154         if not playback_url:
2155             self.report_warning('Unable to mark watched')
2156             return
2157         parsed_playback_url = compat_urlparse.urlparse(playback_url)
2158         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
2159
2160         # cpn generation algorithm is reverse engineered from base.js.
2161         # In fact it works even with dummy cpn.
2162         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2163         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
2164
2165         qs.update({
2166             'ver': ['2'],
2167             'cpn': [cpn],
2168         })
2169         playback_url = compat_urlparse.urlunparse(
2170             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
2171
2172         self._download_webpage(
2173             playback_url, video_id, 'Marking watched',
2174             'Unable to mark watched', fatal=False)
2175
2176     @staticmethod
2177     def _extract_urls(webpage):
2178         # Embedded YouTube player
2179         entries = [
2180             unescapeHTML(mobj.group('url'))
2181             for mobj in re.finditer(r'''(?x)
2182             (?:
2183                 <iframe[^>]+?src=|
2184                 data-video-url=|
2185                 <embed[^>]+?src=|
2186                 embedSWF\(?:\s*|
2187                 <object[^>]+data=|
2188                 new\s+SWFObject\(
2189             )
2190             (["\'])
2191                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
2192                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
2193             \1''', webpage)]
2194
2195         # lazyYT YouTube embed
2196         entries.extend(list(map(
2197             unescapeHTML,
2198             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
2199
2200         # Wordpress "YouTube Video Importer" plugin
2201         matches = re.findall(r'''(?x)<div[^>]+
2202             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2203             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
2204         entries.extend(m[-1] for m in matches)
2205
2206         return entries
2207
2208     @staticmethod
2209     def _extract_url(webpage):
2210         urls = YoutubeIE._extract_urls(webpage)
2211         return urls[0] if urls else None
2212
2213     @classmethod
2214     def extract_id(cls, url):
2215         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
2216         if mobj is None:
2217             raise ExtractorError('Invalid URL: %s' % url)
2218         return mobj.group('id')
2219
2220     def _extract_chapters_from_json(self, data, duration):
2221         chapter_list = traverse_obj(
2222             data, (
2223                 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2224                 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2225             ), expected_type=list)
2226
2227         return self._extract_chapters(
2228             chapter_list,
2229             chapter_time=lambda chapter: float_or_none(
2230                 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2231             chapter_title=lambda chapter: traverse_obj(
2232                 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2233             duration=duration)
2234
2235     def _extract_chapters_from_engagement_panel(self, data, duration):
2236         content_list = traverse_obj(
2237             data,
2238             ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
2239             expected_type=list, default=[])
2240         chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2241         chapter_title = lambda chapter: self._get_text(chapter, 'title')
2242
2243         return next((
2244             filter(None, (
2245                 self._extract_chapters(
2246                     traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2247                     chapter_time, chapter_title, duration)
2248                 for contents in content_list
2249             ))), [])
2250
2251     def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
2252         chapters = []
2253         last_chapter = {'start_time': 0}
2254         for idx, chapter in enumerate(chapter_list or []):
2255             title = chapter_title(chapter)
2256             start_time = chapter_time(chapter)
2257             if start_time is None:
2258                 continue
2259             last_chapter['end_time'] = start_time
2260             if start_time < last_chapter['start_time']:
2261                 if idx == 1:
2262                     chapters.pop()
2263                     self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2264                 else:
2265                     self.report_warning(f'Invalid start time for chapter "{title}"')
2266                     continue
2267             last_chapter = {'start_time': start_time, 'title': title}
2268             chapters.append(last_chapter)
2269         last_chapter['end_time'] = duration
2270         return chapters
2271
2272     def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2273         return self._parse_json(self._search_regex(
2274             (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2275              regex), webpage, name, default='{}'), video_id, fatal=False)
2276
2277     def _extract_comment(self, comment_renderer, parent=None):
2278         comment_id = comment_renderer.get('commentId')
2279         if not comment_id:
2280             return
2281
2282         text = self._get_text(comment_renderer, 'contentText')
2283
2284         # note: timestamp is an estimate calculated from the current time and time_text
2285         timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')
2286         author = self._get_text(comment_renderer, 'authorText')
2287         author_id = try_get(comment_renderer,
2288                             lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
2289
2290         votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2291                                                        lambda x: x['likeCount']), compat_str)) or 0
2292         author_thumbnail = try_get(comment_renderer,
2293                                    lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2294
2295         author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2296         is_favorited = 'creatorHeart' in (try_get(
2297             comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2298         return {
2299             'id': comment_id,
2300             'text': text,
2301             'timestamp': timestamp,
2302             'time_text': time_text,
2303             'like_count': votes,
2304             'is_favorited': is_favorited,
2305             'author': author,
2306             'author_id': author_id,
2307             'author_thumbnail': author_thumbnail,
2308             'author_is_uploader': author_is_uploader,
2309             'parent': parent or 'root'
2310         }
2311
2312     def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
2313
2314         get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
2315
2316         def extract_header(contents):
2317             _continuation = None
2318             for content in contents:
2319                 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
2320                 expected_comment_count = parse_count(self._get_text(
2321                     comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
2322
2323                 if expected_comment_count:
2324                     tracker['est_total'] = expected_comment_count
2325                     self.to_screen(f'Downloading ~{expected_comment_count} comments')
2326                 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top')  # 1 = new, 0 = top
2327
2328                 sort_menu_item = try_get(
2329                     comments_header_renderer,
2330                     lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2331                 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2332
2333                 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2334                 if not _continuation:
2335                     continue
2336
2337                 sort_text = str_or_none(sort_menu_item.get('title'))
2338                 if not sort_text:
2339                     sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2340                 self.to_screen('Sorting comments by %s' % sort_text.lower())
2341                 break
2342             return _continuation
2343
2344         def extract_thread(contents):
2345             if not parent:
2346                 tracker['current_page_thread'] = 0
2347             for content in contents:
2348                 if not parent and tracker['total_parent_comments'] >= max_parents:
2349                     yield
2350                 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2351                 comment_renderer = get_first(
2352                     (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
2353                     expected_type=dict, default={})
2354
2355                 comment = self._extract_comment(comment_renderer, parent)
2356                 if not comment:
2357                     continue
2358
2359                 tracker['running_total'] += 1
2360                 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
2361                 yield comment
2362
2363                 # Attempt to get the replies
2364                 comment_replies_renderer = try_get(
2365                     comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2366
2367                 if comment_replies_renderer:
2368                     tracker['current_page_thread'] += 1
2369                     comment_entries_iter = self._comment_entries(
2370                         comment_replies_renderer, ytcfg, video_id,
2371                         parent=comment.get('id'), tracker=tracker)
2372                     for reply_comment in itertools.islice(comment_entries_iter, min(max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments']))):
2373                         yield reply_comment
2374
2375         # Keeps track of counts across recursive calls
2376         if not tracker:
2377             tracker = dict(
2378                 running_total=0,
2379                 est_total=0,
2380                 current_page_thread=0,
2381                 total_parent_comments=0,
2382                 total_reply_comments=0)
2383
2384         # TODO: Deprecated
2385         # YouTube comments have a max depth of 2
2386         max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
2387         if max_depth:
2388             self._downloader.deprecation_warning(
2389                 '[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')
2390         if max_depth == 1 and parent:
2391             return
2392
2393         max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(
2394             lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
2395
2396         continuation = self._extract_continuation(root_continuation_data)
2397         message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
2398         if message and not parent:
2399             self.report_warning(message, video_id=video_id)
2400
2401         response = None
2402         is_first_continuation = parent is None
2403
2404         for page_num in itertools.count(0):
2405             if not continuation:
2406                 break
2407             headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
2408             comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"
2409             if page_num == 0:
2410                 if is_first_continuation:
2411                     note_prefix = 'Downloading comment section API JSON'
2412                 else:
2413                     note_prefix = '    Downloading comment API JSON reply thread %d %s' % (
2414                         tracker['current_page_thread'], comment_prog_str)
2415             else:
2416                 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2417                     '       ' if parent else '', ' replies' if parent else '',
2418                     page_num, comment_prog_str)
2419
2420             response = self._extract_response(
2421                 item_id=None, query=continuation,
2422                 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2423                 check_get_keys='onResponseReceivedEndpoints')
2424
2425             continuation_contents = traverse_obj(
2426                 response, 'onResponseReceivedEndpoints', expected_type=list, default=[])
2427
2428             continuation = None
2429             for continuation_section in continuation_contents:
2430                 continuation_items = traverse_obj(
2431                     continuation_section,
2432                     (('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),
2433                     get_all=False, expected_type=list) or []
2434                 if is_first_continuation:
2435                     continuation = extract_header(continuation_items)
2436                     is_first_continuation = False
2437                     if continuation:
2438                         break
2439                     continue
2440
2441                 for entry in extract_thread(continuation_items):
2442                     if not entry:
2443                         return
2444                     yield entry
2445                 continuation = self._extract_continuation({'contents': continuation_items})
2446                 if continuation:
2447                     break
2448
2449     def _get_comments(self, ytcfg, video_id, contents, webpage):
2450         """Entry for comment extraction"""
2451         def _real_comment_extract(contents):
2452             renderer = next((
2453                 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
2454                 if item.get('sectionIdentifier') == 'comment-item-section'), None)
2455             yield from self._comment_entries(renderer, ytcfg, video_id)
2456
2457         max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
2458         return itertools.islice(_real_comment_extract(contents), 0, max_comments)
2459
2460     @staticmethod
2461     def _get_checkok_params():
2462         return {'contentCheckOk': True, 'racyCheckOk': True}
2463
2464     @classmethod
2465     def _generate_player_context(cls, sts=None):
2466         context = {
2467             'html5Preference': 'HTML5_PREF_WANTS',
2468         }
2469         if sts is not None:
2470             context['signatureTimestamp'] = sts
2471         return {
2472             'playbackContext': {
2473                 'contentPlaybackContext': context
2474             },
2475             **cls._get_checkok_params()
2476         }
2477
2478     @staticmethod
2479     def _is_agegated(player_response):
2480         if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
2481             return True
2482
2483         reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
2484         AGE_GATE_REASONS = (
2485             'confirm your age', 'age-restricted', 'inappropriate',  # reason
2486             'age_verification_required', 'age_check_required',  # status
2487         )
2488         return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
2489
2490     @staticmethod
2491     def _is_unplayable(player_response):
2492         return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
2493
2494     def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):
2495
2496         session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2497         syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
2498         sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
2499         headers = self.generate_api_headers(
2500             ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
2501
2502         yt_query = {'videoId': video_id}
2503         yt_query.update(self._generate_player_context(sts))
2504         return self._extract_response(
2505             item_id=video_id, ep='player', query=yt_query,
2506             ytcfg=player_ytcfg, headers=headers, fatal=True,
2507             default_client=client,
2508             note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2509         ) or None
2510
2511     def _get_requested_clients(self, url, smuggled_data):
2512         requested_clients = []
2513         default = ['android', 'web']
2514         allowed_clients = sorted(
2515             [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],
2516             key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
2517         for client in self._configuration_arg('player_client'):
2518             if client in allowed_clients:
2519                 requested_clients.append(client)
2520             elif client == 'default':
2521                 requested_clients.extend(default)
2522             elif client == 'all':
2523                 requested_clients.extend(allowed_clients)
2524             else:
2525                 self.report_warning(f'Skipping unsupported client {client}')
2526         if not requested_clients:
2527             requested_clients = default
2528
2529         if smuggled_data.get('is_music_url') or self.is_music_url(url):
2530             requested_clients.extend(
2531                 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
2532
2533         return orderedSet(requested_clients)
2534
2535     def _extract_player_ytcfg(self, client, video_id):
2536         url = {
2537             'web_music': 'https://music.youtube.com',
2538             'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
2539         }.get(client)
2540         if not url:
2541             return {}
2542         webpage = self._download_webpage(url, video_id, fatal=False, note='Downloading %s config' % client.replace('_', ' ').strip())
2543         return self.extract_ytcfg(video_id, webpage) or {}
2544
2545     def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):
2546         initial_pr = None
2547         if webpage:
2548             initial_pr = self._extract_yt_initial_variable(
2549                 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2550                 video_id, 'initial player response')
2551
2552         original_clients = clients
2553         clients = clients[::-1]
2554         prs = []
2555
2556         def append_client(client_name):
2557             if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:
2558                 clients.append(client_name)
2559
2560         # Android player_response does not have microFormats which are needed for
2561         # extraction of some data. So we return the initial_pr with formats
2562         # stripped out even if not requested by the user
2563         # See: https://github.com/yt-dlp/yt-dlp/issues/501
2564         if initial_pr:
2565             pr = dict(initial_pr)
2566             pr['streamingData'] = None
2567             prs.append(pr)
2568
2569         last_error = None
2570         tried_iframe_fallback = False
2571         player_url = None
2572         while clients:
2573             client = clients.pop()
2574             player_ytcfg = master_ytcfg if client == 'web' else {}
2575             if 'configs' not in self._configuration_arg('player_skip'):
2576                 player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
2577
2578             player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
2579             require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
2580             if 'js' in self._configuration_arg('player_skip'):
2581                 require_js_player = False
2582                 player_url = None
2583
2584             if not player_url and not tried_iframe_fallback and require_js_player:
2585                 player_url = self._download_player_url(video_id)
2586                 tried_iframe_fallback = True
2587
2588             try:
2589                 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
2590                     client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)
2591             except ExtractorError as e:
2592                 if last_error:
2593                     self.report_warning(last_error)
2594                 last_error = e
2595                 continue
2596
2597             if pr:
2598                 prs.append(pr)
2599
2600             # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
2601             if client.endswith('_agegate') and self._is_unplayable(pr) and self.is_authenticated:
2602                 append_client(client.replace('_agegate', '_creator'))
2603             elif self._is_agegated(pr):
2604                 append_client(f'{client}_agegate')
2605
2606         if last_error:
2607             if not len(prs):
2608                 raise last_error
2609             self.report_warning(last_error)
2610         return prs, player_url
2611
2612     def _extract_formats(self, streaming_data, video_id, player_url, is_live):
2613         itags, stream_ids = {}, []
2614         itag_qualities, res_qualities = {}, {}
2615         q = qualities([
2616             # Normally tiny is the smallest video-only formats. But
2617             # audio-only formats with unknown quality may get tagged as tiny
2618             'tiny',
2619             'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high',  # Audio only formats
2620             'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2621         ])
2622         streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
2623
2624         for fmt in streaming_formats:
2625             if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2626                 continue
2627
2628             itag = str_or_none(fmt.get('itag'))
2629             audio_track = fmt.get('audioTrack') or {}
2630             stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2631             if stream_id in stream_ids:
2632                 continue
2633
2634             quality = fmt.get('quality')
2635             height = int_or_none(fmt.get('height'))
2636             if quality == 'tiny' or not quality:
2637                 quality = fmt.get('audioQuality', '').lower() or quality
2638             # The 3gp format (17) in android client has a quality of "small",
2639             # but is actually worse than other formats
2640             if itag == '17':
2641                 quality = 'tiny'
2642             if quality:
2643                 if itag:
2644                     itag_qualities[itag] = quality
2645                 if height:
2646                     res_qualities[height] = quality
2647             # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2648             # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2649             # number of fragment that would subsequently requested with (`&sq=N`)
2650             if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2651                 continue
2652
2653             fmt_url = fmt.get('url')
2654             if not fmt_url:
2655                 sc = compat_parse_qs(fmt.get('signatureCipher'))
2656                 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2657                 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2658                 if not (sc and fmt_url and encrypted_sig):
2659                     continue
2660                 if not player_url:
2661                     continue
2662                 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2663                 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2664                 fmt_url += '&' + sp + '=' + signature
2665
2666             query = parse_qs(fmt_url)
2667             throttled = False
2668             if query.get('n'):
2669                 try:
2670                     fmt_url = update_url_query(fmt_url, {
2671                         'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})
2672                 except ExtractorError as e:
2673                     self.report_warning(
2674                         f'nsig extraction failed: You may experience throttling for some formats\n'
2675                         f'n = {query["n"][0]} ; player = {player_url}\n{e}', only_once=True)
2676                     throttled = True
2677
2678             if itag:
2679                 itags[itag] = 'https'
2680                 stream_ids.append(stream_id)
2681
2682             tbr = float_or_none(
2683                 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
2684             dct = {
2685                 'asr': int_or_none(fmt.get('audioSampleRate')),
2686                 'filesize': int_or_none(fmt.get('contentLength')),
2687                 'format_id': itag,
2688                 'format_note': join_nonempty(
2689                     '%s%s' % (audio_track.get('displayName') or '',
2690                               ' (default)' if audio_track.get('audioIsDefault') else ''),
2691                     fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
2692                     throttled and 'THROTTLED', delim=', '),
2693                 'source_preference': -10 if throttled else -1,
2694                 'fps': int_or_none(fmt.get('fps')) or None,
2695                 'height': height,
2696                 'quality': q(quality),
2697                 'tbr': tbr,
2698                 'url': fmt_url,
2699                 'width': int_or_none(fmt.get('width')),
2700                 'language': audio_track.get('id', '').split('.')[0],
2701                 'language_preference': 1 if audio_track.get('audioIsDefault') else -1,
2702             }
2703             mime_mobj = re.match(
2704                 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2705             if mime_mobj:
2706                 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2707                 dct.update(parse_codecs(mime_mobj.group(2)))
2708             no_audio = dct.get('acodec') == 'none'
2709             no_video = dct.get('vcodec') == 'none'
2710             if no_audio:
2711                 dct['vbr'] = tbr
2712             if no_video:
2713                 dct['abr'] = tbr
2714             if no_audio or no_video:
2715                 dct['downloader_options'] = {
2716                     # Youtube throttles chunks >~10M
2717                     'http_chunk_size': 10485760,
2718                 }
2719                 if dct.get('ext'):
2720                     dct['container'] = dct['ext'] + '_dash'
2721             yield dct
2722
2723         live_from_start = is_live and self.get_param('live_from_start')
2724         skip_manifests = self._configuration_arg('skip')
2725         if not self.get_param('youtube_include_hls_manifest', True):
2726             skip_manifests.append('hls')
2727         get_dash = 'dash' not in skip_manifests and (
2728             not is_live or live_from_start or self._configuration_arg('include_live_dash'))
2729         get_hls = not live_from_start and 'hls' not in skip_manifests
2730
2731         def process_manifest_format(f, proto, itag):
2732             if itag in itags:
2733                 if itags[itag] == proto or f'{itag}-{proto}' in itags:
2734                     return False
2735                 itag = f'{itag}-{proto}'
2736             if itag:
2737                 f['format_id'] = itag
2738                 itags[itag] = proto
2739
2740             f['quality'] = next((
2741                 q(qdict[val])
2742                 for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities))
2743                 if val in qdict), -1)
2744             return True
2745
2746         for sd in streaming_data:
2747             hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
2748             if hls_manifest_url:
2749                 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
2750                     if process_manifest_format(f, 'hls', self._search_regex(
2751                             r'/itag/(\d+)', f['url'], 'itag', default=None)):
2752                         yield f
2753
2754             dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2755             if dash_manifest_url:
2756                 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
2757                     if process_manifest_format(f, 'dash', f['format_id']):
2758                         f['filesize'] = int_or_none(self._search_regex(
2759                             r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
2760                         if live_from_start:
2761                             f['is_from_start'] = True
2762
2763                         yield f
2764
2765     def _extract_storyboard(self, player_responses, duration):
2766         spec = get_first(
2767             player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
2768         if not spec:
2769             return
2770         base_url = spec.pop()
2771         L = len(spec) - 1
2772         for i, args in enumerate(spec):
2773             args = args.split('#')
2774             counts = list(map(int_or_none, args[:5]))
2775             if len(args) != 8 or not all(counts):
2776                 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
2777                 continue
2778             width, height, frame_count, cols, rows = counts
2779             N, sigh = args[6:]
2780
2781             url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
2782             fragment_count = frame_count / (cols * rows)
2783             fragment_duration = duration / fragment_count
2784             yield {
2785                 'format_id': f'sb{i}',
2786                 'format_note': 'storyboard',
2787                 'ext': 'mhtml',
2788                 'protocol': 'mhtml',
2789                 'acodec': 'none',
2790                 'vcodec': 'none',
2791                 'url': url,
2792                 'width': width,
2793                 'height': height,
2794                 'fragments': [{
2795                     'path': url.replace('$M', str(j)),
2796                     'duration': min(fragment_duration, duration - (j * fragment_duration)),
2797                 } for j in range(math.ceil(fragment_count))],
2798             }
2799
2800     def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
2801         webpage = None
2802         if 'webpage' not in self._configuration_arg('player_skip'):
2803             webpage = self._download_webpage(
2804                 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2805
2806         master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2807
2808         player_responses, player_url = self._extract_player_responses(
2809             self._get_requested_clients(url, smuggled_data),
2810             video_id, webpage, master_ytcfg)
2811
2812         return webpage, master_ytcfg, player_responses, player_url
2813
2814     def _list_formats(self, video_id, microformats, video_details, player_responses, player_url):
2815         live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
2816         is_live = get_first(video_details, 'isLive')
2817         if is_live is None:
2818             is_live = get_first(live_broadcast_details, 'isLiveNow')
2819
2820         streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2821         formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
2822
2823         return live_broadcast_details, is_live, streaming_data, formats
2824
2825     def _real_extract(self, url):
2826         url, smuggled_data = unsmuggle_url(url, {})
2827         video_id = self._match_id(url)
2828
2829         base_url = self.http_scheme() + '//www.youtube.com/'
2830         webpage_url = base_url + 'watch?v=' + video_id
2831
2832         webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
2833
2834         playability_statuses = traverse_obj(
2835             player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2836
2837         trailer_video_id = get_first(
2838             playability_statuses,
2839             ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2840             expected_type=str)
2841         if trailer_video_id:
2842             return self.url_result(
2843                 trailer_video_id, self.ie_key(), trailer_video_id)
2844
2845         search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2846                        if webpage else (lambda x: None))
2847
2848         video_details = traverse_obj(
2849             player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2850         microformats = traverse_obj(
2851             player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2852             expected_type=dict, default=[])
2853         video_title = (
2854             get_first(video_details, 'title')
2855             or self._get_text(microformats, (..., 'title'))
2856             or search_meta(['og:title', 'twitter:title', 'title']))
2857         video_description = get_first(video_details, 'shortDescription')
2858
2859         multifeed_metadata_list = get_first(
2860             player_responses,
2861             ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2862             expected_type=str)
2863         if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
2864             if self.get_param('noplaylist'):
2865                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2866             else:
2867                 entries = []
2868                 feed_ids = []
2869                 for feed in multifeed_metadata_list.split(','):
2870                     # Unquote should take place before split on comma (,) since textual
2871                     # fields may contain comma as well (see
2872                     # https://github.com/ytdl-org/youtube-dl/issues/8536)
2873                     feed_data = compat_parse_qs(
2874                         compat_urllib_parse_unquote_plus(feed))
2875
2876                     def feed_entry(name):
2877                         return try_get(
2878                             feed_data, lambda x: x[name][0], compat_str)
2879
2880                     feed_id = feed_entry('id')
2881                     if not feed_id:
2882                         continue
2883                     feed_title = feed_entry('title')
2884                     title = video_title
2885                     if feed_title:
2886                         title += ' (%s)' % feed_title
2887                     entries.append({
2888                         '_type': 'url_transparent',
2889                         'ie_key': 'Youtube',
2890                         'url': smuggle_url(
2891                             '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2892                             {'force_singlefeed': True}),
2893                         'title': title,
2894                     })
2895                     feed_ids.append(feed_id)
2896                 self.to_screen(
2897                     'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2898                     % (', '.join(feed_ids), video_id))
2899                 return self.playlist_result(
2900                     entries, video_id, video_title, video_description)
2901
2902         live_broadcast_details, is_live, streaming_data, formats = self._list_formats(video_id, microformats, video_details, player_responses, player_url)
2903
2904         if not formats:
2905             if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
2906                 self.report_drm(video_id)
2907             pemr = get_first(
2908                 playability_statuses,
2909                 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2910             reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2911             subreason = clean_html(self._get_text(pemr, 'subreason') or '')
2912             if subreason:
2913                 if subreason == 'The uploader has not made this video available in your country.':
2914                     countries = get_first(microformats, 'availableCountries')
2915                     if not countries:
2916                         regions_allowed = search_meta('regionsAllowed')
2917                         countries = regions_allowed.split(',') if regions_allowed else None
2918                     self.raise_geo_restricted(subreason, countries, metadata_available=True)
2919                 reason += f'. {subreason}'
2920             if reason:
2921                 self.raise_no_formats(reason, expected=True)
2922
2923         keywords = get_first(video_details, 'keywords', expected_type=list) or []
2924         if not keywords and webpage:
2925             keywords = [
2926                 unescapeHTML(m.group('content'))
2927                 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2928         for keyword in keywords:
2929             if keyword.startswith('yt:stretch='):
2930                 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2931                 if mobj:
2932                     # NB: float is intentional for forcing float division
2933                     w, h = (float(v) for v in mobj.groups())
2934                     if w > 0 and h > 0:
2935                         ratio = w / h
2936                         for f in formats:
2937                             if f.get('vcodec') != 'none':
2938                                 f['stretched_ratio'] = ratio
2939                         break
2940         thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))
2941         thumbnail_url = search_meta(['og:image', 'twitter:image'])
2942         if thumbnail_url:
2943             thumbnails.append({
2944                 'url': thumbnail_url,
2945             })
2946         original_thumbnails = thumbnails.copy()
2947
2948         # The best resolution thumbnails sometimes does not appear in the webpage
2949         # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
2950         # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2951         thumbnail_names = [
2952             'maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3',
2953             'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2954             'mqdefault', 'mq1', 'mq2', 'mq3',
2955             'default', '1', '2', '3'
2956         ]
2957         n_thumbnail_names = len(thumbnail_names)
2958         thumbnails.extend({
2959             'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2960                 video_id=video_id, name=name, ext=ext,
2961                 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
2962         } for name in thumbnail_names for ext in ('webp', 'jpg'))
2963         for thumb in thumbnails:
2964             i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
2965             thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
2966         self._remove_duplicate_formats(thumbnails)
2967         self._downloader._sort_thumbnails(original_thumbnails)
2968
2969         category = get_first(microformats, 'category') or search_meta('genre')
2970         channel_id = str_or_none(
2971             get_first(video_details, 'channelId')
2972             or get_first(microformats, 'externalChannelId')
2973             or search_meta('channelId'))
2974         duration = int_or_none(
2975             get_first(video_details, 'lengthSeconds')
2976             or get_first(microformats, 'lengthSeconds')
2977             or parse_duration(search_meta('duration'))) or None
2978         owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2979
2980         live_content = get_first(video_details, 'isLiveContent')
2981         is_upcoming = get_first(video_details, 'isUpcoming')
2982         if is_live is None:
2983             if is_upcoming or live_content is False:
2984                 is_live = False
2985         if is_upcoming is None and (live_content or is_live):
2986             is_upcoming = False
2987         live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2988         live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2989         if not duration and live_end_time and live_start_time:
2990             duration = live_end_time - live_start_time
2991
2992         if is_live and self.get_param('live_from_start'):
2993             self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)
2994
2995         formats.extend(self._extract_storyboard(player_responses, duration))
2996
2997         # Source is given priority since formats that throttle are given lower source_preference
2998         # When throttling issue is fully fixed, remove this
2999         self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))
3000
3001         info = {
3002             'id': video_id,
3003             'title': video_title,
3004             'formats': formats,
3005             'thumbnails': thumbnails,
3006             # The best thumbnail that we are sure exists. Prevents unnecessary
3007             # URL checking if user don't care about getting the best possible thumbnail
3008             'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
3009             'description': video_description,
3010             'upload_date': unified_strdate(
3011                 get_first(microformats, 'uploadDate')
3012                 or search_meta('uploadDate')),
3013             'uploader': get_first(video_details, 'author'),
3014             'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
3015             'uploader_url': owner_profile_url,
3016             'channel_id': channel_id,
3017             'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
3018             'duration': duration,
3019             'view_count': int_or_none(
3020                 get_first((video_details, microformats), (..., 'viewCount'))
3021                 or search_meta('interactionCount')),
3022             'average_rating': float_or_none(get_first(video_details, 'averageRating')),
3023             'age_limit': 18 if (
3024                 get_first(microformats, 'isFamilySafe') is False
3025                 or search_meta('isFamilyFriendly') == 'false'
3026                 or search_meta('og:restrictions:age') == '18+') else 0,
3027             'webpage_url': webpage_url,
3028             'categories': [category] if category else None,
3029             'tags': keywords,
3030             'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
3031             'is_live': is_live,
3032             'was_live': (False if is_live or is_upcoming or live_content is False
3033                          else None if is_live is None or is_upcoming is None
3034                          else live_content),
3035             'live_status': 'is_upcoming' if is_upcoming else None,  # rest will be set by YoutubeDL
3036             'release_timestamp': live_start_time,
3037         }
3038
3039         pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
3040         if pctr:
3041             def get_lang_code(track):
3042                 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
3043                         or track.get('languageCode'))
3044
3045             # Converted into dicts to remove duplicates
3046             captions = {
3047                 get_lang_code(sub): sub
3048                 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
3049             translation_languages = {
3050                 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
3051                 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
3052
3053             def process_language(container, base_url, lang_code, sub_name, query):
3054                 lang_subs = container.setdefault(lang_code, [])
3055                 for fmt in self._SUBTITLE_FORMATS:
3056                     query.update({
3057                         'fmt': fmt,
3058                     })
3059                     lang_subs.append({
3060                         'ext': fmt,
3061                         'url': update_url_query(base_url, query),
3062                         'name': sub_name,
3063                     })
3064
3065             subtitles, automatic_captions = {}, {}
3066             for lang_code, caption_track in captions.items():
3067                 base_url = caption_track.get('baseUrl')
3068                 if not base_url:
3069                     continue
3070                 lang_name = self._get_text(caption_track, 'name', max_runs=1)
3071                 if caption_track.get('kind') != 'asr':
3072                     if not lang_code:
3073                         continue
3074                     process_language(
3075                         subtitles, base_url, lang_code, lang_name, {})
3076                     if not caption_track.get('isTranslatable'):
3077                         continue
3078                 for trans_code, trans_name in translation_languages.items():
3079                     if not trans_code:
3080                         continue
3081                     if caption_track.get('kind') != 'asr':
3082                         trans_code += f'-{lang_code}'
3083                         trans_name += format_field(lang_name, template=' from %s')
3084                     process_language(
3085                         automatic_captions, base_url, trans_code, trans_name, {'tlang': trans_code})
3086             info['automatic_captions'] = automatic_captions
3087             info['subtitles'] = subtitles
3088
3089         parsed_url = compat_urllib_parse_urlparse(url)
3090         for component in [parsed_url.fragment, parsed_url.query]:
3091             query = compat_parse_qs(component)
3092             for k, v in query.items():
3093                 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
3094                     d_k += '_time'
3095                     if d_k not in info and k in s_ks:
3096                         info[d_k] = parse_duration(query[k][0])
3097
3098         # Youtube Music Auto-generated description
3099         if video_description:
3100             mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
3101             if mobj:
3102                 release_year = mobj.group('release_year')
3103                 release_date = mobj.group('release_date')
3104                 if release_date:
3105                     release_date = release_date.replace('-', '')
3106                     if not release_year:
3107                         release_year = release_date[:4]
3108                 info.update({
3109                     'album': mobj.group('album'.strip()),
3110                     'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
3111                     'track': mobj.group('track').strip(),
3112                     'release_date': release_date,
3113                     'release_year': int_or_none(release_year),
3114                 })
3115
3116         initial_data = None
3117         if webpage:
3118             initial_data = self._extract_yt_initial_variable(
3119                 webpage, self._YT_INITIAL_DATA_RE, video_id,
3120                 'yt initial data')
3121         if not initial_data:
3122             query = {'videoId': video_id}
3123             query.update(self._get_checkok_params())
3124             initial_data = self._extract_response(
3125                 item_id=video_id, ep='next', fatal=False,
3126                 ytcfg=master_ytcfg, query=query,
3127                 headers=self.generate_api_headers(ytcfg=master_ytcfg),
3128                 note='Downloading initial data API JSON')
3129
3130         try:
3131             # This will error if there is no livechat
3132             initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
3133             info.setdefault('subtitles', {})['live_chat'] = [{
3134                 'url': 'https://www.youtube.com/watch?v=%s' % video_id,  # url is needed to set cookies
3135                 'video_id': video_id,
3136                 'ext': 'json',
3137                 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
3138             }]
3139         except (KeyError, IndexError, TypeError):
3140             pass
3141
3142         if initial_data:
3143             info['chapters'] = (
3144                 self._extract_chapters_from_json(initial_data, duration)
3145                 or self._extract_chapters_from_engagement_panel(initial_data, duration)
3146                 or None)
3147
3148             contents = try_get(
3149                 initial_data,
3150                 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
3151                 list) or []
3152             for content in contents:
3153                 vpir = content.get('videoPrimaryInfoRenderer')
3154                 if vpir:
3155                     stl = vpir.get('superTitleLink')
3156                     if stl:
3157                         stl = self._get_text(stl)
3158                         if try_get(
3159                                 vpir,
3160                                 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3161                             info['location'] = stl
3162                         else:
3163                             mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
3164                             if mobj:
3165                                 info.update({
3166                                     'series': mobj.group(1),
3167                                     'season_number': int(mobj.group(2)),
3168                                     'episode_number': int(mobj.group(3)),
3169                                 })
3170                     for tlb in (try_get(
3171                             vpir,
3172                             lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3173                             list) or []):
3174                         tbr = tlb.get('toggleButtonRenderer') or {}
3175                         for getter, regex in [(
3176                                 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3177                                 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3178                                     lambda x: x['accessibility'],
3179                                     lambda x: x['accessibilityData']['accessibilityData'],
3180                                 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3181                             label = (try_get(tbr, getter, dict) or {}).get('label')
3182                             if label:
3183                                 mobj = re.match(regex, label)
3184                                 if mobj:
3185                                     info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3186                                     break
3187                     sbr_tooltip = try_get(
3188                         vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3189                     if sbr_tooltip:
3190                         like_count, dislike_count = sbr_tooltip.split(' / ')
3191                         info.update({
3192                             'like_count': str_to_int(like_count),
3193                             'dislike_count': str_to_int(dislike_count),
3194                         })
3195                 vsir = content.get('videoSecondaryInfoRenderer')
3196                 if vsir:
3197                     info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
3198                     rows = try_get(
3199                         vsir,
3200                         lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3201                         list) or []
3202                     multiple_songs = False
3203                     for row in rows:
3204                         if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3205                             multiple_songs = True
3206                             break
3207                     for row in rows:
3208                         mrr = row.get('metadataRowRenderer') or {}
3209                         mrr_title = mrr.get('title')
3210                         if not mrr_title:
3211                             continue
3212                         mrr_title = self._get_text(mrr, 'title')
3213                         mrr_contents_text = self._get_text(mrr, ('contents', 0))
3214                         if mrr_title == 'License':
3215                             info['license'] = mrr_contents_text
3216                         elif not multiple_songs:
3217                             if mrr_title == 'Album':
3218                                 info['album'] = mrr_contents_text
3219                             elif mrr_title == 'Artist':
3220                                 info['artist'] = mrr_contents_text
3221                             elif mrr_title == 'Song':
3222                                 info['track'] = mrr_contents_text
3223
3224         fallbacks = {
3225             'channel': 'uploader',
3226             'channel_id': 'uploader_id',
3227             'channel_url': 'uploader_url',
3228         }
3229         for to, frm in fallbacks.items():
3230             if not info.get(to):
3231                 info[to] = info.get(frm)
3232
3233         for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3234             v = info.get(s_k)
3235             if v:
3236                 info[d_k] = v
3237
3238         is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3239         is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
3240         is_membersonly = None
3241         is_premium = None
3242         if initial_data and is_private is not None:
3243             is_membersonly = False
3244             is_premium = False
3245             contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3246             badge_labels = set()
3247             for content in contents:
3248                 if not isinstance(content, dict):
3249                     continue
3250                 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3251             for badge_label in badge_labels:
3252                 if badge_label.lower() == 'members only':
3253                     is_membersonly = True
3254                 elif badge_label.lower() == 'premium':
3255                     is_premium = True
3256                 elif badge_label.lower() == 'unlisted':
3257                     is_unlisted = True
3258
3259         info['availability'] = self._availability(
3260             is_private=is_private,
3261             needs_premium=is_premium,
3262             needs_subscription=is_membersonly,
3263             needs_auth=info['age_limit'] >= 18,
3264             is_unlisted=None if is_private is None else is_unlisted)
3265
3266         info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
3267
3268         self.mark_watched(video_id, player_responses)
3269
3270         return info
3271
3272
3273 class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
3274
3275     def _extract_channel_id(self, webpage):
3276         channel_id = self._html_search_meta(
3277             'channelId', webpage, 'channel id', default=None)
3278         if channel_id:
3279             return channel_id
3280         channel_url = self._html_search_meta(
3281             ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3282              'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3283              'twitter:app:url:googleplay'), webpage, 'channel url')
3284         return self._search_regex(
3285             r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3286             channel_url, 'channel id')
3287
3288     @staticmethod
3289     def _extract_basic_item_renderer(item):
3290         # Modified from _extract_grid_item_renderer
3291         known_basic_renderers = (
3292             'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
3293         )
3294         for key, renderer in item.items():
3295             if not isinstance(renderer, dict):
3296                 continue
3297             elif key in known_basic_renderers:
3298                 return renderer
3299             elif key.startswith('grid') and key.endswith('Renderer'):
3300                 return renderer
3301
3302     def _grid_entries(self, grid_renderer):
3303         for item in grid_renderer['items']:
3304             if not isinstance(item, dict):
3305                 continue
3306             renderer = self._extract_basic_item_renderer(item)
3307             if not isinstance(renderer, dict):
3308                 continue
3309             title = self._get_text(renderer, 'title')
3310
3311             # playlist
3312             playlist_id = renderer.get('playlistId')
3313             if playlist_id:
3314                 yield self.url_result(
3315                     'https://www.youtube.com/playlist?list=%s' % playlist_id,
3316                     ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3317                     video_title=title)
3318                 continue
3319             # video
3320             video_id = renderer.get('videoId')
3321             if video_id:
3322                 yield self._extract_video(renderer)
3323                 continue
3324             # channel
3325             channel_id = renderer.get('channelId')
3326             if channel_id:
3327                 yield self.url_result(
3328                     'https://www.youtube.com/channel/%s' % channel_id,
3329                     ie=YoutubeTabIE.ie_key(), video_title=title)
3330                 continue
3331             # generic endpoint URL support
3332             ep_url = urljoin('https://www.youtube.com/', try_get(
3333                 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3334                 compat_str))
3335             if ep_url:
3336                 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3337                     if ie.suitable(ep_url):
3338                         yield self.url_result(
3339                             ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3340                         break
3341
3342     def _shelf_entries_from_content(self, shelf_renderer):
3343         content = shelf_renderer.get('content')
3344         if not isinstance(content, dict):
3345             return
3346         renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3347         if renderer:
3348             # TODO: add support for nested playlists so each shelf is processed
3349             # as separate playlist
3350             # TODO: this includes only first N items
3351             for entry in self._grid_entries(renderer):
3352                 yield entry
3353         renderer = content.get('horizontalListRenderer')
3354         if renderer:
3355             # TODO
3356             pass
3357
3358     def _shelf_entries(self, shelf_renderer, skip_channels=False):
3359         ep = try_get(
3360             shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3361             compat_str)
3362         shelf_url = urljoin('https://www.youtube.com', ep)
3363         if shelf_url:
3364             # Skipping links to another channels, note that checking for
3365             # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3366             # will not work
3367             if skip_channels and '/channels?' in shelf_url:
3368                 return
3369             title = self._get_text(shelf_renderer, 'title')
3370             yield self.url_result(shelf_url, video_title=title)
3371         # Shelf may not contain shelf URL, fallback to extraction from content
3372         for entry in self._shelf_entries_from_content(shelf_renderer):
3373             yield entry
3374
3375     def _playlist_entries(self, video_list_renderer):
3376         for content in video_list_renderer['contents']:
3377             if not isinstance(content, dict):
3378                 continue
3379             renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3380             if not isinstance(renderer, dict):
3381                 continue
3382             video_id = renderer.get('videoId')
3383             if not video_id:
3384                 continue
3385             yield self._extract_video(renderer)
3386
3387     def _rich_entries(self, rich_grid_renderer):
3388         renderer = try_get(
3389             rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3390         video_id = renderer.get('videoId')
3391         if not video_id:
3392             return
3393         yield self._extract_video(renderer)
3394
3395     def _video_entry(self, video_renderer):
3396         video_id = video_renderer.get('videoId')
3397         if video_id:
3398             return self._extract_video(video_renderer)
3399
3400     def _post_thread_entries(self, post_thread_renderer):
3401         post_renderer = try_get(
3402             post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3403         if not post_renderer:
3404             return
3405         # video attachment
3406         video_renderer = try_get(
3407             post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3408         video_id = video_renderer.get('videoId')
3409         if video_id:
3410             entry = self._extract_video(video_renderer)
3411             if entry:
3412                 yield entry
3413         # playlist attachment
3414         playlist_id = try_get(
3415             post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3416         if playlist_id:
3417             yield self.url_result(
3418                 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3419                 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3420         # inline video links
3421         runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3422         for run in runs:
3423             if not isinstance(run, dict):
3424                 continue
3425             ep_url = try_get(
3426                 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3427             if not ep_url:
3428                 continue
3429             if not YoutubeIE.suitable(ep_url):
3430                 continue
3431             ep_video_id = YoutubeIE._match_id(ep_url)
3432             if video_id == ep_video_id:
3433                 continue
3434             yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
3435
3436     def _post_thread_continuation_entries(self, post_thread_continuation):
3437         contents = post_thread_continuation.get('contents')
3438         if not isinstance(contents, list):
3439             return
3440         for content in contents:
3441             renderer = content.get('backstagePostThreadRenderer')
3442             if not isinstance(renderer, dict):
3443                 continue
3444             for entry in self._post_thread_entries(renderer):
3445                 yield entry
3446
3447     r''' # unused
3448     def _rich_grid_entries(self, contents):
3449         for content in contents:
3450             video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3451             if video_renderer:
3452                 entry = self._video_entry(video_renderer)
3453                 if entry:
3454                     yield entry
3455     '''
3456     def _extract_entries(self, parent_renderer, continuation_list):
3457         # continuation_list is modified in-place with continuation_list = [continuation_token]
3458         continuation_list[:] = [None]
3459         contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3460         for content in contents:
3461             if not isinstance(content, dict):
3462                 continue
3463             is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3464             if not is_renderer:
3465                 renderer = content.get('richItemRenderer')
3466                 if renderer:
3467                     for entry in self._rich_entries(renderer):
3468                         yield entry
3469                     continuation_list[0] = self._extract_continuation(parent_renderer)
3470                 continue
3471             isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3472             for isr_content in isr_contents:
3473                 if not isinstance(isr_content, dict):
3474                     continue
3475
3476                 known_renderers = {
3477                     'playlistVideoListRenderer': self._playlist_entries,
3478                     'gridRenderer': self._grid_entries,
3479                     'shelfRenderer': lambda x: self._shelf_entries(x),
3480                     'backstagePostThreadRenderer': self._post_thread_entries,
3481                     'videoRenderer': lambda x: [self._video_entry(x)],
3482                     'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
3483                     'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
3484                 }
3485                 for key, renderer in isr_content.items():
3486                     if key not in known_renderers:
3487                         continue
3488                     for entry in known_renderers[key](renderer):
3489                         if entry:
3490                             yield entry
3491                     continuation_list[0] = self._extract_continuation(renderer)
3492                     break
3493
3494             if not continuation_list[0]:
3495                 continuation_list[0] = self._extract_continuation(is_renderer)
3496
3497         if not continuation_list[0]:
3498             continuation_list[0] = self._extract_continuation(parent_renderer)
3499
3500     def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
3501         continuation_list = [None]
3502         extract_entries = lambda x: self._extract_entries(x, continuation_list)
3503         tab_content = try_get(tab, lambda x: x['content'], dict)
3504         if not tab_content:
3505             return
3506         parent_renderer = (
3507             try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3508             or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
3509         for entry in extract_entries(parent_renderer):
3510             yield entry
3511         continuation = continuation_list[0]
3512
3513         for page_num in itertools.count(1):
3514             if not continuation:
3515                 break
3516             headers = self.generate_api_headers(
3517                 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
3518             response = self._extract_response(
3519                 item_id='%s page %s' % (item_id, page_num),
3520                 query=continuation, headers=headers, ytcfg=ytcfg,
3521                 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3522
3523             if not response:
3524                 break
3525             # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
3526             # See: https://github.com/ytdl-org/youtube-dl/issues/28702
3527             visitor_data = self._extract_visitor_data(response) or visitor_data
3528
3529             known_continuation_renderers = {
3530                 'playlistVideoListContinuation': self._playlist_entries,
3531                 'gridContinuation': self._grid_entries,
3532                 'itemSectionContinuation': self._post_thread_continuation_entries,
3533                 'sectionListContinuation': extract_entries,  # for feeds
3534             }
3535             continuation_contents = try_get(
3536                 response, lambda x: x['continuationContents'], dict) or {}
3537             continuation_renderer = None
3538             for key, value in continuation_contents.items():
3539                 if key not in known_continuation_renderers:
3540                     continue
3541                 continuation_renderer = value
3542                 continuation_list = [None]
3543                 for entry in known_continuation_renderers[key](continuation_renderer):
3544                     yield entry
3545                 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3546                 break
3547             if continuation_renderer:
3548                 continue
3549
3550             known_renderers = {
3551                 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3552                 'gridVideoRenderer': (self._grid_entries, 'items'),
3553                 'gridChannelRenderer': (self._grid_entries, 'items'),
3554                 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
3555                 'itemSectionRenderer': (extract_entries, 'contents'),  # for feeds
3556                 'richItemRenderer': (extract_entries, 'contents'),  # for hashtag
3557                 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
3558             }
3559             on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3560             continuation_items = try_get(
3561                 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
3562             continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3563             video_items_renderer = None
3564             for key, value in continuation_item.items():
3565                 if key not in known_renderers:
3566                     continue
3567                 video_items_renderer = {known_renderers[key][1]: continuation_items}
3568                 continuation_list = [None]
3569                 for entry in known_renderers[key][0](video_items_renderer):
3570                     yield entry
3571                 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
3572                 break
3573             if video_items_renderer:
3574                 continue
3575             break
3576
3577     @staticmethod
3578     def _extract_selected_tab(tabs):
3579         for tab in tabs:
3580             renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3581             if renderer.get('selected') is True:
3582                 return renderer
3583         else:
3584             raise ExtractorError('Unable to find selected tab')
3585
3586     @classmethod
3587     def _extract_uploader(cls, data):
3588         uploader = {}
3589         renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3590         owner = try_get(
3591             renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3592         if owner:
3593             uploader['uploader'] = owner.get('text')
3594             uploader['uploader_id'] = try_get(
3595                 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3596             uploader['uploader_url'] = urljoin(
3597                 'https://www.youtube.com/',
3598                 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3599         return {k: v for k, v in uploader.items() if v is not None}
3600
3601     def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
3602         playlist_id = title = description = channel_url = channel_name = channel_id = None
3603         tags = []
3604
3605         selected_tab = self._extract_selected_tab(tabs)
3606         renderer = try_get(
3607             data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3608         if renderer:
3609             channel_name = renderer.get('title')
3610             channel_url = renderer.get('channelUrl')
3611             channel_id = renderer.get('externalId')
3612         else:
3613             renderer = try_get(
3614                 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3615
3616         if renderer:
3617             title = renderer.get('title')
3618             description = renderer.get('description', '')
3619             playlist_id = channel_id
3620             tags = renderer.get('keywords', '').split()
3621
3622         thumbnails = (
3623             self._extract_thumbnails(renderer, 'avatar')
3624             or self._extract_thumbnails(
3625                 self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3626                 ('thumbnailRenderer', 'playlistVideoThumbnailRenderer', 'thumbnail')))
3627
3628         if playlist_id is None:
3629             playlist_id = item_id
3630         if title is None:
3631             title = (
3632                 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3633                 or playlist_id)
3634         title += format_field(selected_tab, 'title', ' - %s')
3635         title += format_field(selected_tab, 'expandedText', ' - %s')
3636         metadata = {
3637             'playlist_id': playlist_id,
3638             'playlist_title': title,
3639             'playlist_description': description,
3640             'uploader': channel_name,
3641             'uploader_id': channel_id,
3642             'uploader_url': channel_url,
3643             'thumbnails': thumbnails,
3644             'tags': tags,
3645         }
3646         availability = self._extract_availability(data)
3647         if availability:
3648             metadata['availability'] = availability
3649         if not channel_id:
3650             metadata.update(self._extract_uploader(data))
3651         metadata.update({
3652             'channel': metadata['uploader'],
3653             'channel_id': metadata['uploader_id'],
3654             'channel_url': metadata['uploader_url']})
3655         return self.playlist_result(
3656             self._entries(
3657                 selected_tab, playlist_id, ytcfg,
3658                 self._extract_account_syncid(ytcfg, data),
3659                 self._extract_visitor_data(data, ytcfg)),
3660             **metadata)
3661
3662     def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg):
3663         first_id = last_id = response = None
3664         for page_num in itertools.count(1):
3665             videos = list(self._playlist_entries(playlist))
3666             if not videos:
3667                 return
3668             start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3669             if start >= len(videos):
3670                 return
3671             for video in videos[start:]:
3672                 if video['id'] == first_id:
3673                     self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3674                     return
3675                 yield video
3676             first_id = first_id or videos[0]['id']
3677             last_id = videos[-1]['id']
3678             watch_endpoint = try_get(
3679                 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3680             headers = self.generate_api_headers(
3681                 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3682                 visitor_data=self._extract_visitor_data(response, data, ytcfg))
3683             query = {
3684                 'playlistId': playlist_id,
3685                 'videoId': watch_endpoint.get('videoId') or last_id,
3686                 'index': watch_endpoint.get('index') or len(videos),
3687                 'params': watch_endpoint.get('params') or 'OAE%3D'
3688             }
3689             response = self._extract_response(
3690                 item_id='%s page %d' % (playlist_id, page_num),
3691                 query=query, ep='next', headers=headers, ytcfg=ytcfg,
3692                 check_get_keys='contents'
3693             )
3694             playlist = try_get(
3695                 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3696
3697     def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
3698         title = playlist.get('title') or try_get(
3699             data, lambda x: x['titleText']['simpleText'], compat_str)
3700         playlist_id = playlist.get('playlistId') or item_id
3701
3702         # Delegating everything except mix playlists to regular tab-based playlist URL
3703         playlist_url = urljoin(url, try_get(
3704             playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3705             compat_str))
3706         if playlist_url and playlist_url != url:
3707             return self.url_result(
3708                 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3709                 video_title=title)
3710
3711         return self.playlist_result(
3712             self._extract_mix_playlist(playlist, playlist_id, data, ytcfg),
3713             playlist_id=playlist_id, playlist_title=title)
3714
3715     def _extract_availability(self, data):
3716         """
3717         Gets the availability of a given playlist/tab.
3718         Note: Unless YouTube tells us explicitly, we do not assume it is public
3719         @param data: response
3720         """
3721         is_private = is_unlisted = None
3722         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
3723         badge_labels = self._extract_badges(renderer)
3724
3725         # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
3726         privacy_dropdown_entries = try_get(
3727             renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
3728         for renderer_dict in privacy_dropdown_entries:
3729             is_selected = try_get(
3730                 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
3731             if not is_selected:
3732                 continue
3733             label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
3734             if label:
3735                 badge_labels.add(label.lower())
3736                 break
3737
3738         for badge_label in badge_labels:
3739             if badge_label == 'unlisted':
3740                 is_unlisted = True
3741             elif badge_label == 'private':
3742                 is_private = True
3743             elif badge_label == 'public':
3744                 is_unlisted = is_private = False
3745         return self._availability(is_private, False, False, False, is_unlisted)
3746
3747     @staticmethod
3748     def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
3749         sidebar_renderer = try_get(
3750             data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
3751         for item in sidebar_renderer:
3752             renderer = try_get(item, lambda x: x[info_renderer], expected_type)
3753             if renderer:
3754                 return renderer
3755
3756     def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
3757         """
3758         Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3759         """
3760         browse_id = params = None
3761         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
3762         if not renderer:
3763             return
3764         menu_renderer = try_get(
3765             renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3766         for menu_item in menu_renderer:
3767             if not isinstance(menu_item, dict):
3768                 continue
3769             nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3770             text = try_get(
3771                 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3772             if not text or text.lower() != 'show unavailable videos':
3773                 continue
3774             browse_endpoint = try_get(
3775                 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3776             browse_id = browse_endpoint.get('browseId')
3777             params = browse_endpoint.get('params')
3778             break
3779
3780         headers = self.generate_api_headers(
3781             ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3782             visitor_data=self._extract_visitor_data(data, ytcfg))
3783         query = {
3784             'params': params or 'wgYCCAA=',
3785             'browseId': browse_id or 'VL%s' % item_id
3786         }
3787         return self._extract_response(
3788             item_id=item_id, headers=headers, query=query,
3789             check_get_keys='contents', fatal=False, ytcfg=ytcfg,
3790             note='Downloading API JSON with unavailable videos')
3791
3792     def _extract_webpage(self, url, item_id, fatal=True):
3793         retries = self.get_param('extractor_retries', 3)
3794         count = -1
3795         webpage = data = last_error = None
3796         while count < retries:
3797             count += 1
3798             # Sometimes youtube returns a webpage with incomplete ytInitialData
3799             # See: https://github.com/yt-dlp/yt-dlp/issues/116
3800             if last_error:
3801                 self.report_warning('%s. Retrying ...' % last_error)
3802             try:
3803                 webpage = self._download_webpage(
3804                     url, item_id,
3805                     note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))
3806                 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
3807             except ExtractorError as e:
3808                 if isinstance(e.cause, network_exceptions):
3809                     if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
3810                         last_error = error_to_compat_str(e.cause or e.msg)
3811                         if count < retries:
3812                             continue
3813                 if fatal:
3814                     raise
3815                 self.report_warning(error_to_compat_str(e))
3816                 break
3817             else:
3818                 try:
3819                     self._extract_and_report_alerts(data)
3820                 except ExtractorError as e:
3821                     if fatal:
3822                         raise
3823                     self.report_warning(error_to_compat_str(e))
3824                     break
3825
3826                 if dict_get(data, ('contents', 'currentVideoEndpoint')):
3827                     break
3828
3829                 last_error = 'Incomplete yt initial data received'
3830                 if count >= retries:
3831                     if fatal:
3832                         raise ExtractorError(last_error)
3833                     self.report_warning(last_error)
3834                     break
3835
3836         return webpage, data
3837
3838     def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
3839         data = None
3840         if 'webpage' not in self._configuration_arg('skip'):
3841             webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
3842             ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
3843         if not data:
3844             if not ytcfg and self.is_authenticated:
3845                 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download.'
3846                 if 'authcheck' not in self._configuration_arg('skip') and fatal:
3847                     raise ExtractorError(
3848                         msg + ' If you are not downloading private content, or your cookies are only for the first account and channel,'
3849                               ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
3850                         expected=True)
3851                 self.report_warning(msg, only_once=True)
3852             data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
3853         return data, ytcfg
3854
3855     def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
3856         headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
3857         resolve_response = self._extract_response(
3858             item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
3859             ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
3860         endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
3861         for ep_key, ep in endpoints.items():
3862             params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
3863             if params:
3864                 return self._extract_response(
3865                     item_id=item_id, query=params, ep=ep, headers=headers,
3866                     ytcfg=ytcfg, fatal=fatal, default_client=default_client,
3867                     check_get_keys=('contents', 'currentVideoEndpoint'))
3868         err_note = 'Failed to resolve url (does the playlist exist?)'
3869         if fatal:
3870             raise ExtractorError(err_note, expected=True)
3871         self.report_warning(err_note, item_id)
3872
3873     @staticmethod
3874     def _smuggle_data(entries, data):
3875         for entry in entries:
3876             if data:
3877                 entry['url'] = smuggle_url(entry['url'], data)
3878             yield entry
3879
3880     _SEARCH_PARAMS = None
3881
3882     def _search_results(self, query, params=NO_DEFAULT):
3883         data = {'query': query}
3884         if params is NO_DEFAULT:
3885             params = self._SEARCH_PARAMS
3886         if params:
3887             data['params'] = params
3888         continuation_list = [None]
3889         for page_num in itertools.count(1):
3890             data.update(continuation_list[0] or {})
3891             search = self._extract_response(
3892                 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
3893                 check_get_keys=('contents', 'onResponseReceivedCommands'))
3894             slr_contents = try_get(
3895                 search,
3896                 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3897                  lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3898                 list)
3899             yield from self._extract_entries({'contents': slr_contents}, continuation_list)
3900             if not continuation_list[0]:
3901                 break
3902
3903
3904 class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
3905     IE_DESC = 'YouTube Tabs'
3906     _VALID_URL = r'''(?x:
3907         https?://
3908             (?:\w+\.)?
3909             (?:
3910                 youtube(?:kids)?\.com|
3911                 %(invidious)s
3912             )/
3913             (?:
3914                 (?P<channel_type>channel|c|user|browse)/|
3915                 (?P<not_channel>
3916                     feed/|hashtag/|
3917                     (?:playlist|watch)\?.*?\blist=
3918                 )|
3919                 (?!(?:%(reserved_names)s)\b)  # Direct URLs
3920             )
3921             (?P<id>[^/?\#&]+)
3922     )''' % {
3923         'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
3924         'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
3925     }
3926     IE_NAME = 'youtube:tab'
3927
3928     _TESTS = [{
3929         'note': 'playlists, multipage',
3930         'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3931         'playlist_mincount': 94,
3932         'info_dict': {
3933             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3934             'title': 'Игорь Клейнер - Playlists',
3935             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3936             'uploader': 'Игорь Клейнер',
3937             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3938         },
3939     }, {
3940         'note': 'playlists, multipage, different order',
3941         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3942         'playlist_mincount': 94,
3943         'info_dict': {
3944             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3945             'title': 'Игорь Клейнер - Playlists',
3946             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3947             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3948             'uploader': 'Игорь Клейнер',
3949         },
3950     }, {
3951         'note': 'playlists, series',
3952         'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3953         'playlist_mincount': 5,
3954         'info_dict': {
3955             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3956             'title': '3Blue1Brown - Playlists',
3957             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3958             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3959             'uploader': '3Blue1Brown',
3960         },
3961     }, {
3962         'note': 'playlists, singlepage',
3963         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3964         'playlist_mincount': 4,
3965         'info_dict': {
3966             'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3967             'title': 'ThirstForScience - Playlists',
3968             'description': 'md5:609399d937ea957b0f53cbffb747a14c',
3969             'uploader': 'ThirstForScience',
3970             'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3971         }
3972     }, {
3973         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3974         'only_matching': True,
3975     }, {
3976         'note': 'basic, single video playlist',
3977         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3978         'info_dict': {
3979             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3980             'uploader': 'Sergey M.',
3981             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3982             'title': 'youtube-dl public playlist',
3983         },
3984         'playlist_count': 1,
3985     }, {
3986         'note': 'empty playlist',
3987         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3988         'info_dict': {
3989             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3990             'uploader': 'Sergey M.',
3991             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3992             'title': 'youtube-dl empty playlist',
3993         },
3994         'playlist_count': 0,
3995     }, {
3996         'note': 'Home tab',
3997         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
3998         'info_dict': {
3999             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4000             'title': 'lex will - Home',
4001             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4002             'uploader': 'lex will',
4003             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4004         },
4005         'playlist_mincount': 2,
4006     }, {
4007         'note': 'Videos tab',
4008         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
4009         'info_dict': {
4010             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4011             'title': 'lex will - Videos',
4012             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4013             'uploader': 'lex will',
4014             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4015         },
4016         'playlist_mincount': 975,
4017     }, {
4018         'note': 'Videos tab, sorted by popular',
4019         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
4020         'info_dict': {
4021             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4022             'title': 'lex will - Videos',
4023             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4024             'uploader': 'lex will',
4025             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4026         },
4027         'playlist_mincount': 199,
4028     }, {
4029         'note': 'Playlists tab',
4030         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
4031         'info_dict': {
4032             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4033             'title': 'lex will - Playlists',
4034             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4035             'uploader': 'lex will',
4036             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4037         },
4038         'playlist_mincount': 17,
4039     }, {
4040         'note': 'Community tab',
4041         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
4042         'info_dict': {
4043             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4044             'title': 'lex will - Community',
4045             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4046             'uploader': 'lex will',
4047             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4048         },
4049         'playlist_mincount': 18,
4050     }, {
4051         'note': 'Channels tab',
4052         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
4053         'info_dict': {
4054             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4055             'title': 'lex will - Channels',
4056             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4057             'uploader': 'lex will',
4058             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4059         },
4060         'playlist_mincount': 12,
4061     }, {
4062         'note': 'Search tab',
4063         'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
4064         'playlist_mincount': 40,
4065         'info_dict': {
4066             'id': 'UCYO_jab_esuFRV4b17AJtAw',
4067             'title': '3Blue1Brown - Search - linear algebra',
4068             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
4069             'uploader': '3Blue1Brown',
4070             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
4071         },
4072     }, {
4073         'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4074         'only_matching': True,
4075     }, {
4076         'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4077         'only_matching': True,
4078     }, {
4079         'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4080         'only_matching': True,
4081     }, {
4082         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
4083         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
4084         'info_dict': {
4085             'title': '29C3: Not my department',
4086             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
4087             'uploader': 'Christiaan008',
4088             'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
4089             'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
4090         },
4091         'playlist_count': 96,
4092     }, {
4093         'note': 'Large playlist',
4094         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
4095         'info_dict': {
4096             'title': 'Uploads from Cauchemar',
4097             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
4098             'uploader': 'Cauchemar',
4099             'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
4100         },
4101         'playlist_mincount': 1123,
4102     }, {
4103         'note': 'even larger playlist, 8832 videos',
4104         'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
4105         'only_matching': True,
4106     }, {
4107         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
4108         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
4109         'info_dict': {
4110             'title': 'Uploads from Interstellar Movie',
4111             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
4112             'uploader': 'Interstellar Movie',
4113             'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4114         },
4115         'playlist_mincount': 21,
4116     }, {
4117         'note': 'Playlist with "show unavailable videos" button',
4118         'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
4119         'info_dict': {
4120             'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
4121             'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
4122             'uploader': 'Phim Siêu Nhân Nhật Bản',
4123             'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
4124         },
4125         'playlist_mincount': 200,
4126     }, {
4127         'note': 'Playlist with unavailable videos in page 7',
4128         'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
4129         'info_dict': {
4130             'title': 'Uploads from BlankTV',
4131             'id': 'UU8l9frL61Yl5KFOl87nIm2w',
4132             'uploader': 'BlankTV',
4133             'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
4134         },
4135         'playlist_mincount': 1000,
4136     }, {
4137         'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
4138         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
4139         'info_dict': {
4140             'title': 'Data Analysis with Dr Mike Pound',
4141             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
4142             'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
4143             'uploader': 'Computerphile',
4144             'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
4145         },
4146         'playlist_mincount': 11,
4147     }, {
4148         'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4149         'only_matching': True,
4150     }, {
4151         'note': 'Playlist URL that does not actually serve a playlist',
4152         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
4153         'info_dict': {
4154             'id': 'FqZTN594JQw',
4155             'ext': 'webm',
4156             'title': "Smiley's People 01 detective, Adventure Series, Action",
4157             'uploader': 'STREEM',
4158             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
4159             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
4160             'upload_date': '20150526',
4161             'license': 'Standard YouTube License',
4162             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
4163             'categories': ['People & Blogs'],
4164             'tags': list,
4165             'view_count': int,
4166             'like_count': int,
4167             'dislike_count': int,
4168         },
4169         'params': {
4170             'skip_download': True,
4171         },
4172         'skip': 'This video is not available.',
4173         'add_ie': [YoutubeIE.ie_key()],
4174     }, {
4175         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
4176         'only_matching': True,
4177     }, {
4178         'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
4179         'only_matching': True,
4180     }, {
4181         'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
4182         'info_dict': {
4183             'id': '3yImotZU3tw',  # This will keep changing
4184             'ext': 'mp4',
4185             'title': compat_str,
4186             'uploader': 'Sky News',
4187             'uploader_id': 'skynews',
4188             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
4189             'upload_date': r're:\d{8}',
4190             'description': compat_str,
4191             'categories': ['News & Politics'],
4192             'tags': list,
4193             'like_count': int,
4194             'dislike_count': int,
4195         },
4196         'params': {
4197             'skip_download': True,
4198         },
4199         'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
4200     }, {
4201         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
4202         'info_dict': {
4203             'id': 'a48o2S1cPoo',
4204             'ext': 'mp4',
4205             'title': 'The Young Turks - Live Main Show',
4206             'uploader': 'The Young Turks',
4207             'uploader_id': 'TheYoungTurks',
4208             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
4209             'upload_date': '20150715',
4210             'license': 'Standard YouTube License',
4211             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
4212             'categories': ['News & Politics'],
4213             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
4214             'like_count': int,
4215             'dislike_count': int,
4216         },
4217         'params': {
4218             'skip_download': True,
4219         },
4220         'only_matching': True,
4221     }, {
4222         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
4223         'only_matching': True,
4224     }, {
4225         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
4226         'only_matching': True,
4227     }, {
4228         'note': 'A channel that is not live. Should raise error',
4229         'url': 'https://www.youtube.com/user/numberphile/live',
4230         'only_matching': True,
4231     }, {
4232         'url': 'https://www.youtube.com/feed/trending',
4233         'only_matching': True,
4234     }, {
4235         'url': 'https://www.youtube.com/feed/library',
4236         'only_matching': True,
4237     }, {
4238         'url': 'https://www.youtube.com/feed/history',
4239         'only_matching': True,
4240     }, {
4241         'url': 'https://www.youtube.com/feed/subscriptions',
4242         'only_matching': True,
4243     }, {
4244         'url': 'https://www.youtube.com/feed/watch_later',
4245         'only_matching': True,
4246     }, {
4247         'note': 'Recommended - redirects to home page.',
4248         'url': 'https://www.youtube.com/feed/recommended',
4249         'only_matching': True,
4250     }, {
4251         'note': 'inline playlist with not always working continuations',
4252         'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
4253         'only_matching': True,
4254     }, {
4255         'url': 'https://www.youtube.com/course',
4256         'only_matching': True,
4257     }, {
4258         'url': 'https://www.youtube.com/zsecurity',
4259         'only_matching': True,
4260     }, {
4261         'url': 'http://www.youtube.com/NASAgovVideo/videos',
4262         'only_matching': True,
4263     }, {
4264         'url': 'https://www.youtube.com/TheYoungTurks/live',
4265         'only_matching': True,
4266     }, {
4267         'url': 'https://www.youtube.com/hashtag/cctv9',
4268         'info_dict': {
4269             'id': 'cctv9',
4270             'title': '#cctv9',
4271         },
4272         'playlist_mincount': 350,
4273     }, {
4274         'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
4275         'only_matching': True,
4276     }, {
4277         'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
4278         'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4279         'only_matching': True
4280     }, {
4281         'note': '/browse/ should redirect to /channel/',
4282         'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
4283         'only_matching': True
4284     }, {
4285         'note': 'VLPL, should redirect to playlist?list=PL...',
4286         'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4287         'info_dict': {
4288             'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4289             'uploader': 'NoCopyrightSounds',
4290             'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
4291             'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
4292             'title': 'NCS Releases',
4293         },
4294         'playlist_mincount': 166,
4295     }, {
4296         'note': 'Topic, should redirect to playlist?list=UU...',
4297         'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
4298         'info_dict': {
4299             'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
4300             'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
4301             'title': 'Uploads from Royalty Free Music - Topic',
4302             'uploader': 'Royalty Free Music - Topic',
4303         },
4304         'expected_warnings': [
4305             'A channel/user page was given',
4306             'The URL does not have a videos tab',
4307         ],
4308         'playlist_mincount': 101,
4309     }, {
4310         'note': 'Topic without a UU playlist',
4311         'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
4312         'info_dict': {
4313             'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
4314             'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
4315         },
4316         'expected_warnings': [
4317             'A channel/user page was given',
4318             'The URL does not have a videos tab',
4319             'Falling back to channel URL',
4320         ],
4321         'playlist_mincount': 9,
4322     }, {
4323         'note': 'Youtube music Album',
4324         'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
4325         'info_dict': {
4326             'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
4327             'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
4328         },
4329         'playlist_count': 50,
4330     }, {
4331         'note': 'unlisted single video playlist',
4332         'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
4333         'info_dict': {
4334             'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
4335             'uploader': 'colethedj',
4336             'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
4337             'title': 'yt-dlp unlisted playlist test',
4338             'availability': 'unlisted'
4339         },
4340         'playlist_count': 1,
4341     }, {
4342         'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
4343         'url': 'https://www.youtube.com/feed/recommended',
4344         'info_dict': {
4345             'id': 'recommended',
4346             'title': 'recommended',
4347         },
4348         'playlist_mincount': 50,
4349         'params': {
4350             'skip_download': True,
4351             'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4352         },
4353     }, {
4354         'note': 'API Fallback: /videos tab, sorted by oldest first',
4355         'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
4356         'info_dict': {
4357             'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
4358             'title': 'Cody\'sLab - Videos',
4359             'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
4360             'uploader': 'Cody\'sLab',
4361             'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
4362         },
4363         'playlist_mincount': 650,
4364         'params': {
4365             'skip_download': True,
4366             'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4367         },
4368     }, {
4369         'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
4370         'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
4371         'info_dict': {
4372             'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
4373             'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
4374             'title': 'Uploads from Royalty Free Music - Topic',
4375             'uploader': 'Royalty Free Music - Topic',
4376         },
4377         'expected_warnings': [
4378             'A channel/user page was given',
4379             'The URL does not have a videos tab',
4380         ],
4381         'playlist_mincount': 101,
4382         'params': {
4383             'skip_download': True,
4384             'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4385         },
4386     }]
4387
4388     @classmethod
4389     def suitable(cls, url):
4390         return False if YoutubeIE.suitable(url) else super(
4391             YoutubeTabIE, cls).suitable(url)
4392
4393     def _real_extract(self, url):
4394         url, smuggled_data = unsmuggle_url(url, {})
4395         if self.is_music_url(url):
4396             smuggled_data['is_music_url'] = True
4397         info_dict = self.__real_extract(url, smuggled_data)
4398         if info_dict.get('entries'):
4399             info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4400         return info_dict
4401
4402     _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$')
4403
4404     def __real_extract(self, url, smuggled_data):
4405         item_id = self._match_id(url)
4406         url = compat_urlparse.urlunparse(
4407             compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
4408         compat_opts = self.get_param('compat_opts', [])
4409
4410         def get_mobj(url):
4411             mobj = self._URL_RE.match(url).groupdict()
4412             mobj.update((k, '') for k, v in mobj.items() if v is None)
4413             return mobj
4414
4415         mobj, redirect_warning = get_mobj(url), None
4416         # Youtube returns incomplete data if tabname is not lower case
4417         pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4418         if is_channel:
4419             if smuggled_data.get('is_music_url'):
4420                 if item_id[:2] == 'VL':  # Youtube music VL channels have an equivalent playlist
4421                     item_id = item_id[2:]
4422                     pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False
4423                 elif item_id[:2] == 'MP':  # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
4424                     mdata = self._extract_tab_endpoint(
4425                         f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
4426                     murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
4427                                         get_all=False, expected_type=compat_str)
4428                     if not murl:
4429                         raise ExtractorError('Failed to resolve album to playlist')
4430                     return self.url_result(murl, ie=YoutubeTabIE.ie_key())
4431                 elif mobj['channel_type'] == 'browse':  # Youtube music /browse/ should be changed to /channel/
4432                     pre = f'https://www.youtube.com/channel/{item_id}'
4433
4434         if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4435             # Home URLs should redirect to /videos/
4436             redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '
4437                                 'To download only the videos in the home page, add a "/featured" to the URL')
4438             tab = '/videos'
4439
4440         url = ''.join((pre, tab, post))
4441         mobj = get_mobj(url)
4442
4443         # Handle both video/playlist URLs
4444         qs = parse_qs(url)
4445         video_id, playlist_id = [qs.get(key, [None])[0] for key in ('v', 'list')]
4446
4447         if not video_id and mobj['not_channel'].startswith('watch'):
4448             if not playlist_id:
4449                 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
4450                 raise ExtractorError('Unable to recognize tab page')
4451             # Common mistake: https://www.youtube.com/watch?list=playlist_id
4452             self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
4453             url = f'https://www.youtube.com/playlist?list={playlist_id}'
4454             mobj = get_mobj(url)
4455
4456         if video_id and playlist_id:
4457             if self.get_param('noplaylist'):
4458                 self.to_screen(f'Downloading just video {video_id} because of --no-playlist')
4459                 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
4460                                        ie=YoutubeIE.ie_key(), video_id=video_id)
4461             self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')
4462
4463         data, ytcfg = self._extract_data(url, item_id)
4464
4465         tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
4466         if tabs:
4467             selected_tab = self._extract_selected_tab(tabs)
4468             tab_name = selected_tab.get('title', '')
4469             if 'no-youtube-channel-redirect' not in compat_opts:
4470                 if mobj['tab'] == '/live':
4471                     # Live tab should have redirected to the video
4472                     raise ExtractorError('The channel is not currently live', expected=True)
4473                 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4474                     redirect_warning = f'The URL does not have a {mobj["tab"][1:]} tab'
4475                     if not mobj['not_channel'] and item_id[:2] == 'UC':
4476                         # Topic channels don't have /videos. Use the equivalent playlist instead
4477                         pl_id = f'UU{item_id[2:]}'
4478                         pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
4479                         try:
4480                             data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True)
4481                         except ExtractorError:
4482                             redirect_warning += ' and the playlist redirect gave error'
4483                         else:
4484                             item_id, url, tab_name = pl_id, pl_url, mobj['tab'][1:]
4485                             redirect_warning += f'. Redirecting to playlist {pl_id} instead'
4486                     if tab_name.lower() != mobj['tab'][1:]:
4487                         redirect_warning += f'. {tab_name} tab is being downloaded instead'
4488
4489         if redirect_warning:
4490             self.report_warning(redirect_warning)
4491         self.write_debug(f'Final URL: {url}')
4492
4493         # YouTube sometimes provides a button to reload playlist with unavailable videos.
4494         if 'no-youtube-unavailable-videos' not in compat_opts:
4495             data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data
4496         self._extract_and_report_alerts(data, only_once=True)
4497         tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
4498         if tabs:
4499             return self._extract_from_tabs(item_id, ytcfg, data, tabs)
4500
4501         playlist = traverse_obj(
4502             data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
4503         if playlist:
4504             return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
4505
4506         video_id = traverse_obj(
4507             data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
4508         if video_id:
4509             if mobj['tab'] != '/live':  # live tab is expected to redirect to video
4510                 self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
4511             return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
4512                                    ie=YoutubeIE.ie_key(), video_id=video_id)
4513
4514         raise ExtractorError('Unable to recognize tab page')
4515
4516
4517 class YoutubePlaylistIE(InfoExtractor):
4518     IE_DESC = 'YouTube playlists'
4519     _VALID_URL = r'''(?x)(?:
4520                         (?:https?://)?
4521                         (?:\w+\.)?
4522                         (?:
4523                             (?:
4524                                 youtube(?:kids)?\.com|
4525                                 %(invidious)s
4526                             )
4527                             /.*?\?.*?\blist=
4528                         )?
4529                         (?P<id>%(playlist_id)s)
4530                      )''' % {
4531         'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
4532         'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
4533     }
4534     IE_NAME = 'youtube:playlist'
4535     _TESTS = [{
4536         'note': 'issue #673',
4537         'url': 'PLBB231211A4F62143',
4538         'info_dict': {
4539             'title': '[OLD]Team Fortress 2 (Class-based LP)',
4540             'id': 'PLBB231211A4F62143',
4541             'uploader': 'Wickydoo',
4542             'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4543             'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
4544         },
4545         'playlist_mincount': 29,
4546     }, {
4547         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4548         'info_dict': {
4549             'title': 'YDL_safe_search',
4550             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4551         },
4552         'playlist_count': 2,
4553         'skip': 'This playlist is private',
4554     }, {
4555         'note': 'embedded',
4556         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4557         'playlist_count': 4,
4558         'info_dict': {
4559             'title': 'JODA15',
4560             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4561             'uploader': 'milan',
4562             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
4563         }
4564     }, {
4565         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4566         'playlist_mincount': 654,
4567         'info_dict': {
4568             'title': '2018 Chinese New Singles (11/6 updated)',
4569             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4570             'uploader': 'LBK',
4571             'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4572             'description': 'md5:da521864744d60a198e3a88af4db0d9d',
4573         }
4574     }, {
4575         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4576         'only_matching': True,
4577     }, {
4578         # music album playlist
4579         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4580         'only_matching': True,
4581     }]
4582
4583     @classmethod
4584     def suitable(cls, url):
4585         if YoutubeTabIE.suitable(url):
4586             return False
4587         from ..utils import parse_qs
4588         qs = parse_qs(url)
4589         if qs.get('v', [None])[0]:
4590             return False
4591         return super(YoutubePlaylistIE, cls).suitable(url)
4592
4593     def _real_extract(self, url):
4594         playlist_id = self._match_id(url)
4595         is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
4596         url = update_url_query(
4597             'https://www.youtube.com/playlist',
4598             parse_qs(url) or {'list': playlist_id})
4599         if is_music_url:
4600             url = smuggle_url(url, {'is_music_url': True})
4601         return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4602
4603
4604 class YoutubeYtBeIE(InfoExtractor):
4605     IE_DESC = 'youtu.be'
4606     _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4607     _TESTS = [{
4608         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4609         'info_dict': {
4610             'id': 'yeWKywCrFtk',
4611             'ext': 'mp4',
4612             'title': 'Small Scale Baler and Braiding Rugs',
4613             'uploader': 'Backus-Page House Museum',
4614             'uploader_id': 'backuspagemuseum',
4615             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4616             'upload_date': '20161008',
4617             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4618             'categories': ['Nonprofits & Activism'],
4619             'tags': list,
4620             'like_count': int,
4621             'dislike_count': int,
4622         },
4623         'params': {
4624             'noplaylist': True,
4625             'skip_download': True,
4626         },
4627     }, {
4628         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
4629         'only_matching': True,
4630     }]
4631
4632     def _real_extract(self, url):
4633         mobj = self._match_valid_url(url)
4634         video_id = mobj.group('id')
4635         playlist_id = mobj.group('playlist_id')
4636         return self.url_result(
4637             update_url_query('https://www.youtube.com/watch', {
4638                 'v': video_id,
4639                 'list': playlist_id,
4640                 'feature': 'youtu.be',
4641             }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4642
4643
4644 class YoutubeYtUserIE(InfoExtractor):
4645     IE_DESC = 'YouTube user videos; "ytuser:" prefix'
4646     _VALID_URL = r'ytuser:(?P<id>.+)'
4647     _TESTS = [{
4648         'url': 'ytuser:phihag',
4649         'only_matching': True,
4650     }]
4651
4652     def _real_extract(self, url):
4653         user_id = self._match_id(url)
4654         return self.url_result(
4655             'https://www.youtube.com/user/%s/videos' % user_id,
4656             ie=YoutubeTabIE.ie_key(), video_id=user_id)
4657
4658
4659 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
4660     IE_NAME = 'youtube:favorites'
4661     IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
4662     _VALID_URL = r':ytfav(?:ou?rite)?s?'
4663     _LOGIN_REQUIRED = True
4664     _TESTS = [{
4665         'url': ':ytfav',
4666         'only_matching': True,
4667     }, {
4668         'url': ':ytfavorites',
4669         'only_matching': True,
4670     }]
4671
4672     def _real_extract(self, url):
4673         return self.url_result(
4674             'https://www.youtube.com/playlist?list=LL',
4675             ie=YoutubeTabIE.ie_key())
4676
4677
4678 class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
4679     IE_DESC = 'YouTube search'
4680     IE_NAME = 'youtube:search'
4681     _SEARCH_KEY = 'ytsearch'
4682     _SEARCH_PARAMS = 'EgIQAQ%3D%3D'  # Videos only
4683     _TESTS = []
4684
4685
4686 class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
4687     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
4688     _SEARCH_KEY = 'ytsearchdate'
4689     IE_DESC = 'YouTube search, newest videos first'
4690     _SEARCH_PARAMS = 'CAISAhAB'  # Videos only, sorted by date
4691
4692
4693 class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
4694     IE_DESC = 'YouTube search URLs with sorting and filter support'
4695     IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4696     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
4697     _TESTS = [{
4698         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4699         'playlist_mincount': 5,
4700         'info_dict': {
4701             'id': 'youtube-dl test video',
4702             'title': 'youtube-dl test video',
4703         }
4704     }, {
4705         'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
4706         'playlist_mincount': 5,
4707         'info_dict': {
4708             'id': 'python',
4709             'title': 'python',
4710         }
4711
4712     }, {
4713         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4714         'only_matching': True,
4715     }]
4716
4717     def _real_extract(self, url):
4718         qs = parse_qs(url)
4719         query = (qs.get('search_query') or qs.get('q'))[0]
4720         return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
4721
4722
4723 class YoutubeFeedsInfoExtractor(YoutubeTabIE):
4724     """
4725     Base class for feed extractors
4726     Subclasses must define the _FEED_NAME property.
4727     """
4728     _LOGIN_REQUIRED = True
4729     _TESTS = []
4730
4731     @property
4732     def IE_NAME(self):
4733         return 'youtube:%s' % self._FEED_NAME
4734
4735     def _real_extract(self, url):
4736         return self.url_result(
4737             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4738             ie=YoutubeTabIE.ie_key())
4739
4740
4741 class YoutubeWatchLaterIE(InfoExtractor):
4742     IE_NAME = 'youtube:watchlater'
4743     IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
4744     _VALID_URL = r':ytwatchlater'
4745     _TESTS = [{
4746         'url': ':ytwatchlater',
4747         'only_matching': True,
4748     }]
4749
4750     def _real_extract(self, url):
4751         return self.url_result(
4752             'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
4753
4754
4755 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4756     IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
4757     _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
4758     _FEED_NAME = 'recommended'
4759     _LOGIN_REQUIRED = False
4760     _TESTS = [{
4761         'url': ':ytrec',
4762         'only_matching': True,
4763     }, {
4764         'url': ':ytrecommended',
4765         'only_matching': True,
4766     }, {
4767         'url': 'https://youtube.com',
4768         'only_matching': True,
4769     }]
4770
4771
4772 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
4773     IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
4774     _VALID_URL = r':ytsub(?:scription)?s?'
4775     _FEED_NAME = 'subscriptions'
4776     _TESTS = [{
4777         'url': ':ytsubs',
4778         'only_matching': True,
4779     }, {
4780         'url': ':ytsubscriptions',
4781         'only_matching': True,
4782     }]
4783
4784
4785 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
4786     IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
4787     _VALID_URL = r':ythis(?:tory)?'
4788     _FEED_NAME = 'history'
4789     _TESTS = [{
4790         'url': ':ythistory',
4791         'only_matching': True,
4792     }]
4793
4794
4795 class YoutubeTruncatedURLIE(InfoExtractor):
4796     IE_NAME = 'youtube:truncated_url'
4797     IE_DESC = False  # Do not list
4798     _VALID_URL = r'''(?x)
4799         (?:https?://)?
4800         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4801         (?:watch\?(?:
4802             feature=[a-z_]+|
4803             annotation_id=annotation_[^&]+|
4804             x-yt-cl=[0-9]+|
4805             hl=[^&]*|
4806             t=[0-9]+
4807         )?
4808         |
4809             attribution_link\?a=[^&]+
4810         )
4811         $
4812     '''
4813
4814     _TESTS = [{
4815         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
4816         'only_matching': True,
4817     }, {
4818         'url': 'https://www.youtube.com/watch?',
4819         'only_matching': True,
4820     }, {
4821         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4822         'only_matching': True,
4823     }, {
4824         'url': 'https://www.youtube.com/watch?feature=foo',
4825         'only_matching': True,
4826     }, {
4827         'url': 'https://www.youtube.com/watch?hl=en-GB',
4828         'only_matching': True,
4829     }, {
4830         'url': 'https://www.youtube.com/watch?t=2372',
4831         'only_matching': True,
4832     }]
4833
4834     def _real_extract(self, url):
4835         raise ExtractorError(
4836             'Did you forget to quote the URL? Remember that & is a meta '
4837             'character in most shells, so you want to put the URL in quotes, '
4838             'like  youtube-dl '
4839             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
4840             ' or simply  youtube-dl BaW_jenozKc  .',
4841             expected=True)
4842
4843
4844 class YoutubeClipIE(InfoExtractor):
4845     IE_NAME = 'youtube:clip'
4846     IE_DESC = False  # Do not list
4847     _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'
4848
4849     def _real_extract(self, url):
4850         self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')
4851         return self.url_result(url, 'Generic')
4852
4853
4854 class YoutubeTruncatedIDIE(InfoExtractor):
4855     IE_NAME = 'youtube:truncated_id'
4856     IE_DESC = False  # Do not list
4857     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
4858
4859     _TESTS = [{
4860         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4861         'only_matching': True,
4862     }]
4863
4864     def _real_extract(self, url):
4865         video_id = self._match_id(url)
4866         raise ExtractorError(
4867             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4868             expected=True)