yt_dlp/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import calendar
   6 import copy
   7 import datetime
   8 import functools
   9 import hashlib
  10 import itertools
  11 import json
  12 import math
  13 import os.path
  14 import random
  15 import re
  16 import sys
  17 import time
  18 import traceback
  19 import threading
  20
  21 from .common import InfoExtractor, SearchInfoExtractor
  22 from ..compat import (
  23     compat_chr,
  24     compat_HTTPError,
  25     compat_parse_qs,
  26     compat_str,
  27     compat_urllib_parse_unquote_plus,
  28     compat_urllib_parse_urlencode,
  29     compat_urllib_parse_urlparse,
  30     compat_urlparse,
  31 )
  32 from ..jsinterp import JSInterpreter
  33 from ..utils import (
  34     bug_reports_message,
  35     clean_html,
  36     datetime_from_str,
  37     dict_get,
  38     error_to_compat_str,
  39     ExtractorError,
  40     float_or_none,
  41     format_field,
  42     int_or_none,
  43     is_html,
  44     join_nonempty,
  45     mimetype2ext,
  46     network_exceptions,
  47     NO_DEFAULT,
  48     orderedSet,
  49     parse_codecs,
  50     parse_count,
  51     parse_duration,
  52     parse_iso8601,
  53     parse_qs,
  54     qualities,
  55     remove_end,
  56     remove_start,
  57     smuggle_url,
  58     str_or_none,
  59     str_to_int,
  60     strftime_or_none,
  61     traverse_obj,
  62     try_get,
  63     unescapeHTML,
  64     unified_strdate,
  65     unsmuggle_url,
  66     update_url_query,
  67     url_or_none,
  68     urljoin,
  69     variadic,
  70 )
  71
  72
  73 def get_first(obj, keys, **kwargs):
  74     return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
  75
  76
  77 # any clients starting with _ cannot be explicity requested by the user
  78 INNERTUBE_CLIENTS = {
  79     'web': {
  80         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
  81         'INNERTUBE_CONTEXT': {
  82             'client': {
  83                 'clientName': 'WEB',
  84                 'clientVersion': '2.20210622.10.00',
  85             }
  86         },
  87         'INNERTUBE_CONTEXT_CLIENT_NAME': 1
  88     },
  89     'web_embedded': {
  90         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
  91         'INNERTUBE_CONTEXT': {
  92             'client': {
  93                 'clientName': 'WEB_EMBEDDED_PLAYER',
  94                 'clientVersion': '1.20210620.0.1',
  95             },
  96         },
  97         'INNERTUBE_CONTEXT_CLIENT_NAME': 56
  98     },
  99     'web_music': {
 100         'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
 101         'INNERTUBE_HOST': 'music.youtube.com',
 102         'INNERTUBE_CONTEXT': {
 103             'client': {
 104                 'clientName': 'WEB_REMIX',
 105                 'clientVersion': '1.20210621.00.00',
 106             }
 107         },
 108         'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
 109     },
 110     'web_creator': {
 111         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 112         'INNERTUBE_CONTEXT': {
 113             'client': {
 114                 'clientName': 'WEB_CREATOR',
 115                 'clientVersion': '1.20210621.00.00',
 116             }
 117         },
 118         'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
 119     },
 120     'android': {
 121         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 122         'INNERTUBE_CONTEXT': {
 123             'client': {
 124                 'clientName': 'ANDROID',
 125                 'clientVersion': '16.20',
 126             }
 127         },
 128         'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
 129         'REQUIRE_JS_PLAYER': False
 130     },
 131     'android_embedded': {
 132         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 133         'INNERTUBE_CONTEXT': {
 134             'client': {
 135                 'clientName': 'ANDROID_EMBEDDED_PLAYER',
 136                 'clientVersion': '16.20',
 137             },
 138         },
 139         'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
 140         'REQUIRE_JS_PLAYER': False
 141     },
 142     'android_music': {
 143         'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
 144         'INNERTUBE_HOST': 'music.youtube.com',
 145         'INNERTUBE_CONTEXT': {
 146             'client': {
 147                 'clientName': 'ANDROID_MUSIC',
 148                 'clientVersion': '4.32',
 149             }
 150         },
 151         'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
 152         'REQUIRE_JS_PLAYER': False
 153     },
 154     'android_creator': {
 155         'INNERTUBE_CONTEXT': {
 156             'client': {
 157                 'clientName': 'ANDROID_CREATOR',
 158                 'clientVersion': '21.24.100',
 159             },
 160         },
 161         'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
 162         'REQUIRE_JS_PLAYER': False
 163     },
 164     # ios has HLS live streams
 165     # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680
 166     'ios': {
 167         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 168         'INNERTUBE_CONTEXT': {
 169             'client': {
 170                 'clientName': 'IOS',
 171                 'clientVersion': '16.20',
 172             }
 173         },
 174         'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
 175         'REQUIRE_JS_PLAYER': False
 176     },
 177     'ios_embedded': {
 178         'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
 179         'INNERTUBE_CONTEXT': {
 180             'client': {
 181                 'clientName': 'IOS_MESSAGES_EXTENSION',
 182                 'clientVersion': '16.20',
 183             },
 184         },
 185         'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
 186         'REQUIRE_JS_PLAYER': False
 187     },
 188     'ios_music': {
 189         'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
 190         'INNERTUBE_HOST': 'music.youtube.com',
 191         'INNERTUBE_CONTEXT': {
 192             'client': {
 193                 'clientName': 'IOS_MUSIC',
 194                 'clientVersion': '4.32',
 195             },
 196         },
 197         'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
 198         'REQUIRE_JS_PLAYER': False
 199     },
 200     'ios_creator': {
 201         'INNERTUBE_CONTEXT': {
 202             'client': {
 203                 'clientName': 'IOS_CREATOR',
 204                 'clientVersion': '21.24.100',
 205             },
 206         },
 207         'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
 208         'REQUIRE_JS_PLAYER': False
 209     },
 210     # mweb has 'ultralow' formats
 211     # See: https://github.com/yt-dlp/yt-dlp/pull/557
 212     'mweb': {
 213         'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
 214         'INNERTUBE_CONTEXT': {
 215             'client': {
 216                 'clientName': 'MWEB',
 217                 'clientVersion': '2.20210721.07.00',
 218             }
 219         },
 220         'INNERTUBE_CONTEXT_CLIENT_NAME': 2
 221     },
 222 }
 223
 224
 225 def build_innertube_clients():
 226     third_party = {
 227         'embedUrl': 'https://google.com',  # Can be any valid URL
 228     }
 229     base_clients = ('android', 'web', 'ios', 'mweb')
 230     priority = qualities(base_clients[::-1])
 231
 232     for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
 233         ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
 234         ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
 235         ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
 236         ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
 237         ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
 238
 239         if client in base_clients:
 240             INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
 241             agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
 242             agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
 243             agegate_ytcfg['priority'] -= 1
 244         elif client.endswith('_embedded'):
 245             ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
 246             ytcfg['priority'] -= 2
 247         else:
 248             ytcfg['priority'] -= 3
 249
 250
 251 build_innertube_clients()
 252
 253
 254 class YoutubeBaseInfoExtractor(InfoExtractor):
 255     """Provide base functions for Youtube extractors"""
 256
 257     _RESERVED_NAMES = (
 258         r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
 259         r'shorts|movies|results|shared|hashtag|trending|feed|feeds|'
 260         r'browse|oembed|get_video_info|iframe_api|s/player|'
 261         r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
 262
 263     _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
 264
 265     _NETRC_MACHINE = 'youtube'
 266
 267     # If True it will raise an error if no login info is provided
 268     _LOGIN_REQUIRED = False
 269
 270     _INVIDIOUS_SITES = (
 271         # invidious-redirect websites
 272         r'(?:www\.)?redirect\.invidious\.io',
 273         r'(?:(?:www|dev)\.)?invidio\.us',
 274         # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
 275         r'(?:www\.)?invidious\.pussthecat\.org',
 276         r'(?:www\.)?invidious\.zee\.li',
 277         r'(?:www\.)?invidious\.ethibox\.fr',
 278         r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
 279         # youtube-dl invidious instances list
 280         r'(?:(?:www|no)\.)?invidiou\.sh',
 281         r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
 282         r'(?:www\.)?invidious\.kabi\.tk',
 283         r'(?:www\.)?invidious\.mastodon\.host',
 284         r'(?:www\.)?invidious\.zapashcanon\.fr',
 285         r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
 286         r'(?:www\.)?invidious\.tinfoil-hat\.net',
 287         r'(?:www\.)?invidious\.himiko\.cloud',
 288         r'(?:www\.)?invidious\.reallyancient\.tech',
 289         r'(?:www\.)?invidious\.tube',
 290         r'(?:www\.)?invidiou\.site',
 291         r'(?:www\.)?invidious\.site',
 292         r'(?:www\.)?invidious\.xyz',
 293         r'(?:www\.)?invidious\.nixnet\.xyz',
 294         r'(?:www\.)?invidious\.048596\.xyz',
 295         r'(?:www\.)?invidious\.drycat\.fr',
 296         r'(?:www\.)?inv\.skyn3t\.in',
 297         r'(?:www\.)?tube\.poal\.co',
 298         r'(?:www\.)?tube\.connect\.cafe',
 299         r'(?:www\.)?vid\.wxzm\.sx',
 300         r'(?:www\.)?vid\.mint\.lgbt',
 301         r'(?:www\.)?vid\.puffyan\.us',
 302         r'(?:www\.)?yewtu\.be',
 303         r'(?:www\.)?yt\.elukerio\.org',
 304         r'(?:www\.)?yt\.lelux\.fi',
 305         r'(?:www\.)?invidious\.ggc-project\.de',
 306         r'(?:www\.)?yt\.maisputain\.ovh',
 307         r'(?:www\.)?ytprivate\.com',
 308         r'(?:www\.)?invidious\.13ad\.de',
 309         r'(?:www\.)?invidious\.toot\.koeln',
 310         r'(?:www\.)?invidious\.fdn\.fr',
 311         r'(?:www\.)?watch\.nettohikari\.com',
 312         r'(?:www\.)?invidious\.namazso\.eu',
 313         r'(?:www\.)?invidious\.silkky\.cloud',
 314         r'(?:www\.)?invidious\.exonip\.de',
 315         r'(?:www\.)?invidious\.riverside\.rocks',
 316         r'(?:www\.)?invidious\.blamefran\.net',
 317         r'(?:www\.)?invidious\.moomoo\.de',
 318         r'(?:www\.)?ytb\.trom\.tf',
 319         r'(?:www\.)?yt\.cyberhost\.uk',
 320         r'(?:www\.)?kgg2m7yk5aybusll\.onion',
 321         r'(?:www\.)?qklhadlycap4cnod\.onion',
 322         r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
 323         r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
 324         r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
 325         r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
 326         r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
 327         r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
 328         r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
 329         r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
 330         r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
 331         r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
 332     )
 333
 334     def _login(self):
 335         """
 336         Attempt to log in to YouTube.
 337         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
 338         """
 339
 340         if (self._LOGIN_REQUIRED
 341                 and self.get_param('cookiefile') is None
 342                 and self.get_param('cookiesfrombrowser') is None):
 343             self.raise_login_required(
 344                 'Login details are needed to download this content', method='cookies')
 345         username, password = self._get_login_info()
 346         if username:
 347             self.report_warning(f'Cannot login to YouTube using username and password. {self._LOGIN_HINTS["cookies"]}')
 348
 349     def _initialize_consent(self):
 350         cookies = self._get_cookies('https://www.youtube.com/')
 351         if cookies.get('__Secure-3PSID'):
 352             return
 353         consent_id = None
 354         consent = cookies.get('CONSENT')
 355         if consent:
 356             if 'YES' in consent.value:
 357                 return
 358             consent_id = self._search_regex(
 359                 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
 360         if not consent_id:
 361             consent_id = random.randint(100, 999)
 362         self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
 363
 364     def _initialize_pref(self):
 365         cookies = self._get_cookies('https://www.youtube.com/')
 366         pref_cookie = cookies.get('PREF')
 367         pref = {}
 368         if pref_cookie:
 369             try:
 370                 pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))
 371             except ValueError:
 372                 self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
 373         pref.update({'hl': 'en'})
 374         self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))
 375
 376     def _real_initialize(self):
 377         self._initialize_pref()
 378         self._initialize_consent()
 379         self._login()
 380
 381     _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
 382     _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
 383     _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
 384
 385     def _get_default_ytcfg(self, client='web'):
 386         return copy.deepcopy(INNERTUBE_CLIENTS[client])
 387
 388     def _get_innertube_host(self, client='web'):
 389         return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
 390
 391     def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
 392         # try_get but with fallback to default ytcfg client values when present
 393         _func = lambda y: try_get(y, getter, expected_type)
 394         return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
 395
 396     def _extract_client_name(self, ytcfg, default_client='web'):
 397         return self._ytcfg_get_safe(
 398             ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
 399                     lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
 400
 401     def _extract_client_version(self, ytcfg, default_client='web'):
 402         return self._ytcfg_get_safe(
 403             ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
 404                     lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
 405
 406     def _extract_api_key(self, ytcfg=None, default_client='web'):
 407         return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
 408
 409     def _extract_context(self, ytcfg=None, default_client='web'):
 410         context = get_first(
 411             (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
 412         # Enforce language for extraction
 413         traverse_obj(context, 'client', expected_type=dict, default={})['hl'] = 'en'
 414         return context
 415
 416     _SAPISID = None
 417
 418     def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
 419         time_now = round(time.time())
 420         if self._SAPISID is None:
 421             yt_cookies = self._get_cookies('https://www.youtube.com')
 422             # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
 423             # See: https://github.com/yt-dlp/yt-dlp/issues/393
 424             sapisid_cookie = dict_get(
 425                 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
 426             if sapisid_cookie and sapisid_cookie.value:
 427                 self._SAPISID = sapisid_cookie.value
 428                 self.write_debug('Extracted SAPISID cookie')
 429                 # SAPISID cookie is required if not already present
 430                 if not yt_cookies.get('SAPISID'):
 431                     self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
 432                     self._set_cookie(
 433                         '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
 434             else:
 435                 self._SAPISID = False
 436         if not self._SAPISID:
 437             return None
 438         # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
 439         sapisidhash = hashlib.sha1(
 440             f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
 441         return f'SAPISIDHASH {time_now}_{sapisidhash}'
 442
 443     def _call_api(self, ep, query, video_id, fatal=True, headers=None,
 444                   note='Downloading API JSON', errnote='Unable to download API page',
 445                   context=None, api_key=None, api_hostname=None, default_client='web'):
 446
 447         data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
 448         data.update(query)
 449         real_headers = self.generate_api_headers(default_client=default_client)
 450         real_headers.update({'content-type': 'application/json'})
 451         if headers:
 452             real_headers.update(headers)
 453         return self._download_json(
 454             'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
 455             video_id=video_id, fatal=fatal, note=note, errnote=errnote,
 456             data=json.dumps(data).encode('utf8'), headers=real_headers,
 457             query={'key': api_key or self._extract_api_key()})
 458
 459     def extract_yt_initial_data(self, item_id, webpage, fatal=True):
 460         data = self._search_regex(
 461             (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
 462              self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)
 463         if data:
 464             return self._parse_json(data, item_id, fatal=fatal)
 465
 466     @staticmethod
 467     def _extract_session_index(*data):
 468         """
 469         Index of current account in account list.
 470         See: https://github.com/yt-dlp/yt-dlp/pull/519
 471         """
 472         for ytcfg in data:
 473             session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
 474             if session_index is not None:
 475                 return session_index
 476
 477     # Deprecated?
 478     def _extract_identity_token(self, ytcfg=None, webpage=None):
 479         if ytcfg:
 480             token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
 481             if token:
 482                 return token
 483         if webpage:
 484             return self._search_regex(
 485                 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
 486                 'identity token', default=None, fatal=False)
 487
 488     @staticmethod
 489     def _extract_account_syncid(*args):
 490         """
 491         Extract syncId required to download private playlists of secondary channels
 492         @params response and/or ytcfg
 493         """
 494         for data in args:
 495             # ytcfg includes channel_syncid if on secondary channel
 496             delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
 497             if delegated_sid:
 498                 return delegated_sid
 499             sync_ids = (try_get(
 500                 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
 501                        lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')
 502             if len(sync_ids) >= 2 and sync_ids[1]:
 503                 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
 504                 # and just "user_syncid||" for primary channel. We only want the channel_syncid
 505                 return sync_ids[0]
 506
 507     @staticmethod
 508     def _extract_visitor_data(*args):
 509         """
 510         Extracts visitorData from an API response or ytcfg
 511         Appears to be used to track session state
 512         """
 513         return get_first(
 514             args, (('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))),
 515             expected_type=str)
 516
 517     @property
 518     def is_authenticated(self):
 519         return bool(self._generate_sapisidhash_header())
 520
 521     def extract_ytcfg(self, video_id, webpage):
 522         if not webpage:
 523             return {}
 524         return self._parse_json(
 525             self._search_regex(
 526                 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
 527                 default='{}'), video_id, fatal=False) or {}
 528
 529     def generate_api_headers(
 530             self, *, ytcfg=None, account_syncid=None, session_index=None,
 531             visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
 532
 533         origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
 534         headers = {
 535             'X-YouTube-Client-Name': compat_str(
 536                 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
 537             'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
 538             'Origin': origin,
 539             'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
 540             'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
 541             'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)
 542         }
 543         if session_index is None:
 544             session_index = self._extract_session_index(ytcfg)
 545         if account_syncid or session_index is not None:
 546             headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
 547
 548         auth = self._generate_sapisidhash_header(origin)
 549         if auth is not None:
 550             headers['Authorization'] = auth
 551             headers['X-Origin'] = origin
 552         return {h: v for h, v in headers.items() if v is not None}
 553
 554     @staticmethod
 555     def _build_api_continuation_query(continuation, ctp=None):
 556         query = {
 557             'continuation': continuation
 558         }
 559         # TODO: Inconsistency with clickTrackingParams.
 560         # Currently we have a fixed ctp contained within context (from ytcfg)
 561         # and a ctp in root query for continuation.
 562         if ctp:
 563             query['clickTracking'] = {'clickTrackingParams': ctp}
 564         return query
 565
 566     @classmethod
 567     def _extract_next_continuation_data(cls, renderer):
 568         next_continuation = try_get(
 569             renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
 570                        lambda x: x['continuation']['reloadContinuationData']), dict)
 571         if not next_continuation:
 572             return
 573         continuation = next_continuation.get('continuation')
 574         if not continuation:
 575             return
 576         ctp = next_continuation.get('clickTrackingParams')
 577         return cls._build_api_continuation_query(continuation, ctp)
 578
 579     @classmethod
 580     def _extract_continuation_ep_data(cls, continuation_ep: dict):
 581         if isinstance(continuation_ep, dict):
 582             continuation = try_get(
 583                 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
 584             if not continuation:
 585                 return
 586             ctp = continuation_ep.get('clickTrackingParams')
 587             return cls._build_api_continuation_query(continuation, ctp)
 588
 589     @classmethod
 590     def _extract_continuation(cls, renderer):
 591         next_continuation = cls._extract_next_continuation_data(renderer)
 592         if next_continuation:
 593             return next_continuation
 594
 595         contents = []
 596         for key in ('contents', 'items'):
 597             contents.extend(try_get(renderer, lambda x: x[key], list) or [])
 598
 599         for content in contents:
 600             if not isinstance(content, dict):
 601                 continue
 602             continuation_ep = try_get(
 603                 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
 604                           lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
 605                 dict)
 606             continuation = cls._extract_continuation_ep_data(continuation_ep)
 607             if continuation:
 608                 return continuation
 609
 610     @classmethod
 611     def _extract_alerts(cls, data):
 612         for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
 613             if not isinstance(alert_dict, dict):
 614                 continue
 615             for alert in alert_dict.values():
 616                 alert_type = alert.get('type')
 617                 if not alert_type:
 618                     continue
 619                 message = cls._get_text(alert, 'text')
 620                 if message:
 621                     yield alert_type, message
 622
 623     def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
 624         errors = []
 625         warnings = []
 626         for alert_type, alert_message in alerts:
 627             if alert_type.lower() == 'error' and fatal:
 628                 errors.append([alert_type, alert_message])
 629             else:
 630                 warnings.append([alert_type, alert_message])
 631
 632         for alert_type, alert_message in (warnings + errors[:-1]):
 633             self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message), only_once=only_once)
 634         if errors:
 635             raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
 636
 637     def _extract_and_report_alerts(self, data, *args, **kwargs):
 638         return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
 639
 640     def _extract_badges(self, renderer: dict):
 641         badges = set()
 642         for badge in try_get(renderer, lambda x: x['badges'], list) or []:
 643             label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
 644             if label:
 645                 badges.add(label.lower())
 646         return badges
 647
 648     @staticmethod
 649     def _get_text(data, *path_list, max_runs=None):
 650         for path in path_list or [None]:
 651             if path is None:
 652                 obj = [data]
 653             else:
 654                 obj = traverse_obj(data, path, default=[])
 655                 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
 656                     obj = [obj]
 657             for item in obj:
 658                 text = try_get(item, lambda x: x['simpleText'], compat_str)
 659                 if text:
 660                     return text
 661                 runs = try_get(item, lambda x: x['runs'], list) or []
 662                 if not runs and isinstance(item, list):
 663                     runs = item
 664
 665                 runs = runs[:min(len(runs), max_runs or len(runs))]
 666                 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
 667                 if text:
 668                     return text
 669
 670     @staticmethod
 671     def _extract_thumbnails(data, *path_list):
 672         """
 673         Extract thumbnails from thumbnails dict
 674         @param path_list: path list to level that contains 'thumbnails' key
 675         """
 676         thumbnails = []
 677         for path in path_list or [()]:
 678             for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):
 679                 thumbnail_url = url_or_none(thumbnail.get('url'))
 680                 if not thumbnail_url:
 681                     continue
 682                 # Sometimes youtube gives a wrong thumbnail URL. See:
 683                 # https://github.com/yt-dlp/yt-dlp/issues/233
 684                 # https://github.com/ytdl-org/youtube-dl/issues/28023
 685                 if 'maxresdefault' in thumbnail_url:
 686                     thumbnail_url = thumbnail_url.split('?')[0]
 687                 thumbnails.append({
 688                     'url': thumbnail_url,
 689                     'height': int_or_none(thumbnail.get('height')),
 690                     'width': int_or_none(thumbnail.get('width')),
 691                 })
 692         return thumbnails
 693
 694     @staticmethod
 695     def extract_relative_time(relative_time_text):
 696         """
 697         Extracts a relative time from string and converts to dt object
 698         e.g. 'streamed 6 days ago', '5 seconds ago (edited)'
 699         """
 700         mobj = re.search(r'(?P<time>\d+)\s*(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?\s*ago', relative_time_text)
 701         if mobj:
 702             try:
 703                 return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')), precision='auto')
 704             except ValueError:
 705                 return None
 706
 707     def _extract_time_text(self, renderer, *path_list):
 708         text = self._get_text(renderer, *path_list) or ''
 709         dt = self.extract_relative_time(text)
 710         timestamp = None
 711         if isinstance(dt, datetime.datetime):
 712             timestamp = calendar.timegm(dt.timetuple())
 713         if text and timestamp is None:
 714             self.report_warning('Cannot parse localized time text' + bug_reports_message(), only_once=True)
 715         return timestamp, text
 716
 717     def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
 718                           ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
 719                           default_client='web'):
 720         response = None
 721         last_error = None
 722         count = -1
 723         retries = self.get_param('extractor_retries', 3)
 724         if check_get_keys is None:
 725             check_get_keys = []
 726         while count < retries:
 727             count += 1
 728             if last_error:
 729                 self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))
 730             try:
 731                 response = self._call_api(
 732                     ep=ep, fatal=True, headers=headers,
 733                     video_id=item_id, query=query,
 734                     context=self._extract_context(ytcfg, default_client),
 735                     api_key=self._extract_api_key(ytcfg, default_client),
 736                     api_hostname=api_hostname, default_client=default_client,
 737                     note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
 738             except ExtractorError as e:
 739                 if isinstance(e.cause, network_exceptions):
 740                     if isinstance(e.cause, compat_HTTPError) and not is_html(e.cause.read(512)):
 741                         e.cause.seek(0)
 742                         yt_error = try_get(
 743                             self._parse_json(e.cause.read().decode(), item_id, fatal=False),
 744                             lambda x: x['error']['message'], compat_str)
 745                         if yt_error:
 746                             self._report_alerts([('ERROR', yt_error)], fatal=False)
 747                     # Downloading page may result in intermittent 5xx HTTP error
 748                     # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
 749                     # We also want to catch all other network exceptions since errors in later pages can be troublesome
 750                     # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
 751                     if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
 752                         last_error = error_to_compat_str(e.cause or e.msg)
 753                         if count < retries:
 754                             continue
 755                 if fatal:
 756                     raise
 757                 else:
 758                     self.report_warning(error_to_compat_str(e))
 759                     return
 760
 761             else:
 762                 try:
 763                     self._extract_and_report_alerts(response, only_once=True)
 764                 except ExtractorError as e:
 765                     # YouTube servers may return errors we want to retry on in a 200 OK response
 766                     # See: https://github.com/yt-dlp/yt-dlp/issues/839
 767                     if 'unknown error' in e.msg.lower():
 768                         last_error = e.msg
 769                         continue
 770                     if fatal:
 771                         raise
 772                     self.report_warning(error_to_compat_str(e))
 773                     return
 774                 if not check_get_keys or dict_get(response, check_get_keys):
 775                     break
 776                 # Youtube sometimes sends incomplete data
 777                 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
 778                 last_error = 'Incomplete data received'
 779                 if count >= retries:
 780                     if fatal:
 781                         raise ExtractorError(last_error)
 782                     else:
 783                         self.report_warning(last_error)
 784                         return
 785         return response
 786
 787     @staticmethod
 788     def is_music_url(url):
 789         return re.match(r'https?://music\.youtube\.com/', url) is not None
 790
 791     def _extract_video(self, renderer):
 792         video_id = renderer.get('videoId')
 793         title = self._get_text(renderer, 'title')
 794         description = self._get_text(renderer, 'descriptionSnippet')
 795         duration = parse_duration(self._get_text(
 796             renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
 797         view_count_text = self._get_text(renderer, 'viewCountText') or ''
 798         view_count = str_to_int(self._search_regex(
 799             r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
 800             'view count', default=None))
 801
 802         uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
 803         channel_id = traverse_obj(
 804             renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'), expected_type=str, get_all=False)
 805         timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')
 806         scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
 807         overlay_style = traverse_obj(
 808             renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'), get_all=False, expected_type=str)
 809         badges = self._extract_badges(renderer)
 810         thumbnails = self._extract_thumbnails(renderer, 'thumbnail')
 811
 812         return {
 813             '_type': 'url',
 814             'ie_key': YoutubeIE.ie_key(),
 815             'id': video_id,
 816             'url': f'https://www.youtube.com/watch?v={video_id}',
 817             'title': title,
 818             'description': description,
 819             'duration': duration,
 820             'view_count': view_count,
 821             'uploader': uploader,
 822             'channel_id': channel_id,
 823             'thumbnails': thumbnails,
 824             'upload_date': strftime_or_none(timestamp, '%Y%m%d'),
 825             'live_status': ('is_upcoming' if scheduled_timestamp is not None
 826                             else 'was_live' if 'streamed' in time_text.lower()
 827                             else 'is_live' if overlay_style is not None and overlay_style == 'LIVE' or 'live now' in badges
 828                             else None),
 829             'release_timestamp': scheduled_timestamp,
 830             'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)
 831         }
 832
 833
 834 class YoutubeIE(YoutubeBaseInfoExtractor):
 835     IE_DESC = 'YouTube'
 836     _VALID_URL = r"""(?x)^
 837                      (
 838                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 839                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
 840                             (?:www\.)?deturl\.com/www\.youtube\.com|
 841                             (?:www\.)?pwnyoutube\.com|
 842                             (?:www\.)?hooktube\.com|
 843                             (?:www\.)?yourepeat\.com|
 844                             tube\.majestyc\.net|
 845                             %(invidious)s|
 846                             youtube\.googleapis\.com)/                        # the various hostnames, with wildcard subdomains
 847                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 848                          (?:                                                  # the various things that can precede the ID:
 849                              (?:(?:v|embed|e|shorts)/(?!videoseries))         # v/ or embed/ or e/ or shorts/
 850                              |(?:                                             # or the v= param in all its forms
 851                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 852                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 853                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 854                                  v=
 855                              )
 856                          ))
 857                          |(?:
 858                             youtu\.be|                                        # just youtu.be/xxxx
 859                             vid\.plus|                                        # or vid.plus/xxxx
 860                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 861                             %(invidious)s
 862                          )/
 863                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 864                          )
 865                      )?                                                       # all until now is optional -> you can pass the naked ID
 866                      (?P<id>[0-9A-Za-z_-]{11})                                # here is it! the YouTube video ID
 867                      (?(1).+)?                                                # if we found the ID, everything can follow
 868                      (?:\#|$)""" % {
 869         'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
 870     }
 871     _PLAYER_INFO_RE = (
 872         r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
 873         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
 874         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
 875     )
 876     _formats = {
 877         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 878         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 879         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 880         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 881         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 882         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 883         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 884         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 885         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 886         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 887         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 888         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 889         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 890         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 891         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 892         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 893         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 894         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 895
 896
 897         # 3D videos
 898         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 899         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 900         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 901         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 902         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 903         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 904         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 905
 906         # Apple HTTP Live Streaming
 907         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 908         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 909         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 910         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 911         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 912         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 913         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 914         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
 915
 916         # DASH mp4 video
 917         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
 918         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
 919         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 920         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
 921         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
 922         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
 923         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
 924         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 925         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
 926         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 927         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 928         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
 929
 930         # Dash mp4 audio
 931         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
 932         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
 933         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
 934         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 935         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 936         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
 937         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
 938
 939         # Dash webm
 940         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 941         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 942         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 943         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 944         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 945         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 946         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
 947         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 948         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 949         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 950         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 951         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 952         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 953         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 954         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 955         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
 956         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 957         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 958         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 959         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 960         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 961         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 962
 963         # Dash webm audio
 964         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
 965         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
 966
 967         # Dash webm audio with opus inside
 968         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
 969         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
 970         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
 971
 972         # RTMP (unnamed)
 973         '_rtmp': {'protocol': 'rtmp'},
 974
 975         # av01 video only formats sometimes served with "unknown" codecs
 976         '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
 977         '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
 978         '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
 979         '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
 980         '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
 981         '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
 982         '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
 983         '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
 984     }
 985     _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
 986
 987     _GEO_BYPASS = False
 988
 989     IE_NAME = 'youtube'
 990     _TESTS = [
 991         {
 992             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
 993             'info_dict': {
 994                 'id': 'BaW_jenozKc',
 995                 'ext': 'mp4',
 996                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 997                 'uploader': 'Philipp Hagemeister',
 998                 'uploader_id': 'phihag',
 999                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1000                 'channel': 'Philipp Hagemeister',
1001                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1002                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1003                 'upload_date': '20121002',
1004                 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
1005                 'categories': ['Science & Technology'],
1006                 'tags': ['youtube-dl'],
1007                 'duration': 10,
1008                 'view_count': int,
1009                 'like_count': int,
1010                 # 'dislike_count': int,
1011                 'availability': 'public',
1012                 'playable_in_embed': True,
1013                 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1014                 'live_status': 'not_live',
1015                 'age_limit': 0,
1016                 'start_time': 1,
1017                 'end_time': 9,
1018             }
1019         },
1020         {
1021             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1022             'note': 'Embed-only video (#1746)',
1023             'info_dict': {
1024                 'id': 'yZIXLfi8CZQ',
1025                 'ext': 'mp4',
1026                 'upload_date': '20120608',
1027                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1028                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1029                 'uploader': 'SET India',
1030                 'uploader_id': 'setindia',
1031                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
1032                 'age_limit': 18,
1033             },
1034             'skip': 'Private video',
1035         },
1036         {
1037             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1038             'note': 'Use the first video ID in the URL',
1039             'info_dict': {
1040                 'id': 'BaW_jenozKc',
1041                 'ext': 'mp4',
1042                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1043                 'uploader': 'Philipp Hagemeister',
1044                 'uploader_id': 'phihag',
1045                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1046                 'upload_date': '20121002',
1047                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1048                 'categories': ['Science & Technology'],
1049                 'tags': ['youtube-dl'],
1050                 'duration': 10,
1051                 'view_count': int,
1052                 'like_count': int,
1053                 'dislike_count': int,
1054             },
1055             'params': {
1056                 'skip_download': True,
1057             },
1058         },
1059         {
1060             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1061             'note': '256k DASH audio (format 141) via DASH manifest',
1062             'info_dict': {
1063                 'id': 'a9LDPn-MO4I',
1064                 'ext': 'm4a',
1065                 'upload_date': '20121002',
1066                 'uploader_id': '8KVIDEO',
1067                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1068                 'description': '',
1069                 'uploader': '8KVIDEO',
1070                 'title': 'UHDTV TEST 8K VIDEO.mp4'
1071             },
1072             'params': {
1073                 'youtube_include_dash_manifest': True,
1074                 'format': '141',
1075             },
1076             'skip': 'format 141 not served anymore',
1077         },
1078         # DASH manifest with encrypted signature
1079         {
1080             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1081             'info_dict': {
1082                 'id': 'IB3lcPjvWLA',
1083                 'ext': 'm4a',
1084                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1085                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1086                 'duration': 244,
1087                 'uploader': 'AfrojackVEVO',
1088                 'uploader_id': 'AfrojackVEVO',
1089                 'upload_date': '20131011',
1090                 'abr': 129.495,
1091             },
1092             'params': {
1093                 'youtube_include_dash_manifest': True,
1094                 'format': '141/bestaudio[ext=m4a]',
1095             },
1096         },
1097         # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
1098         {
1099             'note': 'Embed allowed age-gate video',
1100             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1101             'info_dict': {
1102                 'id': 'HtVdAasjOgU',
1103                 'ext': 'mp4',
1104                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1105                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1106                 'duration': 142,
1107                 'uploader': 'The Witcher',
1108                 'uploader_id': 'WitcherGame',
1109                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1110                 'upload_date': '20140605',
1111                 'age_limit': 18,
1112             },
1113         },
1114         {
1115             'note': 'Age-gate video with embed allowed in public site',
1116             'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1117             'info_dict': {
1118                 'id': 'HsUATh_Nc2U',
1119                 'ext': 'mp4',
1120                 'title': 'Godzilla 2 (Official Video)',
1121                 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1122                 'upload_date': '20200408',
1123                 'uploader_id': 'FlyingKitty900',
1124                 'uploader': 'FlyingKitty',
1125                 'age_limit': 18,
1126             },
1127         },
1128         {
1129             'note': 'Age-gate video embedable only with clientScreen=EMBED',
1130             'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1131             'info_dict': {
1132                 'id': 'Tq92D6wQ1mg',
1133                 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
1134                 'ext': 'mp4',
1135                 'upload_date': '20191227',
1136                 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1137                 'uploader': 'Projekt Melody',
1138                 'description': 'md5:17eccca93a786d51bc67646756894066',
1139                 'age_limit': 18,
1140             },
1141         },
1142         {
1143             'note': 'Non-Agegated non-embeddable video',
1144             'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1145             'info_dict': {
1146                 'id': 'MeJVWBSsPAY',
1147                 'ext': 'mp4',
1148                 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1149                 'uploader': 'Herr Lurik',
1150                 'uploader_id': 'st3in234',
1151                 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1152                 'upload_date': '20130730',
1153             },
1154         },
1155         {
1156             'note': 'Non-bypassable age-gated video',
1157             'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1158             'only_matching': True,
1159         },
1160         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1161         # YouTube Red ad is not captured for creator
1162         {
1163             'url': '__2ABJjxzNo',
1164             'info_dict': {
1165                 'id': '__2ABJjxzNo',
1166                 'ext': 'mp4',
1167                 'duration': 266,
1168                 'upload_date': '20100430',
1169                 'uploader_id': 'deadmau5',
1170                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1171                 'creator': 'deadmau5',
1172                 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1173                 'uploader': 'deadmau5',
1174                 'title': 'Deadmau5 - Some Chords (HD)',
1175                 'alt_title': 'Some Chords',
1176             },
1177             'expected_warnings': [
1178                 'DASH manifest missing',
1179             ]
1180         },
1181         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1182         {
1183             'url': 'lqQg6PlCWgI',
1184             'info_dict': {
1185                 'id': 'lqQg6PlCWgI',
1186                 'ext': 'mp4',
1187                 'duration': 6085,
1188                 'upload_date': '20150827',
1189                 'uploader_id': 'olympic',
1190                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1191                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
1192                 'uploader': 'Olympics',
1193                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
1194             },
1195             'params': {
1196                 'skip_download': 'requires avconv',
1197             }
1198         },
1199         # Non-square pixels
1200         {
1201             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1202             'info_dict': {
1203                 'id': '_b-2C3KPAM0',
1204                 'ext': 'mp4',
1205                 'stretched_ratio': 16 / 9.,
1206                 'duration': 85,
1207                 'upload_date': '20110310',
1208                 'uploader_id': 'AllenMeow',
1209                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1210                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1211                 'uploader': '孫ᄋᄅ',
1212                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1213             },
1214         },
1215         # url_encoded_fmt_stream_map is empty string
1216         {
1217             'url': 'qEJwOuvDf7I',
1218             'info_dict': {
1219                 'id': 'qEJwOuvDf7I',
1220                 'ext': 'webm',
1221                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1222                 'description': '',
1223                 'upload_date': '20150404',
1224                 'uploader_id': 'spbelect',
1225                 'uploader': 'Наблюдатели Петербурга',
1226             },
1227             'params': {
1228                 'skip_download': 'requires avconv',
1229             },
1230             'skip': 'This live event has ended.',
1231         },
1232         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1233         {
1234             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1235             'info_dict': {
1236                 'id': 'FIl7x6_3R5Y',
1237                 'ext': 'webm',
1238                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1239                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1240                 'duration': 220,
1241                 'upload_date': '20150625',
1242                 'uploader_id': 'dorappi2000',
1243                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1244                 'uploader': 'dorappi2000',
1245                 'formats': 'mincount:31',
1246             },
1247             'skip': 'not actual anymore',
1248         },
1249         # DASH manifest with segment_list
1250         {
1251             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1252             'md5': '8ce563a1d667b599d21064e982ab9e31',
1253             'info_dict': {
1254                 'id': 'CsmdDsKjzN8',
1255                 'ext': 'mp4',
1256                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1257                 'uploader': 'Airtek',
1258                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1259                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1260                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1261             },
1262             'params': {
1263                 'youtube_include_dash_manifest': True,
1264                 'format': '135',  # bestvideo
1265             },
1266             'skip': 'This live event has ended.',
1267         },
1268         {
1269             # Multifeed videos (multiple cameras), URL is for Main Camera
1270             'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1271             'info_dict': {
1272                 'id': 'jvGDaLqkpTg',
1273                 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1274                 'description': 'md5:e03b909557865076822aa169218d6a5d',
1275             },
1276             'playlist': [{
1277                 'info_dict': {
1278                     'id': 'jvGDaLqkpTg',
1279                     'ext': 'mp4',
1280                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1281                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1282                     'duration': 10643,
1283                     'upload_date': '20161111',
1284                     'uploader': 'Team PGP',
1285                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1286                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1287                 },
1288             }, {
1289                 'info_dict': {
1290                     'id': '3AKt1R1aDnw',
1291                     'ext': 'mp4',
1292                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1293                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1294                     'duration': 10991,
1295                     'upload_date': '20161111',
1296                     'uploader': 'Team PGP',
1297                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1298                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1299                 },
1300             }, {
1301                 'info_dict': {
1302                     'id': 'RtAMM00gpVc',
1303                     'ext': 'mp4',
1304                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1305                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1306                     'duration': 10995,
1307                     'upload_date': '20161111',
1308                     'uploader': 'Team PGP',
1309                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1310                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1311                 },
1312             }, {
1313                 'info_dict': {
1314                     'id': '6N2fdlP3C5U',
1315                     'ext': 'mp4',
1316                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1317                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1318                     'duration': 10990,
1319                     'upload_date': '20161111',
1320                     'uploader': 'Team PGP',
1321                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1322                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1323                 },
1324             }],
1325             'params': {
1326                 'skip_download': True,
1327             },
1328             'skip': 'Not multifeed anymore',
1329         },
1330         {
1331             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1332             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1333             'info_dict': {
1334                 'id': 'gVfLd0zydlo',
1335                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1336             },
1337             'playlist_count': 2,
1338             'skip': 'Not multifeed anymore',
1339         },
1340         {
1341             'url': 'https://vid.plus/FlRa-iH7PGw',
1342             'only_matching': True,
1343         },
1344         {
1345             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1346             'only_matching': True,
1347         },
1348         {
1349             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1350             # Also tests cut-off URL expansion in video description (see
1351             # https://github.com/ytdl-org/youtube-dl/issues/1892,
1352             # https://github.com/ytdl-org/youtube-dl/issues/8164)
1353             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1354             'info_dict': {
1355                 'id': 'lsguqyKfVQg',
1356                 'ext': 'mp4',
1357                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1358                 'alt_title': 'Dark Walk',
1359                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1360                 'duration': 133,
1361                 'upload_date': '20151119',
1362                 'uploader_id': 'IronSoulElf',
1363                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1364                 'uploader': 'IronSoulElf',
1365                 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1366                 'track': 'Dark Walk',
1367                 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1368                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1369             },
1370             'params': {
1371                 'skip_download': True,
1372             },
1373         },
1374         {
1375             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1376             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1377             'only_matching': True,
1378         },
1379         {
1380             # Video with yt:stretch=17:0
1381             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1382             'info_dict': {
1383                 'id': 'Q39EVAstoRM',
1384                 'ext': 'mp4',
1385                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1386                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1387                 'upload_date': '20151107',
1388                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1389                 'uploader': 'CH GAMER DROID',
1390             },
1391             'params': {
1392                 'skip_download': True,
1393             },
1394             'skip': 'This video does not exist.',
1395         },
1396         {
1397             # Video with incomplete 'yt:stretch=16:'
1398             'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1399             'only_matching': True,
1400         },
1401         {
1402             # Video licensed under Creative Commons
1403             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1404             'info_dict': {
1405                 'id': 'M4gD1WSo5mA',
1406                 'ext': 'mp4',
1407                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1408                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1409                 'duration': 721,
1410                 'upload_date': '20150127',
1411                 'uploader_id': 'BerkmanCenter',
1412                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1413                 'uploader': 'The Berkman Klein Center for Internet & Society',
1414                 'license': 'Creative Commons Attribution license (reuse allowed)',
1415             },
1416             'params': {
1417                 'skip_download': True,
1418             },
1419         },
1420         {
1421             # Channel-like uploader_url
1422             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1423             'info_dict': {
1424                 'id': 'eQcmzGIKrzg',
1425                 'ext': 'mp4',
1426                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1427                 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1428                 'duration': 4060,
1429                 'upload_date': '20151119',
1430                 'uploader': 'Bernie Sanders',
1431                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1432                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1433                 'license': 'Creative Commons Attribution license (reuse allowed)',
1434             },
1435             'params': {
1436                 'skip_download': True,
1437             },
1438         },
1439         {
1440             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1441             'only_matching': True,
1442         },
1443         {
1444             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1445             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1446             'only_matching': True,
1447         },
1448         {
1449             # Rental video preview
1450             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1451             'info_dict': {
1452                 'id': 'uGpuVWrhIzE',
1453                 'ext': 'mp4',
1454                 'title': 'Piku - Trailer',
1455                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1456                 'upload_date': '20150811',
1457                 'uploader': 'FlixMatrix',
1458                 'uploader_id': 'FlixMatrixKaravan',
1459                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1460                 'license': 'Standard YouTube License',
1461             },
1462             'params': {
1463                 'skip_download': True,
1464             },
1465             'skip': 'This video is not available.',
1466         },
1467         {
1468             # YouTube Red video with episode data
1469             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1470             'info_dict': {
1471                 'id': 'iqKdEhx-dD4',
1472                 'ext': 'mp4',
1473                 'title': 'Isolation - Mind Field (Ep 1)',
1474                 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1475                 'duration': 2085,
1476                 'upload_date': '20170118',
1477                 'uploader': 'Vsauce',
1478                 'uploader_id': 'Vsauce',
1479                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1480                 'series': 'Mind Field',
1481                 'season_number': 1,
1482                 'episode_number': 1,
1483             },
1484             'params': {
1485                 'skip_download': True,
1486             },
1487             'expected_warnings': [
1488                 'Skipping DASH manifest',
1489             ],
1490         },
1491         {
1492             # The following content has been identified by the YouTube community
1493             # as inappropriate or offensive to some audiences.
1494             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1495             'info_dict': {
1496                 'id': '6SJNVb0GnPI',
1497                 'ext': 'mp4',
1498                 'title': 'Race Differences in Intelligence',
1499                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1500                 'duration': 965,
1501                 'upload_date': '20140124',
1502                 'uploader': 'New Century Foundation',
1503                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1504                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1505             },
1506             'params': {
1507                 'skip_download': True,
1508             },
1509             'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1510         },
1511         {
1512             # itag 212
1513             'url': '1t24XAntNCY',
1514             'only_matching': True,
1515         },
1516         {
1517             # geo restricted to JP
1518             'url': 'sJL6WA-aGkQ',
1519             'only_matching': True,
1520         },
1521         {
1522             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1523             'only_matching': True,
1524         },
1525         {
1526             'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1527             'only_matching': True,
1528         },
1529         {
1530             # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1531             'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1532             'only_matching': True,
1533         },
1534         {
1535             # DRM protected
1536             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1537             'only_matching': True,
1538         },
1539         {
1540             # Video with unsupported adaptive stream type formats
1541             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1542             'info_dict': {
1543                 'id': 'Z4Vy8R84T1U',
1544                 'ext': 'mp4',
1545                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1546                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1547                 'duration': 433,
1548                 'upload_date': '20130923',
1549                 'uploader': 'Amelia Putri Harwita',
1550                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1551                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1552                 'formats': 'maxcount:10',
1553             },
1554             'params': {
1555                 'skip_download': True,
1556                 'youtube_include_dash_manifest': False,
1557             },
1558             'skip': 'not actual anymore',
1559         },
1560         {
1561             # Youtube Music Auto-generated description
1562             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1563             'info_dict': {
1564                 'id': 'MgNrAu2pzNs',
1565                 'ext': 'mp4',
1566                 'title': 'Voyeur Girl',
1567                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1568                 'upload_date': '20190312',
1569                 'uploader': 'Stephen - Topic',
1570                 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1571                 'artist': 'Stephen',
1572                 'track': 'Voyeur Girl',
1573                 'album': 'it\'s too much love to know my dear',
1574                 'release_date': '20190313',
1575                 'release_year': 2019,
1576             },
1577             'params': {
1578                 'skip_download': True,
1579             },
1580         },
1581         {
1582             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1583             'only_matching': True,
1584         },
1585         {
1586             # invalid -> valid video id redirection
1587             'url': 'DJztXj2GPfl',
1588             'info_dict': {
1589                 'id': 'DJztXj2GPfk',
1590                 'ext': 'mp4',
1591                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1592                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1593                 'upload_date': '20090125',
1594                 'uploader': 'Prochorowka',
1595                 'uploader_id': 'Prochorowka',
1596                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1597                 'artist': 'Panjabi MC',
1598                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1599                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1600             },
1601             'params': {
1602                 'skip_download': True,
1603             },
1604             'skip': 'Video unavailable',
1605         },
1606         {
1607             # empty description results in an empty string
1608             'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1609             'info_dict': {
1610                 'id': 'x41yOUIvK2k',
1611                 'ext': 'mp4',
1612                 'title': 'IMG 3456',
1613                 'description': '',
1614                 'upload_date': '20170613',
1615                 'uploader_id': 'ElevageOrVert',
1616                 'uploader': 'ElevageOrVert',
1617             },
1618             'params': {
1619                 'skip_download': True,
1620             },
1621         },
1622         {
1623             # with '};' inside yt initial data (see [1])
1624             # see [2] for an example with '};' inside ytInitialPlayerResponse
1625             # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1626             # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1627             'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1628             'info_dict': {
1629                 'id': 'CHqg6qOn4no',
1630                 'ext': 'mp4',
1631                 'title': 'Part 77   Sort a list of simple types in c#',
1632                 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1633                 'upload_date': '20130831',
1634                 'uploader_id': 'kudvenkat',
1635                 'uploader': 'kudvenkat',
1636             },
1637             'params': {
1638                 'skip_download': True,
1639             },
1640         },
1641         {
1642             # another example of '};' in ytInitialData
1643             'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1644             'only_matching': True,
1645         },
1646         {
1647             'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1648             'only_matching': True,
1649         },
1650         {
1651             # https://github.com/ytdl-org/youtube-dl/pull/28094
1652             'url': 'OtqTfy26tG0',
1653             'info_dict': {
1654                 'id': 'OtqTfy26tG0',
1655                 'ext': 'mp4',
1656                 'title': 'Burn Out',
1657                 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1658                 'upload_date': '20141120',
1659                 'uploader': 'The Cinematic Orchestra - Topic',
1660                 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1661                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1662                 'artist': 'The Cinematic Orchestra',
1663                 'track': 'Burn Out',
1664                 'album': 'Every Day',
1665                 'release_data': None,
1666                 'release_year': None,
1667             },
1668             'params': {
1669                 'skip_download': True,
1670             },
1671         },
1672         {
1673             # controversial video, only works with bpctr when authenticated with cookies
1674             'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1675             'only_matching': True,
1676         },
1677         {
1678             # controversial video, requires bpctr/contentCheckOk
1679             'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1680             'info_dict': {
1681                 'id': 'SZJvDhaSDnc',
1682                 'ext': 'mp4',
1683                 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1684                 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1685                 'uploader': 'CBS This Morning',
1686                 'uploader_id': 'CBSThisMorning',
1687                 'upload_date': '20140716',
1688                 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1689             }
1690         },
1691         {
1692             # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1693             'url': 'cBvYw8_A0vQ',
1694             'info_dict': {
1695                 'id': 'cBvYw8_A0vQ',
1696                 'ext': 'mp4',
1697                 'title': '4K Ueno Okachimachi  Street  Scenes  上野御徒町歩き',
1698                 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1699                 'upload_date': '20201120',
1700                 'uploader': 'Walk around Japan',
1701                 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1702                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1703             },
1704             'params': {
1705                 'skip_download': True,
1706             },
1707         }, {
1708             # Has multiple audio streams
1709             'url': 'WaOKSUlf4TM',
1710             'only_matching': True
1711         }, {
1712             # Requires Premium: has format 141 when requested using YTM url
1713             'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1714             'only_matching': True
1715         }, {
1716             # multiple subtitles with same lang_code
1717             'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1718             'only_matching': True,
1719         }, {
1720             # Force use android client fallback
1721             'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1722             'info_dict': {
1723                 'id': 'YOelRv7fMxY',
1724                 'title': 'DIGGING A SECRET TUNNEL Part 1',
1725                 'ext': '3gp',
1726                 'upload_date': '20210624',
1727                 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1728                 'uploader': 'colinfurze',
1729                 'uploader_id': 'colinfurze',
1730                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1731                 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
1732             },
1733             'params': {
1734                 'format': '17',  # 3gp format available on android
1735                 'extractor_args': {'youtube': {'player_client': ['android']}},
1736             },
1737         },
1738         {
1739             # Skip download of additional client configs (remix client config in this case)
1740             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1741             'only_matching': True,
1742             'params': {
1743                 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1744             },
1745         }, {
1746             # shorts
1747             'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
1748             'only_matching': True,
1749         }, {
1750             'note': 'Storyboards',
1751             'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
1752             'info_dict': {
1753                 'id': '5KLPxDtMqe8',
1754                 'ext': 'mhtml',
1755                 'format_id': 'sb0',
1756                 'title': 'Your Brain is Plastic',
1757                 'uploader_id': 'scishow',
1758                 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
1759                 'upload_date': '20140324',
1760                 'uploader': 'SciShow',
1761             }, 'params': {'format': 'mhtml', 'skip_download': True}
1762         }
1763     ]
1764
1765     @classmethod
1766     def suitable(cls, url):
1767         from ..utils import parse_qs
1768
1769         qs = parse_qs(url)
1770         if qs.get('list', [None])[0]:
1771             return False
1772         return super(YoutubeIE, cls).suitable(url)
1773
1774     def __init__(self, *args, **kwargs):
1775         super(YoutubeIE, self).__init__(*args, **kwargs)
1776         self._code_cache = {}
1777         self._player_cache = {}
1778
1779     def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):
1780         EXPIRATION_DURATION = 18_000
1781         lock = threading.Lock()
1782
1783         is_live = True
1784         expiration_time = time.time() + EXPIRATION_DURATION
1785         formats = [f for f in formats if f.get('is_from_start')]
1786
1787         def refetch_manifest(format_id):
1788             nonlocal formats, expiration_time, is_live
1789             if time.time() <= expiration_time:
1790                 return
1791
1792             _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
1793             video_details = traverse_obj(
1794                 prs, (..., 'videoDetails'), expected_type=dict, default=[])
1795             microformats = traverse_obj(
1796                 prs, (..., 'microformat', 'playerMicroformatRenderer'),
1797                 expected_type=dict, default=[])
1798             _, is_live, _, formats = self._list_formats(video_id, microformats, video_details, prs, player_url)
1799             expiration_time = time.time() + EXPIRATION_DURATION
1800
1801         def mpd_feed(format_id):
1802             """
1803             @returns (manifest_url, manifest_stream_number, is_live) or None
1804             """
1805             with lock:
1806                 refetch_manifest(format_id)
1807
1808             f = next((f for f in formats if f['format_id'] == format_id), None)
1809             if not f:
1810                 self.report_warning(
1811                     f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')
1812                 return None
1813             return f['manifest_url'], f['manifest_stream_number'], is_live
1814
1815         for f in formats:
1816             f['protocol'] = 'http_dash_segments_generator'
1817             f['fragments'] = functools.partial(
1818                 self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)
1819
1820     def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):
1821         FETCH_SPAN, MAX_DURATION = 5, 432000
1822
1823         mpd_url, stream_number, is_live = None, None, True
1824
1825         begin_index = 0
1826         download_start_time = ctx.get('start') or time.time()
1827
1828         lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
1829         if lack_early_segments:
1830             self.report_warning(bug_reports_message(
1831                 'Starting download from the last 120 hours of the live stream since '
1832                 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
1833             lack_early_segments = True
1834
1835         known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
1836         fragments, fragment_base_url = None, None
1837
1838         def _extract_sequence_from_mpd(refresh_sequence):
1839             nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
1840             # Obtain from MPD's maximum seq value
1841             old_mpd_url = mpd_url
1842             mpd_url, stream_number, is_live = mpd_feed(format_id) or (mpd_url, stream_number, False)
1843             if old_mpd_url == mpd_url and not refresh_sequence:
1844                 return True, last_seq
1845             try:
1846                 fmts, _ = self._extract_mpd_formats_and_subtitles(
1847                     mpd_url, None, note=False, errnote=False, fatal=False)
1848             except ExtractorError:
1849                 fmts = None
1850             if not fmts:
1851                 no_fragment_score += 1
1852                 return False, last_seq
1853             fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
1854             fragments = fmt_info['fragments']
1855             fragment_base_url = fmt_info['fragment_base_url']
1856             assert fragment_base_url
1857
1858             _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
1859             return True, _last_seq
1860
1861         while is_live:
1862             fetch_time = time.time()
1863             if no_fragment_score > 30:
1864                 return
1865             if last_segment_url:
1866                 # Obtain from "X-Head-Seqnum" header value from each segment
1867                 try:
1868                     urlh = self._request_webpage(
1869                         last_segment_url, None, note=False, errnote=False, fatal=False)
1870                 except ExtractorError:
1871                     urlh = None
1872                 last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
1873                 if last_seq is None:
1874                     no_fragment_score += 1
1875                     last_segment_url = None
1876                     continue
1877             else:
1878                 should_retry, last_seq = _extract_sequence_from_mpd(True)
1879                 if not should_retry:
1880                     continue
1881
1882             if known_idx > last_seq:
1883                 last_segment_url = None
1884                 continue
1885
1886             last_seq += 1
1887
1888             if begin_index < 0 and known_idx < 0:
1889                 # skip from the start when it's negative value
1890                 known_idx = last_seq + begin_index
1891             if lack_early_segments:
1892                 known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
1893             try:
1894                 for idx in range(known_idx, last_seq):
1895                     # do not update sequence here or you'll get skipped some part of it
1896                     should_retry, _ = _extract_sequence_from_mpd(False)
1897                     if not should_retry:
1898                         # retry when it gets weird state
1899                         known_idx = idx - 1
1900                         raise ExtractorError('breaking out of outer loop')
1901                     last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)
1902                     yield {
1903                         'url': last_segment_url,
1904                     }
1905                 if known_idx == last_seq:
1906                     no_fragment_score += 5
1907                 else:
1908                     no_fragment_score = 0
1909                 known_idx = last_seq
1910             except ExtractorError:
1911                 continue
1912
1913             time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
1914
1915     def _extract_player_url(self, *ytcfgs, webpage=None):
1916         player_url = traverse_obj(
1917             ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
1918             get_all=False, expected_type=compat_str)
1919         if not player_url:
1920             return
1921         if player_url.startswith('//'):
1922             player_url = 'https:' + player_url
1923         elif not re.match(r'https?://', player_url):
1924             player_url = compat_urlparse.urljoin(
1925                 'https://www.youtube.com', player_url)
1926         return player_url
1927
1928     def _download_player_url(self, video_id, fatal=False):
1929         res = self._download_webpage(
1930             'https://www.youtube.com/iframe_api',
1931             note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
1932         if res:
1933             player_version = self._search_regex(
1934                 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
1935             if player_version:
1936                 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
1937
1938     def _signature_cache_id(self, example_sig):
1939         """ Return a string representation of a signature """
1940         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1941
1942     @classmethod
1943     def _extract_player_info(cls, player_url):
1944         for player_re in cls._PLAYER_INFO_RE:
1945             id_m = re.search(player_re, player_url)
1946             if id_m:
1947                 break
1948         else:
1949             raise ExtractorError('Cannot identify player %r' % player_url)
1950         return id_m.group('id')
1951
1952     def _load_player(self, video_id, player_url, fatal=True):
1953         player_id = self._extract_player_info(player_url)
1954         if player_id not in self._code_cache:
1955             code = self._download_webpage(
1956                 player_url, video_id, fatal=fatal,
1957                 note='Downloading player ' + player_id,
1958                 errnote='Download of %s failed' % player_url)
1959             if code:
1960                 self._code_cache[player_id] = code
1961         return self._code_cache.get(player_id)
1962
1963     def _extract_signature_function(self, video_id, player_url, example_sig):
1964         player_id = self._extract_player_info(player_url)
1965
1966         # Read from filesystem cache
1967         func_id = 'js_%s_%s' % (
1968             player_id, self._signature_cache_id(example_sig))
1969         assert os.path.basename(func_id) == func_id
1970
1971         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1972         if cache_spec is not None:
1973             return lambda s: ''.join(s[i] for i in cache_spec)
1974
1975         code = self._load_player(video_id, player_url)
1976         if code:
1977             res = self._parse_sig_js(code)
1978
1979             test_string = ''.join(map(compat_chr, range(len(example_sig))))
1980             cache_res = res(test_string)
1981             cache_spec = [ord(c) for c in cache_res]
1982
1983             self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1984             return res
1985
1986     def _print_sig_code(self, func, example_sig):
1987         if not self.get_param('youtube_print_sig_code'):
1988             return
1989
1990         def gen_sig_code(idxs):
1991             def _genslice(start, end, step):
1992                 starts = '' if start == 0 else str(start)
1993                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1994                 steps = '' if step == 1 else (':%d' % step)
1995                 return 's[%s%s%s]' % (starts, ends, steps)
1996
1997             step = None
1998             # Quelch pyflakes warnings - start will be set when step is set
1999             start = '(Never used)'
2000             for i, prev in zip(idxs[1:], idxs[:-1]):
2001                 if step is not None:
2002                     if i - prev == step:
2003                         continue
2004                     yield _genslice(start, prev, step)
2005                     step = None
2006                     continue
2007                 if i - prev in [-1, 1]:
2008                     step = i - prev
2009                     start = prev
2010                     continue
2011                 else:
2012                     yield 's[%d]' % prev
2013             if step is None:
2014                 yield 's[%d]' % i
2015             else:
2016                 yield _genslice(start, i, step)
2017
2018         test_string = ''.join(map(compat_chr, range(len(example_sig))))
2019         cache_res = func(test_string)
2020         cache_spec = [ord(c) for c in cache_res]
2021         expr_code = ' + '.join(gen_sig_code(cache_spec))
2022         signature_id_tuple = '(%s)' % (
2023             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
2024         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
2025                 '    return %s\n') % (signature_id_tuple, expr_code)
2026         self.to_screen('Extracted signature function:\n' + code)
2027
2028     def _parse_sig_js(self, jscode):
2029         funcname = self._search_regex(
2030             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2031              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2032              r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
2033              r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
2034              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
2035              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
2036              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
2037              # Obsolete patterns
2038              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2039              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
2040              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2041              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2042              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2043              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2044              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2045              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
2046             jscode, 'Initial JS player signature function name', group='sig')
2047
2048         jsi = JSInterpreter(jscode)
2049         initial_function = jsi.extract_function(funcname)
2050         return lambda s: initial_function([s])
2051
2052     def _decrypt_signature(self, s, video_id, player_url):
2053         """Turn the encrypted s field into a working signature"""
2054
2055         if player_url is None:
2056             raise ExtractorError('Cannot decrypt signature without player_url')
2057
2058         try:
2059             player_id = (player_url, self._signature_cache_id(s))
2060             if player_id not in self._player_cache:
2061                 func = self._extract_signature_function(
2062                     video_id, player_url, s
2063                 )
2064                 self._player_cache[player_id] = func
2065             func = self._player_cache[player_id]
2066             self._print_sig_code(func, s)
2067             return func(s)
2068         except Exception as e:
2069             raise ExtractorError('Signature extraction failed: ' + traceback.format_exc(), cause=e)
2070
2071     def _decrypt_nsig(self, s, video_id, player_url):
2072         """Turn the encrypted n field into a working signature"""
2073         if player_url is None:
2074             raise ExtractorError('Cannot decrypt nsig without player_url')
2075         if player_url.startswith('//'):
2076             player_url = 'https:' + player_url
2077         elif not re.match(r'https?://', player_url):
2078             player_url = compat_urlparse.urljoin(
2079                 'https://www.youtube.com', player_url)
2080
2081         sig_id = ('nsig_value', s)
2082         if sig_id in self._player_cache:
2083             return self._player_cache[sig_id]
2084
2085         try:
2086             player_id = ('nsig', player_url)
2087             if player_id not in self._player_cache:
2088                 self._player_cache[player_id] = self._extract_n_function(video_id, player_url)
2089             func = self._player_cache[player_id]
2090             self._player_cache[sig_id] = func(s)
2091             self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')
2092             return self._player_cache[sig_id]
2093         except Exception as e:
2094             raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)
2095
2096     def _extract_n_function_name(self, jscode):
2097         return self._search_regex(
2098             (r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]{3})\([a-zA-Z0-9]\)',),
2099             jscode, 'Initial JS player n function name', group='nfunc')
2100
2101     def _extract_n_function(self, video_id, player_url):
2102         player_id = self._extract_player_info(player_url)
2103         func_code = self._downloader.cache.load('youtube-nsig', player_id)
2104
2105         if func_code:
2106             jsi = JSInterpreter(func_code)
2107         else:
2108             jscode = self._load_player(video_id, player_url)
2109             funcname = self._extract_n_function_name(jscode)
2110             jsi = JSInterpreter(jscode)
2111             func_code = jsi.extract_function_code(funcname)
2112             self._downloader.cache.store('youtube-nsig', player_id, func_code)
2113
2114         if self.get_param('youtube_print_sig_code'):
2115             self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
2116
2117         return lambda s: jsi.extract_function_from_code(*func_code)([s])
2118
2119     def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
2120         """
2121         Extract signatureTimestamp (sts)
2122         Required to tell API what sig/player version is in use.
2123         """
2124         sts = None
2125         if isinstance(ytcfg, dict):
2126             sts = int_or_none(ytcfg.get('STS'))
2127
2128         if not sts:
2129             # Attempt to extract from player
2130             if player_url is None:
2131                 error_msg = 'Cannot extract signature timestamp without player_url.'
2132                 if fatal:
2133                     raise ExtractorError(error_msg)
2134                 self.report_warning(error_msg)
2135                 return
2136             code = self._load_player(video_id, player_url, fatal=fatal)
2137             if code:
2138                 sts = int_or_none(self._search_regex(
2139                     r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2140                     'JS player signature timestamp', group='sts', fatal=fatal))
2141         return sts
2142
2143     def _mark_watched(self, video_id, player_responses):
2144         playback_url = get_first(
2145             player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
2146             expected_type=url_or_none)
2147         if not playback_url:
2148             self.report_warning('Unable to mark watched')
2149             return
2150         parsed_playback_url = compat_urlparse.urlparse(playback_url)
2151         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
2152
2153         # cpn generation algorithm is reverse engineered from base.js.
2154         # In fact it works even with dummy cpn.
2155         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2156         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
2157
2158         qs.update({
2159             'ver': ['2'],
2160             'cpn': [cpn],
2161         })
2162         playback_url = compat_urlparse.urlunparse(
2163             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
2164
2165         self._download_webpage(
2166             playback_url, video_id, 'Marking watched',
2167             'Unable to mark watched', fatal=False)
2168
2169     @staticmethod
2170     def _extract_urls(webpage):
2171         # Embedded YouTube player
2172         entries = [
2173             unescapeHTML(mobj.group('url'))
2174             for mobj in re.finditer(r'''(?x)
2175             (?:
2176                 <iframe[^>]+?src=|
2177                 data-video-url=|
2178                 <embed[^>]+?src=|
2179                 embedSWF\(?:\s*|
2180                 <object[^>]+data=|
2181                 new\s+SWFObject\(
2182             )
2183             (["\'])
2184                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
2185                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
2186             \1''', webpage)]
2187
2188         # lazyYT YouTube embed
2189         entries.extend(list(map(
2190             unescapeHTML,
2191             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
2192
2193         # Wordpress "YouTube Video Importer" plugin
2194         matches = re.findall(r'''(?x)<div[^>]+
2195             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2196             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
2197         entries.extend(m[-1] for m in matches)
2198
2199         return entries
2200
2201     @staticmethod
2202     def _extract_url(webpage):
2203         urls = YoutubeIE._extract_urls(webpage)
2204         return urls[0] if urls else None
2205
2206     @classmethod
2207     def extract_id(cls, url):
2208         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
2209         if mobj is None:
2210             raise ExtractorError('Invalid URL: %s' % url)
2211         return mobj.group('id')
2212
2213     def _extract_chapters_from_json(self, data, duration):
2214         chapter_list = traverse_obj(
2215             data, (
2216                 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2217                 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2218             ), expected_type=list)
2219
2220         return self._extract_chapters(
2221             chapter_list,
2222             chapter_time=lambda chapter: float_or_none(
2223                 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2224             chapter_title=lambda chapter: traverse_obj(
2225                 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2226             duration=duration)
2227
2228     def _extract_chapters_from_engagement_panel(self, data, duration):
2229         content_list = traverse_obj(
2230             data,
2231             ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
2232             expected_type=list, default=[])
2233         chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2234         chapter_title = lambda chapter: self._get_text(chapter, 'title')
2235
2236         return next((
2237             filter(None, (
2238                 self._extract_chapters(
2239                     traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2240                     chapter_time, chapter_title, duration)
2241                 for contents in content_list
2242             ))), [])
2243
2244     def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
2245         chapters = []
2246         last_chapter = {'start_time': 0}
2247         for idx, chapter in enumerate(chapter_list or []):
2248             title = chapter_title(chapter)
2249             start_time = chapter_time(chapter)
2250             if start_time is None:
2251                 continue
2252             last_chapter['end_time'] = start_time
2253             if start_time < last_chapter['start_time']:
2254                 if idx == 1:
2255                     chapters.pop()
2256                     self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2257                 else:
2258                     self.report_warning(f'Invalid start time for chapter "{title}"')
2259                     continue
2260             last_chapter = {'start_time': start_time, 'title': title}
2261             chapters.append(last_chapter)
2262         last_chapter['end_time'] = duration
2263         return chapters
2264
2265     def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2266         return self._parse_json(self._search_regex(
2267             (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2268              regex), webpage, name, default='{}'), video_id, fatal=False)
2269
2270     def _extract_comment(self, comment_renderer, parent=None):
2271         comment_id = comment_renderer.get('commentId')
2272         if not comment_id:
2273             return
2274
2275         text = self._get_text(comment_renderer, 'contentText')
2276
2277         # note: timestamp is an estimate calculated from the current time and time_text
2278         timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')
2279         author = self._get_text(comment_renderer, 'authorText')
2280         author_id = try_get(comment_renderer,
2281                             lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
2282
2283         votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2284                                                        lambda x: x['likeCount']), compat_str)) or 0
2285         author_thumbnail = try_get(comment_renderer,
2286                                    lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2287
2288         author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2289         is_favorited = 'creatorHeart' in (try_get(
2290             comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2291         return {
2292             'id': comment_id,
2293             'text': text,
2294             'timestamp': timestamp,
2295             'time_text': time_text,
2296             'like_count': votes,
2297             'is_favorited': is_favorited,
2298             'author': author,
2299             'author_id': author_id,
2300             'author_thumbnail': author_thumbnail,
2301             'author_is_uploader': author_is_uploader,
2302             'parent': parent or 'root'
2303         }
2304
2305     def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
2306
2307         get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
2308
2309         def extract_header(contents):
2310             _continuation = None
2311             for content in contents:
2312                 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
2313                 expected_comment_count = parse_count(self._get_text(
2314                     comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
2315
2316                 if expected_comment_count:
2317                     tracker['est_total'] = expected_comment_count
2318                     self.to_screen(f'Downloading ~{expected_comment_count} comments')
2319                 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top')  # 1 = new, 0 = top
2320
2321                 sort_menu_item = try_get(
2322                     comments_header_renderer,
2323                     lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2324                 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2325
2326                 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2327                 if not _continuation:
2328                     continue
2329
2330                 sort_text = str_or_none(sort_menu_item.get('title'))
2331                 if not sort_text:
2332                     sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2333                 self.to_screen('Sorting comments by %s' % sort_text.lower())
2334                 break
2335             return _continuation
2336
2337         def extract_thread(contents):
2338             if not parent:
2339                 tracker['current_page_thread'] = 0
2340             for content in contents:
2341                 if not parent and tracker['total_parent_comments'] >= max_parents:
2342                     yield
2343                 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2344                 comment_renderer = get_first(
2345                     (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
2346                     expected_type=dict, default={})
2347
2348                 comment = self._extract_comment(comment_renderer, parent)
2349                 if not comment:
2350                     continue
2351
2352                 tracker['running_total'] += 1
2353                 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
2354                 yield comment
2355
2356                 # Attempt to get the replies
2357                 comment_replies_renderer = try_get(
2358                     comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2359
2360                 if comment_replies_renderer:
2361                     tracker['current_page_thread'] += 1
2362                     comment_entries_iter = self._comment_entries(
2363                         comment_replies_renderer, ytcfg, video_id,
2364                         parent=comment.get('id'), tracker=tracker)
2365                     for reply_comment in itertools.islice(comment_entries_iter, min(max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments']))):
2366                         yield reply_comment
2367
2368         # Keeps track of counts across recursive calls
2369         if not tracker:
2370             tracker = dict(
2371                 running_total=0,
2372                 est_total=0,
2373                 current_page_thread=0,
2374                 total_parent_comments=0,
2375                 total_reply_comments=0)
2376
2377         # TODO: Deprecated
2378         # YouTube comments have a max depth of 2
2379         max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
2380         if max_depth:
2381             self._downloader.deprecation_warning(
2382                 '[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')
2383         if max_depth == 1 and parent:
2384             return
2385
2386         max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(
2387             lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
2388
2389         continuation = self._extract_continuation(root_continuation_data)
2390         message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
2391         if message and not parent:
2392             self.report_warning(message, video_id=video_id)
2393
2394         response = None
2395         is_first_continuation = parent is None
2396
2397         for page_num in itertools.count(0):
2398             if not continuation:
2399                 break
2400             headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
2401             comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"
2402             if page_num == 0:
2403                 if is_first_continuation:
2404                     note_prefix = 'Downloading comment section API JSON'
2405                 else:
2406                     note_prefix = '    Downloading comment API JSON reply thread %d %s' % (
2407                         tracker['current_page_thread'], comment_prog_str)
2408             else:
2409                 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2410                     '       ' if parent else '', ' replies' if parent else '',
2411                     page_num, comment_prog_str)
2412
2413             response = self._extract_response(
2414                 item_id=None, query=continuation,
2415                 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2416                 check_get_keys='onResponseReceivedEndpoints')
2417
2418             continuation_contents = traverse_obj(
2419                 response, 'onResponseReceivedEndpoints', expected_type=list, default=[])
2420
2421             continuation = None
2422             for continuation_section in continuation_contents:
2423                 continuation_items = traverse_obj(
2424                     continuation_section,
2425                     (('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),
2426                     get_all=False, expected_type=list) or []
2427                 if is_first_continuation:
2428                     continuation = extract_header(continuation_items)
2429                     is_first_continuation = False
2430                     if continuation:
2431                         break
2432                     continue
2433
2434                 for entry in extract_thread(continuation_items):
2435                     if not entry:
2436                         return
2437                     yield entry
2438                 continuation = self._extract_continuation({'contents': continuation_items})
2439                 if continuation:
2440                     break
2441
2442     def _get_comments(self, ytcfg, video_id, contents, webpage):
2443         """Entry for comment extraction"""
2444         def _real_comment_extract(contents):
2445             renderer = next((
2446                 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
2447                 if item.get('sectionIdentifier') == 'comment-item-section'), None)
2448             yield from self._comment_entries(renderer, ytcfg, video_id)
2449
2450         max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
2451         return itertools.islice(_real_comment_extract(contents), 0, max_comments)
2452
2453     @staticmethod
2454     def _get_checkok_params():
2455         return {'contentCheckOk': True, 'racyCheckOk': True}
2456
2457     @classmethod
2458     def _generate_player_context(cls, sts=None):
2459         context = {
2460             'html5Preference': 'HTML5_PREF_WANTS',
2461         }
2462         if sts is not None:
2463             context['signatureTimestamp'] = sts
2464         return {
2465             'playbackContext': {
2466                 'contentPlaybackContext': context
2467             },
2468             **cls._get_checkok_params()
2469         }
2470
2471     @staticmethod
2472     def _is_agegated(player_response):
2473         if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
2474             return True
2475
2476         reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
2477         AGE_GATE_REASONS = (
2478             'confirm your age', 'age-restricted', 'inappropriate',  # reason
2479             'age_verification_required', 'age_check_required',  # status
2480         )
2481         return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
2482
2483     @staticmethod
2484     def _is_unplayable(player_response):
2485         return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
2486
2487     def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):
2488
2489         session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2490         syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
2491         sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
2492         headers = self.generate_api_headers(
2493             ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
2494
2495         yt_query = {'videoId': video_id}
2496         yt_query.update(self._generate_player_context(sts))
2497         return self._extract_response(
2498             item_id=video_id, ep='player', query=yt_query,
2499             ytcfg=player_ytcfg, headers=headers, fatal=True,
2500             default_client=client,
2501             note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2502         ) or None
2503
2504     def _get_requested_clients(self, url, smuggled_data):
2505         requested_clients = []
2506         default = ['android', 'web']
2507         allowed_clients = sorted(
2508             [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],
2509             key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
2510         for client in self._configuration_arg('player_client'):
2511             if client in allowed_clients:
2512                 requested_clients.append(client)
2513             elif client == 'default':
2514                 requested_clients.extend(default)
2515             elif client == 'all':
2516                 requested_clients.extend(allowed_clients)
2517             else:
2518                 self.report_warning(f'Skipping unsupported client {client}')
2519         if not requested_clients:
2520             requested_clients = default
2521
2522         if smuggled_data.get('is_music_url') or self.is_music_url(url):
2523             requested_clients.extend(
2524                 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
2525
2526         return orderedSet(requested_clients)
2527
2528     def _extract_player_ytcfg(self, client, video_id):
2529         url = {
2530             'web_music': 'https://music.youtube.com',
2531             'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
2532         }.get(client)
2533         if not url:
2534             return {}
2535         webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config')
2536         return self.extract_ytcfg(video_id, webpage) or {}
2537
2538     def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):
2539         initial_pr = None
2540         if webpage:
2541             initial_pr = self._extract_yt_initial_variable(
2542                 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2543                 video_id, 'initial player response')
2544
2545         original_clients = clients
2546         clients = clients[::-1]
2547         prs = []
2548
2549         def append_client(client_name):
2550             if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:
2551                 clients.append(client_name)
2552
2553         # Android player_response does not have microFormats which are needed for
2554         # extraction of some data. So we return the initial_pr with formats
2555         # stripped out even if not requested by the user
2556         # See: https://github.com/yt-dlp/yt-dlp/issues/501
2557         if initial_pr:
2558             pr = dict(initial_pr)
2559             pr['streamingData'] = None
2560             prs.append(pr)
2561
2562         last_error = None
2563         tried_iframe_fallback = False
2564         player_url = None
2565         while clients:
2566             client = clients.pop()
2567             player_ytcfg = master_ytcfg if client == 'web' else {}
2568             if 'configs' not in self._configuration_arg('player_skip'):
2569                 player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
2570
2571             player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
2572             require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
2573             if 'js' in self._configuration_arg('player_skip'):
2574                 require_js_player = False
2575                 player_url = None
2576
2577             if not player_url and not tried_iframe_fallback and require_js_player:
2578                 player_url = self._download_player_url(video_id)
2579                 tried_iframe_fallback = True
2580
2581             try:
2582                 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
2583                     client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)
2584             except ExtractorError as e:
2585                 if last_error:
2586                     self.report_warning(last_error)
2587                 last_error = e
2588                 continue
2589
2590             if pr:
2591                 prs.append(pr)
2592
2593             # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
2594             if client.endswith('_agegate') and self._is_unplayable(pr) and self.is_authenticated:
2595                 append_client(client.replace('_agegate', '_creator'))
2596             elif self._is_agegated(pr):
2597                 append_client(f'{client}_agegate')
2598
2599         if last_error:
2600             if not len(prs):
2601                 raise last_error
2602             self.report_warning(last_error)
2603         return prs, player_url
2604
2605     def _extract_formats(self, streaming_data, video_id, player_url, is_live):
2606         itags, stream_ids = {}, []
2607         itag_qualities, res_qualities = {}, {}
2608         q = qualities([
2609             # Normally tiny is the smallest video-only formats. But
2610             # audio-only formats with unknown quality may get tagged as tiny
2611             'tiny',
2612             'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high',  # Audio only formats
2613             'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2614         ])
2615         streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
2616
2617         for fmt in streaming_formats:
2618             if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2619                 continue
2620
2621             itag = str_or_none(fmt.get('itag'))
2622             audio_track = fmt.get('audioTrack') or {}
2623             stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2624             if stream_id in stream_ids:
2625                 continue
2626
2627             quality = fmt.get('quality')
2628             height = int_or_none(fmt.get('height'))
2629             if quality == 'tiny' or not quality:
2630                 quality = fmt.get('audioQuality', '').lower() or quality
2631             # The 3gp format (17) in android client has a quality of "small",
2632             # but is actually worse than other formats
2633             if itag == '17':
2634                 quality = 'tiny'
2635             if quality:
2636                 if itag:
2637                     itag_qualities[itag] = quality
2638                 if height:
2639                     res_qualities[height] = quality
2640             # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2641             # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2642             # number of fragment that would subsequently requested with (`&sq=N`)
2643             if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2644                 continue
2645
2646             fmt_url = fmt.get('url')
2647             if not fmt_url:
2648                 sc = compat_parse_qs(fmt.get('signatureCipher'))
2649                 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2650                 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2651                 if not (sc and fmt_url and encrypted_sig):
2652                     continue
2653                 if not player_url:
2654                     continue
2655                 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2656                 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2657                 fmt_url += '&' + sp + '=' + signature
2658
2659             query = parse_qs(fmt_url)
2660             throttled = False
2661             if query.get('n'):
2662                 try:
2663                     fmt_url = update_url_query(fmt_url, {
2664                         'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})
2665                 except ExtractorError as e:
2666                     self.report_warning(
2667                         f'nsig extraction failed: You may experience throttling for some formats\n'
2668                         f'n = {query["n"][0]} ; player = {player_url}\n{e}', only_once=True)
2669                     throttled = True
2670
2671             if itag:
2672                 itags[itag] = 'https'
2673                 stream_ids.append(stream_id)
2674
2675             tbr = float_or_none(
2676                 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
2677             dct = {
2678                 'asr': int_or_none(fmt.get('audioSampleRate')),
2679                 'filesize': int_or_none(fmt.get('contentLength')),
2680                 'format_id': itag,
2681                 'format_note': join_nonempty(
2682                     '%s%s' % (audio_track.get('displayName') or '',
2683                               ' (default)' if audio_track.get('audioIsDefault') else ''),
2684                     fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
2685                     throttled and 'THROTTLED', delim=', '),
2686                 'source_preference': -10 if throttled else -1,
2687                 'fps': int_or_none(fmt.get('fps')) or None,
2688                 'height': height,
2689                 'quality': q(quality),
2690                 'tbr': tbr,
2691                 'url': fmt_url,
2692                 'width': int_or_none(fmt.get('width')),
2693                 'language': audio_track.get('id', '').split('.')[0],
2694                 'language_preference': 1 if audio_track.get('audioIsDefault') else -1,
2695             }
2696             mime_mobj = re.match(
2697                 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2698             if mime_mobj:
2699                 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2700                 dct.update(parse_codecs(mime_mobj.group(2)))
2701             no_audio = dct.get('acodec') == 'none'
2702             no_video = dct.get('vcodec') == 'none'
2703             if no_audio:
2704                 dct['vbr'] = tbr
2705             if no_video:
2706                 dct['abr'] = tbr
2707             if no_audio or no_video:
2708                 dct['downloader_options'] = {
2709                     # Youtube throttles chunks >~10M
2710                     'http_chunk_size': 10485760,
2711                 }
2712                 if dct.get('ext'):
2713                     dct['container'] = dct['ext'] + '_dash'
2714             yield dct
2715
2716         live_from_start = is_live and self.get_param('live_from_start')
2717         skip_manifests = self._configuration_arg('skip')
2718         if not self.get_param('youtube_include_hls_manifest', True):
2719             skip_manifests.append('hls')
2720         get_dash = 'dash' not in skip_manifests and (
2721             not is_live or live_from_start or self._configuration_arg('include_live_dash'))
2722         get_hls = not live_from_start and 'hls' not in skip_manifests
2723
2724         def process_manifest_format(f, proto, itag):
2725             if itag in itags:
2726                 if itags[itag] == proto or f'{itag}-{proto}' in itags:
2727                     return False
2728                 itag = f'{itag}-{proto}'
2729             if itag:
2730                 f['format_id'] = itag
2731                 itags[itag] = proto
2732
2733             f['quality'] = next((
2734                 q(qdict[val])
2735                 for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities))
2736                 if val in qdict), -1)
2737             return True
2738
2739         for sd in streaming_data:
2740             hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
2741             if hls_manifest_url:
2742                 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
2743                     if process_manifest_format(f, 'hls', self._search_regex(
2744                             r'/itag/(\d+)', f['url'], 'itag', default=None)):
2745                         yield f
2746
2747             dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2748             if dash_manifest_url:
2749                 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
2750                     if process_manifest_format(f, 'dash', f['format_id']):
2751                         f['filesize'] = int_or_none(self._search_regex(
2752                             r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
2753                         if live_from_start:
2754                             f['is_from_start'] = True
2755
2756                         yield f
2757
2758     def _extract_storyboard(self, player_responses, duration):
2759         spec = get_first(
2760             player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
2761         if not spec:
2762             return
2763         base_url = spec.pop()
2764         L = len(spec) - 1
2765         for i, args in enumerate(spec):
2766             args = args.split('#')
2767             counts = list(map(int_or_none, args[:5]))
2768             if len(args) != 8 or not all(counts):
2769                 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
2770                 continue
2771             width, height, frame_count, cols, rows = counts
2772             N, sigh = args[6:]
2773
2774             url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
2775             fragment_count = frame_count / (cols * rows)
2776             fragment_duration = duration / fragment_count
2777             yield {
2778                 'format_id': f'sb{i}',
2779                 'format_note': 'storyboard',
2780                 'ext': 'mhtml',
2781                 'protocol': 'mhtml',
2782                 'acodec': 'none',
2783                 'vcodec': 'none',
2784                 'url': url,
2785                 'width': width,
2786                 'height': height,
2787                 'fragments': [{
2788                     'path': url.replace('$M', str(j)),
2789                     'duration': min(fragment_duration, duration - (j * fragment_duration)),
2790                 } for j in range(math.ceil(fragment_count))],
2791             }
2792
2793     def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
2794         webpage = None
2795         if 'webpage' not in self._configuration_arg('player_skip'):
2796             webpage = self._download_webpage(
2797                 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2798
2799         master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2800
2801         player_responses, player_url = self._extract_player_responses(
2802             self._get_requested_clients(url, smuggled_data),
2803             video_id, webpage, master_ytcfg)
2804
2805         return webpage, master_ytcfg, player_responses, player_url
2806
2807     def _list_formats(self, video_id, microformats, video_details, player_responses, player_url):
2808         live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
2809         is_live = get_first(video_details, 'isLive')
2810         if is_live is None:
2811             is_live = get_first(live_broadcast_details, 'isLiveNow')
2812
2813         streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2814         formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
2815
2816         return live_broadcast_details, is_live, streaming_data, formats
2817
2818     def _real_extract(self, url):
2819         url, smuggled_data = unsmuggle_url(url, {})
2820         video_id = self._match_id(url)
2821
2822         base_url = self.http_scheme() + '//www.youtube.com/'
2823         webpage_url = base_url + 'watch?v=' + video_id
2824
2825         webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
2826
2827         playability_statuses = traverse_obj(
2828             player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2829
2830         trailer_video_id = get_first(
2831             playability_statuses,
2832             ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2833             expected_type=str)
2834         if trailer_video_id:
2835             return self.url_result(
2836                 trailer_video_id, self.ie_key(), trailer_video_id)
2837
2838         search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2839                        if webpage else (lambda x: None))
2840
2841         video_details = traverse_obj(
2842             player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2843         microformats = traverse_obj(
2844             player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2845             expected_type=dict, default=[])
2846         video_title = (
2847             get_first(video_details, 'title')
2848             or self._get_text(microformats, (..., 'title'))
2849             or search_meta(['og:title', 'twitter:title', 'title']))
2850         video_description = get_first(video_details, 'shortDescription')
2851
2852         multifeed_metadata_list = get_first(
2853             player_responses,
2854             ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2855             expected_type=str)
2856         if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
2857             if self.get_param('noplaylist'):
2858                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2859             else:
2860                 entries = []
2861                 feed_ids = []
2862                 for feed in multifeed_metadata_list.split(','):
2863                     # Unquote should take place before split on comma (,) since textual
2864                     # fields may contain comma as well (see
2865                     # https://github.com/ytdl-org/youtube-dl/issues/8536)
2866                     feed_data = compat_parse_qs(
2867                         compat_urllib_parse_unquote_plus(feed))
2868
2869                     def feed_entry(name):
2870                         return try_get(
2871                             feed_data, lambda x: x[name][0], compat_str)
2872
2873                     feed_id = feed_entry('id')
2874                     if not feed_id:
2875                         continue
2876                     feed_title = feed_entry('title')
2877                     title = video_title
2878                     if feed_title:
2879                         title += ' (%s)' % feed_title
2880                     entries.append({
2881                         '_type': 'url_transparent',
2882                         'ie_key': 'Youtube',
2883                         'url': smuggle_url(
2884                             '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2885                             {'force_singlefeed': True}),
2886                         'title': title,
2887                     })
2888                     feed_ids.append(feed_id)
2889                 self.to_screen(
2890                     'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2891                     % (', '.join(feed_ids), video_id))
2892                 return self.playlist_result(
2893                     entries, video_id, video_title, video_description)
2894
2895         live_broadcast_details, is_live, streaming_data, formats = self._list_formats(video_id, microformats, video_details, player_responses, player_url)
2896
2897         if not formats:
2898             if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
2899                 self.report_drm(video_id)
2900             pemr = get_first(
2901                 playability_statuses,
2902                 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2903             reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2904             subreason = clean_html(self._get_text(pemr, 'subreason') or '')
2905             if subreason:
2906                 if subreason == 'The uploader has not made this video available in your country.':
2907                     countries = get_first(microformats, 'availableCountries')
2908                     if not countries:
2909                         regions_allowed = search_meta('regionsAllowed')
2910                         countries = regions_allowed.split(',') if regions_allowed else None
2911                     self.raise_geo_restricted(subreason, countries, metadata_available=True)
2912                 reason += f'. {subreason}'
2913             if reason:
2914                 self.raise_no_formats(reason, expected=True)
2915
2916         keywords = get_first(video_details, 'keywords', expected_type=list) or []
2917         if not keywords and webpage:
2918             keywords = [
2919                 unescapeHTML(m.group('content'))
2920                 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2921         for keyword in keywords:
2922             if keyword.startswith('yt:stretch='):
2923                 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2924                 if mobj:
2925                     # NB: float is intentional for forcing float division
2926                     w, h = (float(v) for v in mobj.groups())
2927                     if w > 0 and h > 0:
2928                         ratio = w / h
2929                         for f in formats:
2930                             if f.get('vcodec') != 'none':
2931                                 f['stretched_ratio'] = ratio
2932                         break
2933         thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))
2934         thumbnail_url = search_meta(['og:image', 'twitter:image'])
2935         if thumbnail_url:
2936             thumbnails.append({
2937                 'url': thumbnail_url,
2938             })
2939         original_thumbnails = thumbnails.copy()
2940
2941         # The best resolution thumbnails sometimes does not appear in the webpage
2942         # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
2943         # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2944         thumbnail_names = [
2945             'maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3',
2946             'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2947             'mqdefault', 'mq1', 'mq2', 'mq3',
2948             'default', '1', '2', '3'
2949         ]
2950         n_thumbnail_names = len(thumbnail_names)
2951         thumbnails.extend({
2952             'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2953                 video_id=video_id, name=name, ext=ext,
2954                 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
2955         } for name in thumbnail_names for ext in ('webp', 'jpg'))
2956         for thumb in thumbnails:
2957             i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
2958             thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
2959         self._remove_duplicate_formats(thumbnails)
2960         self._downloader._sort_thumbnails(original_thumbnails)
2961
2962         category = get_first(microformats, 'category') or search_meta('genre')
2963         channel_id = str_or_none(
2964             get_first(video_details, 'channelId')
2965             or get_first(microformats, 'externalChannelId')
2966             or search_meta('channelId'))
2967         duration = int_or_none(
2968             get_first(video_details, 'lengthSeconds')
2969             or get_first(microformats, 'lengthSeconds')
2970             or parse_duration(search_meta('duration'))) or None
2971         owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2972
2973         live_content = get_first(video_details, 'isLiveContent')
2974         is_upcoming = get_first(video_details, 'isUpcoming')
2975         if is_live is None:
2976             if is_upcoming or live_content is False:
2977                 is_live = False
2978         if is_upcoming is None and (live_content or is_live):
2979             is_upcoming = False
2980         live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2981         live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2982         if not duration and live_end_time and live_start_time:
2983             duration = live_end_time - live_start_time
2984
2985         if is_live and self.get_param('live_from_start'):
2986             self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)
2987
2988         formats.extend(self._extract_storyboard(player_responses, duration))
2989
2990         # Source is given priority since formats that throttle are given lower source_preference
2991         # When throttling issue is fully fixed, remove this
2992         self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))
2993
2994         info = {
2995             'id': video_id,
2996             'title': video_title,
2997             'formats': formats,
2998             'thumbnails': thumbnails,
2999             # The best thumbnail that we are sure exists. Prevents unnecessary
3000             # URL checking if user don't care about getting the best possible thumbnail
3001             'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
3002             'description': video_description,
3003             'upload_date': unified_strdate(
3004                 get_first(microformats, 'uploadDate')
3005                 or search_meta('uploadDate')),
3006             'uploader': get_first(video_details, 'author'),
3007             'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
3008             'uploader_url': owner_profile_url,
3009             'channel_id': channel_id,
3010             'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
3011             'duration': duration,
3012             'view_count': int_or_none(
3013                 get_first((video_details, microformats), (..., 'viewCount'))
3014                 or search_meta('interactionCount')),
3015             'average_rating': float_or_none(get_first(video_details, 'averageRating')),
3016             'age_limit': 18 if (
3017                 get_first(microformats, 'isFamilySafe') is False
3018                 or search_meta('isFamilyFriendly') == 'false'
3019                 or search_meta('og:restrictions:age') == '18+') else 0,
3020             'webpage_url': webpage_url,
3021             'categories': [category] if category else None,
3022             'tags': keywords,
3023             'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
3024             'is_live': is_live,
3025             'was_live': (False if is_live or is_upcoming or live_content is False
3026                          else None if is_live is None or is_upcoming is None
3027                          else live_content),
3028             'live_status': 'is_upcoming' if is_upcoming else None,  # rest will be set by YoutubeDL
3029             'release_timestamp': live_start_time,
3030         }
3031
3032         pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
3033         if pctr:
3034             def get_lang_code(track):
3035                 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
3036                         or track.get('languageCode'))
3037
3038             # Converted into dicts to remove duplicates
3039             captions = {
3040                 get_lang_code(sub): sub
3041                 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
3042             translation_languages = {
3043                 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
3044                 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
3045
3046             def process_language(container, base_url, lang_code, sub_name, query):
3047                 lang_subs = container.setdefault(lang_code, [])
3048                 for fmt in self._SUBTITLE_FORMATS:
3049                     query.update({
3050                         'fmt': fmt,
3051                     })
3052                     lang_subs.append({
3053                         'ext': fmt,
3054                         'url': update_url_query(base_url, query),
3055                         'name': sub_name,
3056                     })
3057
3058             subtitles, automatic_captions = {}, {}
3059             for lang_code, caption_track in captions.items():
3060                 base_url = caption_track.get('baseUrl')
3061                 if not base_url:
3062                     continue
3063                 lang_name = self._get_text(caption_track, 'name', max_runs=1)
3064                 if caption_track.get('kind') != 'asr':
3065                     if not lang_code:
3066                         continue
3067                     process_language(
3068                         subtitles, base_url, lang_code, lang_name, {})
3069                     if not caption_track.get('isTranslatable'):
3070                         continue
3071                 for trans_code, trans_name in translation_languages.items():
3072                     if not trans_code:
3073                         continue
3074                     if caption_track.get('kind') != 'asr':
3075                         trans_code += f'-{lang_code}'
3076                         trans_name += format_field(lang_name, template=' from %s')
3077                     process_language(
3078                         automatic_captions, base_url, trans_code, trans_name, {'tlang': trans_code})
3079             info['automatic_captions'] = automatic_captions
3080             info['subtitles'] = subtitles
3081
3082         parsed_url = compat_urllib_parse_urlparse(url)
3083         for component in [parsed_url.fragment, parsed_url.query]:
3084             query = compat_parse_qs(component)
3085             for k, v in query.items():
3086                 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
3087                     d_k += '_time'
3088                     if d_k not in info and k in s_ks:
3089                         info[d_k] = parse_duration(query[k][0])
3090
3091         # Youtube Music Auto-generated description
3092         if video_description:
3093             mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
3094             if mobj:
3095                 release_year = mobj.group('release_year')
3096                 release_date = mobj.group('release_date')
3097                 if release_date:
3098                     release_date = release_date.replace('-', '')
3099                     if not release_year:
3100                         release_year = release_date[:4]
3101                 info.update({
3102                     'album': mobj.group('album'.strip()),
3103                     'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
3104                     'track': mobj.group('track').strip(),
3105                     'release_date': release_date,
3106                     'release_year': int_or_none(release_year),
3107                 })
3108
3109         initial_data = None
3110         if webpage:
3111             initial_data = self._extract_yt_initial_variable(
3112                 webpage, self._YT_INITIAL_DATA_RE, video_id,
3113                 'yt initial data')
3114         if not initial_data:
3115             query = {'videoId': video_id}
3116             query.update(self._get_checkok_params())
3117             initial_data = self._extract_response(
3118                 item_id=video_id, ep='next', fatal=False,
3119                 ytcfg=master_ytcfg, query=query,
3120                 headers=self.generate_api_headers(ytcfg=master_ytcfg),
3121                 note='Downloading initial data API JSON')
3122
3123         try:
3124             # This will error if there is no livechat
3125             initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
3126             info.setdefault('subtitles', {})['live_chat'] = [{
3127                 'url': 'https://www.youtube.com/watch?v=%s' % video_id,  # url is needed to set cookies
3128                 'video_id': video_id,
3129                 'ext': 'json',
3130                 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
3131             }]
3132         except (KeyError, IndexError, TypeError):
3133             pass
3134
3135         if initial_data:
3136             info['chapters'] = (
3137                 self._extract_chapters_from_json(initial_data, duration)
3138                 or self._extract_chapters_from_engagement_panel(initial_data, duration)
3139                 or None)
3140
3141             contents = try_get(
3142                 initial_data,
3143                 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
3144                 list) or []
3145             for content in contents:
3146                 vpir = content.get('videoPrimaryInfoRenderer')
3147                 if vpir:
3148                     stl = vpir.get('superTitleLink')
3149                     if stl:
3150                         stl = self._get_text(stl)
3151                         if try_get(
3152                                 vpir,
3153                                 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3154                             info['location'] = stl
3155                         else:
3156                             mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
3157                             if mobj:
3158                                 info.update({
3159                                     'series': mobj.group(1),
3160                                     'season_number': int(mobj.group(2)),
3161                                     'episode_number': int(mobj.group(3)),
3162                                 })
3163                     for tlb in (try_get(
3164                             vpir,
3165                             lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3166                             list) or []):
3167                         tbr = tlb.get('toggleButtonRenderer') or {}
3168                         for getter, regex in [(
3169                                 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3170                                 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3171                                     lambda x: x['accessibility'],
3172                                     lambda x: x['accessibilityData']['accessibilityData'],
3173                                 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3174                             label = (try_get(tbr, getter, dict) or {}).get('label')
3175                             if label:
3176                                 mobj = re.match(regex, label)
3177                                 if mobj:
3178                                     info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3179                                     break
3180                     sbr_tooltip = try_get(
3181                         vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3182                     if sbr_tooltip:
3183                         like_count, dislike_count = sbr_tooltip.split(' / ')
3184                         info.update({
3185                             'like_count': str_to_int(like_count),
3186                             'dislike_count': str_to_int(dislike_count),
3187                         })
3188                 vsir = content.get('videoSecondaryInfoRenderer')
3189                 if vsir:
3190                     info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
3191                     rows = try_get(
3192                         vsir,
3193                         lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3194                         list) or []
3195                     multiple_songs = False
3196                     for row in rows:
3197                         if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3198                             multiple_songs = True
3199                             break
3200                     for row in rows:
3201                         mrr = row.get('metadataRowRenderer') or {}
3202                         mrr_title = mrr.get('title')
3203                         if not mrr_title:
3204                             continue
3205                         mrr_title = self._get_text(mrr, 'title')
3206                         mrr_contents_text = self._get_text(mrr, ('contents', 0))
3207                         if mrr_title == 'License':
3208                             info['license'] = mrr_contents_text
3209                         elif not multiple_songs:
3210                             if mrr_title == 'Album':
3211                                 info['album'] = mrr_contents_text
3212                             elif mrr_title == 'Artist':
3213                                 info['artist'] = mrr_contents_text
3214                             elif mrr_title == 'Song':
3215                                 info['track'] = mrr_contents_text
3216
3217         fallbacks = {
3218             'channel': 'uploader',
3219             'channel_id': 'uploader_id',
3220             'channel_url': 'uploader_url',
3221         }
3222         for to, frm in fallbacks.items():
3223             if not info.get(to):
3224                 info[to] = info.get(frm)
3225
3226         for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3227             v = info.get(s_k)
3228             if v:
3229                 info[d_k] = v
3230
3231         is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3232         is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
3233         is_membersonly = None
3234         is_premium = None
3235         if initial_data and is_private is not None:
3236             is_membersonly = False
3237             is_premium = False
3238             contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3239             badge_labels = set()
3240             for content in contents:
3241                 if not isinstance(content, dict):
3242                     continue
3243                 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3244             for badge_label in badge_labels:
3245                 if badge_label.lower() == 'members only':
3246                     is_membersonly = True
3247                 elif badge_label.lower() == 'premium':
3248                     is_premium = True
3249                 elif badge_label.lower() == 'unlisted':
3250                     is_unlisted = True
3251
3252         info['availability'] = self._availability(
3253             is_private=is_private,
3254             needs_premium=is_premium,
3255             needs_subscription=is_membersonly,
3256             needs_auth=info['age_limit'] >= 18,
3257             is_unlisted=None if is_private is None else is_unlisted)
3258
3259         info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
3260
3261         self.mark_watched(video_id, player_responses)
3262
3263         return info
3264
3265
3266 class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
3267
3268     def _extract_channel_id(self, webpage):
3269         channel_id = self._html_search_meta(
3270             'channelId', webpage, 'channel id', default=None)
3271         if channel_id:
3272             return channel_id
3273         channel_url = self._html_search_meta(
3274             ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3275              'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3276              'twitter:app:url:googleplay'), webpage, 'channel url')
3277         return self._search_regex(
3278             r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3279             channel_url, 'channel id')
3280
3281     @staticmethod
3282     def _extract_basic_item_renderer(item):
3283         # Modified from _extract_grid_item_renderer
3284         known_basic_renderers = (
3285             'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
3286         )
3287         for key, renderer in item.items():
3288             if not isinstance(renderer, dict):
3289                 continue
3290             elif key in known_basic_renderers:
3291                 return renderer
3292             elif key.startswith('grid') and key.endswith('Renderer'):
3293                 return renderer
3294
3295     def _grid_entries(self, grid_renderer):
3296         for item in grid_renderer['items']:
3297             if not isinstance(item, dict):
3298                 continue
3299             renderer = self._extract_basic_item_renderer(item)
3300             if not isinstance(renderer, dict):
3301                 continue
3302             title = self._get_text(renderer, 'title')
3303
3304             # playlist
3305             playlist_id = renderer.get('playlistId')
3306             if playlist_id:
3307                 yield self.url_result(
3308                     'https://www.youtube.com/playlist?list=%s' % playlist_id,
3309                     ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3310                     video_title=title)
3311                 continue
3312             # video
3313             video_id = renderer.get('videoId')
3314             if video_id:
3315                 yield self._extract_video(renderer)
3316                 continue
3317             # channel
3318             channel_id = renderer.get('channelId')
3319             if channel_id:
3320                 yield self.url_result(
3321                     'https://www.youtube.com/channel/%s' % channel_id,
3322                     ie=YoutubeTabIE.ie_key(), video_title=title)
3323                 continue
3324             # generic endpoint URL support
3325             ep_url = urljoin('https://www.youtube.com/', try_get(
3326                 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3327                 compat_str))
3328             if ep_url:
3329                 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3330                     if ie.suitable(ep_url):
3331                         yield self.url_result(
3332                             ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3333                         break
3334
3335     def _shelf_entries_from_content(self, shelf_renderer):
3336         content = shelf_renderer.get('content')
3337         if not isinstance(content, dict):
3338             return
3339         renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3340         if renderer:
3341             # TODO: add support for nested playlists so each shelf is processed
3342             # as separate playlist
3343             # TODO: this includes only first N items
3344             for entry in self._grid_entries(renderer):
3345                 yield entry
3346         renderer = content.get('horizontalListRenderer')
3347         if renderer:
3348             # TODO
3349             pass
3350
3351     def _shelf_entries(self, shelf_renderer, skip_channels=False):
3352         ep = try_get(
3353             shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3354             compat_str)
3355         shelf_url = urljoin('https://www.youtube.com', ep)
3356         if shelf_url:
3357             # Skipping links to another channels, note that checking for
3358             # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3359             # will not work
3360             if skip_channels and '/channels?' in shelf_url:
3361                 return
3362             title = self._get_text(shelf_renderer, 'title')
3363             yield self.url_result(shelf_url, video_title=title)
3364         # Shelf may not contain shelf URL, fallback to extraction from content
3365         for entry in self._shelf_entries_from_content(shelf_renderer):
3366             yield entry
3367
3368     def _playlist_entries(self, video_list_renderer):
3369         for content in video_list_renderer['contents']:
3370             if not isinstance(content, dict):
3371                 continue
3372             renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3373             if not isinstance(renderer, dict):
3374                 continue
3375             video_id = renderer.get('videoId')
3376             if not video_id:
3377                 continue
3378             yield self._extract_video(renderer)
3379
3380     def _rich_entries(self, rich_grid_renderer):
3381         renderer = try_get(
3382             rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3383         video_id = renderer.get('videoId')
3384         if not video_id:
3385             return
3386         yield self._extract_video(renderer)
3387
3388     def _video_entry(self, video_renderer):
3389         video_id = video_renderer.get('videoId')
3390         if video_id:
3391             return self._extract_video(video_renderer)
3392
3393     def _post_thread_entries(self, post_thread_renderer):
3394         post_renderer = try_get(
3395             post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3396         if not post_renderer:
3397             return
3398         # video attachment
3399         video_renderer = try_get(
3400             post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3401         video_id = video_renderer.get('videoId')
3402         if video_id:
3403             entry = self._extract_video(video_renderer)
3404             if entry:
3405                 yield entry
3406         # playlist attachment
3407         playlist_id = try_get(
3408             post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3409         if playlist_id:
3410             yield self.url_result(
3411                 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3412                 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3413         # inline video links
3414         runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3415         for run in runs:
3416             if not isinstance(run, dict):
3417                 continue
3418             ep_url = try_get(
3419                 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3420             if not ep_url:
3421                 continue
3422             if not YoutubeIE.suitable(ep_url):
3423                 continue
3424             ep_video_id = YoutubeIE._match_id(ep_url)
3425             if video_id == ep_video_id:
3426                 continue
3427             yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
3428
3429     def _post_thread_continuation_entries(self, post_thread_continuation):
3430         contents = post_thread_continuation.get('contents')
3431         if not isinstance(contents, list):
3432             return
3433         for content in contents:
3434             renderer = content.get('backstagePostThreadRenderer')
3435             if not isinstance(renderer, dict):
3436                 continue
3437             for entry in self._post_thread_entries(renderer):
3438                 yield entry
3439
3440     r''' # unused
3441     def _rich_grid_entries(self, contents):
3442         for content in contents:
3443             video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3444             if video_renderer:
3445                 entry = self._video_entry(video_renderer)
3446                 if entry:
3447                     yield entry
3448     '''
3449     def _extract_entries(self, parent_renderer, continuation_list):
3450         # continuation_list is modified in-place with continuation_list = [continuation_token]
3451         continuation_list[:] = [None]
3452         contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3453         for content in contents:
3454             if not isinstance(content, dict):
3455                 continue
3456             is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3457             if not is_renderer:
3458                 renderer = content.get('richItemRenderer')
3459                 if renderer:
3460                     for entry in self._rich_entries(renderer):
3461                         yield entry
3462                     continuation_list[0] = self._extract_continuation(parent_renderer)
3463                 continue
3464             isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3465             for isr_content in isr_contents:
3466                 if not isinstance(isr_content, dict):
3467                     continue
3468
3469                 known_renderers = {
3470                     'playlistVideoListRenderer': self._playlist_entries,
3471                     'gridRenderer': self._grid_entries,
3472                     'shelfRenderer': lambda x: self._shelf_entries(x),
3473                     'backstagePostThreadRenderer': self._post_thread_entries,
3474                     'videoRenderer': lambda x: [self._video_entry(x)],
3475                     'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
3476                     'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
3477                 }
3478                 for key, renderer in isr_content.items():
3479                     if key not in known_renderers:
3480                         continue
3481                     for entry in known_renderers[key](renderer):
3482                         if entry:
3483                             yield entry
3484                     continuation_list[0] = self._extract_continuation(renderer)
3485                     break
3486
3487             if not continuation_list[0]:
3488                 continuation_list[0] = self._extract_continuation(is_renderer)
3489
3490         if not continuation_list[0]:
3491             continuation_list[0] = self._extract_continuation(parent_renderer)
3492
3493     def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
3494         continuation_list = [None]
3495         extract_entries = lambda x: self._extract_entries(x, continuation_list)
3496         tab_content = try_get(tab, lambda x: x['content'], dict)
3497         if not tab_content:
3498             return
3499         parent_renderer = (
3500             try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3501             or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
3502         for entry in extract_entries(parent_renderer):
3503             yield entry
3504         continuation = continuation_list[0]
3505
3506         for page_num in itertools.count(1):
3507             if not continuation:
3508                 break
3509             headers = self.generate_api_headers(
3510                 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
3511             response = self._extract_response(
3512                 item_id='%s page %s' % (item_id, page_num),
3513                 query=continuation, headers=headers, ytcfg=ytcfg,
3514                 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3515
3516             if not response:
3517                 break
3518             # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
3519             # See: https://github.com/ytdl-org/youtube-dl/issues/28702
3520             visitor_data = self._extract_visitor_data(response) or visitor_data
3521
3522             known_continuation_renderers = {
3523                 'playlistVideoListContinuation': self._playlist_entries,
3524                 'gridContinuation': self._grid_entries,
3525                 'itemSectionContinuation': self._post_thread_continuation_entries,
3526                 'sectionListContinuation': extract_entries,  # for feeds
3527             }
3528             continuation_contents = try_get(
3529                 response, lambda x: x['continuationContents'], dict) or {}
3530             continuation_renderer = None
3531             for key, value in continuation_contents.items():
3532                 if key not in known_continuation_renderers:
3533                     continue
3534                 continuation_renderer = value
3535                 continuation_list = [None]
3536                 for entry in known_continuation_renderers[key](continuation_renderer):
3537                     yield entry
3538                 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3539                 break
3540             if continuation_renderer:
3541                 continue
3542
3543             known_renderers = {
3544                 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3545                 'gridVideoRenderer': (self._grid_entries, 'items'),
3546                 'gridChannelRenderer': (self._grid_entries, 'items'),
3547                 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
3548                 'itemSectionRenderer': (extract_entries, 'contents'),  # for feeds
3549                 'richItemRenderer': (extract_entries, 'contents'),  # for hashtag
3550                 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
3551             }
3552             on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3553             continuation_items = try_get(
3554                 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
3555             continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3556             video_items_renderer = None
3557             for key, value in continuation_item.items():
3558                 if key not in known_renderers:
3559                     continue
3560                 video_items_renderer = {known_renderers[key][1]: continuation_items}
3561                 continuation_list = [None]
3562                 for entry in known_renderers[key][0](video_items_renderer):
3563                     yield entry
3564                 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
3565                 break
3566             if video_items_renderer:
3567                 continue
3568             break
3569
3570     @staticmethod
3571     def _extract_selected_tab(tabs):
3572         for tab in tabs:
3573             renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3574             if renderer.get('selected') is True:
3575                 return renderer
3576         else:
3577             raise ExtractorError('Unable to find selected tab')
3578
3579     @classmethod
3580     def _extract_uploader(cls, data):
3581         uploader = {}
3582         renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3583         owner = try_get(
3584             renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3585         if owner:
3586             uploader['uploader'] = owner.get('text')
3587             uploader['uploader_id'] = try_get(
3588                 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3589             uploader['uploader_url'] = urljoin(
3590                 'https://www.youtube.com/',
3591                 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3592         return {k: v for k, v in uploader.items() if v is not None}
3593
3594     def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
3595         playlist_id = title = description = channel_url = channel_name = channel_id = None
3596         tags = []
3597
3598         selected_tab = self._extract_selected_tab(tabs)
3599         renderer = try_get(
3600             data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3601         if renderer:
3602             channel_name = renderer.get('title')
3603             channel_url = renderer.get('channelUrl')
3604             channel_id = renderer.get('externalId')
3605         else:
3606             renderer = try_get(
3607                 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3608
3609         if renderer:
3610             title = renderer.get('title')
3611             description = renderer.get('description', '')
3612             playlist_id = channel_id
3613             tags = renderer.get('keywords', '').split()
3614
3615         thumbnails = (
3616             self._extract_thumbnails(renderer, 'avatar')
3617             or self._extract_thumbnails(
3618                 self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3619                 ('thumbnailRenderer', 'playlistVideoThumbnailRenderer', 'thumbnail')))
3620
3621         if playlist_id is None:
3622             playlist_id = item_id
3623         if title is None:
3624             title = (
3625                 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3626                 or playlist_id)
3627         title += format_field(selected_tab, 'title', ' - %s')
3628         title += format_field(selected_tab, 'expandedText', ' - %s')
3629         metadata = {
3630             'playlist_id': playlist_id,
3631             'playlist_title': title,
3632             'playlist_description': description,
3633             'uploader': channel_name,
3634             'uploader_id': channel_id,
3635             'uploader_url': channel_url,
3636             'thumbnails': thumbnails,
3637             'tags': tags,
3638         }
3639         availability = self._extract_availability(data)
3640         if availability:
3641             metadata['availability'] = availability
3642         if not channel_id:
3643             metadata.update(self._extract_uploader(data))
3644         metadata.update({
3645             'channel': metadata['uploader'],
3646             'channel_id': metadata['uploader_id'],
3647             'channel_url': metadata['uploader_url']})
3648         return self.playlist_result(
3649             self._entries(
3650                 selected_tab, playlist_id, ytcfg,
3651                 self._extract_account_syncid(ytcfg, data),
3652                 self._extract_visitor_data(data, ytcfg)),
3653             **metadata)
3654
3655     def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg):
3656         first_id = last_id = response = None
3657         for page_num in itertools.count(1):
3658             videos = list(self._playlist_entries(playlist))
3659             if not videos:
3660                 return
3661             start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3662             if start >= len(videos):
3663                 return
3664             for video in videos[start:]:
3665                 if video['id'] == first_id:
3666                     self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3667                     return
3668                 yield video
3669             first_id = first_id or videos[0]['id']
3670             last_id = videos[-1]['id']
3671             watch_endpoint = try_get(
3672                 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3673             headers = self.generate_api_headers(
3674                 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3675                 visitor_data=self._extract_visitor_data(response, data, ytcfg))
3676             query = {
3677                 'playlistId': playlist_id,
3678                 'videoId': watch_endpoint.get('videoId') or last_id,
3679                 'index': watch_endpoint.get('index') or len(videos),
3680                 'params': watch_endpoint.get('params') or 'OAE%3D'
3681             }
3682             response = self._extract_response(
3683                 item_id='%s page %d' % (playlist_id, page_num),
3684                 query=query, ep='next', headers=headers, ytcfg=ytcfg,
3685                 check_get_keys='contents'
3686             )
3687             playlist = try_get(
3688                 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3689
3690     def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
3691         title = playlist.get('title') or try_get(
3692             data, lambda x: x['titleText']['simpleText'], compat_str)
3693         playlist_id = playlist.get('playlistId') or item_id
3694
3695         # Delegating everything except mix playlists to regular tab-based playlist URL
3696         playlist_url = urljoin(url, try_get(
3697             playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3698             compat_str))
3699         if playlist_url and playlist_url != url:
3700             return self.url_result(
3701                 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3702                 video_title=title)
3703
3704         return self.playlist_result(
3705             self._extract_mix_playlist(playlist, playlist_id, data, ytcfg),
3706             playlist_id=playlist_id, playlist_title=title)
3707
3708     def _extract_availability(self, data):
3709         """
3710         Gets the availability of a given playlist/tab.
3711         Note: Unless YouTube tells us explicitly, we do not assume it is public
3712         @param data: response
3713         """
3714         is_private = is_unlisted = None
3715         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
3716         badge_labels = self._extract_badges(renderer)
3717
3718         # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
3719         privacy_dropdown_entries = try_get(
3720             renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
3721         for renderer_dict in privacy_dropdown_entries:
3722             is_selected = try_get(
3723                 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
3724             if not is_selected:
3725                 continue
3726             label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
3727             if label:
3728                 badge_labels.add(label.lower())
3729                 break
3730
3731         for badge_label in badge_labels:
3732             if badge_label == 'unlisted':
3733                 is_unlisted = True
3734             elif badge_label == 'private':
3735                 is_private = True
3736             elif badge_label == 'public':
3737                 is_unlisted = is_private = False
3738         return self._availability(is_private, False, False, False, is_unlisted)
3739
3740     @staticmethod
3741     def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
3742         sidebar_renderer = try_get(
3743             data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
3744         for item in sidebar_renderer:
3745             renderer = try_get(item, lambda x: x[info_renderer], expected_type)
3746             if renderer:
3747                 return renderer
3748
3749     def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
3750         """
3751         Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3752         """
3753         browse_id = params = None
3754         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
3755         if not renderer:
3756             return
3757         menu_renderer = try_get(
3758             renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3759         for menu_item in menu_renderer:
3760             if not isinstance(menu_item, dict):
3761                 continue
3762             nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3763             text = try_get(
3764                 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3765             if not text or text.lower() != 'show unavailable videos':
3766                 continue
3767             browse_endpoint = try_get(
3768                 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3769             browse_id = browse_endpoint.get('browseId')
3770             params = browse_endpoint.get('params')
3771             break
3772
3773         headers = self.generate_api_headers(
3774             ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3775             visitor_data=self._extract_visitor_data(data, ytcfg))
3776         query = {
3777             'params': params or 'wgYCCAA=',
3778             'browseId': browse_id or 'VL%s' % item_id
3779         }
3780         return self._extract_response(
3781             item_id=item_id, headers=headers, query=query,
3782             check_get_keys='contents', fatal=False, ytcfg=ytcfg,
3783             note='Downloading API JSON with unavailable videos')
3784
3785     def _extract_webpage(self, url, item_id, fatal=True):
3786         retries = self.get_param('extractor_retries', 3)
3787         count = -1
3788         webpage = data = last_error = None
3789         while count < retries:
3790             count += 1
3791             # Sometimes youtube returns a webpage with incomplete ytInitialData
3792             # See: https://github.com/yt-dlp/yt-dlp/issues/116
3793             if last_error:
3794                 self.report_warning('%s. Retrying ...' % last_error)
3795             try:
3796                 webpage = self._download_webpage(
3797                     url, item_id,
3798                     note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))
3799                 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
3800             except ExtractorError as e:
3801                 if isinstance(e.cause, network_exceptions):
3802                     if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
3803                         last_error = error_to_compat_str(e.cause or e.msg)
3804                         if count < retries:
3805                             continue
3806                 if fatal:
3807                     raise
3808                 self.report_warning(error_to_compat_str(e))
3809                 break
3810             else:
3811                 try:
3812                     self._extract_and_report_alerts(data)
3813                 except ExtractorError as e:
3814                     if fatal:
3815                         raise
3816                     self.report_warning(error_to_compat_str(e))
3817                     break
3818
3819                 if dict_get(data, ('contents', 'currentVideoEndpoint')):
3820                     break
3821
3822                 last_error = 'Incomplete yt initial data received'
3823                 if count >= retries:
3824                     if fatal:
3825                         raise ExtractorError(last_error)
3826                     self.report_warning(last_error)
3827                     break
3828
3829         return webpage, data
3830
3831     def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
3832         data = None
3833         if 'webpage' not in self._configuration_arg('skip'):
3834             webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
3835             ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
3836         if not data:
3837             if not ytcfg and self.is_authenticated:
3838                 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download.'
3839                 if 'authcheck' not in self._configuration_arg('skip') and fatal:
3840                     raise ExtractorError(
3841                         msg + ' If you are not downloading private content, or your cookies are only for the first account and channel,'
3842                               ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
3843                         expected=True)
3844                 self.report_warning(msg, only_once=True)
3845             data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
3846         return data, ytcfg
3847
3848     def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
3849         headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
3850         resolve_response = self._extract_response(
3851             item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
3852             ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
3853         endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
3854         for ep_key, ep in endpoints.items():
3855             params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
3856             if params:
3857                 return self._extract_response(
3858                     item_id=item_id, query=params, ep=ep, headers=headers,
3859                     ytcfg=ytcfg, fatal=fatal, default_client=default_client,
3860                     check_get_keys=('contents', 'currentVideoEndpoint'))
3861         err_note = 'Failed to resolve url (does the playlist exist?)'
3862         if fatal:
3863             raise ExtractorError(err_note, expected=True)
3864         self.report_warning(err_note, item_id)
3865
3866     @staticmethod
3867     def _smuggle_data(entries, data):
3868         for entry in entries:
3869             if data:
3870                 entry['url'] = smuggle_url(entry['url'], data)
3871             yield entry
3872
3873     _SEARCH_PARAMS = None
3874
3875     def _search_results(self, query, params=NO_DEFAULT):
3876         data = {'query': query}
3877         if params is NO_DEFAULT:
3878             params = self._SEARCH_PARAMS
3879         if params:
3880             data['params'] = params
3881         continuation_list = [None]
3882         for page_num in itertools.count(1):
3883             data.update(continuation_list[0] or {})
3884             search = self._extract_response(
3885                 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
3886                 check_get_keys=('contents', 'onResponseReceivedCommands'))
3887             slr_contents = try_get(
3888                 search,
3889                 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3890                  lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3891                 list)
3892             yield from self._extract_entries({'contents': slr_contents}, continuation_list)
3893             if not continuation_list[0]:
3894                 break
3895
3896
3897 class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
3898     IE_DESC = 'YouTube Tabs'
3899     _VALID_URL = r'''(?x:
3900         https?://
3901             (?:\w+\.)?
3902             (?:
3903                 youtube(?:kids)?\.com|
3904                 %(invidious)s
3905             )/
3906             (?:
3907                 (?P<channel_type>channel|c|user|browse)/|
3908                 (?P<not_channel>
3909                     feed/|hashtag/|
3910                     (?:playlist|watch)\?.*?\blist=
3911                 )|
3912                 (?!(?:%(reserved_names)s)\b)  # Direct URLs
3913             )
3914             (?P<id>[^/?\#&]+)
3915     )''' % {
3916         'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
3917         'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
3918     }
3919     IE_NAME = 'youtube:tab'
3920
3921     _TESTS = [{
3922         'note': 'playlists, multipage',
3923         'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3924         'playlist_mincount': 94,
3925         'info_dict': {
3926             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3927             'title': 'Игорь Клейнер - Playlists',
3928             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3929             'uploader': 'Игорь Клейнер',
3930             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3931         },
3932     }, {
3933         'note': 'playlists, multipage, different order',
3934         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3935         'playlist_mincount': 94,
3936         'info_dict': {
3937             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3938             'title': 'Игорь Клейнер - Playlists',
3939             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3940             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3941             'uploader': 'Игорь Клейнер',
3942         },
3943     }, {
3944         'note': 'playlists, series',
3945         'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3946         'playlist_mincount': 5,
3947         'info_dict': {
3948             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3949             'title': '3Blue1Brown - Playlists',
3950             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3951             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3952             'uploader': '3Blue1Brown',
3953         },
3954     }, {
3955         'note': 'playlists, singlepage',
3956         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3957         'playlist_mincount': 4,
3958         'info_dict': {
3959             'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3960             'title': 'ThirstForScience - Playlists',
3961             'description': 'md5:609399d937ea957b0f53cbffb747a14c',
3962             'uploader': 'ThirstForScience',
3963             'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3964         }
3965     }, {
3966         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3967         'only_matching': True,
3968     }, {
3969         'note': 'basic, single video playlist',
3970         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3971         'info_dict': {
3972             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3973             'uploader': 'Sergey M.',
3974             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3975             'title': 'youtube-dl public playlist',
3976         },
3977         'playlist_count': 1,
3978     }, {
3979         'note': 'empty playlist',
3980         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3981         'info_dict': {
3982             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3983             'uploader': 'Sergey M.',
3984             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3985             'title': 'youtube-dl empty playlist',
3986         },
3987         'playlist_count': 0,
3988     }, {
3989         'note': 'Home tab',
3990         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
3991         'info_dict': {
3992             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3993             'title': 'lex will - Home',
3994             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3995             'uploader': 'lex will',
3996             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3997         },
3998         'playlist_mincount': 2,
3999     }, {
4000         'note': 'Videos tab',
4001         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
4002         'info_dict': {
4003             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4004             'title': 'lex will - Videos',
4005             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4006             'uploader': 'lex will',
4007             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4008         },
4009         'playlist_mincount': 975,
4010     }, {
4011         'note': 'Videos tab, sorted by popular',
4012         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
4013         'info_dict': {
4014             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4015             'title': 'lex will - Videos',
4016             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4017             'uploader': 'lex will',
4018             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4019         },
4020         'playlist_mincount': 199,
4021     }, {
4022         'note': 'Playlists tab',
4023         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
4024         'info_dict': {
4025             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4026             'title': 'lex will - Playlists',
4027             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4028             'uploader': 'lex will',
4029             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4030         },
4031         'playlist_mincount': 17,
4032     }, {
4033         'note': 'Community tab',
4034         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
4035         'info_dict': {
4036             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4037             'title': 'lex will - Community',
4038             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4039             'uploader': 'lex will',
4040             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4041         },
4042         'playlist_mincount': 18,
4043     }, {
4044         'note': 'Channels tab',
4045         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
4046         'info_dict': {
4047             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4048             'title': 'lex will - Channels',
4049             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4050             'uploader': 'lex will',
4051             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4052         },
4053         'playlist_mincount': 12,
4054     }, {
4055         'note': 'Search tab',
4056         'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
4057         'playlist_mincount': 40,
4058         'info_dict': {
4059             'id': 'UCYO_jab_esuFRV4b17AJtAw',
4060             'title': '3Blue1Brown - Search - linear algebra',
4061             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
4062             'uploader': '3Blue1Brown',
4063             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
4064         },
4065     }, {
4066         'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4067         'only_matching': True,
4068     }, {
4069         'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4070         'only_matching': True,
4071     }, {
4072         'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4073         'only_matching': True,
4074     }, {
4075         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
4076         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
4077         'info_dict': {
4078             'title': '29C3: Not my department',
4079             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
4080             'uploader': 'Christiaan008',
4081             'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
4082             'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
4083         },
4084         'playlist_count': 96,
4085     }, {
4086         'note': 'Large playlist',
4087         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
4088         'info_dict': {
4089             'title': 'Uploads from Cauchemar',
4090             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
4091             'uploader': 'Cauchemar',
4092             'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
4093         },
4094         'playlist_mincount': 1123,
4095     }, {
4096         'note': 'even larger playlist, 8832 videos',
4097         'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
4098         'only_matching': True,
4099     }, {
4100         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
4101         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
4102         'info_dict': {
4103             'title': 'Uploads from Interstellar Movie',
4104             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
4105             'uploader': 'Interstellar Movie',
4106             'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4107         },
4108         'playlist_mincount': 21,
4109     }, {
4110         'note': 'Playlist with "show unavailable videos" button',
4111         'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
4112         'info_dict': {
4113             'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
4114             'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
4115             'uploader': 'Phim Siêu Nhân Nhật Bản',
4116             'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
4117         },
4118         'playlist_mincount': 200,
4119     }, {
4120         'note': 'Playlist with unavailable videos in page 7',
4121         'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
4122         'info_dict': {
4123             'title': 'Uploads from BlankTV',
4124             'id': 'UU8l9frL61Yl5KFOl87nIm2w',
4125             'uploader': 'BlankTV',
4126             'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
4127         },
4128         'playlist_mincount': 1000,
4129     }, {
4130         'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
4131         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
4132         'info_dict': {
4133             'title': 'Data Analysis with Dr Mike Pound',
4134             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
4135             'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
4136             'uploader': 'Computerphile',
4137             'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
4138         },
4139         'playlist_mincount': 11,
4140     }, {
4141         'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4142         'only_matching': True,
4143     }, {
4144         'note': 'Playlist URL that does not actually serve a playlist',
4145         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
4146         'info_dict': {
4147             'id': 'FqZTN594JQw',
4148             'ext': 'webm',
4149             'title': "Smiley's People 01 detective, Adventure Series, Action",
4150             'uploader': 'STREEM',
4151             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
4152             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
4153             'upload_date': '20150526',
4154             'license': 'Standard YouTube License',
4155             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
4156             'categories': ['People & Blogs'],
4157             'tags': list,
4158             'view_count': int,
4159             'like_count': int,
4160             'dislike_count': int,
4161         },
4162         'params': {
4163             'skip_download': True,
4164         },
4165         'skip': 'This video is not available.',
4166         'add_ie': [YoutubeIE.ie_key()],
4167     }, {
4168         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
4169         'only_matching': True,
4170     }, {
4171         'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
4172         'only_matching': True,
4173     }, {
4174         'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
4175         'info_dict': {
4176             'id': '3yImotZU3tw',  # This will keep changing
4177             'ext': 'mp4',
4178             'title': compat_str,
4179             'uploader': 'Sky News',
4180             'uploader_id': 'skynews',
4181             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
4182             'upload_date': r're:\d{8}',
4183             'description': compat_str,
4184             'categories': ['News & Politics'],
4185             'tags': list,
4186             'like_count': int,
4187             'dislike_count': int,
4188         },
4189         'params': {
4190             'skip_download': True,
4191         },
4192         'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
4193     }, {
4194         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
4195         'info_dict': {
4196             'id': 'a48o2S1cPoo',
4197             'ext': 'mp4',
4198             'title': 'The Young Turks - Live Main Show',
4199             'uploader': 'The Young Turks',
4200             'uploader_id': 'TheYoungTurks',
4201             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
4202             'upload_date': '20150715',
4203             'license': 'Standard YouTube License',
4204             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
4205             'categories': ['News & Politics'],
4206             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
4207             'like_count': int,
4208             'dislike_count': int,
4209         },
4210         'params': {
4211             'skip_download': True,
4212         },
4213         'only_matching': True,
4214     }, {
4215         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
4216         'only_matching': True,
4217     }, {
4218         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
4219         'only_matching': True,
4220     }, {
4221         'note': 'A channel that is not live. Should raise error',
4222         'url': 'https://www.youtube.com/user/numberphile/live',
4223         'only_matching': True,
4224     }, {
4225         'url': 'https://www.youtube.com/feed/trending',
4226         'only_matching': True,
4227     }, {
4228         'url': 'https://www.youtube.com/feed/library',
4229         'only_matching': True,
4230     }, {
4231         'url': 'https://www.youtube.com/feed/history',
4232         'only_matching': True,
4233     }, {
4234         'url': 'https://www.youtube.com/feed/subscriptions',
4235         'only_matching': True,
4236     }, {
4237         'url': 'https://www.youtube.com/feed/watch_later',
4238         'only_matching': True,
4239     }, {
4240         'note': 'Recommended - redirects to home page.',
4241         'url': 'https://www.youtube.com/feed/recommended',
4242         'only_matching': True,
4243     }, {
4244         'note': 'inline playlist with not always working continuations',
4245         'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
4246         'only_matching': True,
4247     }, {
4248         'url': 'https://www.youtube.com/course',
4249         'only_matching': True,
4250     }, {
4251         'url': 'https://www.youtube.com/zsecurity',
4252         'only_matching': True,
4253     }, {
4254         'url': 'http://www.youtube.com/NASAgovVideo/videos',
4255         'only_matching': True,
4256     }, {
4257         'url': 'https://www.youtube.com/TheYoungTurks/live',
4258         'only_matching': True,
4259     }, {
4260         'url': 'https://www.youtube.com/hashtag/cctv9',
4261         'info_dict': {
4262             'id': 'cctv9',
4263             'title': '#cctv9',
4264         },
4265         'playlist_mincount': 350,
4266     }, {
4267         'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
4268         'only_matching': True,
4269     }, {
4270         'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
4271         'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4272         'only_matching': True
4273     }, {
4274         'note': '/browse/ should redirect to /channel/',
4275         'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
4276         'only_matching': True
4277     }, {
4278         'note': 'VLPL, should redirect to playlist?list=PL...',
4279         'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4280         'info_dict': {
4281             'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4282             'uploader': 'NoCopyrightSounds',
4283             'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
4284             'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
4285             'title': 'NCS Releases',
4286         },
4287         'playlist_mincount': 166,
4288     }, {
4289         'note': 'Topic, should redirect to playlist?list=UU...',
4290         'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
4291         'info_dict': {
4292             'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
4293             'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
4294             'title': 'Uploads from Royalty Free Music - Topic',
4295             'uploader': 'Royalty Free Music - Topic',
4296         },
4297         'expected_warnings': [
4298             'A channel/user page was given',
4299             'The URL does not have a videos tab',
4300         ],
4301         'playlist_mincount': 101,
4302     }, {
4303         'note': 'Topic without a UU playlist',
4304         'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
4305         'info_dict': {
4306             'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
4307             'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
4308         },
4309         'expected_warnings': [
4310             'A channel/user page was given',
4311             'The URL does not have a videos tab',
4312             'Falling back to channel URL',
4313         ],
4314         'playlist_mincount': 9,
4315     }, {
4316         'note': 'Youtube music Album',
4317         'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
4318         'info_dict': {
4319             'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
4320             'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
4321         },
4322         'playlist_count': 50,
4323     }, {
4324         'note': 'unlisted single video playlist',
4325         'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
4326         'info_dict': {
4327             'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
4328             'uploader': 'colethedj',
4329             'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
4330             'title': 'yt-dlp unlisted playlist test',
4331             'availability': 'unlisted'
4332         },
4333         'playlist_count': 1,
4334     }, {
4335         'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
4336         'url': 'https://www.youtube.com/feed/recommended',
4337         'info_dict': {
4338             'id': 'recommended',
4339             'title': 'recommended',
4340         },
4341         'playlist_mincount': 50,
4342         'params': {
4343             'skip_download': True,
4344             'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4345         },
4346     }, {
4347         'note': 'API Fallback: /videos tab, sorted by oldest first',
4348         'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
4349         'info_dict': {
4350             'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
4351             'title': 'Cody\'sLab - Videos',
4352             'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
4353             'uploader': 'Cody\'sLab',
4354             'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
4355         },
4356         'playlist_mincount': 650,
4357         'params': {
4358             'skip_download': True,
4359             'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4360         },
4361     }, {
4362         'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
4363         'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
4364         'info_dict': {
4365             'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
4366             'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
4367             'title': 'Uploads from Royalty Free Music - Topic',
4368             'uploader': 'Royalty Free Music - Topic',
4369         },
4370         'expected_warnings': [
4371             'A channel/user page was given',
4372             'The URL does not have a videos tab',
4373         ],
4374         'playlist_mincount': 101,
4375         'params': {
4376             'skip_download': True,
4377             'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4378         },
4379     }]
4380
4381     @classmethod
4382     def suitable(cls, url):
4383         return False if YoutubeIE.suitable(url) else super(
4384             YoutubeTabIE, cls).suitable(url)
4385
4386     def _real_extract(self, url):
4387         url, smuggled_data = unsmuggle_url(url, {})
4388         if self.is_music_url(url):
4389             smuggled_data['is_music_url'] = True
4390         info_dict = self.__real_extract(url, smuggled_data)
4391         if info_dict.get('entries'):
4392             info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4393         return info_dict
4394
4395     _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$')
4396
4397     def __real_extract(self, url, smuggled_data):
4398         item_id = self._match_id(url)
4399         url = compat_urlparse.urlunparse(
4400             compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
4401         compat_opts = self.get_param('compat_opts', [])
4402
4403         def get_mobj(url):
4404             mobj = self._URL_RE.match(url).groupdict()
4405             mobj.update((k, '') for k, v in mobj.items() if v is None)
4406             return mobj
4407
4408         mobj, redirect_warning = get_mobj(url), None
4409         # Youtube returns incomplete data if tabname is not lower case
4410         pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4411         if is_channel:
4412             if smuggled_data.get('is_music_url'):
4413                 if item_id[:2] == 'VL':  # Youtube music VL channels have an equivalent playlist
4414                     item_id = item_id[2:]
4415                     pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False
4416                 elif item_id[:2] == 'MP':  # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
4417                     mdata = self._extract_tab_endpoint(
4418                         f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
4419                     murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
4420                                         get_all=False, expected_type=compat_str)
4421                     if not murl:
4422                         raise ExtractorError('Failed to resolve album to playlist')
4423                     return self.url_result(murl, ie=YoutubeTabIE.ie_key())
4424                 elif mobj['channel_type'] == 'browse':  # Youtube music /browse/ should be changed to /channel/
4425                     pre = f'https://www.youtube.com/channel/{item_id}'
4426
4427         if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4428             # Home URLs should redirect to /videos/
4429             redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '
4430                                 'To download only the videos in the home page, add a "/featured" to the URL')
4431             tab = '/videos'
4432
4433         url = ''.join((pre, tab, post))
4434         mobj = get_mobj(url)
4435
4436         # Handle both video/playlist URLs
4437         qs = parse_qs(url)
4438         video_id, playlist_id = [qs.get(key, [None])[0] for key in ('v', 'list')]
4439
4440         if not video_id and mobj['not_channel'].startswith('watch'):
4441             if not playlist_id:
4442                 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
4443                 raise ExtractorError('Unable to recognize tab page')
4444             # Common mistake: https://www.youtube.com/watch?list=playlist_id
4445             self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
4446             url = f'https://www.youtube.com/playlist?list={playlist_id}'
4447             mobj = get_mobj(url)
4448
4449         if video_id and playlist_id:
4450             if self.get_param('noplaylist'):
4451                 self.to_screen(f'Downloading just video {video_id} because of --no-playlist')
4452                 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
4453                                        ie=YoutubeIE.ie_key(), video_id=video_id)
4454             self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')
4455
4456         data, ytcfg = self._extract_data(url, item_id)
4457
4458         tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
4459         if tabs:
4460             selected_tab = self._extract_selected_tab(tabs)
4461             tab_name = selected_tab.get('title', '')
4462             if 'no-youtube-channel-redirect' not in compat_opts:
4463                 if mobj['tab'] == '/live':
4464                     # Live tab should have redirected to the video
4465                     raise ExtractorError('The channel is not currently live', expected=True)
4466                 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4467                     redirect_warning = f'The URL does not have a {mobj["tab"][1:]} tab'
4468                     if not mobj['not_channel'] and item_id[:2] == 'UC':
4469                         # Topic channels don't have /videos. Use the equivalent playlist instead
4470                         pl_id = f'UU{item_id[2:]}'
4471                         pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
4472                         try:
4473                             data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True)
4474                         except ExtractorError:
4475                             redirect_warning += ' and the playlist redirect gave error'
4476                         else:
4477                             item_id, url, tab_name = pl_id, pl_url, mobj['tab'][1:]
4478                             redirect_warning += f'. Redirecting to playlist {pl_id} instead'
4479                     if tab_name.lower() != mobj['tab'][1:]:
4480                         redirect_warning += f'. {tab_name} tab is being downloaded instead'
4481
4482         if redirect_warning:
4483             self.report_warning(redirect_warning)
4484         self.write_debug(f'Final URL: {url}')
4485
4486         # YouTube sometimes provides a button to reload playlist with unavailable videos.
4487         if 'no-youtube-unavailable-videos' not in compat_opts:
4488             data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data
4489         self._extract_and_report_alerts(data, only_once=True)
4490         tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
4491         if tabs:
4492             return self._extract_from_tabs(item_id, ytcfg, data, tabs)
4493
4494         playlist = traverse_obj(
4495             data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
4496         if playlist:
4497             return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
4498
4499         video_id = traverse_obj(
4500             data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
4501         if video_id:
4502             if mobj['tab'] != '/live':  # live tab is expected to redirect to video
4503                 self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
4504             return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
4505                                    ie=YoutubeIE.ie_key(), video_id=video_id)
4506
4507         raise ExtractorError('Unable to recognize tab page')
4508
4509
4510 class YoutubePlaylistIE(InfoExtractor):
4511     IE_DESC = 'YouTube playlists'
4512     _VALID_URL = r'''(?x)(?:
4513                         (?:https?://)?
4514                         (?:\w+\.)?
4515                         (?:
4516                             (?:
4517                                 youtube(?:kids)?\.com|
4518                                 %(invidious)s
4519                             )
4520                             /.*?\?.*?\blist=
4521                         )?
4522                         (?P<id>%(playlist_id)s)
4523                      )''' % {
4524         'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
4525         'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
4526     }
4527     IE_NAME = 'youtube:playlist'
4528     _TESTS = [{
4529         'note': 'issue #673',
4530         'url': 'PLBB231211A4F62143',
4531         'info_dict': {
4532             'title': '[OLD]Team Fortress 2 (Class-based LP)',
4533             'id': 'PLBB231211A4F62143',
4534             'uploader': 'Wickydoo',
4535             'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4536             'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
4537         },
4538         'playlist_mincount': 29,
4539     }, {
4540         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4541         'info_dict': {
4542             'title': 'YDL_safe_search',
4543             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4544         },
4545         'playlist_count': 2,
4546         'skip': 'This playlist is private',
4547     }, {
4548         'note': 'embedded',
4549         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4550         'playlist_count': 4,
4551         'info_dict': {
4552             'title': 'JODA15',
4553             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4554             'uploader': 'milan',
4555             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
4556         }
4557     }, {
4558         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4559         'playlist_mincount': 654,
4560         'info_dict': {
4561             'title': '2018 Chinese New Singles (11/6 updated)',
4562             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4563             'uploader': 'LBK',
4564             'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4565             'description': 'md5:da521864744d60a198e3a88af4db0d9d',
4566         }
4567     }, {
4568         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4569         'only_matching': True,
4570     }, {
4571         # music album playlist
4572         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4573         'only_matching': True,
4574     }]
4575
4576     @classmethod
4577     def suitable(cls, url):
4578         if YoutubeTabIE.suitable(url):
4579             return False
4580         from ..utils import parse_qs
4581         qs = parse_qs(url)
4582         if qs.get('v', [None])[0]:
4583             return False
4584         return super(YoutubePlaylistIE, cls).suitable(url)
4585
4586     def _real_extract(self, url):
4587         playlist_id = self._match_id(url)
4588         is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
4589         url = update_url_query(
4590             'https://www.youtube.com/playlist',
4591             parse_qs(url) or {'list': playlist_id})
4592         if is_music_url:
4593             url = smuggle_url(url, {'is_music_url': True})
4594         return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4595
4596
4597 class YoutubeYtBeIE(InfoExtractor):
4598     IE_DESC = 'youtu.be'
4599     _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4600     _TESTS = [{
4601         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4602         'info_dict': {
4603             'id': 'yeWKywCrFtk',
4604             'ext': 'mp4',
4605             'title': 'Small Scale Baler and Braiding Rugs',
4606             'uploader': 'Backus-Page House Museum',
4607             'uploader_id': 'backuspagemuseum',
4608             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4609             'upload_date': '20161008',
4610             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4611             'categories': ['Nonprofits & Activism'],
4612             'tags': list,
4613             'like_count': int,
4614             'dislike_count': int,
4615         },
4616         'params': {
4617             'noplaylist': True,
4618             'skip_download': True,
4619         },
4620     }, {
4621         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
4622         'only_matching': True,
4623     }]
4624
4625     def _real_extract(self, url):
4626         mobj = self._match_valid_url(url)
4627         video_id = mobj.group('id')
4628         playlist_id = mobj.group('playlist_id')
4629         return self.url_result(
4630             update_url_query('https://www.youtube.com/watch', {
4631                 'v': video_id,
4632                 'list': playlist_id,
4633                 'feature': 'youtu.be',
4634             }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4635
4636
4637 class YoutubeYtUserIE(InfoExtractor):
4638     IE_DESC = 'YouTube user videos; "ytuser:" prefix'
4639     _VALID_URL = r'ytuser:(?P<id>.+)'
4640     _TESTS = [{
4641         'url': 'ytuser:phihag',
4642         'only_matching': True,
4643     }]
4644
4645     def _real_extract(self, url):
4646         user_id = self._match_id(url)
4647         return self.url_result(
4648             'https://www.youtube.com/user/%s/videos' % user_id,
4649             ie=YoutubeTabIE.ie_key(), video_id=user_id)
4650
4651
4652 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
4653     IE_NAME = 'youtube:favorites'
4654     IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
4655     _VALID_URL = r':ytfav(?:ou?rite)?s?'
4656     _LOGIN_REQUIRED = True
4657     _TESTS = [{
4658         'url': ':ytfav',
4659         'only_matching': True,
4660     }, {
4661         'url': ':ytfavorites',
4662         'only_matching': True,
4663     }]
4664
4665     def _real_extract(self, url):
4666         return self.url_result(
4667             'https://www.youtube.com/playlist?list=LL',
4668             ie=YoutubeTabIE.ie_key())
4669
4670
4671 class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
4672     IE_DESC = 'YouTube search'
4673     IE_NAME = 'youtube:search'
4674     _SEARCH_KEY = 'ytsearch'
4675     _SEARCH_PARAMS = 'EgIQAQ%3D%3D'  # Videos only
4676     _TESTS = []
4677
4678
4679 class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
4680     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
4681     _SEARCH_KEY = 'ytsearchdate'
4682     IE_DESC = 'YouTube search, newest videos first'
4683     _SEARCH_PARAMS = 'CAISAhAB'  # Videos only, sorted by date
4684
4685
4686 class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
4687     IE_DESC = 'YouTube search URLs with sorting and filter support'
4688     IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4689     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
4690     _TESTS = [{
4691         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4692         'playlist_mincount': 5,
4693         'info_dict': {
4694             'id': 'youtube-dl test video',
4695             'title': 'youtube-dl test video',
4696         }
4697     }, {
4698         'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
4699         'playlist_mincount': 5,
4700         'info_dict': {
4701             'id': 'python',
4702             'title': 'python',
4703         }
4704
4705     }, {
4706         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4707         'only_matching': True,
4708     }]
4709
4710     def _real_extract(self, url):
4711         qs = parse_qs(url)
4712         query = (qs.get('search_query') or qs.get('q'))[0]
4713         return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
4714
4715
4716 class YoutubeFeedsInfoExtractor(YoutubeTabIE):
4717     """
4718     Base class for feed extractors
4719     Subclasses must define the _FEED_NAME property.
4720     """
4721     _LOGIN_REQUIRED = True
4722     _TESTS = []
4723
4724     @property
4725     def IE_NAME(self):
4726         return 'youtube:%s' % self._FEED_NAME
4727
4728     def _real_extract(self, url):
4729         return self.url_result(
4730             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4731             ie=YoutubeTabIE.ie_key())
4732
4733
4734 class YoutubeWatchLaterIE(InfoExtractor):
4735     IE_NAME = 'youtube:watchlater'
4736     IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
4737     _VALID_URL = r':ytwatchlater'
4738     _TESTS = [{
4739         'url': ':ytwatchlater',
4740         'only_matching': True,
4741     }]
4742
4743     def _real_extract(self, url):
4744         return self.url_result(
4745             'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
4746
4747
4748 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4749     IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
4750     _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
4751     _FEED_NAME = 'recommended'
4752     _LOGIN_REQUIRED = False
4753     _TESTS = [{
4754         'url': ':ytrec',
4755         'only_matching': True,
4756     }, {
4757         'url': ':ytrecommended',
4758         'only_matching': True,
4759     }, {
4760         'url': 'https://youtube.com',
4761         'only_matching': True,
4762     }]
4763
4764
4765 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
4766     IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
4767     _VALID_URL = r':ytsub(?:scription)?s?'
4768     _FEED_NAME = 'subscriptions'
4769     _TESTS = [{
4770         'url': ':ytsubs',
4771         'only_matching': True,
4772     }, {
4773         'url': ':ytsubscriptions',
4774         'only_matching': True,
4775     }]
4776
4777
4778 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
4779     IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
4780     _VALID_URL = r':ythis(?:tory)?'
4781     _FEED_NAME = 'history'
4782     _TESTS = [{
4783         'url': ':ythistory',
4784         'only_matching': True,
4785     }]
4786
4787
4788 class YoutubeTruncatedURLIE(InfoExtractor):
4789     IE_NAME = 'youtube:truncated_url'
4790     IE_DESC = False  # Do not list
4791     _VALID_URL = r'''(?x)
4792         (?:https?://)?
4793         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4794         (?:watch\?(?:
4795             feature=[a-z_]+|
4796             annotation_id=annotation_[^&]+|
4797             x-yt-cl=[0-9]+|
4798             hl=[^&]*|
4799             t=[0-9]+
4800         )?
4801         |
4802             attribution_link\?a=[^&]+
4803         )
4804         $
4805     '''
4806
4807     _TESTS = [{
4808         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
4809         'only_matching': True,
4810     }, {
4811         'url': 'https://www.youtube.com/watch?',
4812         'only_matching': True,
4813     }, {
4814         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4815         'only_matching': True,
4816     }, {
4817         'url': 'https://www.youtube.com/watch?feature=foo',
4818         'only_matching': True,
4819     }, {
4820         'url': 'https://www.youtube.com/watch?hl=en-GB',
4821         'only_matching': True,
4822     }, {
4823         'url': 'https://www.youtube.com/watch?t=2372',
4824         'only_matching': True,
4825     }]
4826
4827     def _real_extract(self, url):
4828         raise ExtractorError(
4829             'Did you forget to quote the URL? Remember that & is a meta '
4830             'character in most shells, so you want to put the URL in quotes, '
4831             'like  youtube-dl '
4832             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
4833             ' or simply  youtube-dl BaW_jenozKc  .',
4834             expected=True)
4835
4836
4837 class YoutubeClipIE(InfoExtractor):
4838     IE_NAME = 'youtube:clip'
4839     IE_DESC = False  # Do not list
4840     _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'
4841
4842     def _real_extract(self, url):
4843         self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')
4844         return self.url_result(url, 'Generic')
4845
4846
4847 class YoutubeTruncatedIDIE(InfoExtractor):
4848     IE_NAME = 'youtube:truncated_id'
4849     IE_DESC = False  # Do not list
4850     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
4851
4852     _TESTS = [{
4853         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4854         'only_matching': True,
4855     }]
4856
4857     def _real_extract(self, url):
4858         video_id = self._match_id(url)
4859         raise ExtractorError(
4860             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4861             expected=True)