yt_dlp/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import calendar
   6 import copy
   7 import datetime
   8 import functools
   9 import hashlib
  10 import itertools
  11 import json
  12 import math
  13 import os.path
  14 import random
  15 import re
  16 import sys
  17 import time
  18 import traceback
  19 import threading
  20
  21 from .common import InfoExtractor, SearchInfoExtractor
  22 from ..compat import (
  23     compat_chr,
  24     compat_HTTPError,
  25     compat_parse_qs,
  26     compat_str,
  27     compat_urllib_parse_unquote_plus,
  28     compat_urllib_parse_urlencode,
  29     compat_urllib_parse_urlparse,
  30     compat_urlparse,
  31 )
  32 from ..jsinterp import JSInterpreter
  33 from ..utils import (
  34     bug_reports_message,
  35     clean_html,
  36     datetime_from_str,
  37     dict_get,
  38     error_to_compat_str,
  39     ExtractorError,
  40     float_or_none,
  41     format_field,
  42     int_or_none,
  43     is_html,
  44     join_nonempty,
  45     mimetype2ext,
  46     network_exceptions,
  47     NO_DEFAULT,
  48     orderedSet,
  49     parse_codecs,
  50     parse_count,
  51     parse_duration,
  52     parse_iso8601,
  53     parse_qs,
  54     qualities,
  55     remove_end,
  56     remove_start,
  57     smuggle_url,
  58     str_or_none,
  59     str_to_int,
  60     strftime_or_none,
  61     traverse_obj,
  62     try_get,
  63     unescapeHTML,
  64     unified_strdate,
  65     unsmuggle_url,
  66     update_url_query,
  67     url_or_none,
  68     urljoin,
  69     variadic,
  70 )
  71
  72
  73 def get_first(obj, keys, **kwargs):
  74     return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
  75
  76
  77 # any clients starting with _ cannot be explicity requested by the user
  78 INNERTUBE_CLIENTS = {
  79     'web': {
  80         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
  81         'INNERTUBE_CONTEXT': {
  82             'client': {
  83                 'clientName': 'WEB',
  84                 'clientVersion': '2.20210622.10.00',
  85             }
  86         },
  87         'INNERTUBE_CONTEXT_CLIENT_NAME': 1
  88     },
  89     'web_embedded': {
  90         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
  91         'INNERTUBE_CONTEXT': {
  92             'client': {
  93                 'clientName': 'WEB_EMBEDDED_PLAYER',
  94                 'clientVersion': '1.20210620.0.1',
  95             },
  96         },
  97         'INNERTUBE_CONTEXT_CLIENT_NAME': 56
  98     },
  99     'web_music': {
 100         'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
 101         'INNERTUBE_HOST': 'music.youtube.com',
 102         'INNERTUBE_CONTEXT': {
 103             'client': {
 104                 'clientName': 'WEB_REMIX',
 105                 'clientVersion': '1.20210621.00.00',
 106             }
 107         },
 108         'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
 109     },
 110     'web_creator': {
 111         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 112         'INNERTUBE_CONTEXT': {
 113             'client': {
 114                 'clientName': 'WEB_CREATOR',
 115                 'clientVersion': '1.20210621.00.00',
 116             }
 117         },
 118         'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
 119     },
 120     'android': {
 121         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 122         'INNERTUBE_CONTEXT': {
 123             'client': {
 124                 'clientName': 'ANDROID',
 125                 'clientVersion': '16.20',
 126             }
 127         },
 128         'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
 129         'REQUIRE_JS_PLAYER': False
 130     },
 131     'android_embedded': {
 132         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 133         'INNERTUBE_CONTEXT': {
 134             'client': {
 135                 'clientName': 'ANDROID_EMBEDDED_PLAYER',
 136                 'clientVersion': '16.20',
 137             },
 138         },
 139         'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
 140         'REQUIRE_JS_PLAYER': False
 141     },
 142     'android_music': {
 143         'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
 144         'INNERTUBE_HOST': 'music.youtube.com',
 145         'INNERTUBE_CONTEXT': {
 146             'client': {
 147                 'clientName': 'ANDROID_MUSIC',
 148                 'clientVersion': '4.32',
 149             }
 150         },
 151         'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
 152         'REQUIRE_JS_PLAYER': False
 153     },
 154     'android_creator': {
 155         'INNERTUBE_CONTEXT': {
 156             'client': {
 157                 'clientName': 'ANDROID_CREATOR',
 158                 'clientVersion': '21.24.100',
 159             },
 160         },
 161         'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
 162         'REQUIRE_JS_PLAYER': False
 163     },
 164     # ios has HLS live streams
 165     # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680
 166     'ios': {
 167         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 168         'INNERTUBE_CONTEXT': {
 169             'client': {
 170                 'clientName': 'IOS',
 171                 'clientVersion': '16.20',
 172             }
 173         },
 174         'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
 175         'REQUIRE_JS_PLAYER': False
 176     },
 177     'ios_embedded': {
 178         'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
 179         'INNERTUBE_CONTEXT': {
 180             'client': {
 181                 'clientName': 'IOS_MESSAGES_EXTENSION',
 182                 'clientVersion': '16.20',
 183             },
 184         },
 185         'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
 186         'REQUIRE_JS_PLAYER': False
 187     },
 188     'ios_music': {
 189         'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
 190         'INNERTUBE_HOST': 'music.youtube.com',
 191         'INNERTUBE_CONTEXT': {
 192             'client': {
 193                 'clientName': 'IOS_MUSIC',
 194                 'clientVersion': '4.32',
 195             },
 196         },
 197         'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
 198         'REQUIRE_JS_PLAYER': False
 199     },
 200     'ios_creator': {
 201         'INNERTUBE_CONTEXT': {
 202             'client': {
 203                 'clientName': 'IOS_CREATOR',
 204                 'clientVersion': '21.24.100',
 205             },
 206         },
 207         'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
 208         'REQUIRE_JS_PLAYER': False
 209     },
 210     # mweb has 'ultralow' formats
 211     # See: https://github.com/yt-dlp/yt-dlp/pull/557
 212     'mweb': {
 213         'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
 214         'INNERTUBE_CONTEXT': {
 215             'client': {
 216                 'clientName': 'MWEB',
 217                 'clientVersion': '2.20210721.07.00',
 218             }
 219         },
 220         'INNERTUBE_CONTEXT_CLIENT_NAME': 2
 221     },
 222 }
 223
 224
 225 def build_innertube_clients():
 226     third_party = {
 227         'embedUrl': 'https://google.com',  # Can be any valid URL
 228     }
 229     base_clients = ('android', 'web', 'ios', 'mweb')
 230     priority = qualities(base_clients[::-1])
 231
 232     for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
 233         ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
 234         ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
 235         ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
 236         ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
 237         ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
 238
 239         if client in base_clients:
 240             INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
 241             agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
 242             agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
 243             agegate_ytcfg['priority'] -= 1
 244         elif client.endswith('_embedded'):
 245             ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
 246             ytcfg['priority'] -= 2
 247         else:
 248             ytcfg['priority'] -= 3
 249
 250
 251 build_innertube_clients()
 252
 253
 254 class YoutubeBaseInfoExtractor(InfoExtractor):
 255     """Provide base functions for Youtube extractors"""
 256
 257     _RESERVED_NAMES = (
 258         r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
 259         r'shorts|movies|results|shared|hashtag|trending|feed|feeds|'
 260         r'browse|oembed|get_video_info|iframe_api|s/player|'
 261         r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
 262
 263     _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
 264
 265     _NETRC_MACHINE = 'youtube'
 266
 267     # If True it will raise an error if no login info is provided
 268     _LOGIN_REQUIRED = False
 269
 270     _INVIDIOUS_SITES = (
 271         # invidious-redirect websites
 272         r'(?:www\.)?redirect\.invidious\.io',
 273         r'(?:(?:www|dev)\.)?invidio\.us',
 274         # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
 275         r'(?:www\.)?invidious\.pussthecat\.org',
 276         r'(?:www\.)?invidious\.zee\.li',
 277         r'(?:www\.)?invidious\.ethibox\.fr',
 278         r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
 279         # youtube-dl invidious instances list
 280         r'(?:(?:www|no)\.)?invidiou\.sh',
 281         r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
 282         r'(?:www\.)?invidious\.kabi\.tk',
 283         r'(?:www\.)?invidious\.mastodon\.host',
 284         r'(?:www\.)?invidious\.zapashcanon\.fr',
 285         r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
 286         r'(?:www\.)?invidious\.tinfoil-hat\.net',
 287         r'(?:www\.)?invidious\.himiko\.cloud',
 288         r'(?:www\.)?invidious\.reallyancient\.tech',
 289         r'(?:www\.)?invidious\.tube',
 290         r'(?:www\.)?invidiou\.site',
 291         r'(?:www\.)?invidious\.site',
 292         r'(?:www\.)?invidious\.xyz',
 293         r'(?:www\.)?invidious\.nixnet\.xyz',
 294         r'(?:www\.)?invidious\.048596\.xyz',
 295         r'(?:www\.)?invidious\.drycat\.fr',
 296         r'(?:www\.)?inv\.skyn3t\.in',
 297         r'(?:www\.)?tube\.poal\.co',
 298         r'(?:www\.)?tube\.connect\.cafe',
 299         r'(?:www\.)?vid\.wxzm\.sx',
 300         r'(?:www\.)?vid\.mint\.lgbt',
 301         r'(?:www\.)?vid\.puffyan\.us',
 302         r'(?:www\.)?yewtu\.be',
 303         r'(?:www\.)?yt\.elukerio\.org',
 304         r'(?:www\.)?yt\.lelux\.fi',
 305         r'(?:www\.)?invidious\.ggc-project\.de',
 306         r'(?:www\.)?yt\.maisputain\.ovh',
 307         r'(?:www\.)?ytprivate\.com',
 308         r'(?:www\.)?invidious\.13ad\.de',
 309         r'(?:www\.)?invidious\.toot\.koeln',
 310         r'(?:www\.)?invidious\.fdn\.fr',
 311         r'(?:www\.)?watch\.nettohikari\.com',
 312         r'(?:www\.)?invidious\.namazso\.eu',
 313         r'(?:www\.)?invidious\.silkky\.cloud',
 314         r'(?:www\.)?invidious\.exonip\.de',
 315         r'(?:www\.)?invidious\.riverside\.rocks',
 316         r'(?:www\.)?invidious\.blamefran\.net',
 317         r'(?:www\.)?invidious\.moomoo\.de',
 318         r'(?:www\.)?ytb\.trom\.tf',
 319         r'(?:www\.)?yt\.cyberhost\.uk',
 320         r'(?:www\.)?kgg2m7yk5aybusll\.onion',
 321         r'(?:www\.)?qklhadlycap4cnod\.onion',
 322         r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
 323         r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
 324         r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
 325         r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
 326         r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
 327         r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
 328         r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
 329         r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
 330         r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
 331         r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
 332     )
 333
 334     def _login(self):
 335         """
 336         Attempt to log in to YouTube.
 337         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
 338         """
 339
 340         if (self._LOGIN_REQUIRED
 341                 and self.get_param('cookiefile') is None
 342                 and self.get_param('cookiesfrombrowser') is None):
 343             self.raise_login_required(
 344                 'Login details are needed to download this content', method='cookies')
 345         username, password = self._get_login_info()
 346         if username:
 347             self.report_warning(f'Cannot login to YouTube using username and password. {self._LOGIN_HINTS["cookies"]}')
 348
 349     def _initialize_consent(self):
 350         cookies = self._get_cookies('https://www.youtube.com/')
 351         if cookies.get('__Secure-3PSID'):
 352             return
 353         consent_id = None
 354         consent = cookies.get('CONSENT')
 355         if consent:
 356             if 'YES' in consent.value:
 357                 return
 358             consent_id = self._search_regex(
 359                 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
 360         if not consent_id:
 361             consent_id = random.randint(100, 999)
 362         self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
 363
 364     def _initialize_pref(self):
 365         cookies = self._get_cookies('https://www.youtube.com/')
 366         pref_cookie = cookies.get('PREF')
 367         pref = {}
 368         if pref_cookie:
 369             try:
 370                 pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))
 371             except ValueError:
 372                 self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
 373         pref.update({'hl': 'en'})
 374         self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))
 375
 376     def _real_initialize(self):
 377         self._initialize_pref()
 378         self._initialize_consent()
 379         self._login()
 380
 381     _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
 382     _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
 383     _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
 384
 385     def _get_default_ytcfg(self, client='web'):
 386         return copy.deepcopy(INNERTUBE_CLIENTS[client])
 387
 388     def _get_innertube_host(self, client='web'):
 389         return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
 390
 391     def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
 392         # try_get but with fallback to default ytcfg client values when present
 393         _func = lambda y: try_get(y, getter, expected_type)
 394         return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
 395
 396     def _extract_client_name(self, ytcfg, default_client='web'):
 397         return self._ytcfg_get_safe(
 398             ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
 399                     lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
 400
 401     def _extract_client_version(self, ytcfg, default_client='web'):
 402         return self._ytcfg_get_safe(
 403             ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
 404                     lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
 405
 406     def _extract_api_key(self, ytcfg=None, default_client='web'):
 407         return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
 408
 409     def _extract_context(self, ytcfg=None, default_client='web'):
 410         context = get_first(
 411             (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
 412         # Enforce language for extraction
 413         traverse_obj(context, 'client', expected_type=dict, default={})['hl'] = 'en'
 414         return context
 415
 416     _SAPISID = None
 417
 418     def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
 419         time_now = round(time.time())
 420         if self._SAPISID is None:
 421             yt_cookies = self._get_cookies('https://www.youtube.com')
 422             # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
 423             # See: https://github.com/yt-dlp/yt-dlp/issues/393
 424             sapisid_cookie = dict_get(
 425                 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
 426             if sapisid_cookie and sapisid_cookie.value:
 427                 self._SAPISID = sapisid_cookie.value
 428                 self.write_debug('Extracted SAPISID cookie')
 429                 # SAPISID cookie is required if not already present
 430                 if not yt_cookies.get('SAPISID'):
 431                     self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
 432                     self._set_cookie(
 433                         '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
 434             else:
 435                 self._SAPISID = False
 436         if not self._SAPISID:
 437             return None
 438         # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
 439         sapisidhash = hashlib.sha1(
 440             f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
 441         return f'SAPISIDHASH {time_now}_{sapisidhash}'
 442
 443     def _call_api(self, ep, query, video_id, fatal=True, headers=None,
 444                   note='Downloading API JSON', errnote='Unable to download API page',
 445                   context=None, api_key=None, api_hostname=None, default_client='web'):
 446
 447         data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
 448         data.update(query)
 449         real_headers = self.generate_api_headers(default_client=default_client)
 450         real_headers.update({'content-type': 'application/json'})
 451         if headers:
 452             real_headers.update(headers)
 453         return self._download_json(
 454             'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
 455             video_id=video_id, fatal=fatal, note=note, errnote=errnote,
 456             data=json.dumps(data).encode('utf8'), headers=real_headers,
 457             query={'key': api_key or self._extract_api_key()})
 458
 459     def extract_yt_initial_data(self, item_id, webpage, fatal=True):
 460         data = self._search_regex(
 461             (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
 462              self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)
 463         if data:
 464             return self._parse_json(data, item_id, fatal=fatal)
 465
 466     @staticmethod
 467     def _extract_session_index(*data):
 468         """
 469         Index of current account in account list.
 470         See: https://github.com/yt-dlp/yt-dlp/pull/519
 471         """
 472         for ytcfg in data:
 473             session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
 474             if session_index is not None:
 475                 return session_index
 476
 477     # Deprecated?
 478     def _extract_identity_token(self, ytcfg=None, webpage=None):
 479         if ytcfg:
 480             token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
 481             if token:
 482                 return token
 483         if webpage:
 484             return self._search_regex(
 485                 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
 486                 'identity token', default=None, fatal=False)
 487
 488     @staticmethod
 489     def _extract_account_syncid(*args):
 490         """
 491         Extract syncId required to download private playlists of secondary channels
 492         @params response and/or ytcfg
 493         """
 494         for data in args:
 495             # ytcfg includes channel_syncid if on secondary channel
 496             delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
 497             if delegated_sid:
 498                 return delegated_sid
 499             sync_ids = (try_get(
 500                 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
 501                        lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')
 502             if len(sync_ids) >= 2 and sync_ids[1]:
 503                 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
 504                 # and just "user_syncid||" for primary channel. We only want the channel_syncid
 505                 return sync_ids[0]
 506
 507     @staticmethod
 508     def _extract_visitor_data(*args):
 509         """
 510         Extracts visitorData from an API response or ytcfg
 511         Appears to be used to track session state
 512         """
 513         return get_first(
 514             args, (('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))),
 515             expected_type=str)
 516
 517     @property
 518     def is_authenticated(self):
 519         return bool(self._generate_sapisidhash_header())
 520
 521     def extract_ytcfg(self, video_id, webpage):
 522         if not webpage:
 523             return {}
 524         return self._parse_json(
 525             self._search_regex(
 526                 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
 527                 default='{}'), video_id, fatal=False) or {}
 528
 529     def generate_api_headers(
 530             self, *, ytcfg=None, account_syncid=None, session_index=None,
 531             visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
 532
 533         origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
 534         headers = {
 535             'X-YouTube-Client-Name': compat_str(
 536                 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
 537             'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
 538             'Origin': origin,
 539             'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
 540             'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
 541             'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)
 542         }
 543         if session_index is None:
 544             session_index = self._extract_session_index(ytcfg)
 545         if account_syncid or session_index is not None:
 546             headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
 547
 548         auth = self._generate_sapisidhash_header(origin)
 549         if auth is not None:
 550             headers['Authorization'] = auth
 551             headers['X-Origin'] = origin
 552         return {h: v for h, v in headers.items() if v is not None}
 553
 554     @staticmethod
 555     def _build_api_continuation_query(continuation, ctp=None):
 556         query = {
 557             'continuation': continuation
 558         }
 559         # TODO: Inconsistency with clickTrackingParams.
 560         # Currently we have a fixed ctp contained within context (from ytcfg)
 561         # and a ctp in root query for continuation.
 562         if ctp:
 563             query['clickTracking'] = {'clickTrackingParams': ctp}
 564         return query
 565
 566     @classmethod
 567     def _extract_next_continuation_data(cls, renderer):
 568         next_continuation = try_get(
 569             renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
 570                        lambda x: x['continuation']['reloadContinuationData']), dict)
 571         if not next_continuation:
 572             return
 573         continuation = next_continuation.get('continuation')
 574         if not continuation:
 575             return
 576         ctp = next_continuation.get('clickTrackingParams')
 577         return cls._build_api_continuation_query(continuation, ctp)
 578
 579     @classmethod
 580     def _extract_continuation_ep_data(cls, continuation_ep: dict):
 581         if isinstance(continuation_ep, dict):
 582             continuation = try_get(
 583                 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
 584             if not continuation:
 585                 return
 586             ctp = continuation_ep.get('clickTrackingParams')
 587             return cls._build_api_continuation_query(continuation, ctp)
 588
 589     @classmethod
 590     def _extract_continuation(cls, renderer):
 591         next_continuation = cls._extract_next_continuation_data(renderer)
 592         if next_continuation:
 593             return next_continuation
 594
 595         contents = []
 596         for key in ('contents', 'items'):
 597             contents.extend(try_get(renderer, lambda x: x[key], list) or [])
 598
 599         for content in contents:
 600             if not isinstance(content, dict):
 601                 continue
 602             continuation_ep = try_get(
 603                 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
 604                           lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
 605                 dict)
 606             continuation = cls._extract_continuation_ep_data(continuation_ep)
 607             if continuation:
 608                 return continuation
 609
 610     @classmethod
 611     def _extract_alerts(cls, data):
 612         for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
 613             if not isinstance(alert_dict, dict):
 614                 continue
 615             for alert in alert_dict.values():
 616                 alert_type = alert.get('type')
 617                 if not alert_type:
 618                     continue
 619                 message = cls._get_text(alert, 'text')
 620                 if message:
 621                     yield alert_type, message
 622
 623     def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
 624         errors = []
 625         warnings = []
 626         for alert_type, alert_message in alerts:
 627             if alert_type.lower() == 'error' and fatal:
 628                 errors.append([alert_type, alert_message])
 629             else:
 630                 warnings.append([alert_type, alert_message])
 631
 632         for alert_type, alert_message in (warnings + errors[:-1]):
 633             self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message), only_once=only_once)
 634         if errors:
 635             raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
 636
 637     def _extract_and_report_alerts(self, data, *args, **kwargs):
 638         return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
 639
 640     def _extract_badges(self, renderer: dict):
 641         badges = set()
 642         for badge in try_get(renderer, lambda x: x['badges'], list) or []:
 643             label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
 644             if label:
 645                 badges.add(label.lower())
 646         return badges
 647
 648     @staticmethod
 649     def _get_text(data, *path_list, max_runs=None):
 650         for path in path_list or [None]:
 651             if path is None:
 652                 obj = [data]
 653             else:
 654                 obj = traverse_obj(data, path, default=[])
 655                 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
 656                     obj = [obj]
 657             for item in obj:
 658                 text = try_get(item, lambda x: x['simpleText'], compat_str)
 659                 if text:
 660                     return text
 661                 runs = try_get(item, lambda x: x['runs'], list) or []
 662                 if not runs and isinstance(item, list):
 663                     runs = item
 664
 665                 runs = runs[:min(len(runs), max_runs or len(runs))]
 666                 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
 667                 if text:
 668                     return text
 669
 670     @staticmethod
 671     def extract_relative_time(relative_time_text):
 672         """
 673         Extracts a relative time from string and converts to dt object
 674         e.g. 'streamed 6 days ago', '5 seconds ago (edited)'
 675         """
 676         mobj = re.search(r'(?P<time>\d+)\s*(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?\s*ago', relative_time_text)
 677         if mobj:
 678             try:
 679                 return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')), precision='auto')
 680             except ValueError:
 681                 return None
 682
 683     def _extract_time_text(self, renderer, *path_list):
 684         text = self._get_text(renderer, *path_list) or ''
 685         dt = self.extract_relative_time(text)
 686         timestamp = None
 687         if isinstance(dt, datetime.datetime):
 688             timestamp = calendar.timegm(dt.timetuple())
 689         if text and timestamp is None:
 690             self.report_warning('Cannot parse localized time text' + bug_reports_message(), only_once=True)
 691         return timestamp, text
 692
 693     def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
 694                           ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
 695                           default_client='web'):
 696         response = None
 697         last_error = None
 698         count = -1
 699         retries = self.get_param('extractor_retries', 3)
 700         if check_get_keys is None:
 701             check_get_keys = []
 702         while count < retries:
 703             count += 1
 704             if last_error:
 705                 self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))
 706             try:
 707                 response = self._call_api(
 708                     ep=ep, fatal=True, headers=headers,
 709                     video_id=item_id, query=query,
 710                     context=self._extract_context(ytcfg, default_client),
 711                     api_key=self._extract_api_key(ytcfg, default_client),
 712                     api_hostname=api_hostname, default_client=default_client,
 713                     note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
 714             except ExtractorError as e:
 715                 if isinstance(e.cause, network_exceptions):
 716                     if isinstance(e.cause, compat_HTTPError) and not is_html(e.cause.read(512)):
 717                         e.cause.seek(0)
 718                         yt_error = try_get(
 719                             self._parse_json(e.cause.read().decode(), item_id, fatal=False),
 720                             lambda x: x['error']['message'], compat_str)
 721                         if yt_error:
 722                             self._report_alerts([('ERROR', yt_error)], fatal=False)
 723                     # Downloading page may result in intermittent 5xx HTTP error
 724                     # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
 725                     # We also want to catch all other network exceptions since errors in later pages can be troublesome
 726                     # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
 727                     if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
 728                         last_error = error_to_compat_str(e.cause or e.msg)
 729                         if count < retries:
 730                             continue
 731                 if fatal:
 732                     raise
 733                 else:
 734                     self.report_warning(error_to_compat_str(e))
 735                     return
 736
 737             else:
 738                 try:
 739                     self._extract_and_report_alerts(response, only_once=True)
 740                 except ExtractorError as e:
 741                     # YouTube servers may return errors we want to retry on in a 200 OK response
 742                     # See: https://github.com/yt-dlp/yt-dlp/issues/839
 743                     if 'unknown error' in e.msg.lower():
 744                         last_error = e.msg
 745                         continue
 746                     if fatal:
 747                         raise
 748                     self.report_warning(error_to_compat_str(e))
 749                     return
 750                 if not check_get_keys or dict_get(response, check_get_keys):
 751                     break
 752                 # Youtube sometimes sends incomplete data
 753                 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
 754                 last_error = 'Incomplete data received'
 755                 if count >= retries:
 756                     if fatal:
 757                         raise ExtractorError(last_error)
 758                     else:
 759                         self.report_warning(last_error)
 760                         return
 761         return response
 762
 763     @staticmethod
 764     def is_music_url(url):
 765         return re.match(r'https?://music\.youtube\.com/', url) is not None
 766
 767     def _extract_video(self, renderer):
 768         video_id = renderer.get('videoId')
 769         title = self._get_text(renderer, 'title')
 770         description = self._get_text(renderer, 'descriptionSnippet')
 771         duration = parse_duration(self._get_text(
 772             renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
 773         view_count_text = self._get_text(renderer, 'viewCountText') or ''
 774         view_count = str_to_int(self._search_regex(
 775             r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
 776             'view count', default=None))
 777
 778         uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
 779         channel_id = traverse_obj(
 780             renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'), expected_type=str, get_all=False)
 781         timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')
 782         scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
 783         overlay_style = traverse_obj(
 784             renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'), get_all=False, expected_type=str)
 785         badges = self._extract_badges(renderer)
 786         return {
 787             '_type': 'url',
 788             'ie_key': YoutubeIE.ie_key(),
 789             'id': video_id,
 790             'url': f'https://www.youtube.com/watch?v={video_id}',
 791             'title': title,
 792             'description': description,
 793             'duration': duration,
 794             'view_count': view_count,
 795             'uploader': uploader,
 796             'channel_id': channel_id,
 797             'upload_date': strftime_or_none(timestamp, '%Y%m%d'),
 798             'live_status': ('is_upcoming' if scheduled_timestamp is not None
 799                             else 'was_live' if 'streamed' in time_text.lower()
 800                             else 'is_live' if overlay_style is not None and overlay_style == 'LIVE' or 'live now' in badges
 801                             else None),
 802             'release_timestamp': scheduled_timestamp,
 803             'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)
 804         }
 805
 806
 807 class YoutubeIE(YoutubeBaseInfoExtractor):
 808     IE_DESC = 'YouTube'
 809     _VALID_URL = r"""(?x)^
 810                      (
 811                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 812                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
 813                             (?:www\.)?deturl\.com/www\.youtube\.com|
 814                             (?:www\.)?pwnyoutube\.com|
 815                             (?:www\.)?hooktube\.com|
 816                             (?:www\.)?yourepeat\.com|
 817                             tube\.majestyc\.net|
 818                             %(invidious)s|
 819                             youtube\.googleapis\.com)/                        # the various hostnames, with wildcard subdomains
 820                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 821                          (?:                                                  # the various things that can precede the ID:
 822                              (?:(?:v|embed|e|shorts)/(?!videoseries))         # v/ or embed/ or e/ or shorts/
 823                              |(?:                                             # or the v= param in all its forms
 824                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 825                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 826                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 827                                  v=
 828                              )
 829                          ))
 830                          |(?:
 831                             youtu\.be|                                        # just youtu.be/xxxx
 832                             vid\.plus|                                        # or vid.plus/xxxx
 833                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 834                             %(invidious)s
 835                          )/
 836                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 837                          )
 838                      )?                                                       # all until now is optional -> you can pass the naked ID
 839                      (?P<id>[0-9A-Za-z_-]{11})                                # here is it! the YouTube video ID
 840                      (?(1).+)?                                                # if we found the ID, everything can follow
 841                      (?:\#|$)""" % {
 842         'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
 843     }
 844     _PLAYER_INFO_RE = (
 845         r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
 846         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
 847         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
 848     )
 849     _formats = {
 850         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 851         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 852         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 853         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 854         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 855         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 856         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 857         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 858         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 859         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 860         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 861         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 862         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 863         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 864         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 865         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 866         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 867         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 868
 869
 870         # 3D videos
 871         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 872         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 873         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 874         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 875         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 876         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 877         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 878
 879         # Apple HTTP Live Streaming
 880         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 881         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 882         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 883         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 884         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 885         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 886         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 887         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
 888
 889         # DASH mp4 video
 890         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
 891         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
 892         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 893         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
 894         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
 895         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
 896         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
 897         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 898         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
 899         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 900         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 901         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
 902
 903         # Dash mp4 audio
 904         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
 905         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
 906         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
 907         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 908         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 909         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
 910         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
 911
 912         # Dash webm
 913         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 914         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 915         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 916         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 917         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 918         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 919         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
 920         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 921         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 922         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 923         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 924         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 925         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 926         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 927         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 928         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
 929         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 930         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 931         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 932         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 933         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 934         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 935
 936         # Dash webm audio
 937         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
 938         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
 939
 940         # Dash webm audio with opus inside
 941         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
 942         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
 943         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
 944
 945         # RTMP (unnamed)
 946         '_rtmp': {'protocol': 'rtmp'},
 947
 948         # av01 video only formats sometimes served with "unknown" codecs
 949         '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
 950         '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
 951         '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
 952         '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
 953         '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
 954         '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
 955         '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
 956         '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
 957     }
 958     _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
 959
 960     _GEO_BYPASS = False
 961
 962     IE_NAME = 'youtube'
 963     _TESTS = [
 964         {
 965             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
 966             'info_dict': {
 967                 'id': 'BaW_jenozKc',
 968                 'ext': 'mp4',
 969                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 970                 'uploader': 'Philipp Hagemeister',
 971                 'uploader_id': 'phihag',
 972                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 973                 'channel': 'Philipp Hagemeister',
 974                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
 975                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
 976                 'upload_date': '20121002',
 977                 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
 978                 'categories': ['Science & Technology'],
 979                 'tags': ['youtube-dl'],
 980                 'duration': 10,
 981                 'view_count': int,
 982                 'like_count': int,
 983                 # 'dislike_count': int,
 984                 'availability': 'public',
 985                 'playable_in_embed': True,
 986                 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
 987                 'live_status': 'not_live',
 988                 'age_limit': 0,
 989                 'start_time': 1,
 990                 'end_time': 9,
 991             }
 992         },
 993         {
 994             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
 995             'note': 'Embed-only video (#1746)',
 996             'info_dict': {
 997                 'id': 'yZIXLfi8CZQ',
 998                 'ext': 'mp4',
 999                 'upload_date': '20120608',
1000                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1001                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1002                 'uploader': 'SET India',
1003                 'uploader_id': 'setindia',
1004                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
1005                 'age_limit': 18,
1006             },
1007             'skip': 'Private video',
1008         },
1009         {
1010             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1011             'note': 'Use the first video ID in the URL',
1012             'info_dict': {
1013                 'id': 'BaW_jenozKc',
1014                 'ext': 'mp4',
1015                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1016                 'uploader': 'Philipp Hagemeister',
1017                 'uploader_id': 'phihag',
1018                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1019                 'upload_date': '20121002',
1020                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1021                 'categories': ['Science & Technology'],
1022                 'tags': ['youtube-dl'],
1023                 'duration': 10,
1024                 'view_count': int,
1025                 'like_count': int,
1026                 'dislike_count': int,
1027             },
1028             'params': {
1029                 'skip_download': True,
1030             },
1031         },
1032         {
1033             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1034             'note': '256k DASH audio (format 141) via DASH manifest',
1035             'info_dict': {
1036                 'id': 'a9LDPn-MO4I',
1037                 'ext': 'm4a',
1038                 'upload_date': '20121002',
1039                 'uploader_id': '8KVIDEO',
1040                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1041                 'description': '',
1042                 'uploader': '8KVIDEO',
1043                 'title': 'UHDTV TEST 8K VIDEO.mp4'
1044             },
1045             'params': {
1046                 'youtube_include_dash_manifest': True,
1047                 'format': '141',
1048             },
1049             'skip': 'format 141 not served anymore',
1050         },
1051         # DASH manifest with encrypted signature
1052         {
1053             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1054             'info_dict': {
1055                 'id': 'IB3lcPjvWLA',
1056                 'ext': 'm4a',
1057                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1058                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1059                 'duration': 244,
1060                 'uploader': 'AfrojackVEVO',
1061                 'uploader_id': 'AfrojackVEVO',
1062                 'upload_date': '20131011',
1063                 'abr': 129.495,
1064             },
1065             'params': {
1066                 'youtube_include_dash_manifest': True,
1067                 'format': '141/bestaudio[ext=m4a]',
1068             },
1069         },
1070         # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
1071         {
1072             'note': 'Embed allowed age-gate video',
1073             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1074             'info_dict': {
1075                 'id': 'HtVdAasjOgU',
1076                 'ext': 'mp4',
1077                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1078                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1079                 'duration': 142,
1080                 'uploader': 'The Witcher',
1081                 'uploader_id': 'WitcherGame',
1082                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1083                 'upload_date': '20140605',
1084                 'age_limit': 18,
1085             },
1086         },
1087         {
1088             'note': 'Age-gate video with embed allowed in public site',
1089             'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1090             'info_dict': {
1091                 'id': 'HsUATh_Nc2U',
1092                 'ext': 'mp4',
1093                 'title': 'Godzilla 2 (Official Video)',
1094                 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1095                 'upload_date': '20200408',
1096                 'uploader_id': 'FlyingKitty900',
1097                 'uploader': 'FlyingKitty',
1098                 'age_limit': 18,
1099             },
1100         },
1101         {
1102             'note': 'Age-gate video embedable only with clientScreen=EMBED',
1103             'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1104             'info_dict': {
1105                 'id': 'Tq92D6wQ1mg',
1106                 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
1107                 'ext': 'mp4',
1108                 'upload_date': '20191227',
1109                 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1110                 'uploader': 'Projekt Melody',
1111                 'description': 'md5:17eccca93a786d51bc67646756894066',
1112                 'age_limit': 18,
1113             },
1114         },
1115         {
1116             'note': 'Non-Agegated non-embeddable video',
1117             'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1118             'info_dict': {
1119                 'id': 'MeJVWBSsPAY',
1120                 'ext': 'mp4',
1121                 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1122                 'uploader': 'Herr Lurik',
1123                 'uploader_id': 'st3in234',
1124                 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1125                 'upload_date': '20130730',
1126             },
1127         },
1128         {
1129             'note': 'Non-bypassable age-gated video',
1130             'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1131             'only_matching': True,
1132         },
1133         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1134         # YouTube Red ad is not captured for creator
1135         {
1136             'url': '__2ABJjxzNo',
1137             'info_dict': {
1138                 'id': '__2ABJjxzNo',
1139                 'ext': 'mp4',
1140                 'duration': 266,
1141                 'upload_date': '20100430',
1142                 'uploader_id': 'deadmau5',
1143                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1144                 'creator': 'deadmau5',
1145                 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1146                 'uploader': 'deadmau5',
1147                 'title': 'Deadmau5 - Some Chords (HD)',
1148                 'alt_title': 'Some Chords',
1149             },
1150             'expected_warnings': [
1151                 'DASH manifest missing',
1152             ]
1153         },
1154         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1155         {
1156             'url': 'lqQg6PlCWgI',
1157             'info_dict': {
1158                 'id': 'lqQg6PlCWgI',
1159                 'ext': 'mp4',
1160                 'duration': 6085,
1161                 'upload_date': '20150827',
1162                 'uploader_id': 'olympic',
1163                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1164                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
1165                 'uploader': 'Olympics',
1166                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
1167             },
1168             'params': {
1169                 'skip_download': 'requires avconv',
1170             }
1171         },
1172         # Non-square pixels
1173         {
1174             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1175             'info_dict': {
1176                 'id': '_b-2C3KPAM0',
1177                 'ext': 'mp4',
1178                 'stretched_ratio': 16 / 9.,
1179                 'duration': 85,
1180                 'upload_date': '20110310',
1181                 'uploader_id': 'AllenMeow',
1182                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1183                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1184                 'uploader': '孫ᄋᄅ',
1185                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1186             },
1187         },
1188         # url_encoded_fmt_stream_map is empty string
1189         {
1190             'url': 'qEJwOuvDf7I',
1191             'info_dict': {
1192                 'id': 'qEJwOuvDf7I',
1193                 'ext': 'webm',
1194                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1195                 'description': '',
1196                 'upload_date': '20150404',
1197                 'uploader_id': 'spbelect',
1198                 'uploader': 'Наблюдатели Петербурга',
1199             },
1200             'params': {
1201                 'skip_download': 'requires avconv',
1202             },
1203             'skip': 'This live event has ended.',
1204         },
1205         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1206         {
1207             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1208             'info_dict': {
1209                 'id': 'FIl7x6_3R5Y',
1210                 'ext': 'webm',
1211                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1212                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1213                 'duration': 220,
1214                 'upload_date': '20150625',
1215                 'uploader_id': 'dorappi2000',
1216                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1217                 'uploader': 'dorappi2000',
1218                 'formats': 'mincount:31',
1219             },
1220             'skip': 'not actual anymore',
1221         },
1222         # DASH manifest with segment_list
1223         {
1224             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1225             'md5': '8ce563a1d667b599d21064e982ab9e31',
1226             'info_dict': {
1227                 'id': 'CsmdDsKjzN8',
1228                 'ext': 'mp4',
1229                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1230                 'uploader': 'Airtek',
1231                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1232                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1233                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1234             },
1235             'params': {
1236                 'youtube_include_dash_manifest': True,
1237                 'format': '135',  # bestvideo
1238             },
1239             'skip': 'This live event has ended.',
1240         },
1241         {
1242             # Multifeed videos (multiple cameras), URL is for Main Camera
1243             'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1244             'info_dict': {
1245                 'id': 'jvGDaLqkpTg',
1246                 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1247                 'description': 'md5:e03b909557865076822aa169218d6a5d',
1248             },
1249             'playlist': [{
1250                 'info_dict': {
1251                     'id': 'jvGDaLqkpTg',
1252                     'ext': 'mp4',
1253                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1254                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1255                     'duration': 10643,
1256                     'upload_date': '20161111',
1257                     'uploader': 'Team PGP',
1258                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1259                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1260                 },
1261             }, {
1262                 'info_dict': {
1263                     'id': '3AKt1R1aDnw',
1264                     'ext': 'mp4',
1265                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1266                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1267                     'duration': 10991,
1268                     'upload_date': '20161111',
1269                     'uploader': 'Team PGP',
1270                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1271                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1272                 },
1273             }, {
1274                 'info_dict': {
1275                     'id': 'RtAMM00gpVc',
1276                     'ext': 'mp4',
1277                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1278                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1279                     'duration': 10995,
1280                     'upload_date': '20161111',
1281                     'uploader': 'Team PGP',
1282                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1283                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1284                 },
1285             }, {
1286                 'info_dict': {
1287                     'id': '6N2fdlP3C5U',
1288                     'ext': 'mp4',
1289                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1290                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1291                     'duration': 10990,
1292                     'upload_date': '20161111',
1293                     'uploader': 'Team PGP',
1294                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1295                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1296                 },
1297             }],
1298             'params': {
1299                 'skip_download': True,
1300             },
1301             'skip': 'Not multifeed anymore',
1302         },
1303         {
1304             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1305             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1306             'info_dict': {
1307                 'id': 'gVfLd0zydlo',
1308                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1309             },
1310             'playlist_count': 2,
1311             'skip': 'Not multifeed anymore',
1312         },
1313         {
1314             'url': 'https://vid.plus/FlRa-iH7PGw',
1315             'only_matching': True,
1316         },
1317         {
1318             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1319             'only_matching': True,
1320         },
1321         {
1322             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1323             # Also tests cut-off URL expansion in video description (see
1324             # https://github.com/ytdl-org/youtube-dl/issues/1892,
1325             # https://github.com/ytdl-org/youtube-dl/issues/8164)
1326             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1327             'info_dict': {
1328                 'id': 'lsguqyKfVQg',
1329                 'ext': 'mp4',
1330                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1331                 'alt_title': 'Dark Walk',
1332                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1333                 'duration': 133,
1334                 'upload_date': '20151119',
1335                 'uploader_id': 'IronSoulElf',
1336                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1337                 'uploader': 'IronSoulElf',
1338                 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1339                 'track': 'Dark Walk',
1340                 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1341                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1342             },
1343             'params': {
1344                 'skip_download': True,
1345             },
1346         },
1347         {
1348             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1349             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1350             'only_matching': True,
1351         },
1352         {
1353             # Video with yt:stretch=17:0
1354             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1355             'info_dict': {
1356                 'id': 'Q39EVAstoRM',
1357                 'ext': 'mp4',
1358                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1359                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1360                 'upload_date': '20151107',
1361                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1362                 'uploader': 'CH GAMER DROID',
1363             },
1364             'params': {
1365                 'skip_download': True,
1366             },
1367             'skip': 'This video does not exist.',
1368         },
1369         {
1370             # Video with incomplete 'yt:stretch=16:'
1371             'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1372             'only_matching': True,
1373         },
1374         {
1375             # Video licensed under Creative Commons
1376             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1377             'info_dict': {
1378                 'id': 'M4gD1WSo5mA',
1379                 'ext': 'mp4',
1380                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1381                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1382                 'duration': 721,
1383                 'upload_date': '20150127',
1384                 'uploader_id': 'BerkmanCenter',
1385                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1386                 'uploader': 'The Berkman Klein Center for Internet & Society',
1387                 'license': 'Creative Commons Attribution license (reuse allowed)',
1388             },
1389             'params': {
1390                 'skip_download': True,
1391             },
1392         },
1393         {
1394             # Channel-like uploader_url
1395             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1396             'info_dict': {
1397                 'id': 'eQcmzGIKrzg',
1398                 'ext': 'mp4',
1399                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1400                 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1401                 'duration': 4060,
1402                 'upload_date': '20151119',
1403                 'uploader': 'Bernie Sanders',
1404                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1405                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1406                 'license': 'Creative Commons Attribution license (reuse allowed)',
1407             },
1408             'params': {
1409                 'skip_download': True,
1410             },
1411         },
1412         {
1413             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1414             'only_matching': True,
1415         },
1416         {
1417             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1418             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1419             'only_matching': True,
1420         },
1421         {
1422             # Rental video preview
1423             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1424             'info_dict': {
1425                 'id': 'uGpuVWrhIzE',
1426                 'ext': 'mp4',
1427                 'title': 'Piku - Trailer',
1428                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1429                 'upload_date': '20150811',
1430                 'uploader': 'FlixMatrix',
1431                 'uploader_id': 'FlixMatrixKaravan',
1432                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1433                 'license': 'Standard YouTube License',
1434             },
1435             'params': {
1436                 'skip_download': True,
1437             },
1438             'skip': 'This video is not available.',
1439         },
1440         {
1441             # YouTube Red video with episode data
1442             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1443             'info_dict': {
1444                 'id': 'iqKdEhx-dD4',
1445                 'ext': 'mp4',
1446                 'title': 'Isolation - Mind Field (Ep 1)',
1447                 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1448                 'duration': 2085,
1449                 'upload_date': '20170118',
1450                 'uploader': 'Vsauce',
1451                 'uploader_id': 'Vsauce',
1452                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1453                 'series': 'Mind Field',
1454                 'season_number': 1,
1455                 'episode_number': 1,
1456             },
1457             'params': {
1458                 'skip_download': True,
1459             },
1460             'expected_warnings': [
1461                 'Skipping DASH manifest',
1462             ],
1463         },
1464         {
1465             # The following content has been identified by the YouTube community
1466             # as inappropriate or offensive to some audiences.
1467             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1468             'info_dict': {
1469                 'id': '6SJNVb0GnPI',
1470                 'ext': 'mp4',
1471                 'title': 'Race Differences in Intelligence',
1472                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1473                 'duration': 965,
1474                 'upload_date': '20140124',
1475                 'uploader': 'New Century Foundation',
1476                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1477                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1478             },
1479             'params': {
1480                 'skip_download': True,
1481             },
1482             'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1483         },
1484         {
1485             # itag 212
1486             'url': '1t24XAntNCY',
1487             'only_matching': True,
1488         },
1489         {
1490             # geo restricted to JP
1491             'url': 'sJL6WA-aGkQ',
1492             'only_matching': True,
1493         },
1494         {
1495             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1496             'only_matching': True,
1497         },
1498         {
1499             'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1500             'only_matching': True,
1501         },
1502         {
1503             # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1504             'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1505             'only_matching': True,
1506         },
1507         {
1508             # DRM protected
1509             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1510             'only_matching': True,
1511         },
1512         {
1513             # Video with unsupported adaptive stream type formats
1514             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1515             'info_dict': {
1516                 'id': 'Z4Vy8R84T1U',
1517                 'ext': 'mp4',
1518                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1519                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1520                 'duration': 433,
1521                 'upload_date': '20130923',
1522                 'uploader': 'Amelia Putri Harwita',
1523                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1524                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1525                 'formats': 'maxcount:10',
1526             },
1527             'params': {
1528                 'skip_download': True,
1529                 'youtube_include_dash_manifest': False,
1530             },
1531             'skip': 'not actual anymore',
1532         },
1533         {
1534             # Youtube Music Auto-generated description
1535             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1536             'info_dict': {
1537                 'id': 'MgNrAu2pzNs',
1538                 'ext': 'mp4',
1539                 'title': 'Voyeur Girl',
1540                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1541                 'upload_date': '20190312',
1542                 'uploader': 'Stephen - Topic',
1543                 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1544                 'artist': 'Stephen',
1545                 'track': 'Voyeur Girl',
1546                 'album': 'it\'s too much love to know my dear',
1547                 'release_date': '20190313',
1548                 'release_year': 2019,
1549             },
1550             'params': {
1551                 'skip_download': True,
1552             },
1553         },
1554         {
1555             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1556             'only_matching': True,
1557         },
1558         {
1559             # invalid -> valid video id redirection
1560             'url': 'DJztXj2GPfl',
1561             'info_dict': {
1562                 'id': 'DJztXj2GPfk',
1563                 'ext': 'mp4',
1564                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1565                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1566                 'upload_date': '20090125',
1567                 'uploader': 'Prochorowka',
1568                 'uploader_id': 'Prochorowka',
1569                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1570                 'artist': 'Panjabi MC',
1571                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1572                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1573             },
1574             'params': {
1575                 'skip_download': True,
1576             },
1577             'skip': 'Video unavailable',
1578         },
1579         {
1580             # empty description results in an empty string
1581             'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1582             'info_dict': {
1583                 'id': 'x41yOUIvK2k',
1584                 'ext': 'mp4',
1585                 'title': 'IMG 3456',
1586                 'description': '',
1587                 'upload_date': '20170613',
1588                 'uploader_id': 'ElevageOrVert',
1589                 'uploader': 'ElevageOrVert',
1590             },
1591             'params': {
1592                 'skip_download': True,
1593             },
1594         },
1595         {
1596             # with '};' inside yt initial data (see [1])
1597             # see [2] for an example with '};' inside ytInitialPlayerResponse
1598             # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1599             # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1600             'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1601             'info_dict': {
1602                 'id': 'CHqg6qOn4no',
1603                 'ext': 'mp4',
1604                 'title': 'Part 77   Sort a list of simple types in c#',
1605                 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1606                 'upload_date': '20130831',
1607                 'uploader_id': 'kudvenkat',
1608                 'uploader': 'kudvenkat',
1609             },
1610             'params': {
1611                 'skip_download': True,
1612             },
1613         },
1614         {
1615             # another example of '};' in ytInitialData
1616             'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1617             'only_matching': True,
1618         },
1619         {
1620             'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1621             'only_matching': True,
1622         },
1623         {
1624             # https://github.com/ytdl-org/youtube-dl/pull/28094
1625             'url': 'OtqTfy26tG0',
1626             'info_dict': {
1627                 'id': 'OtqTfy26tG0',
1628                 'ext': 'mp4',
1629                 'title': 'Burn Out',
1630                 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1631                 'upload_date': '20141120',
1632                 'uploader': 'The Cinematic Orchestra - Topic',
1633                 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1634                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1635                 'artist': 'The Cinematic Orchestra',
1636                 'track': 'Burn Out',
1637                 'album': 'Every Day',
1638                 'release_data': None,
1639                 'release_year': None,
1640             },
1641             'params': {
1642                 'skip_download': True,
1643             },
1644         },
1645         {
1646             # controversial video, only works with bpctr when authenticated with cookies
1647             'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1648             'only_matching': True,
1649         },
1650         {
1651             # controversial video, requires bpctr/contentCheckOk
1652             'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1653             'info_dict': {
1654                 'id': 'SZJvDhaSDnc',
1655                 'ext': 'mp4',
1656                 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1657                 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1658                 'uploader': 'CBS This Morning',
1659                 'uploader_id': 'CBSThisMorning',
1660                 'upload_date': '20140716',
1661                 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1662             }
1663         },
1664         {
1665             # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1666             'url': 'cBvYw8_A0vQ',
1667             'info_dict': {
1668                 'id': 'cBvYw8_A0vQ',
1669                 'ext': 'mp4',
1670                 'title': '4K Ueno Okachimachi  Street  Scenes  上野御徒町歩き',
1671                 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1672                 'upload_date': '20201120',
1673                 'uploader': 'Walk around Japan',
1674                 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1675                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1676             },
1677             'params': {
1678                 'skip_download': True,
1679             },
1680         }, {
1681             # Has multiple audio streams
1682             'url': 'WaOKSUlf4TM',
1683             'only_matching': True
1684         }, {
1685             # Requires Premium: has format 141 when requested using YTM url
1686             'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1687             'only_matching': True
1688         }, {
1689             # multiple subtitles with same lang_code
1690             'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1691             'only_matching': True,
1692         }, {
1693             # Force use android client fallback
1694             'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1695             'info_dict': {
1696                 'id': 'YOelRv7fMxY',
1697                 'title': 'DIGGING A SECRET TUNNEL Part 1',
1698                 'ext': '3gp',
1699                 'upload_date': '20210624',
1700                 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1701                 'uploader': 'colinfurze',
1702                 'uploader_id': 'colinfurze',
1703                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1704                 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
1705             },
1706             'params': {
1707                 'format': '17',  # 3gp format available on android
1708                 'extractor_args': {'youtube': {'player_client': ['android']}},
1709             },
1710         },
1711         {
1712             # Skip download of additional client configs (remix client config in this case)
1713             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1714             'only_matching': True,
1715             'params': {
1716                 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1717             },
1718         }, {
1719             # shorts
1720             'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
1721             'only_matching': True,
1722         }, {
1723             'note': 'Storyboards',
1724             'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
1725             'info_dict': {
1726                 'id': '5KLPxDtMqe8',
1727                 'ext': 'mhtml',
1728                 'format_id': 'sb0',
1729                 'title': 'Your Brain is Plastic',
1730                 'uploader_id': 'scishow',
1731                 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
1732                 'upload_date': '20140324',
1733                 'uploader': 'SciShow',
1734             }, 'params': {'format': 'mhtml', 'skip_download': True}
1735         }
1736     ]
1737
1738     @classmethod
1739     def suitable(cls, url):
1740         from ..utils import parse_qs
1741
1742         qs = parse_qs(url)
1743         if qs.get('list', [None])[0]:
1744             return False
1745         return super(YoutubeIE, cls).suitable(url)
1746
1747     def __init__(self, *args, **kwargs):
1748         super(YoutubeIE, self).__init__(*args, **kwargs)
1749         self._code_cache = {}
1750         self._player_cache = {}
1751
1752     def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):
1753         EXPIRATION_DURATION = 18_000
1754         lock = threading.Lock()
1755
1756         is_live = True
1757         expiration_time = time.time() + EXPIRATION_DURATION
1758         formats = [f for f in formats if f.get('is_from_start')]
1759
1760         def refetch_manifest(format_id):
1761             nonlocal formats, expiration_time, is_live
1762             if time.time() <= expiration_time:
1763                 return
1764
1765             _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
1766             video_details = traverse_obj(
1767                 prs, (..., 'videoDetails'), expected_type=dict, default=[])
1768             microformats = traverse_obj(
1769                 prs, (..., 'microformat', 'playerMicroformatRenderer'),
1770                 expected_type=dict, default=[])
1771             _, is_live, _, formats = self._list_formats(video_id, microformats, video_details, prs, player_url)
1772             expiration_time = time.time() + EXPIRATION_DURATION
1773
1774         def mpd_feed(format_id):
1775             """
1776             @returns (manifest_url, manifest_stream_number, is_live) or None
1777             """
1778             with lock:
1779                 refetch_manifest(format_id)
1780
1781             f = next((f for f in formats if f['format_id'] == format_id), None)
1782             if not f:
1783                 self.report_warning(
1784                     f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')
1785                 return None
1786             return f['manifest_url'], f['manifest_stream_number'], is_live
1787
1788         for f in formats:
1789             f['protocol'] = 'http_dash_segments_generator'
1790             f['fragments'] = functools.partial(
1791                 self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)
1792
1793     def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):
1794         FETCH_SPAN, MAX_DURATION = 5, 432000
1795
1796         mpd_url, stream_number, is_live = None, None, True
1797
1798         begin_index = 0
1799         download_start_time = ctx.get('start') or time.time()
1800
1801         lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
1802         if lack_early_segments:
1803             self.report_warning(bug_reports_message(
1804                 'Starting download from the last 120 hours of the live stream since '
1805                 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
1806             lack_early_segments = True
1807
1808         known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
1809         fragments, fragment_base_url = None, None
1810
1811         def _extract_sequence_from_mpd(refresh_sequence):
1812             nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
1813             # Obtain from MPD's maximum seq value
1814             old_mpd_url = mpd_url
1815             mpd_url, stream_number, is_live = mpd_feed(format_id) or (mpd_url, stream_number, False)
1816             if old_mpd_url == mpd_url and not refresh_sequence:
1817                 return True, last_seq
1818             try:
1819                 fmts, _ = self._extract_mpd_formats_and_subtitles(
1820                     mpd_url, None, note=False, errnote=False, fatal=False)
1821             except ExtractorError:
1822                 fmts = None
1823             if not fmts:
1824                 no_fragment_score += 1
1825                 return False, last_seq
1826             fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
1827             fragments = fmt_info['fragments']
1828             fragment_base_url = fmt_info['fragment_base_url']
1829             assert fragment_base_url
1830
1831             _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
1832             return True, _last_seq
1833
1834         while is_live:
1835             fetch_time = time.time()
1836             if no_fragment_score > 30:
1837                 return
1838             if last_segment_url:
1839                 # Obtain from "X-Head-Seqnum" header value from each segment
1840                 try:
1841                     urlh = self._request_webpage(
1842                         last_segment_url, None, note=False, errnote=False, fatal=False)
1843                 except ExtractorError:
1844                     urlh = None
1845                 last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
1846                 if last_seq is None:
1847                     no_fragment_score += 1
1848                     last_segment_url = None
1849                     continue
1850             else:
1851                 should_retry, last_seq = _extract_sequence_from_mpd(True)
1852                 if not should_retry:
1853                     continue
1854
1855             if known_idx > last_seq:
1856                 last_segment_url = None
1857                 continue
1858
1859             last_seq += 1
1860
1861             if begin_index < 0 and known_idx < 0:
1862                 # skip from the start when it's negative value
1863                 known_idx = last_seq + begin_index
1864             if lack_early_segments:
1865                 known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
1866             try:
1867                 for idx in range(known_idx, last_seq):
1868                     # do not update sequence here or you'll get skipped some part of it
1869                     should_retry, _ = _extract_sequence_from_mpd(False)
1870                     if not should_retry:
1871                         # retry when it gets weird state
1872                         known_idx = idx - 1
1873                         raise ExtractorError('breaking out of outer loop')
1874                     last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)
1875                     yield {
1876                         'url': last_segment_url,
1877                     }
1878                 if known_idx == last_seq:
1879                     no_fragment_score += 5
1880                 else:
1881                     no_fragment_score = 0
1882                 known_idx = last_seq
1883             except ExtractorError:
1884                 continue
1885
1886             time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
1887
1888     def _extract_player_url(self, *ytcfgs, webpage=None):
1889         player_url = traverse_obj(
1890             ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
1891             get_all=False, expected_type=compat_str)
1892         if not player_url:
1893             return
1894         if player_url.startswith('//'):
1895             player_url = 'https:' + player_url
1896         elif not re.match(r'https?://', player_url):
1897             player_url = compat_urlparse.urljoin(
1898                 'https://www.youtube.com', player_url)
1899         return player_url
1900
1901     def _download_player_url(self, video_id, fatal=False):
1902         res = self._download_webpage(
1903             'https://www.youtube.com/iframe_api',
1904             note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
1905         if res:
1906             player_version = self._search_regex(
1907                 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
1908             if player_version:
1909                 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
1910
1911     def _signature_cache_id(self, example_sig):
1912         """ Return a string representation of a signature """
1913         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1914
1915     @classmethod
1916     def _extract_player_info(cls, player_url):
1917         for player_re in cls._PLAYER_INFO_RE:
1918             id_m = re.search(player_re, player_url)
1919             if id_m:
1920                 break
1921         else:
1922             raise ExtractorError('Cannot identify player %r' % player_url)
1923         return id_m.group('id')
1924
1925     def _load_player(self, video_id, player_url, fatal=True):
1926         player_id = self._extract_player_info(player_url)
1927         if player_id not in self._code_cache:
1928             code = self._download_webpage(
1929                 player_url, video_id, fatal=fatal,
1930                 note='Downloading player ' + player_id,
1931                 errnote='Download of %s failed' % player_url)
1932             if code:
1933                 self._code_cache[player_id] = code
1934         return self._code_cache.get(player_id)
1935
1936     def _extract_signature_function(self, video_id, player_url, example_sig):
1937         player_id = self._extract_player_info(player_url)
1938
1939         # Read from filesystem cache
1940         func_id = 'js_%s_%s' % (
1941             player_id, self._signature_cache_id(example_sig))
1942         assert os.path.basename(func_id) == func_id
1943
1944         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1945         if cache_spec is not None:
1946             return lambda s: ''.join(s[i] for i in cache_spec)
1947
1948         code = self._load_player(video_id, player_url)
1949         if code:
1950             res = self._parse_sig_js(code)
1951
1952             test_string = ''.join(map(compat_chr, range(len(example_sig))))
1953             cache_res = res(test_string)
1954             cache_spec = [ord(c) for c in cache_res]
1955
1956             self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1957             return res
1958
1959     def _print_sig_code(self, func, example_sig):
1960         if not self.get_param('youtube_print_sig_code'):
1961             return
1962
1963         def gen_sig_code(idxs):
1964             def _genslice(start, end, step):
1965                 starts = '' if start == 0 else str(start)
1966                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1967                 steps = '' if step == 1 else (':%d' % step)
1968                 return 's[%s%s%s]' % (starts, ends, steps)
1969
1970             step = None
1971             # Quelch pyflakes warnings - start will be set when step is set
1972             start = '(Never used)'
1973             for i, prev in zip(idxs[1:], idxs[:-1]):
1974                 if step is not None:
1975                     if i - prev == step:
1976                         continue
1977                     yield _genslice(start, prev, step)
1978                     step = None
1979                     continue
1980                 if i - prev in [-1, 1]:
1981                     step = i - prev
1982                     start = prev
1983                     continue
1984                 else:
1985                     yield 's[%d]' % prev
1986             if step is None:
1987                 yield 's[%d]' % i
1988             else:
1989                 yield _genslice(start, i, step)
1990
1991         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1992         cache_res = func(test_string)
1993         cache_spec = [ord(c) for c in cache_res]
1994         expr_code = ' + '.join(gen_sig_code(cache_spec))
1995         signature_id_tuple = '(%s)' % (
1996             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1997         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1998                 '    return %s\n') % (signature_id_tuple, expr_code)
1999         self.to_screen('Extracted signature function:\n' + code)
2000
2001     def _parse_sig_js(self, jscode):
2002         funcname = self._search_regex(
2003             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2004              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2005              r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
2006              r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
2007              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
2008              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
2009              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
2010              # Obsolete patterns
2011              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2012              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
2013              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2014              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2015              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2016              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2017              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2018              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
2019             jscode, 'Initial JS player signature function name', group='sig')
2020
2021         jsi = JSInterpreter(jscode)
2022         initial_function = jsi.extract_function(funcname)
2023         return lambda s: initial_function([s])
2024
2025     def _decrypt_signature(self, s, video_id, player_url):
2026         """Turn the encrypted s field into a working signature"""
2027
2028         if player_url is None:
2029             raise ExtractorError('Cannot decrypt signature without player_url')
2030
2031         try:
2032             player_id = (player_url, self._signature_cache_id(s))
2033             if player_id not in self._player_cache:
2034                 func = self._extract_signature_function(
2035                     video_id, player_url, s
2036                 )
2037                 self._player_cache[player_id] = func
2038             func = self._player_cache[player_id]
2039             self._print_sig_code(func, s)
2040             return func(s)
2041         except Exception as e:
2042             raise ExtractorError('Signature extraction failed: ' + traceback.format_exc(), cause=e)
2043
2044     def _decrypt_nsig(self, s, video_id, player_url):
2045         """Turn the encrypted n field into a working signature"""
2046         if player_url is None:
2047             raise ExtractorError('Cannot decrypt nsig without player_url')
2048         if player_url.startswith('//'):
2049             player_url = 'https:' + player_url
2050         elif not re.match(r'https?://', player_url):
2051             player_url = compat_urlparse.urljoin(
2052                 'https://www.youtube.com', player_url)
2053
2054         sig_id = ('nsig_value', s)
2055         if sig_id in self._player_cache:
2056             return self._player_cache[sig_id]
2057
2058         try:
2059             player_id = ('nsig', player_url)
2060             if player_id not in self._player_cache:
2061                 self._player_cache[player_id] = self._extract_n_function(video_id, player_url)
2062             func = self._player_cache[player_id]
2063             self._player_cache[sig_id] = func(s)
2064             self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')
2065             return self._player_cache[sig_id]
2066         except Exception as e:
2067             raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)
2068
2069     def _extract_n_function_name(self, jscode):
2070         return self._search_regex(
2071             (r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]{3})\([a-zA-Z0-9]\)',),
2072             jscode, 'Initial JS player n function name', group='nfunc')
2073
2074     def _extract_n_function(self, video_id, player_url):
2075         player_id = self._extract_player_info(player_url)
2076         func_code = self._downloader.cache.load('youtube-nsig', player_id)
2077
2078         if func_code:
2079             jsi = JSInterpreter(func_code)
2080         else:
2081             jscode = self._load_player(video_id, player_url)
2082             funcname = self._extract_n_function_name(jscode)
2083             jsi = JSInterpreter(jscode)
2084             func_code = jsi.extract_function_code(funcname)
2085             self._downloader.cache.store('youtube-nsig', player_id, func_code)
2086
2087         if self.get_param('youtube_print_sig_code'):
2088             self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
2089
2090         return lambda s: jsi.extract_function_from_code(*func_code)([s])
2091
2092     def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
2093         """
2094         Extract signatureTimestamp (sts)
2095         Required to tell API what sig/player version is in use.
2096         """
2097         sts = None
2098         if isinstance(ytcfg, dict):
2099             sts = int_or_none(ytcfg.get('STS'))
2100
2101         if not sts:
2102             # Attempt to extract from player
2103             if player_url is None:
2104                 error_msg = 'Cannot extract signature timestamp without player_url.'
2105                 if fatal:
2106                     raise ExtractorError(error_msg)
2107                 self.report_warning(error_msg)
2108                 return
2109             code = self._load_player(video_id, player_url, fatal=fatal)
2110             if code:
2111                 sts = int_or_none(self._search_regex(
2112                     r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2113                     'JS player signature timestamp', group='sts', fatal=fatal))
2114         return sts
2115
2116     def _mark_watched(self, video_id, player_responses):
2117         playback_url = get_first(
2118             player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
2119             expected_type=url_or_none)
2120         if not playback_url:
2121             self.report_warning('Unable to mark watched')
2122             return
2123         parsed_playback_url = compat_urlparse.urlparse(playback_url)
2124         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
2125
2126         # cpn generation algorithm is reverse engineered from base.js.
2127         # In fact it works even with dummy cpn.
2128         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2129         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
2130
2131         qs.update({
2132             'ver': ['2'],
2133             'cpn': [cpn],
2134         })
2135         playback_url = compat_urlparse.urlunparse(
2136             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
2137
2138         self._download_webpage(
2139             playback_url, video_id, 'Marking watched',
2140             'Unable to mark watched', fatal=False)
2141
2142     @staticmethod
2143     def _extract_urls(webpage):
2144         # Embedded YouTube player
2145         entries = [
2146             unescapeHTML(mobj.group('url'))
2147             for mobj in re.finditer(r'''(?x)
2148             (?:
2149                 <iframe[^>]+?src=|
2150                 data-video-url=|
2151                 <embed[^>]+?src=|
2152                 embedSWF\(?:\s*|
2153                 <object[^>]+data=|
2154                 new\s+SWFObject\(
2155             )
2156             (["\'])
2157                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
2158                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
2159             \1''', webpage)]
2160
2161         # lazyYT YouTube embed
2162         entries.extend(list(map(
2163             unescapeHTML,
2164             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
2165
2166         # Wordpress "YouTube Video Importer" plugin
2167         matches = re.findall(r'''(?x)<div[^>]+
2168             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2169             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
2170         entries.extend(m[-1] for m in matches)
2171
2172         return entries
2173
2174     @staticmethod
2175     def _extract_url(webpage):
2176         urls = YoutubeIE._extract_urls(webpage)
2177         return urls[0] if urls else None
2178
2179     @classmethod
2180     def extract_id(cls, url):
2181         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
2182         if mobj is None:
2183             raise ExtractorError('Invalid URL: %s' % url)
2184         return mobj.group('id')
2185
2186     def _extract_chapters_from_json(self, data, duration):
2187         chapter_list = traverse_obj(
2188             data, (
2189                 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2190                 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2191             ), expected_type=list)
2192
2193         return self._extract_chapters(
2194             chapter_list,
2195             chapter_time=lambda chapter: float_or_none(
2196                 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2197             chapter_title=lambda chapter: traverse_obj(
2198                 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2199             duration=duration)
2200
2201     def _extract_chapters_from_engagement_panel(self, data, duration):
2202         content_list = traverse_obj(
2203             data,
2204             ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
2205             expected_type=list, default=[])
2206         chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2207         chapter_title = lambda chapter: self._get_text(chapter, 'title')
2208
2209         return next((
2210             filter(None, (
2211                 self._extract_chapters(
2212                     traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2213                     chapter_time, chapter_title, duration)
2214                 for contents in content_list
2215             ))), [])
2216
2217     def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
2218         chapters = []
2219         last_chapter = {'start_time': 0}
2220         for idx, chapter in enumerate(chapter_list or []):
2221             title = chapter_title(chapter)
2222             start_time = chapter_time(chapter)
2223             if start_time is None:
2224                 continue
2225             last_chapter['end_time'] = start_time
2226             if start_time < last_chapter['start_time']:
2227                 if idx == 1:
2228                     chapters.pop()
2229                     self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2230                 else:
2231                     self.report_warning(f'Invalid start time for chapter "{title}"')
2232                     continue
2233             last_chapter = {'start_time': start_time, 'title': title}
2234             chapters.append(last_chapter)
2235         last_chapter['end_time'] = duration
2236         return chapters
2237
2238     def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2239         return self._parse_json(self._search_regex(
2240             (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2241              regex), webpage, name, default='{}'), video_id, fatal=False)
2242
2243     def _extract_comment(self, comment_renderer, parent=None):
2244         comment_id = comment_renderer.get('commentId')
2245         if not comment_id:
2246             return
2247
2248         text = self._get_text(comment_renderer, 'contentText')
2249
2250         # note: timestamp is an estimate calculated from the current time and time_text
2251         timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')
2252         author = self._get_text(comment_renderer, 'authorText')
2253         author_id = try_get(comment_renderer,
2254                             lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
2255
2256         votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2257                                                        lambda x: x['likeCount']), compat_str)) or 0
2258         author_thumbnail = try_get(comment_renderer,
2259                                    lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2260
2261         author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2262         is_favorited = 'creatorHeart' in (try_get(
2263             comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2264         return {
2265             'id': comment_id,
2266             'text': text,
2267             'timestamp': timestamp,
2268             'time_text': time_text,
2269             'like_count': votes,
2270             'is_favorited': is_favorited,
2271             'author': author,
2272             'author_id': author_id,
2273             'author_thumbnail': author_thumbnail,
2274             'author_is_uploader': author_is_uploader,
2275             'parent': parent or 'root'
2276         }
2277
2278     def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
2279
2280         get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
2281
2282         def extract_header(contents):
2283             _continuation = None
2284             for content in contents:
2285                 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
2286                 expected_comment_count = parse_count(self._get_text(
2287                     comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
2288
2289                 if expected_comment_count:
2290                     tracker['est_total'] = expected_comment_count
2291                     self.to_screen(f'Downloading ~{expected_comment_count} comments')
2292                 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top')  # 1 = new, 0 = top
2293
2294                 sort_menu_item = try_get(
2295                     comments_header_renderer,
2296                     lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2297                 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2298
2299                 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2300                 if not _continuation:
2301                     continue
2302
2303                 sort_text = str_or_none(sort_menu_item.get('title'))
2304                 if not sort_text:
2305                     sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2306                 self.to_screen('Sorting comments by %s' % sort_text.lower())
2307                 break
2308             return _continuation
2309
2310         def extract_thread(contents):
2311             if not parent:
2312                 tracker['current_page_thread'] = 0
2313             for content in contents:
2314                 if not parent and tracker['total_parent_comments'] >= max_parents:
2315                     yield
2316                 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2317                 comment_renderer = get_first(
2318                     (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
2319                     expected_type=dict, default={})
2320
2321                 comment = self._extract_comment(comment_renderer, parent)
2322                 if not comment:
2323                     continue
2324
2325                 tracker['running_total'] += 1
2326                 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
2327                 yield comment
2328
2329                 # Attempt to get the replies
2330                 comment_replies_renderer = try_get(
2331                     comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2332
2333                 if comment_replies_renderer:
2334                     tracker['current_page_thread'] += 1
2335                     comment_entries_iter = self._comment_entries(
2336                         comment_replies_renderer, ytcfg, video_id,
2337                         parent=comment.get('id'), tracker=tracker)
2338                     for reply_comment in itertools.islice(comment_entries_iter, min(max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments']))):
2339                         yield reply_comment
2340
2341         # Keeps track of counts across recursive calls
2342         if not tracker:
2343             tracker = dict(
2344                 running_total=0,
2345                 est_total=0,
2346                 current_page_thread=0,
2347                 total_parent_comments=0,
2348                 total_reply_comments=0)
2349
2350         # TODO: Deprecated
2351         # YouTube comments have a max depth of 2
2352         max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
2353         if max_depth:
2354             self._downloader.deprecation_warning(
2355                 '[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')
2356         if max_depth == 1 and parent:
2357             return
2358
2359         max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(
2360             lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
2361
2362         continuation = self._extract_continuation(root_continuation_data)
2363         message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
2364         if message and not parent:
2365             self.report_warning(message, video_id=video_id)
2366
2367         response = None
2368         is_first_continuation = parent is None
2369
2370         for page_num in itertools.count(0):
2371             if not continuation:
2372                 break
2373             headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
2374             comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"
2375             if page_num == 0:
2376                 if is_first_continuation:
2377                     note_prefix = 'Downloading comment section API JSON'
2378                 else:
2379                     note_prefix = '    Downloading comment API JSON reply thread %d %s' % (
2380                         tracker['current_page_thread'], comment_prog_str)
2381             else:
2382                 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2383                     '       ' if parent else '', ' replies' if parent else '',
2384                     page_num, comment_prog_str)
2385
2386             response = self._extract_response(
2387                 item_id=None, query=continuation,
2388                 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2389                 check_get_keys='onResponseReceivedEndpoints')
2390
2391             continuation_contents = traverse_obj(
2392                 response, 'onResponseReceivedEndpoints', expected_type=list, default=[])
2393
2394             continuation = None
2395             for continuation_section in continuation_contents:
2396                 continuation_items = traverse_obj(
2397                     continuation_section,
2398                     (('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),
2399                     get_all=False, expected_type=list) or []
2400                 if is_first_continuation:
2401                     continuation = extract_header(continuation_items)
2402                     is_first_continuation = False
2403                     if continuation:
2404                         break
2405                     continue
2406
2407                 for entry in extract_thread(continuation_items):
2408                     if not entry:
2409                         return
2410                     yield entry
2411                 continuation = self._extract_continuation({'contents': continuation_items})
2412                 if continuation:
2413                     break
2414
2415     def _get_comments(self, ytcfg, video_id, contents, webpage):
2416         """Entry for comment extraction"""
2417         def _real_comment_extract(contents):
2418             renderer = next((
2419                 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
2420                 if item.get('sectionIdentifier') == 'comment-item-section'), None)
2421             yield from self._comment_entries(renderer, ytcfg, video_id)
2422
2423         max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
2424         return itertools.islice(_real_comment_extract(contents), 0, max_comments)
2425
2426     @staticmethod
2427     def _get_checkok_params():
2428         return {'contentCheckOk': True, 'racyCheckOk': True}
2429
2430     @classmethod
2431     def _generate_player_context(cls, sts=None):
2432         context = {
2433             'html5Preference': 'HTML5_PREF_WANTS',
2434         }
2435         if sts is not None:
2436             context['signatureTimestamp'] = sts
2437         return {
2438             'playbackContext': {
2439                 'contentPlaybackContext': context
2440             },
2441             **cls._get_checkok_params()
2442         }
2443
2444     @staticmethod
2445     def _is_agegated(player_response):
2446         if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
2447             return True
2448
2449         reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
2450         AGE_GATE_REASONS = (
2451             'confirm your age', 'age-restricted', 'inappropriate',  # reason
2452             'age_verification_required', 'age_check_required',  # status
2453         )
2454         return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
2455
2456     @staticmethod
2457     def _is_unplayable(player_response):
2458         return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
2459
2460     def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):
2461
2462         session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2463         syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
2464         sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
2465         headers = self.generate_api_headers(
2466             ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
2467
2468         yt_query = {'videoId': video_id}
2469         yt_query.update(self._generate_player_context(sts))
2470         return self._extract_response(
2471             item_id=video_id, ep='player', query=yt_query,
2472             ytcfg=player_ytcfg, headers=headers, fatal=True,
2473             default_client=client,
2474             note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2475         ) or None
2476
2477     def _get_requested_clients(self, url, smuggled_data):
2478         requested_clients = []
2479         default = ['android', 'web']
2480         allowed_clients = sorted(
2481             [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],
2482             key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
2483         for client in self._configuration_arg('player_client'):
2484             if client in allowed_clients:
2485                 requested_clients.append(client)
2486             elif client == 'default':
2487                 requested_clients.extend(default)
2488             elif client == 'all':
2489                 requested_clients.extend(allowed_clients)
2490             else:
2491                 self.report_warning(f'Skipping unsupported client {client}')
2492         if not requested_clients:
2493             requested_clients = default
2494
2495         if smuggled_data.get('is_music_url') or self.is_music_url(url):
2496             requested_clients.extend(
2497                 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
2498
2499         return orderedSet(requested_clients)
2500
2501     def _extract_player_ytcfg(self, client, video_id):
2502         url = {
2503             'web_music': 'https://music.youtube.com',
2504             'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
2505         }.get(client)
2506         if not url:
2507             return {}
2508         webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config')
2509         return self.extract_ytcfg(video_id, webpage) or {}
2510
2511     def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):
2512         initial_pr = None
2513         if webpage:
2514             initial_pr = self._extract_yt_initial_variable(
2515                 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2516                 video_id, 'initial player response')
2517
2518         original_clients = clients
2519         clients = clients[::-1]
2520         prs = []
2521
2522         def append_client(client_name):
2523             if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:
2524                 clients.append(client_name)
2525
2526         # Android player_response does not have microFormats which are needed for
2527         # extraction of some data. So we return the initial_pr with formats
2528         # stripped out even if not requested by the user
2529         # See: https://github.com/yt-dlp/yt-dlp/issues/501
2530         if initial_pr:
2531             pr = dict(initial_pr)
2532             pr['streamingData'] = None
2533             prs.append(pr)
2534
2535         last_error = None
2536         tried_iframe_fallback = False
2537         player_url = None
2538         while clients:
2539             client = clients.pop()
2540             player_ytcfg = master_ytcfg if client == 'web' else {}
2541             if 'configs' not in self._configuration_arg('player_skip'):
2542                 player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
2543
2544             player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
2545             require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
2546             if 'js' in self._configuration_arg('player_skip'):
2547                 require_js_player = False
2548                 player_url = None
2549
2550             if not player_url and not tried_iframe_fallback and require_js_player:
2551                 player_url = self._download_player_url(video_id)
2552                 tried_iframe_fallback = True
2553
2554             try:
2555                 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
2556                     client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)
2557             except ExtractorError as e:
2558                 if last_error:
2559                     self.report_warning(last_error)
2560                 last_error = e
2561                 continue
2562
2563             if pr:
2564                 prs.append(pr)
2565
2566             # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
2567             if client.endswith('_agegate') and self._is_unplayable(pr) and self.is_authenticated:
2568                 append_client(client.replace('_agegate', '_creator'))
2569             elif self._is_agegated(pr):
2570                 append_client(f'{client}_agegate')
2571
2572         if last_error:
2573             if not len(prs):
2574                 raise last_error
2575             self.report_warning(last_error)
2576         return prs, player_url
2577
2578     def _extract_formats(self, streaming_data, video_id, player_url, is_live):
2579         itags, stream_ids = {}, []
2580         itag_qualities, res_qualities = {}, {}
2581         q = qualities([
2582             # Normally tiny is the smallest video-only formats. But
2583             # audio-only formats with unknown quality may get tagged as tiny
2584             'tiny',
2585             'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high',  # Audio only formats
2586             'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2587         ])
2588         streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
2589
2590         for fmt in streaming_formats:
2591             if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2592                 continue
2593
2594             itag = str_or_none(fmt.get('itag'))
2595             audio_track = fmt.get('audioTrack') or {}
2596             stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2597             if stream_id in stream_ids:
2598                 continue
2599
2600             quality = fmt.get('quality')
2601             height = int_or_none(fmt.get('height'))
2602             if quality == 'tiny' or not quality:
2603                 quality = fmt.get('audioQuality', '').lower() or quality
2604             # The 3gp format (17) in android client has a quality of "small",
2605             # but is actually worse than other formats
2606             if itag == '17':
2607                 quality = 'tiny'
2608             if quality:
2609                 if itag:
2610                     itag_qualities[itag] = quality
2611                 if height:
2612                     res_qualities[height] = quality
2613             # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2614             # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2615             # number of fragment that would subsequently requested with (`&sq=N`)
2616             if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2617                 continue
2618
2619             fmt_url = fmt.get('url')
2620             if not fmt_url:
2621                 sc = compat_parse_qs(fmt.get('signatureCipher'))
2622                 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2623                 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2624                 if not (sc and fmt_url and encrypted_sig):
2625                     continue
2626                 if not player_url:
2627                     continue
2628                 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2629                 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2630                 fmt_url += '&' + sp + '=' + signature
2631
2632             query = parse_qs(fmt_url)
2633             throttled = False
2634             if query.get('n'):
2635                 try:
2636                     fmt_url = update_url_query(fmt_url, {
2637                         'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})
2638                 except ExtractorError as e:
2639                     self.report_warning(
2640                         f'nsig extraction failed: You may experience throttling for some formats\n'
2641                         f'n = {query["n"][0]} ; player = {player_url}\n{e}', only_once=True)
2642                     throttled = True
2643
2644             if itag:
2645                 itags[itag] = 'https'
2646                 stream_ids.append(stream_id)
2647
2648             tbr = float_or_none(
2649                 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
2650             dct = {
2651                 'asr': int_or_none(fmt.get('audioSampleRate')),
2652                 'filesize': int_or_none(fmt.get('contentLength')),
2653                 'format_id': itag,
2654                 'format_note': join_nonempty(
2655                     '%s%s' % (audio_track.get('displayName') or '',
2656                               ' (default)' if audio_track.get('audioIsDefault') else ''),
2657                     fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
2658                     throttled and 'THROTTLED', delim=', '),
2659                 'source_preference': -10 if throttled else -1,
2660                 'fps': int_or_none(fmt.get('fps')) or None,
2661                 'height': height,
2662                 'quality': q(quality),
2663                 'tbr': tbr,
2664                 'url': fmt_url,
2665                 'width': int_or_none(fmt.get('width')),
2666                 'language': audio_track.get('id', '').split('.')[0],
2667                 'language_preference': 1 if audio_track.get('audioIsDefault') else -1,
2668             }
2669             mime_mobj = re.match(
2670                 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2671             if mime_mobj:
2672                 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2673                 dct.update(parse_codecs(mime_mobj.group(2)))
2674             no_audio = dct.get('acodec') == 'none'
2675             no_video = dct.get('vcodec') == 'none'
2676             if no_audio:
2677                 dct['vbr'] = tbr
2678             if no_video:
2679                 dct['abr'] = tbr
2680             if no_audio or no_video:
2681                 dct['downloader_options'] = {
2682                     # Youtube throttles chunks >~10M
2683                     'http_chunk_size': 10485760,
2684                 }
2685                 if dct.get('ext'):
2686                     dct['container'] = dct['ext'] + '_dash'
2687             yield dct
2688
2689         live_from_start = is_live and self.get_param('live_from_start')
2690         skip_manifests = self._configuration_arg('skip')
2691         if not self.get_param('youtube_include_hls_manifest', True):
2692             skip_manifests.append('hls')
2693         get_dash = 'dash' not in skip_manifests and (
2694             not is_live or live_from_start or self._configuration_arg('include_live_dash'))
2695         get_hls = not live_from_start and 'hls' not in skip_manifests
2696
2697         def process_manifest_format(f, proto, itag):
2698             if itag in itags:
2699                 if itags[itag] == proto or f'{itag}-{proto}' in itags:
2700                     return False
2701                 itag = f'{itag}-{proto}'
2702             if itag:
2703                 f['format_id'] = itag
2704                 itags[itag] = proto
2705
2706             f['quality'] = next((
2707                 q(qdict[val])
2708                 for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities))
2709                 if val in qdict), -1)
2710             return True
2711
2712         for sd in streaming_data:
2713             hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
2714             if hls_manifest_url:
2715                 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
2716                     if process_manifest_format(f, 'hls', self._search_regex(
2717                             r'/itag/(\d+)', f['url'], 'itag', default=None)):
2718                         yield f
2719
2720             dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2721             if dash_manifest_url:
2722                 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
2723                     if process_manifest_format(f, 'dash', f['format_id']):
2724                         f['filesize'] = int_or_none(self._search_regex(
2725                             r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
2726                         if live_from_start:
2727                             f['is_from_start'] = True
2728
2729                         yield f
2730
2731     def _extract_storyboard(self, player_responses, duration):
2732         spec = get_first(
2733             player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
2734         if not spec:
2735             return
2736         base_url = spec.pop()
2737         L = len(spec) - 1
2738         for i, args in enumerate(spec):
2739             args = args.split('#')
2740             counts = list(map(int_or_none, args[:5]))
2741             if len(args) != 8 or not all(counts):
2742                 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
2743                 continue
2744             width, height, frame_count, cols, rows = counts
2745             N, sigh = args[6:]
2746
2747             url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
2748             fragment_count = frame_count / (cols * rows)
2749             fragment_duration = duration / fragment_count
2750             yield {
2751                 'format_id': f'sb{i}',
2752                 'format_note': 'storyboard',
2753                 'ext': 'mhtml',
2754                 'protocol': 'mhtml',
2755                 'acodec': 'none',
2756                 'vcodec': 'none',
2757                 'url': url,
2758                 'width': width,
2759                 'height': height,
2760                 'fragments': [{
2761                     'path': url.replace('$M', str(j)),
2762                     'duration': min(fragment_duration, duration - (j * fragment_duration)),
2763                 } for j in range(math.ceil(fragment_count))],
2764             }
2765
2766     def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
2767         webpage = None
2768         if 'webpage' not in self._configuration_arg('player_skip'):
2769             webpage = self._download_webpage(
2770                 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2771
2772         master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2773
2774         player_responses, player_url = self._extract_player_responses(
2775             self._get_requested_clients(url, smuggled_data),
2776             video_id, webpage, master_ytcfg)
2777
2778         return webpage, master_ytcfg, player_responses, player_url
2779
2780     def _list_formats(self, video_id, microformats, video_details, player_responses, player_url):
2781         live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
2782         is_live = get_first(video_details, 'isLive')
2783         if is_live is None:
2784             is_live = get_first(live_broadcast_details, 'isLiveNow')
2785
2786         streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2787         formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
2788
2789         return live_broadcast_details, is_live, streaming_data, formats
2790
2791     def _real_extract(self, url):
2792         url, smuggled_data = unsmuggle_url(url, {})
2793         video_id = self._match_id(url)
2794
2795         base_url = self.http_scheme() + '//www.youtube.com/'
2796         webpage_url = base_url + 'watch?v=' + video_id
2797
2798         webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
2799
2800         playability_statuses = traverse_obj(
2801             player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2802
2803         trailer_video_id = get_first(
2804             playability_statuses,
2805             ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2806             expected_type=str)
2807         if trailer_video_id:
2808             return self.url_result(
2809                 trailer_video_id, self.ie_key(), trailer_video_id)
2810
2811         search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2812                        if webpage else (lambda x: None))
2813
2814         video_details = traverse_obj(
2815             player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2816         microformats = traverse_obj(
2817             player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2818             expected_type=dict, default=[])
2819         video_title = (
2820             get_first(video_details, 'title')
2821             or self._get_text(microformats, (..., 'title'))
2822             or search_meta(['og:title', 'twitter:title', 'title']))
2823         video_description = get_first(video_details, 'shortDescription')
2824
2825         multifeed_metadata_list = get_first(
2826             player_responses,
2827             ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2828             expected_type=str)
2829         if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
2830             if self.get_param('noplaylist'):
2831                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2832             else:
2833                 entries = []
2834                 feed_ids = []
2835                 for feed in multifeed_metadata_list.split(','):
2836                     # Unquote should take place before split on comma (,) since textual
2837                     # fields may contain comma as well (see
2838                     # https://github.com/ytdl-org/youtube-dl/issues/8536)
2839                     feed_data = compat_parse_qs(
2840                         compat_urllib_parse_unquote_plus(feed))
2841
2842                     def feed_entry(name):
2843                         return try_get(
2844                             feed_data, lambda x: x[name][0], compat_str)
2845
2846                     feed_id = feed_entry('id')
2847                     if not feed_id:
2848                         continue
2849                     feed_title = feed_entry('title')
2850                     title = video_title
2851                     if feed_title:
2852                         title += ' (%s)' % feed_title
2853                     entries.append({
2854                         '_type': 'url_transparent',
2855                         'ie_key': 'Youtube',
2856                         'url': smuggle_url(
2857                             '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2858                             {'force_singlefeed': True}),
2859                         'title': title,
2860                     })
2861                     feed_ids.append(feed_id)
2862                 self.to_screen(
2863                     'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2864                     % (', '.join(feed_ids), video_id))
2865                 return self.playlist_result(
2866                     entries, video_id, video_title, video_description)
2867
2868         live_broadcast_details, is_live, streaming_data, formats = self._list_formats(video_id, microformats, video_details, player_responses, player_url)
2869
2870         if not formats:
2871             if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
2872                 self.report_drm(video_id)
2873             pemr = get_first(
2874                 playability_statuses,
2875                 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2876             reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2877             subreason = clean_html(self._get_text(pemr, 'subreason') or '')
2878             if subreason:
2879                 if subreason == 'The uploader has not made this video available in your country.':
2880                     countries = get_first(microformats, 'availableCountries')
2881                     if not countries:
2882                         regions_allowed = search_meta('regionsAllowed')
2883                         countries = regions_allowed.split(',') if regions_allowed else None
2884                     self.raise_geo_restricted(subreason, countries, metadata_available=True)
2885                 reason += f'. {subreason}'
2886             if reason:
2887                 self.raise_no_formats(reason, expected=True)
2888
2889         keywords = get_first(video_details, 'keywords', expected_type=list) or []
2890         if not keywords and webpage:
2891             keywords = [
2892                 unescapeHTML(m.group('content'))
2893                 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2894         for keyword in keywords:
2895             if keyword.startswith('yt:stretch='):
2896                 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2897                 if mobj:
2898                     # NB: float is intentional for forcing float division
2899                     w, h = (float(v) for v in mobj.groups())
2900                     if w > 0 and h > 0:
2901                         ratio = w / h
2902                         for f in formats:
2903                             if f.get('vcodec') != 'none':
2904                                 f['stretched_ratio'] = ratio
2905                         break
2906
2907         thumbnails = []
2908         thumbnail_dicts = traverse_obj(
2909             (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2910             expected_type=dict, default=[])
2911         for thumbnail in thumbnail_dicts:
2912             thumbnail_url = thumbnail.get('url')
2913             if not thumbnail_url:
2914                 continue
2915             # Sometimes youtube gives a wrong thumbnail URL. See:
2916             # https://github.com/yt-dlp/yt-dlp/issues/233
2917             # https://github.com/ytdl-org/youtube-dl/issues/28023
2918             if 'maxresdefault' in thumbnail_url:
2919                 thumbnail_url = thumbnail_url.split('?')[0]
2920             thumbnails.append({
2921                 'url': thumbnail_url,
2922                 'height': int_or_none(thumbnail.get('height')),
2923                 'width': int_or_none(thumbnail.get('width')),
2924             })
2925         thumbnail_url = search_meta(['og:image', 'twitter:image'])
2926         if thumbnail_url:
2927             thumbnails.append({
2928                 'url': thumbnail_url,
2929             })
2930         original_thumbnails = thumbnails.copy()
2931
2932         # The best resolution thumbnails sometimes does not appear in the webpage
2933         # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
2934         # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2935         thumbnail_names = [
2936             'maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3',
2937             'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2938             'mqdefault', 'mq1', 'mq2', 'mq3',
2939             'default', '1', '2', '3'
2940         ]
2941         n_thumbnail_names = len(thumbnail_names)
2942         thumbnails.extend({
2943             'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2944                 video_id=video_id, name=name, ext=ext,
2945                 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
2946         } for name in thumbnail_names for ext in ('webp', 'jpg'))
2947         for thumb in thumbnails:
2948             i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
2949             thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
2950         self._remove_duplicate_formats(thumbnails)
2951         self._downloader._sort_thumbnails(original_thumbnails)
2952
2953         category = get_first(microformats, 'category') or search_meta('genre')
2954         channel_id = str_or_none(
2955             get_first(video_details, 'channelId')
2956             or get_first(microformats, 'externalChannelId')
2957             or search_meta('channelId'))
2958         duration = int_or_none(
2959             get_first(video_details, 'lengthSeconds')
2960             or get_first(microformats, 'lengthSeconds')
2961             or parse_duration(search_meta('duration'))) or None
2962         owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2963
2964         live_content = get_first(video_details, 'isLiveContent')
2965         is_upcoming = get_first(video_details, 'isUpcoming')
2966         if is_live is None:
2967             if is_upcoming or live_content is False:
2968                 is_live = False
2969         if is_upcoming is None and (live_content or is_live):
2970             is_upcoming = False
2971         live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2972         live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2973         if not duration and live_end_time and live_start_time:
2974             duration = live_end_time - live_start_time
2975
2976         if is_live and self.get_param('live_from_start'):
2977             self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)
2978
2979         formats.extend(self._extract_storyboard(player_responses, duration))
2980
2981         # Source is given priority since formats that throttle are given lower source_preference
2982         # When throttling issue is fully fixed, remove this
2983         self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))
2984
2985         info = {
2986             'id': video_id,
2987             'title': video_title,
2988             'formats': formats,
2989             'thumbnails': thumbnails,
2990             # The best thumbnail that we are sure exists. Prevents unnecessary
2991             # URL checking if user don't care about getting the best possible thumbnail
2992             'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
2993             'description': video_description,
2994             'upload_date': unified_strdate(
2995                 get_first(microformats, 'uploadDate')
2996                 or search_meta('uploadDate')),
2997             'uploader': get_first(video_details, 'author'),
2998             'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2999             'uploader_url': owner_profile_url,
3000             'channel_id': channel_id,
3001             'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
3002             'duration': duration,
3003             'view_count': int_or_none(
3004                 get_first((video_details, microformats), (..., 'viewCount'))
3005                 or search_meta('interactionCount')),
3006             'average_rating': float_or_none(get_first(video_details, 'averageRating')),
3007             'age_limit': 18 if (
3008                 get_first(microformats, 'isFamilySafe') is False
3009                 or search_meta('isFamilyFriendly') == 'false'
3010                 or search_meta('og:restrictions:age') == '18+') else 0,
3011             'webpage_url': webpage_url,
3012             'categories': [category] if category else None,
3013             'tags': keywords,
3014             'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
3015             'is_live': is_live,
3016             'was_live': (False if is_live or is_upcoming or live_content is False
3017                          else None if is_live is None or is_upcoming is None
3018                          else live_content),
3019             'live_status': 'is_upcoming' if is_upcoming else None,  # rest will be set by YoutubeDL
3020             'release_timestamp': live_start_time,
3021         }
3022
3023         pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
3024         if pctr:
3025             def get_lang_code(track):
3026                 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
3027                         or track.get('languageCode'))
3028
3029             # Converted into dicts to remove duplicates
3030             captions = {
3031                 get_lang_code(sub): sub
3032                 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
3033             translation_languages = {
3034                 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
3035                 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
3036
3037             def process_language(container, base_url, lang_code, sub_name, query):
3038                 lang_subs = container.setdefault(lang_code, [])
3039                 for fmt in self._SUBTITLE_FORMATS:
3040                     query.update({
3041                         'fmt': fmt,
3042                     })
3043                     lang_subs.append({
3044                         'ext': fmt,
3045                         'url': update_url_query(base_url, query),
3046                         'name': sub_name,
3047                     })
3048
3049             subtitles, automatic_captions = {}, {}
3050             for lang_code, caption_track in captions.items():
3051                 base_url = caption_track.get('baseUrl')
3052                 if not base_url:
3053                     continue
3054                 lang_name = self._get_text(caption_track, 'name', max_runs=1)
3055                 if caption_track.get('kind') != 'asr':
3056                     if not lang_code:
3057                         continue
3058                     process_language(
3059                         subtitles, base_url, lang_code, lang_name, {})
3060                     if not caption_track.get('isTranslatable'):
3061                         continue
3062                 for trans_code, trans_name in translation_languages.items():
3063                     if not trans_code:
3064                         continue
3065                     if caption_track.get('kind') != 'asr':
3066                         trans_code += f'-{lang_code}'
3067                         trans_name += format_field(lang_name, template=' from %s')
3068                     process_language(
3069                         automatic_captions, base_url, trans_code, trans_name, {'tlang': trans_code})
3070             info['automatic_captions'] = automatic_captions
3071             info['subtitles'] = subtitles
3072
3073         parsed_url = compat_urllib_parse_urlparse(url)
3074         for component in [parsed_url.fragment, parsed_url.query]:
3075             query = compat_parse_qs(component)
3076             for k, v in query.items():
3077                 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
3078                     d_k += '_time'
3079                     if d_k not in info and k in s_ks:
3080                         info[d_k] = parse_duration(query[k][0])
3081
3082         # Youtube Music Auto-generated description
3083         if video_description:
3084             mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
3085             if mobj:
3086                 release_year = mobj.group('release_year')
3087                 release_date = mobj.group('release_date')
3088                 if release_date:
3089                     release_date = release_date.replace('-', '')
3090                     if not release_year:
3091                         release_year = release_date[:4]
3092                 info.update({
3093                     'album': mobj.group('album'.strip()),
3094                     'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
3095                     'track': mobj.group('track').strip(),
3096                     'release_date': release_date,
3097                     'release_year': int_or_none(release_year),
3098                 })
3099
3100         initial_data = None
3101         if webpage:
3102             initial_data = self._extract_yt_initial_variable(
3103                 webpage, self._YT_INITIAL_DATA_RE, video_id,
3104                 'yt initial data')
3105         if not initial_data:
3106             query = {'videoId': video_id}
3107             query.update(self._get_checkok_params())
3108             initial_data = self._extract_response(
3109                 item_id=video_id, ep='next', fatal=False,
3110                 ytcfg=master_ytcfg, query=query,
3111                 headers=self.generate_api_headers(ytcfg=master_ytcfg),
3112                 note='Downloading initial data API JSON')
3113
3114         try:
3115             # This will error if there is no livechat
3116             initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
3117             info.setdefault('subtitles', {})['live_chat'] = [{
3118                 'url': 'https://www.youtube.com/watch?v=%s' % video_id,  # url is needed to set cookies
3119                 'video_id': video_id,
3120                 'ext': 'json',
3121                 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
3122             }]
3123         except (KeyError, IndexError, TypeError):
3124             pass
3125
3126         if initial_data:
3127             info['chapters'] = (
3128                 self._extract_chapters_from_json(initial_data, duration)
3129                 or self._extract_chapters_from_engagement_panel(initial_data, duration)
3130                 or None)
3131
3132             contents = try_get(
3133                 initial_data,
3134                 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
3135                 list) or []
3136             for content in contents:
3137                 vpir = content.get('videoPrimaryInfoRenderer')
3138                 if vpir:
3139                     stl = vpir.get('superTitleLink')
3140                     if stl:
3141                         stl = self._get_text(stl)
3142                         if try_get(
3143                                 vpir,
3144                                 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3145                             info['location'] = stl
3146                         else:
3147                             mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
3148                             if mobj:
3149                                 info.update({
3150                                     'series': mobj.group(1),
3151                                     'season_number': int(mobj.group(2)),
3152                                     'episode_number': int(mobj.group(3)),
3153                                 })
3154                     for tlb in (try_get(
3155                             vpir,
3156                             lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3157                             list) or []):
3158                         tbr = tlb.get('toggleButtonRenderer') or {}
3159                         for getter, regex in [(
3160                                 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3161                                 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3162                                     lambda x: x['accessibility'],
3163                                     lambda x: x['accessibilityData']['accessibilityData'],
3164                                 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3165                             label = (try_get(tbr, getter, dict) or {}).get('label')
3166                             if label:
3167                                 mobj = re.match(regex, label)
3168                                 if mobj:
3169                                     info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3170                                     break
3171                     sbr_tooltip = try_get(
3172                         vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3173                     if sbr_tooltip:
3174                         like_count, dislike_count = sbr_tooltip.split(' / ')
3175                         info.update({
3176                             'like_count': str_to_int(like_count),
3177                             'dislike_count': str_to_int(dislike_count),
3178                         })
3179                 vsir = content.get('videoSecondaryInfoRenderer')
3180                 if vsir:
3181                     info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
3182                     rows = try_get(
3183                         vsir,
3184                         lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3185                         list) or []
3186                     multiple_songs = False
3187                     for row in rows:
3188                         if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3189                             multiple_songs = True
3190                             break
3191                     for row in rows:
3192                         mrr = row.get('metadataRowRenderer') or {}
3193                         mrr_title = mrr.get('title')
3194                         if not mrr_title:
3195                             continue
3196                         mrr_title = self._get_text(mrr, 'title')
3197                         mrr_contents_text = self._get_text(mrr, ('contents', 0))
3198                         if mrr_title == 'License':
3199                             info['license'] = mrr_contents_text
3200                         elif not multiple_songs:
3201                             if mrr_title == 'Album':
3202                                 info['album'] = mrr_contents_text
3203                             elif mrr_title == 'Artist':
3204                                 info['artist'] = mrr_contents_text
3205                             elif mrr_title == 'Song':
3206                                 info['track'] = mrr_contents_text
3207
3208         fallbacks = {
3209             'channel': 'uploader',
3210             'channel_id': 'uploader_id',
3211             'channel_url': 'uploader_url',
3212         }
3213         for to, frm in fallbacks.items():
3214             if not info.get(to):
3215                 info[to] = info.get(frm)
3216
3217         for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3218             v = info.get(s_k)
3219             if v:
3220                 info[d_k] = v
3221
3222         is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3223         is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
3224         is_membersonly = None
3225         is_premium = None
3226         if initial_data and is_private is not None:
3227             is_membersonly = False
3228             is_premium = False
3229             contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3230             badge_labels = set()
3231             for content in contents:
3232                 if not isinstance(content, dict):
3233                     continue
3234                 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3235             for badge_label in badge_labels:
3236                 if badge_label.lower() == 'members only':
3237                     is_membersonly = True
3238                 elif badge_label.lower() == 'premium':
3239                     is_premium = True
3240                 elif badge_label.lower() == 'unlisted':
3241                     is_unlisted = True
3242
3243         info['availability'] = self._availability(
3244             is_private=is_private,
3245             needs_premium=is_premium,
3246             needs_subscription=is_membersonly,
3247             needs_auth=info['age_limit'] >= 18,
3248             is_unlisted=None if is_private is None else is_unlisted)
3249
3250         info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
3251
3252         self.mark_watched(video_id, player_responses)
3253
3254         return info
3255
3256
3257 class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
3258
3259     def _extract_channel_id(self, webpage):
3260         channel_id = self._html_search_meta(
3261             'channelId', webpage, 'channel id', default=None)
3262         if channel_id:
3263             return channel_id
3264         channel_url = self._html_search_meta(
3265             ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3266              'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3267              'twitter:app:url:googleplay'), webpage, 'channel url')
3268         return self._search_regex(
3269             r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3270             channel_url, 'channel id')
3271
3272     @staticmethod
3273     def _extract_basic_item_renderer(item):
3274         # Modified from _extract_grid_item_renderer
3275         known_basic_renderers = (
3276             'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
3277         )
3278         for key, renderer in item.items():
3279             if not isinstance(renderer, dict):
3280                 continue
3281             elif key in known_basic_renderers:
3282                 return renderer
3283             elif key.startswith('grid') and key.endswith('Renderer'):
3284                 return renderer
3285
3286     def _grid_entries(self, grid_renderer):
3287         for item in grid_renderer['items']:
3288             if not isinstance(item, dict):
3289                 continue
3290             renderer = self._extract_basic_item_renderer(item)
3291             if not isinstance(renderer, dict):
3292                 continue
3293             title = self._get_text(renderer, 'title')
3294
3295             # playlist
3296             playlist_id = renderer.get('playlistId')
3297             if playlist_id:
3298                 yield self.url_result(
3299                     'https://www.youtube.com/playlist?list=%s' % playlist_id,
3300                     ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3301                     video_title=title)
3302                 continue
3303             # video
3304             video_id = renderer.get('videoId')
3305             if video_id:
3306                 yield self._extract_video(renderer)
3307                 continue
3308             # channel
3309             channel_id = renderer.get('channelId')
3310             if channel_id:
3311                 yield self.url_result(
3312                     'https://www.youtube.com/channel/%s' % channel_id,
3313                     ie=YoutubeTabIE.ie_key(), video_title=title)
3314                 continue
3315             # generic endpoint URL support
3316             ep_url = urljoin('https://www.youtube.com/', try_get(
3317                 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3318                 compat_str))
3319             if ep_url:
3320                 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3321                     if ie.suitable(ep_url):
3322                         yield self.url_result(
3323                             ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3324                         break
3325
3326     def _shelf_entries_from_content(self, shelf_renderer):
3327         content = shelf_renderer.get('content')
3328         if not isinstance(content, dict):
3329             return
3330         renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3331         if renderer:
3332             # TODO: add support for nested playlists so each shelf is processed
3333             # as separate playlist
3334             # TODO: this includes only first N items
3335             for entry in self._grid_entries(renderer):
3336                 yield entry
3337         renderer = content.get('horizontalListRenderer')
3338         if renderer:
3339             # TODO
3340             pass
3341
3342     def _shelf_entries(self, shelf_renderer, skip_channels=False):
3343         ep = try_get(
3344             shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3345             compat_str)
3346         shelf_url = urljoin('https://www.youtube.com', ep)
3347         if shelf_url:
3348             # Skipping links to another channels, note that checking for
3349             # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3350             # will not work
3351             if skip_channels and '/channels?' in shelf_url:
3352                 return
3353             title = self._get_text(shelf_renderer, 'title')
3354             yield self.url_result(shelf_url, video_title=title)
3355         # Shelf may not contain shelf URL, fallback to extraction from content
3356         for entry in self._shelf_entries_from_content(shelf_renderer):
3357             yield entry
3358
3359     def _playlist_entries(self, video_list_renderer):
3360         for content in video_list_renderer['contents']:
3361             if not isinstance(content, dict):
3362                 continue
3363             renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3364             if not isinstance(renderer, dict):
3365                 continue
3366             video_id = renderer.get('videoId')
3367             if not video_id:
3368                 continue
3369             yield self._extract_video(renderer)
3370
3371     def _rich_entries(self, rich_grid_renderer):
3372         renderer = try_get(
3373             rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3374         video_id = renderer.get('videoId')
3375         if not video_id:
3376             return
3377         yield self._extract_video(renderer)
3378
3379     def _video_entry(self, video_renderer):
3380         video_id = video_renderer.get('videoId')
3381         if video_id:
3382             return self._extract_video(video_renderer)
3383
3384     def _post_thread_entries(self, post_thread_renderer):
3385         post_renderer = try_get(
3386             post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3387         if not post_renderer:
3388             return
3389         # video attachment
3390         video_renderer = try_get(
3391             post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3392         video_id = video_renderer.get('videoId')
3393         if video_id:
3394             entry = self._extract_video(video_renderer)
3395             if entry:
3396                 yield entry
3397         # playlist attachment
3398         playlist_id = try_get(
3399             post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3400         if playlist_id:
3401             yield self.url_result(
3402                 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3403                 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3404         # inline video links
3405         runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3406         for run in runs:
3407             if not isinstance(run, dict):
3408                 continue
3409             ep_url = try_get(
3410                 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3411             if not ep_url:
3412                 continue
3413             if not YoutubeIE.suitable(ep_url):
3414                 continue
3415             ep_video_id = YoutubeIE._match_id(ep_url)
3416             if video_id == ep_video_id:
3417                 continue
3418             yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
3419
3420     def _post_thread_continuation_entries(self, post_thread_continuation):
3421         contents = post_thread_continuation.get('contents')
3422         if not isinstance(contents, list):
3423             return
3424         for content in contents:
3425             renderer = content.get('backstagePostThreadRenderer')
3426             if not isinstance(renderer, dict):
3427                 continue
3428             for entry in self._post_thread_entries(renderer):
3429                 yield entry
3430
3431     r''' # unused
3432     def _rich_grid_entries(self, contents):
3433         for content in contents:
3434             video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3435             if video_renderer:
3436                 entry = self._video_entry(video_renderer)
3437                 if entry:
3438                     yield entry
3439     '''
3440     def _extract_entries(self, parent_renderer, continuation_list):
3441         # continuation_list is modified in-place with continuation_list = [continuation_token]
3442         continuation_list[:] = [None]
3443         contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3444         for content in contents:
3445             if not isinstance(content, dict):
3446                 continue
3447             is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3448             if not is_renderer:
3449                 renderer = content.get('richItemRenderer')
3450                 if renderer:
3451                     for entry in self._rich_entries(renderer):
3452                         yield entry
3453                     continuation_list[0] = self._extract_continuation(parent_renderer)
3454                 continue
3455             isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3456             for isr_content in isr_contents:
3457                 if not isinstance(isr_content, dict):
3458                     continue
3459
3460                 known_renderers = {
3461                     'playlistVideoListRenderer': self._playlist_entries,
3462                     'gridRenderer': self._grid_entries,
3463                     'shelfRenderer': lambda x: self._shelf_entries(x),
3464                     'backstagePostThreadRenderer': self._post_thread_entries,
3465                     'videoRenderer': lambda x: [self._video_entry(x)],
3466                     'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
3467                     'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
3468                 }
3469                 for key, renderer in isr_content.items():
3470                     if key not in known_renderers:
3471                         continue
3472                     for entry in known_renderers[key](renderer):
3473                         if entry:
3474                             yield entry
3475                     continuation_list[0] = self._extract_continuation(renderer)
3476                     break
3477
3478             if not continuation_list[0]:
3479                 continuation_list[0] = self._extract_continuation(is_renderer)
3480
3481         if not continuation_list[0]:
3482             continuation_list[0] = self._extract_continuation(parent_renderer)
3483
3484     def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
3485         continuation_list = [None]
3486         extract_entries = lambda x: self._extract_entries(x, continuation_list)
3487         tab_content = try_get(tab, lambda x: x['content'], dict)
3488         if not tab_content:
3489             return
3490         parent_renderer = (
3491             try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3492             or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
3493         for entry in extract_entries(parent_renderer):
3494             yield entry
3495         continuation = continuation_list[0]
3496
3497         for page_num in itertools.count(1):
3498             if not continuation:
3499                 break
3500             headers = self.generate_api_headers(
3501                 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
3502             response = self._extract_response(
3503                 item_id='%s page %s' % (item_id, page_num),
3504                 query=continuation, headers=headers, ytcfg=ytcfg,
3505                 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3506
3507             if not response:
3508                 break
3509             # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
3510             # See: https://github.com/ytdl-org/youtube-dl/issues/28702
3511             visitor_data = self._extract_visitor_data(response) or visitor_data
3512
3513             known_continuation_renderers = {
3514                 'playlistVideoListContinuation': self._playlist_entries,
3515                 'gridContinuation': self._grid_entries,
3516                 'itemSectionContinuation': self._post_thread_continuation_entries,
3517                 'sectionListContinuation': extract_entries,  # for feeds
3518             }
3519             continuation_contents = try_get(
3520                 response, lambda x: x['continuationContents'], dict) or {}
3521             continuation_renderer = None
3522             for key, value in continuation_contents.items():
3523                 if key not in known_continuation_renderers:
3524                     continue
3525                 continuation_renderer = value
3526                 continuation_list = [None]
3527                 for entry in known_continuation_renderers[key](continuation_renderer):
3528                     yield entry
3529                 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3530                 break
3531             if continuation_renderer:
3532                 continue
3533
3534             known_renderers = {
3535                 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3536                 'gridVideoRenderer': (self._grid_entries, 'items'),
3537                 'gridChannelRenderer': (self._grid_entries, 'items'),
3538                 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
3539                 'itemSectionRenderer': (extract_entries, 'contents'),  # for feeds
3540                 'richItemRenderer': (extract_entries, 'contents'),  # for hashtag
3541                 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
3542             }
3543             on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3544             continuation_items = try_get(
3545                 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
3546             continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3547             video_items_renderer = None
3548             for key, value in continuation_item.items():
3549                 if key not in known_renderers:
3550                     continue
3551                 video_items_renderer = {known_renderers[key][1]: continuation_items}
3552                 continuation_list = [None]
3553                 for entry in known_renderers[key][0](video_items_renderer):
3554                     yield entry
3555                 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
3556                 break
3557             if video_items_renderer:
3558                 continue
3559             break
3560
3561     @staticmethod
3562     def _extract_selected_tab(tabs):
3563         for tab in tabs:
3564             renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3565             if renderer.get('selected') is True:
3566                 return renderer
3567         else:
3568             raise ExtractorError('Unable to find selected tab')
3569
3570     @classmethod
3571     def _extract_uploader(cls, data):
3572         uploader = {}
3573         renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3574         owner = try_get(
3575             renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3576         if owner:
3577             uploader['uploader'] = owner.get('text')
3578             uploader['uploader_id'] = try_get(
3579                 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3580             uploader['uploader_url'] = urljoin(
3581                 'https://www.youtube.com/',
3582                 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3583         return {k: v for k, v in uploader.items() if v is not None}
3584
3585     def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
3586         playlist_id = title = description = channel_url = channel_name = channel_id = None
3587         thumbnails_list = []
3588         tags = []
3589
3590         selected_tab = self._extract_selected_tab(tabs)
3591         renderer = try_get(
3592             data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3593         if renderer:
3594             channel_name = renderer.get('title')
3595             channel_url = renderer.get('channelUrl')
3596             channel_id = renderer.get('externalId')
3597         else:
3598             renderer = try_get(
3599                 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3600
3601         if renderer:
3602             title = renderer.get('title')
3603             description = renderer.get('description', '')
3604             playlist_id = channel_id
3605             tags = renderer.get('keywords', '').split()
3606             thumbnails_list = (
3607                 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
3608                 or try_get(
3609                     self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3610                     lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3611                     list)
3612                 or [])
3613
3614         thumbnails = []
3615         for t in thumbnails_list:
3616             if not isinstance(t, dict):
3617                 continue
3618             thumbnail_url = url_or_none(t.get('url'))
3619             if not thumbnail_url:
3620                 continue
3621             thumbnails.append({
3622                 'url': thumbnail_url,
3623                 'width': int_or_none(t.get('width')),
3624                 'height': int_or_none(t.get('height')),
3625             })
3626         if playlist_id is None:
3627             playlist_id = item_id
3628         if title is None:
3629             title = (
3630                 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3631                 or playlist_id)
3632         title += format_field(selected_tab, 'title', ' - %s')
3633         title += format_field(selected_tab, 'expandedText', ' - %s')
3634         metadata = {
3635             'playlist_id': playlist_id,
3636             'playlist_title': title,
3637             'playlist_description': description,
3638             'uploader': channel_name,
3639             'uploader_id': channel_id,
3640             'uploader_url': channel_url,
3641             'thumbnails': thumbnails,
3642             'tags': tags,
3643         }
3644         availability = self._extract_availability(data)
3645         if availability:
3646             metadata['availability'] = availability
3647         if not channel_id:
3648             metadata.update(self._extract_uploader(data))
3649         metadata.update({
3650             'channel': metadata['uploader'],
3651             'channel_id': metadata['uploader_id'],
3652             'channel_url': metadata['uploader_url']})
3653         return self.playlist_result(
3654             self._entries(
3655                 selected_tab, playlist_id, ytcfg,
3656                 self._extract_account_syncid(ytcfg, data),
3657                 self._extract_visitor_data(data, ytcfg)),
3658             **metadata)
3659
3660     def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg):
3661         first_id = last_id = response = None
3662         for page_num in itertools.count(1):
3663             videos = list(self._playlist_entries(playlist))
3664             if not videos:
3665                 return
3666             start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3667             if start >= len(videos):
3668                 return
3669             for video in videos[start:]:
3670                 if video['id'] == first_id:
3671                     self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3672                     return
3673                 yield video
3674             first_id = first_id or videos[0]['id']
3675             last_id = videos[-1]['id']
3676             watch_endpoint = try_get(
3677                 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3678             headers = self.generate_api_headers(
3679                 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3680                 visitor_data=self._extract_visitor_data(response, data, ytcfg))
3681             query = {
3682                 'playlistId': playlist_id,
3683                 'videoId': watch_endpoint.get('videoId') or last_id,
3684                 'index': watch_endpoint.get('index') or len(videos),
3685                 'params': watch_endpoint.get('params') or 'OAE%3D'
3686             }
3687             response = self._extract_response(
3688                 item_id='%s page %d' % (playlist_id, page_num),
3689                 query=query, ep='next', headers=headers, ytcfg=ytcfg,
3690                 check_get_keys='contents'
3691             )
3692             playlist = try_get(
3693                 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3694
3695     def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
3696         title = playlist.get('title') or try_get(
3697             data, lambda x: x['titleText']['simpleText'], compat_str)
3698         playlist_id = playlist.get('playlistId') or item_id
3699
3700         # Delegating everything except mix playlists to regular tab-based playlist URL
3701         playlist_url = urljoin(url, try_get(
3702             playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3703             compat_str))
3704         if playlist_url and playlist_url != url:
3705             return self.url_result(
3706                 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3707                 video_title=title)
3708
3709         return self.playlist_result(
3710             self._extract_mix_playlist(playlist, playlist_id, data, ytcfg),
3711             playlist_id=playlist_id, playlist_title=title)
3712
3713     def _extract_availability(self, data):
3714         """
3715         Gets the availability of a given playlist/tab.
3716         Note: Unless YouTube tells us explicitly, we do not assume it is public
3717         @param data: response
3718         """
3719         is_private = is_unlisted = None
3720         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
3721         badge_labels = self._extract_badges(renderer)
3722
3723         # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
3724         privacy_dropdown_entries = try_get(
3725             renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
3726         for renderer_dict in privacy_dropdown_entries:
3727             is_selected = try_get(
3728                 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
3729             if not is_selected:
3730                 continue
3731             label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
3732             if label:
3733                 badge_labels.add(label.lower())
3734                 break
3735
3736         for badge_label in badge_labels:
3737             if badge_label == 'unlisted':
3738                 is_unlisted = True
3739             elif badge_label == 'private':
3740                 is_private = True
3741             elif badge_label == 'public':
3742                 is_unlisted = is_private = False
3743         return self._availability(is_private, False, False, False, is_unlisted)
3744
3745     @staticmethod
3746     def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
3747         sidebar_renderer = try_get(
3748             data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
3749         for item in sidebar_renderer:
3750             renderer = try_get(item, lambda x: x[info_renderer], expected_type)
3751             if renderer:
3752                 return renderer
3753
3754     def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
3755         """
3756         Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3757         """
3758         browse_id = params = None
3759         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
3760         if not renderer:
3761             return
3762         menu_renderer = try_get(
3763             renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3764         for menu_item in menu_renderer:
3765             if not isinstance(menu_item, dict):
3766                 continue
3767             nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3768             text = try_get(
3769                 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3770             if not text or text.lower() != 'show unavailable videos':
3771                 continue
3772             browse_endpoint = try_get(
3773                 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3774             browse_id = browse_endpoint.get('browseId')
3775             params = browse_endpoint.get('params')
3776             break
3777
3778         headers = self.generate_api_headers(
3779             ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3780             visitor_data=self._extract_visitor_data(data, ytcfg))
3781         query = {
3782             'params': params or 'wgYCCAA=',
3783             'browseId': browse_id or 'VL%s' % item_id
3784         }
3785         return self._extract_response(
3786             item_id=item_id, headers=headers, query=query,
3787             check_get_keys='contents', fatal=False, ytcfg=ytcfg,
3788             note='Downloading API JSON with unavailable videos')
3789
3790     def _extract_webpage(self, url, item_id, fatal=True):
3791         retries = self.get_param('extractor_retries', 3)
3792         count = -1
3793         webpage = data = last_error = None
3794         while count < retries:
3795             count += 1
3796             # Sometimes youtube returns a webpage with incomplete ytInitialData
3797             # See: https://github.com/yt-dlp/yt-dlp/issues/116
3798             if last_error:
3799                 self.report_warning('%s. Retrying ...' % last_error)
3800             try:
3801                 webpage = self._download_webpage(
3802                     url, item_id,
3803                     note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))
3804                 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
3805             except ExtractorError as e:
3806                 if isinstance(e.cause, network_exceptions):
3807                     if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
3808                         last_error = error_to_compat_str(e.cause or e.msg)
3809                         if count < retries:
3810                             continue
3811                 if fatal:
3812                     raise
3813                 self.report_warning(error_to_compat_str(e))
3814                 break
3815             else:
3816                 try:
3817                     self._extract_and_report_alerts(data)
3818                 except ExtractorError as e:
3819                     if fatal:
3820                         raise
3821                     self.report_warning(error_to_compat_str(e))
3822                     break
3823
3824                 if dict_get(data, ('contents', 'currentVideoEndpoint')):
3825                     break
3826
3827                 last_error = 'Incomplete yt initial data received'
3828                 if count >= retries:
3829                     if fatal:
3830                         raise ExtractorError(last_error)
3831                     self.report_warning(last_error)
3832                     break
3833
3834         return webpage, data
3835
3836     def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
3837         data = None
3838         if 'webpage' not in self._configuration_arg('skip'):
3839             webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
3840             ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
3841         if not data:
3842             if not ytcfg and self.is_authenticated:
3843                 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download.'
3844                 if 'authcheck' not in self._configuration_arg('skip') and fatal:
3845                     raise ExtractorError(
3846                         msg + ' If you are not downloading private content, or your cookies are only for the first account and channel,'
3847                               ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
3848                         expected=True)
3849                 self.report_warning(msg, only_once=True)
3850             data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
3851         return data, ytcfg
3852
3853     def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
3854         headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
3855         resolve_response = self._extract_response(
3856             item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
3857             ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
3858         endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
3859         for ep_key, ep in endpoints.items():
3860             params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
3861             if params:
3862                 return self._extract_response(
3863                     item_id=item_id, query=params, ep=ep, headers=headers,
3864                     ytcfg=ytcfg, fatal=fatal, default_client=default_client,
3865                     check_get_keys=('contents', 'currentVideoEndpoint'))
3866         err_note = 'Failed to resolve url (does the playlist exist?)'
3867         if fatal:
3868             raise ExtractorError(err_note, expected=True)
3869         self.report_warning(err_note, item_id)
3870
3871     @staticmethod
3872     def _smuggle_data(entries, data):
3873         for entry in entries:
3874             if data:
3875                 entry['url'] = smuggle_url(entry['url'], data)
3876             yield entry
3877
3878     _SEARCH_PARAMS = None
3879
3880     def _search_results(self, query, params=NO_DEFAULT):
3881         data = {'query': query}
3882         if params is NO_DEFAULT:
3883             params = self._SEARCH_PARAMS
3884         if params:
3885             data['params'] = params
3886         continuation_list = [None]
3887         for page_num in itertools.count(1):
3888             data.update(continuation_list[0] or {})
3889             search = self._extract_response(
3890                 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
3891                 check_get_keys=('contents', 'onResponseReceivedCommands'))
3892             slr_contents = try_get(
3893                 search,
3894                 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3895                  lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3896                 list)
3897             yield from self._extract_entries({'contents': slr_contents}, continuation_list)
3898             if not continuation_list[0]:
3899                 break
3900
3901
3902 class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
3903     IE_DESC = 'YouTube Tabs'
3904     _VALID_URL = r'''(?x:
3905         https?://
3906             (?:\w+\.)?
3907             (?:
3908                 youtube(?:kids)?\.com|
3909                 %(invidious)s
3910             )/
3911             (?:
3912                 (?P<channel_type>channel|c|user|browse)/|
3913                 (?P<not_channel>
3914                     feed/|hashtag/|
3915                     (?:playlist|watch)\?.*?\blist=
3916                 )|
3917                 (?!(?:%(reserved_names)s)\b)  # Direct URLs
3918             )
3919             (?P<id>[^/?\#&]+)
3920     )''' % {
3921         'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
3922         'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
3923     }
3924     IE_NAME = 'youtube:tab'
3925
3926     _TESTS = [{
3927         'note': 'playlists, multipage',
3928         'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3929         'playlist_mincount': 94,
3930         'info_dict': {
3931             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3932             'title': 'Игорь Клейнер - Playlists',
3933             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3934             'uploader': 'Игорь Клейнер',
3935             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3936         },
3937     }, {
3938         'note': 'playlists, multipage, different order',
3939         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3940         'playlist_mincount': 94,
3941         'info_dict': {
3942             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3943             'title': 'Игорь Клейнер - Playlists',
3944             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3945             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3946             'uploader': 'Игорь Клейнер',
3947         },
3948     }, {
3949         'note': 'playlists, series',
3950         'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3951         'playlist_mincount': 5,
3952         'info_dict': {
3953             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3954             'title': '3Blue1Brown - Playlists',
3955             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3956             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3957             'uploader': '3Blue1Brown',
3958         },
3959     }, {
3960         'note': 'playlists, singlepage',
3961         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3962         'playlist_mincount': 4,
3963         'info_dict': {
3964             'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3965             'title': 'ThirstForScience - Playlists',
3966             'description': 'md5:609399d937ea957b0f53cbffb747a14c',
3967             'uploader': 'ThirstForScience',
3968             'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3969         }
3970     }, {
3971         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3972         'only_matching': True,
3973     }, {
3974         'note': 'basic, single video playlist',
3975         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3976         'info_dict': {
3977             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3978             'uploader': 'Sergey M.',
3979             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3980             'title': 'youtube-dl public playlist',
3981         },
3982         'playlist_count': 1,
3983     }, {
3984         'note': 'empty playlist',
3985         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3986         'info_dict': {
3987             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3988             'uploader': 'Sergey M.',
3989             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3990             'title': 'youtube-dl empty playlist',
3991         },
3992         'playlist_count': 0,
3993     }, {
3994         'note': 'Home tab',
3995         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
3996         'info_dict': {
3997             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3998             'title': 'lex will - Home',
3999             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4000             'uploader': 'lex will',
4001             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4002         },
4003         'playlist_mincount': 2,
4004     }, {
4005         'note': 'Videos tab',
4006         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
4007         'info_dict': {
4008             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4009             'title': 'lex will - Videos',
4010             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4011             'uploader': 'lex will',
4012             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4013         },
4014         'playlist_mincount': 975,
4015     }, {
4016         'note': 'Videos tab, sorted by popular',
4017         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
4018         'info_dict': {
4019             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4020             'title': 'lex will - Videos',
4021             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4022             'uploader': 'lex will',
4023             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4024         },
4025         'playlist_mincount': 199,
4026     }, {
4027         'note': 'Playlists tab',
4028         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
4029         'info_dict': {
4030             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4031             'title': 'lex will - Playlists',
4032             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4033             'uploader': 'lex will',
4034             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4035         },
4036         'playlist_mincount': 17,
4037     }, {
4038         'note': 'Community tab',
4039         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
4040         'info_dict': {
4041             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4042             'title': 'lex will - Community',
4043             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4044             'uploader': 'lex will',
4045             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4046         },
4047         'playlist_mincount': 18,
4048     }, {
4049         'note': 'Channels tab',
4050         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
4051         'info_dict': {
4052             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4053             'title': 'lex will - Channels',
4054             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4055             'uploader': 'lex will',
4056             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4057         },
4058         'playlist_mincount': 12,
4059     }, {
4060         'note': 'Search tab',
4061         'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
4062         'playlist_mincount': 40,
4063         'info_dict': {
4064             'id': 'UCYO_jab_esuFRV4b17AJtAw',
4065             'title': '3Blue1Brown - Search - linear algebra',
4066             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
4067             'uploader': '3Blue1Brown',
4068             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
4069         },
4070     }, {
4071         'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4072         'only_matching': True,
4073     }, {
4074         'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4075         'only_matching': True,
4076     }, {
4077         'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4078         'only_matching': True,
4079     }, {
4080         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
4081         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
4082         'info_dict': {
4083             'title': '29C3: Not my department',
4084             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
4085             'uploader': 'Christiaan008',
4086             'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
4087             'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
4088         },
4089         'playlist_count': 96,
4090     }, {
4091         'note': 'Large playlist',
4092         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
4093         'info_dict': {
4094             'title': 'Uploads from Cauchemar',
4095             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
4096             'uploader': 'Cauchemar',
4097             'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
4098         },
4099         'playlist_mincount': 1123,
4100     }, {
4101         'note': 'even larger playlist, 8832 videos',
4102         'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
4103         'only_matching': True,
4104     }, {
4105         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
4106         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
4107         'info_dict': {
4108             'title': 'Uploads from Interstellar Movie',
4109             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
4110             'uploader': 'Interstellar Movie',
4111             'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4112         },
4113         'playlist_mincount': 21,
4114     }, {
4115         'note': 'Playlist with "show unavailable videos" button',
4116         'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
4117         'info_dict': {
4118             'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
4119             'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
4120             'uploader': 'Phim Siêu Nhân Nhật Bản',
4121             'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
4122         },
4123         'playlist_mincount': 200,
4124     }, {
4125         'note': 'Playlist with unavailable videos in page 7',
4126         'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
4127         'info_dict': {
4128             'title': 'Uploads from BlankTV',
4129             'id': 'UU8l9frL61Yl5KFOl87nIm2w',
4130             'uploader': 'BlankTV',
4131             'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
4132         },
4133         'playlist_mincount': 1000,
4134     }, {
4135         'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
4136         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
4137         'info_dict': {
4138             'title': 'Data Analysis with Dr Mike Pound',
4139             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
4140             'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
4141             'uploader': 'Computerphile',
4142             'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
4143         },
4144         'playlist_mincount': 11,
4145     }, {
4146         'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4147         'only_matching': True,
4148     }, {
4149         'note': 'Playlist URL that does not actually serve a playlist',
4150         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
4151         'info_dict': {
4152             'id': 'FqZTN594JQw',
4153             'ext': 'webm',
4154             'title': "Smiley's People 01 detective, Adventure Series, Action",
4155             'uploader': 'STREEM',
4156             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
4157             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
4158             'upload_date': '20150526',
4159             'license': 'Standard YouTube License',
4160             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
4161             'categories': ['People & Blogs'],
4162             'tags': list,
4163             'view_count': int,
4164             'like_count': int,
4165             'dislike_count': int,
4166         },
4167         'params': {
4168             'skip_download': True,
4169         },
4170         'skip': 'This video is not available.',
4171         'add_ie': [YoutubeIE.ie_key()],
4172     }, {
4173         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
4174         'only_matching': True,
4175     }, {
4176         'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
4177         'only_matching': True,
4178     }, {
4179         'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
4180         'info_dict': {
4181             'id': '3yImotZU3tw',  # This will keep changing
4182             'ext': 'mp4',
4183             'title': compat_str,
4184             'uploader': 'Sky News',
4185             'uploader_id': 'skynews',
4186             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
4187             'upload_date': r're:\d{8}',
4188             'description': compat_str,
4189             'categories': ['News & Politics'],
4190             'tags': list,
4191             'like_count': int,
4192             'dislike_count': int,
4193         },
4194         'params': {
4195             'skip_download': True,
4196         },
4197         'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
4198     }, {
4199         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
4200         'info_dict': {
4201             'id': 'a48o2S1cPoo',
4202             'ext': 'mp4',
4203             'title': 'The Young Turks - Live Main Show',
4204             'uploader': 'The Young Turks',
4205             'uploader_id': 'TheYoungTurks',
4206             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
4207             'upload_date': '20150715',
4208             'license': 'Standard YouTube License',
4209             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
4210             'categories': ['News & Politics'],
4211             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
4212             'like_count': int,
4213             'dislike_count': int,
4214         },
4215         'params': {
4216             'skip_download': True,
4217         },
4218         'only_matching': True,
4219     }, {
4220         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
4221         'only_matching': True,
4222     }, {
4223         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
4224         'only_matching': True,
4225     }, {
4226         'note': 'A channel that is not live. Should raise error',
4227         'url': 'https://www.youtube.com/user/numberphile/live',
4228         'only_matching': True,
4229     }, {
4230         'url': 'https://www.youtube.com/feed/trending',
4231         'only_matching': True,
4232     }, {
4233         'url': 'https://www.youtube.com/feed/library',
4234         'only_matching': True,
4235     }, {
4236         'url': 'https://www.youtube.com/feed/history',
4237         'only_matching': True,
4238     }, {
4239         'url': 'https://www.youtube.com/feed/subscriptions',
4240         'only_matching': True,
4241     }, {
4242         'url': 'https://www.youtube.com/feed/watch_later',
4243         'only_matching': True,
4244     }, {
4245         'note': 'Recommended - redirects to home page.',
4246         'url': 'https://www.youtube.com/feed/recommended',
4247         'only_matching': True,
4248     }, {
4249         'note': 'inline playlist with not always working continuations',
4250         'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
4251         'only_matching': True,
4252     }, {
4253         'url': 'https://www.youtube.com/course',
4254         'only_matching': True,
4255     }, {
4256         'url': 'https://www.youtube.com/zsecurity',
4257         'only_matching': True,
4258     }, {
4259         'url': 'http://www.youtube.com/NASAgovVideo/videos',
4260         'only_matching': True,
4261     }, {
4262         'url': 'https://www.youtube.com/TheYoungTurks/live',
4263         'only_matching': True,
4264     }, {
4265         'url': 'https://www.youtube.com/hashtag/cctv9',
4266         'info_dict': {
4267             'id': 'cctv9',
4268             'title': '#cctv9',
4269         },
4270         'playlist_mincount': 350,
4271     }, {
4272         'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
4273         'only_matching': True,
4274     }, {
4275         'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
4276         'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4277         'only_matching': True
4278     }, {
4279         'note': '/browse/ should redirect to /channel/',
4280         'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
4281         'only_matching': True
4282     }, {
4283         'note': 'VLPL, should redirect to playlist?list=PL...',
4284         'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4285         'info_dict': {
4286             'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4287             'uploader': 'NoCopyrightSounds',
4288             'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
4289             'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
4290             'title': 'NCS Releases',
4291         },
4292         'playlist_mincount': 166,
4293     }, {
4294         'note': 'Topic, should redirect to playlist?list=UU...',
4295         'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
4296         'info_dict': {
4297             'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
4298             'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
4299             'title': 'Uploads from Royalty Free Music - Topic',
4300             'uploader': 'Royalty Free Music - Topic',
4301         },
4302         'expected_warnings': [
4303             'A channel/user page was given',
4304             'The URL does not have a videos tab',
4305         ],
4306         'playlist_mincount': 101,
4307     }, {
4308         'note': 'Topic without a UU playlist',
4309         'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
4310         'info_dict': {
4311             'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
4312             'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
4313         },
4314         'expected_warnings': [
4315             'A channel/user page was given',
4316             'The URL does not have a videos tab',
4317             'Falling back to channel URL',
4318         ],
4319         'playlist_mincount': 9,
4320     }, {
4321         'note': 'Youtube music Album',
4322         'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
4323         'info_dict': {
4324             'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
4325             'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
4326         },
4327         'playlist_count': 50,
4328     }, {
4329         'note': 'unlisted single video playlist',
4330         'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
4331         'info_dict': {
4332             'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
4333             'uploader': 'colethedj',
4334             'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
4335             'title': 'yt-dlp unlisted playlist test',
4336             'availability': 'unlisted'
4337         },
4338         'playlist_count': 1,
4339     }, {
4340         'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
4341         'url': 'https://www.youtube.com/feed/recommended',
4342         'info_dict': {
4343             'id': 'recommended',
4344             'title': 'recommended',
4345         },
4346         'playlist_mincount': 50,
4347         'params': {
4348             'skip_download': True,
4349             'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4350         },
4351     }, {
4352         'note': 'API Fallback: /videos tab, sorted by oldest first',
4353         'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
4354         'info_dict': {
4355             'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
4356             'title': 'Cody\'sLab - Videos',
4357             'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
4358             'uploader': 'Cody\'sLab',
4359             'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
4360         },
4361         'playlist_mincount': 650,
4362         'params': {
4363             'skip_download': True,
4364             'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4365         },
4366     }, {
4367         'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
4368         'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
4369         'info_dict': {
4370             'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
4371             'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
4372             'title': 'Uploads from Royalty Free Music - Topic',
4373             'uploader': 'Royalty Free Music - Topic',
4374         },
4375         'expected_warnings': [
4376             'A channel/user page was given',
4377             'The URL does not have a videos tab',
4378         ],
4379         'playlist_mincount': 101,
4380         'params': {
4381             'skip_download': True,
4382             'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4383         },
4384     }]
4385
4386     @classmethod
4387     def suitable(cls, url):
4388         return False if YoutubeIE.suitable(url) else super(
4389             YoutubeTabIE, cls).suitable(url)
4390
4391     def _real_extract(self, url):
4392         url, smuggled_data = unsmuggle_url(url, {})
4393         if self.is_music_url(url):
4394             smuggled_data['is_music_url'] = True
4395         info_dict = self.__real_extract(url, smuggled_data)
4396         if info_dict.get('entries'):
4397             info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4398         return info_dict
4399
4400     _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$')
4401
4402     def __real_extract(self, url, smuggled_data):
4403         item_id = self._match_id(url)
4404         url = compat_urlparse.urlunparse(
4405             compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
4406         compat_opts = self.get_param('compat_opts', [])
4407
4408         def get_mobj(url):
4409             mobj = self._URL_RE.match(url).groupdict()
4410             mobj.update((k, '') for k, v in mobj.items() if v is None)
4411             return mobj
4412
4413         mobj, redirect_warning = get_mobj(url), None
4414         # Youtube returns incomplete data if tabname is not lower case
4415         pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4416         if is_channel:
4417             if smuggled_data.get('is_music_url'):
4418                 if item_id[:2] == 'VL':  # Youtube music VL channels have an equivalent playlist
4419                     item_id = item_id[2:]
4420                     pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False
4421                 elif item_id[:2] == 'MP':  # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
4422                     mdata = self._extract_tab_endpoint(
4423                         f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
4424                     murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
4425                                         get_all=False, expected_type=compat_str)
4426                     if not murl:
4427                         raise ExtractorError('Failed to resolve album to playlist')
4428                     return self.url_result(murl, ie=YoutubeTabIE.ie_key())
4429                 elif mobj['channel_type'] == 'browse':  # Youtube music /browse/ should be changed to /channel/
4430                     pre = f'https://www.youtube.com/channel/{item_id}'
4431
4432         if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4433             # Home URLs should redirect to /videos/
4434             redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '
4435                                 'To download only the videos in the home page, add a "/featured" to the URL')
4436             tab = '/videos'
4437
4438         url = ''.join((pre, tab, post))
4439         mobj = get_mobj(url)
4440
4441         # Handle both video/playlist URLs
4442         qs = parse_qs(url)
4443         video_id, playlist_id = [qs.get(key, [None])[0] for key in ('v', 'list')]
4444
4445         if not video_id and mobj['not_channel'].startswith('watch'):
4446             if not playlist_id:
4447                 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
4448                 raise ExtractorError('Unable to recognize tab page')
4449             # Common mistake: https://www.youtube.com/watch?list=playlist_id
4450             self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
4451             url = f'https://www.youtube.com/playlist?list={playlist_id}'
4452             mobj = get_mobj(url)
4453
4454         if video_id and playlist_id:
4455             if self.get_param('noplaylist'):
4456                 self.to_screen(f'Downloading just video {video_id} because of --no-playlist')
4457                 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
4458                                        ie=YoutubeIE.ie_key(), video_id=video_id)
4459             self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')
4460
4461         data, ytcfg = self._extract_data(url, item_id)
4462
4463         tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
4464         if tabs:
4465             selected_tab = self._extract_selected_tab(tabs)
4466             tab_name = selected_tab.get('title', '')
4467             if 'no-youtube-channel-redirect' not in compat_opts:
4468                 if mobj['tab'] == '/live':
4469                     # Live tab should have redirected to the video
4470                     raise ExtractorError('The channel is not currently live', expected=True)
4471                 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4472                     redirect_warning = f'The URL does not have a {mobj["tab"][1:]} tab'
4473                     if not mobj['not_channel'] and item_id[:2] == 'UC':
4474                         # Topic channels don't have /videos. Use the equivalent playlist instead
4475                         pl_id = f'UU{item_id[2:]}'
4476                         pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
4477                         try:
4478                             data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True)
4479                         except ExtractorError:
4480                             redirect_warning += ' and the playlist redirect gave error'
4481                         else:
4482                             item_id, url, tab_name = pl_id, pl_url, mobj['tab'][1:]
4483                             redirect_warning += f'. Redirecting to playlist {pl_id} instead'
4484                     if tab_name.lower() != mobj['tab'][1:]:
4485                         redirect_warning += f'. {tab_name} tab is being downloaded instead'
4486
4487         if redirect_warning:
4488             self.report_warning(redirect_warning)
4489         self.write_debug(f'Final URL: {url}')
4490
4491         # YouTube sometimes provides a button to reload playlist with unavailable videos.
4492         if 'no-youtube-unavailable-videos' not in compat_opts:
4493             data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data
4494         self._extract_and_report_alerts(data, only_once=True)
4495         tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
4496         if tabs:
4497             return self._extract_from_tabs(item_id, ytcfg, data, tabs)
4498
4499         playlist = traverse_obj(
4500             data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
4501         if playlist:
4502             return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
4503
4504         video_id = traverse_obj(
4505             data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
4506         if video_id:
4507             if mobj['tab'] != '/live':  # live tab is expected to redirect to video
4508                 self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
4509             return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
4510                                    ie=YoutubeIE.ie_key(), video_id=video_id)
4511
4512         raise ExtractorError('Unable to recognize tab page')
4513
4514
4515 class YoutubePlaylistIE(InfoExtractor):
4516     IE_DESC = 'YouTube playlists'
4517     _VALID_URL = r'''(?x)(?:
4518                         (?:https?://)?
4519                         (?:\w+\.)?
4520                         (?:
4521                             (?:
4522                                 youtube(?:kids)?\.com|
4523                                 %(invidious)s
4524                             )
4525                             /.*?\?.*?\blist=
4526                         )?
4527                         (?P<id>%(playlist_id)s)
4528                      )''' % {
4529         'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
4530         'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
4531     }
4532     IE_NAME = 'youtube:playlist'
4533     _TESTS = [{
4534         'note': 'issue #673',
4535         'url': 'PLBB231211A4F62143',
4536         'info_dict': {
4537             'title': '[OLD]Team Fortress 2 (Class-based LP)',
4538             'id': 'PLBB231211A4F62143',
4539             'uploader': 'Wickydoo',
4540             'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4541             'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
4542         },
4543         'playlist_mincount': 29,
4544     }, {
4545         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4546         'info_dict': {
4547             'title': 'YDL_safe_search',
4548             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4549         },
4550         'playlist_count': 2,
4551         'skip': 'This playlist is private',
4552     }, {
4553         'note': 'embedded',
4554         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4555         'playlist_count': 4,
4556         'info_dict': {
4557             'title': 'JODA15',
4558             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4559             'uploader': 'milan',
4560             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
4561         }
4562     }, {
4563         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4564         'playlist_mincount': 654,
4565         'info_dict': {
4566             'title': '2018 Chinese New Singles (11/6 updated)',
4567             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4568             'uploader': 'LBK',
4569             'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4570             'description': 'md5:da521864744d60a198e3a88af4db0d9d',
4571         }
4572     }, {
4573         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4574         'only_matching': True,
4575     }, {
4576         # music album playlist
4577         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4578         'only_matching': True,
4579     }]
4580
4581     @classmethod
4582     def suitable(cls, url):
4583         if YoutubeTabIE.suitable(url):
4584             return False
4585         from ..utils import parse_qs
4586         qs = parse_qs(url)
4587         if qs.get('v', [None])[0]:
4588             return False
4589         return super(YoutubePlaylistIE, cls).suitable(url)
4590
4591     def _real_extract(self, url):
4592         playlist_id = self._match_id(url)
4593         is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
4594         url = update_url_query(
4595             'https://www.youtube.com/playlist',
4596             parse_qs(url) or {'list': playlist_id})
4597         if is_music_url:
4598             url = smuggle_url(url, {'is_music_url': True})
4599         return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4600
4601
4602 class YoutubeYtBeIE(InfoExtractor):
4603     IE_DESC = 'youtu.be'
4604     _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4605     _TESTS = [{
4606         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4607         'info_dict': {
4608             'id': 'yeWKywCrFtk',
4609             'ext': 'mp4',
4610             'title': 'Small Scale Baler and Braiding Rugs',
4611             'uploader': 'Backus-Page House Museum',
4612             'uploader_id': 'backuspagemuseum',
4613             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4614             'upload_date': '20161008',
4615             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4616             'categories': ['Nonprofits & Activism'],
4617             'tags': list,
4618             'like_count': int,
4619             'dislike_count': int,
4620         },
4621         'params': {
4622             'noplaylist': True,
4623             'skip_download': True,
4624         },
4625     }, {
4626         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
4627         'only_matching': True,
4628     }]
4629
4630     def _real_extract(self, url):
4631         mobj = self._match_valid_url(url)
4632         video_id = mobj.group('id')
4633         playlist_id = mobj.group('playlist_id')
4634         return self.url_result(
4635             update_url_query('https://www.youtube.com/watch', {
4636                 'v': video_id,
4637                 'list': playlist_id,
4638                 'feature': 'youtu.be',
4639             }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4640
4641
4642 class YoutubeYtUserIE(InfoExtractor):
4643     IE_DESC = 'YouTube user videos; "ytuser:" prefix'
4644     _VALID_URL = r'ytuser:(?P<id>.+)'
4645     _TESTS = [{
4646         'url': 'ytuser:phihag',
4647         'only_matching': True,
4648     }]
4649
4650     def _real_extract(self, url):
4651         user_id = self._match_id(url)
4652         return self.url_result(
4653             'https://www.youtube.com/user/%s/videos' % user_id,
4654             ie=YoutubeTabIE.ie_key(), video_id=user_id)
4655
4656
4657 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
4658     IE_NAME = 'youtube:favorites'
4659     IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
4660     _VALID_URL = r':ytfav(?:ou?rite)?s?'
4661     _LOGIN_REQUIRED = True
4662     _TESTS = [{
4663         'url': ':ytfav',
4664         'only_matching': True,
4665     }, {
4666         'url': ':ytfavorites',
4667         'only_matching': True,
4668     }]
4669
4670     def _real_extract(self, url):
4671         return self.url_result(
4672             'https://www.youtube.com/playlist?list=LL',
4673             ie=YoutubeTabIE.ie_key())
4674
4675
4676 class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
4677     IE_DESC = 'YouTube search'
4678     IE_NAME = 'youtube:search'
4679     _SEARCH_KEY = 'ytsearch'
4680     _SEARCH_PARAMS = 'EgIQAQ%3D%3D'  # Videos only
4681     _TESTS = []
4682
4683
4684 class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
4685     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
4686     _SEARCH_KEY = 'ytsearchdate'
4687     IE_DESC = 'YouTube search, newest videos first'
4688     _SEARCH_PARAMS = 'CAISAhAB'  # Videos only, sorted by date
4689
4690
4691 class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
4692     IE_DESC = 'YouTube search URLs with sorting and filter support'
4693     IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4694     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
4695     _TESTS = [{
4696         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4697         'playlist_mincount': 5,
4698         'info_dict': {
4699             'id': 'youtube-dl test video',
4700             'title': 'youtube-dl test video',
4701         }
4702     }, {
4703         'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
4704         'playlist_mincount': 5,
4705         'info_dict': {
4706             'id': 'python',
4707             'title': 'python',
4708         }
4709
4710     }, {
4711         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4712         'only_matching': True,
4713     }]
4714
4715     def _real_extract(self, url):
4716         qs = parse_qs(url)
4717         query = (qs.get('search_query') or qs.get('q'))[0]
4718         return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
4719
4720
4721 class YoutubeFeedsInfoExtractor(YoutubeTabIE):
4722     """
4723     Base class for feed extractors
4724     Subclasses must define the _FEED_NAME property.
4725     """
4726     _LOGIN_REQUIRED = True
4727     _TESTS = []
4728
4729     @property
4730     def IE_NAME(self):
4731         return 'youtube:%s' % self._FEED_NAME
4732
4733     def _real_extract(self, url):
4734         return self.url_result(
4735             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4736             ie=YoutubeTabIE.ie_key())
4737
4738
4739 class YoutubeWatchLaterIE(InfoExtractor):
4740     IE_NAME = 'youtube:watchlater'
4741     IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
4742     _VALID_URL = r':ytwatchlater'
4743     _TESTS = [{
4744         'url': ':ytwatchlater',
4745         'only_matching': True,
4746     }]
4747
4748     def _real_extract(self, url):
4749         return self.url_result(
4750             'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
4751
4752
4753 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4754     IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
4755     _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
4756     _FEED_NAME = 'recommended'
4757     _LOGIN_REQUIRED = False
4758     _TESTS = [{
4759         'url': ':ytrec',
4760         'only_matching': True,
4761     }, {
4762         'url': ':ytrecommended',
4763         'only_matching': True,
4764     }, {
4765         'url': 'https://youtube.com',
4766         'only_matching': True,
4767     }]
4768
4769
4770 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
4771     IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
4772     _VALID_URL = r':ytsub(?:scription)?s?'
4773     _FEED_NAME = 'subscriptions'
4774     _TESTS = [{
4775         'url': ':ytsubs',
4776         'only_matching': True,
4777     }, {
4778         'url': ':ytsubscriptions',
4779         'only_matching': True,
4780     }]
4781
4782
4783 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
4784     IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
4785     _VALID_URL = r':ythis(?:tory)?'
4786     _FEED_NAME = 'history'
4787     _TESTS = [{
4788         'url': ':ythistory',
4789         'only_matching': True,
4790     }]
4791
4792
4793 class YoutubeTruncatedURLIE(InfoExtractor):
4794     IE_NAME = 'youtube:truncated_url'
4795     IE_DESC = False  # Do not list
4796     _VALID_URL = r'''(?x)
4797         (?:https?://)?
4798         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4799         (?:watch\?(?:
4800             feature=[a-z_]+|
4801             annotation_id=annotation_[^&]+|
4802             x-yt-cl=[0-9]+|
4803             hl=[^&]*|
4804             t=[0-9]+
4805         )?
4806         |
4807             attribution_link\?a=[^&]+
4808         )
4809         $
4810     '''
4811
4812     _TESTS = [{
4813         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
4814         'only_matching': True,
4815     }, {
4816         'url': 'https://www.youtube.com/watch?',
4817         'only_matching': True,
4818     }, {
4819         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4820         'only_matching': True,
4821     }, {
4822         'url': 'https://www.youtube.com/watch?feature=foo',
4823         'only_matching': True,
4824     }, {
4825         'url': 'https://www.youtube.com/watch?hl=en-GB',
4826         'only_matching': True,
4827     }, {
4828         'url': 'https://www.youtube.com/watch?t=2372',
4829         'only_matching': True,
4830     }]
4831
4832     def _real_extract(self, url):
4833         raise ExtractorError(
4834             'Did you forget to quote the URL? Remember that & is a meta '
4835             'character in most shells, so you want to put the URL in quotes, '
4836             'like  youtube-dl '
4837             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
4838             ' or simply  youtube-dl BaW_jenozKc  .',
4839             expected=True)
4840
4841
4842 class YoutubeClipIE(InfoExtractor):
4843     IE_NAME = 'youtube:clip'
4844     IE_DESC = False  # Do not list
4845     _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'
4846
4847     def _real_extract(self, url):
4848         self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')
4849         return self.url_result(url, 'Generic')
4850
4851
4852 class YoutubeTruncatedIDIE(InfoExtractor):
4853     IE_NAME = 'youtube:truncated_id'
4854     IE_DESC = False  # Do not list
4855     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
4856
4857     _TESTS = [{
4858         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4859         'only_matching': True,
4860     }]
4861
4862     def _real_extract(self, url):
4863         video_id = self._match_id(url)
4864         raise ExtractorError(
4865             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4866             expected=True)