yt_dlp/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import calendar
   6 import copy
   7 import datetime
   8 import hashlib
   9 import itertools
  10 import json
  11 import math
  12 import os.path
  13 import random
  14 import re
  15 import sys
  16 import time
  17 import traceback
  18
  19 from .common import InfoExtractor, SearchInfoExtractor
  20 from ..compat import (
  21     compat_chr,
  22     compat_HTTPError,
  23     compat_parse_qs,
  24     compat_str,
  25     compat_urllib_parse_unquote_plus,
  26     compat_urllib_parse_urlencode,
  27     compat_urllib_parse_urlparse,
  28     compat_urlparse,
  29 )
  30 from ..jsinterp import JSInterpreter
  31 from ..utils import (
  32     bug_reports_message,
  33     clean_html,
  34     datetime_from_str,
  35     dict_get,
  36     error_to_compat_str,
  37     ExtractorError,
  38     float_or_none,
  39     format_field,
  40     int_or_none,
  41     is_html,
  42     join_nonempty,
  43     mimetype2ext,
  44     network_exceptions,
  45     NO_DEFAULT,
  46     orderedSet,
  47     parse_codecs,
  48     parse_count,
  49     parse_duration,
  50     parse_iso8601,
  51     parse_qs,
  52     qualities,
  53     remove_end,
  54     remove_start,
  55     smuggle_url,
  56     str_or_none,
  57     str_to_int,
  58     strftime_or_none,
  59     traverse_obj,
  60     try_get,
  61     unescapeHTML,
  62     unified_strdate,
  63     unsmuggle_url,
  64     update_url_query,
  65     url_or_none,
  66     urljoin,
  67     variadic,
  68 )
  69
  70
  71 def get_first(obj, keys, **kwargs):
  72     return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
  73
  74
  75 # any clients starting with _ cannot be explicity requested by the user
  76 INNERTUBE_CLIENTS = {
  77     'web': {
  78         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
  79         'INNERTUBE_CONTEXT': {
  80             'client': {
  81                 'clientName': 'WEB',
  82                 'clientVersion': '2.20210622.10.00',
  83             }
  84         },
  85         'INNERTUBE_CONTEXT_CLIENT_NAME': 1
  86     },
  87     'web_embedded': {
  88         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
  89         'INNERTUBE_CONTEXT': {
  90             'client': {
  91                 'clientName': 'WEB_EMBEDDED_PLAYER',
  92                 'clientVersion': '1.20210620.0.1',
  93             },
  94         },
  95         'INNERTUBE_CONTEXT_CLIENT_NAME': 56
  96     },
  97     'web_music': {
  98         'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
  99         'INNERTUBE_HOST': 'music.youtube.com',
 100         'INNERTUBE_CONTEXT': {
 101             'client': {
 102                 'clientName': 'WEB_REMIX',
 103                 'clientVersion': '1.20210621.00.00',
 104             }
 105         },
 106         'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
 107     },
 108     'web_creator': {
 109         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 110         'INNERTUBE_CONTEXT': {
 111             'client': {
 112                 'clientName': 'WEB_CREATOR',
 113                 'clientVersion': '1.20210621.00.00',
 114             }
 115         },
 116         'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
 117     },
 118     'android': {
 119         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 120         'INNERTUBE_CONTEXT': {
 121             'client': {
 122                 'clientName': 'ANDROID',
 123                 'clientVersion': '16.20',
 124             }
 125         },
 126         'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
 127         'REQUIRE_JS_PLAYER': False
 128     },
 129     'android_embedded': {
 130         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 131         'INNERTUBE_CONTEXT': {
 132             'client': {
 133                 'clientName': 'ANDROID_EMBEDDED_PLAYER',
 134                 'clientVersion': '16.20',
 135             },
 136         },
 137         'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
 138         'REQUIRE_JS_PLAYER': False
 139     },
 140     'android_music': {
 141         'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
 142         'INNERTUBE_HOST': 'music.youtube.com',
 143         'INNERTUBE_CONTEXT': {
 144             'client': {
 145                 'clientName': 'ANDROID_MUSIC',
 146                 'clientVersion': '4.32',
 147             }
 148         },
 149         'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
 150         'REQUIRE_JS_PLAYER': False
 151     },
 152     'android_creator': {
 153         'INNERTUBE_CONTEXT': {
 154             'client': {
 155                 'clientName': 'ANDROID_CREATOR',
 156                 'clientVersion': '21.24.100',
 157             },
 158         },
 159         'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
 160         'REQUIRE_JS_PLAYER': False
 161     },
 162     # ios has HLS live streams
 163     # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680
 164     'ios': {
 165         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 166         'INNERTUBE_CONTEXT': {
 167             'client': {
 168                 'clientName': 'IOS',
 169                 'clientVersion': '16.20',
 170             }
 171         },
 172         'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
 173         'REQUIRE_JS_PLAYER': False
 174     },
 175     'ios_embedded': {
 176         'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
 177         'INNERTUBE_CONTEXT': {
 178             'client': {
 179                 'clientName': 'IOS_MESSAGES_EXTENSION',
 180                 'clientVersion': '16.20',
 181             },
 182         },
 183         'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
 184         'REQUIRE_JS_PLAYER': False
 185     },
 186     'ios_music': {
 187         'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
 188         'INNERTUBE_HOST': 'music.youtube.com',
 189         'INNERTUBE_CONTEXT': {
 190             'client': {
 191                 'clientName': 'IOS_MUSIC',
 192                 'clientVersion': '4.32',
 193             },
 194         },
 195         'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
 196         'REQUIRE_JS_PLAYER': False
 197     },
 198     'ios_creator': {
 199         'INNERTUBE_CONTEXT': {
 200             'client': {
 201                 'clientName': 'IOS_CREATOR',
 202                 'clientVersion': '21.24.100',
 203             },
 204         },
 205         'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
 206         'REQUIRE_JS_PLAYER': False
 207     },
 208     # mweb has 'ultralow' formats
 209     # See: https://github.com/yt-dlp/yt-dlp/pull/557
 210     'mweb': {
 211         'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
 212         'INNERTUBE_CONTEXT': {
 213             'client': {
 214                 'clientName': 'MWEB',
 215                 'clientVersion': '2.20210721.07.00',
 216             }
 217         },
 218         'INNERTUBE_CONTEXT_CLIENT_NAME': 2
 219     },
 220 }
 221
 222
 223 def build_innertube_clients():
 224     third_party = {
 225         'embedUrl': 'https://google.com',  # Can be any valid URL
 226     }
 227     base_clients = ('android', 'web', 'ios', 'mweb')
 228     priority = qualities(base_clients[::-1])
 229
 230     for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
 231         ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
 232         ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
 233         ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
 234         ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
 235         ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
 236
 237         if client in base_clients:
 238             INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
 239             agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
 240             agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
 241             agegate_ytcfg['priority'] -= 1
 242         elif client.endswith('_embedded'):
 243             ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
 244             ytcfg['priority'] -= 2
 245         else:
 246             ytcfg['priority'] -= 3
 247
 248
 249 build_innertube_clients()
 250
 251
 252 class YoutubeBaseInfoExtractor(InfoExtractor):
 253     """Provide base functions for Youtube extractors"""
 254
 255     _RESERVED_NAMES = (
 256         r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
 257         r'shorts|movies|results|shared|hashtag|trending|feed|feeds|'
 258         r'browse|oembed|get_video_info|iframe_api|s/player|'
 259         r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
 260
 261     _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
 262
 263     _NETRC_MACHINE = 'youtube'
 264
 265     # If True it will raise an error if no login info is provided
 266     _LOGIN_REQUIRED = False
 267
 268     _INVIDIOUS_SITES = (
 269         # invidious-redirect websites
 270         r'(?:www\.)?redirect\.invidious\.io',
 271         r'(?:(?:www|dev)\.)?invidio\.us',
 272         # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
 273         r'(?:www\.)?invidious\.pussthecat\.org',
 274         r'(?:www\.)?invidious\.zee\.li',
 275         r'(?:www\.)?invidious\.ethibox\.fr',
 276         r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
 277         # youtube-dl invidious instances list
 278         r'(?:(?:www|no)\.)?invidiou\.sh',
 279         r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
 280         r'(?:www\.)?invidious\.kabi\.tk',
 281         r'(?:www\.)?invidious\.mastodon\.host',
 282         r'(?:www\.)?invidious\.zapashcanon\.fr',
 283         r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
 284         r'(?:www\.)?invidious\.tinfoil-hat\.net',
 285         r'(?:www\.)?invidious\.himiko\.cloud',
 286         r'(?:www\.)?invidious\.reallyancient\.tech',
 287         r'(?:www\.)?invidious\.tube',
 288         r'(?:www\.)?invidiou\.site',
 289         r'(?:www\.)?invidious\.site',
 290         r'(?:www\.)?invidious\.xyz',
 291         r'(?:www\.)?invidious\.nixnet\.xyz',
 292         r'(?:www\.)?invidious\.048596\.xyz',
 293         r'(?:www\.)?invidious\.drycat\.fr',
 294         r'(?:www\.)?inv\.skyn3t\.in',
 295         r'(?:www\.)?tube\.poal\.co',
 296         r'(?:www\.)?tube\.connect\.cafe',
 297         r'(?:www\.)?vid\.wxzm\.sx',
 298         r'(?:www\.)?vid\.mint\.lgbt',
 299         r'(?:www\.)?vid\.puffyan\.us',
 300         r'(?:www\.)?yewtu\.be',
 301         r'(?:www\.)?yt\.elukerio\.org',
 302         r'(?:www\.)?yt\.lelux\.fi',
 303         r'(?:www\.)?invidious\.ggc-project\.de',
 304         r'(?:www\.)?yt\.maisputain\.ovh',
 305         r'(?:www\.)?ytprivate\.com',
 306         r'(?:www\.)?invidious\.13ad\.de',
 307         r'(?:www\.)?invidious\.toot\.koeln',
 308         r'(?:www\.)?invidious\.fdn\.fr',
 309         r'(?:www\.)?watch\.nettohikari\.com',
 310         r'(?:www\.)?invidious\.namazso\.eu',
 311         r'(?:www\.)?invidious\.silkky\.cloud',
 312         r'(?:www\.)?invidious\.exonip\.de',
 313         r'(?:www\.)?invidious\.riverside\.rocks',
 314         r'(?:www\.)?invidious\.blamefran\.net',
 315         r'(?:www\.)?invidious\.moomoo\.de',
 316         r'(?:www\.)?ytb\.trom\.tf',
 317         r'(?:www\.)?yt\.cyberhost\.uk',
 318         r'(?:www\.)?kgg2m7yk5aybusll\.onion',
 319         r'(?:www\.)?qklhadlycap4cnod\.onion',
 320         r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
 321         r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
 322         r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
 323         r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
 324         r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
 325         r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
 326         r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
 327         r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
 328         r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
 329         r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
 330     )
 331
 332     def _login(self):
 333         """
 334         Attempt to log in to YouTube.
 335         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
 336         """
 337
 338         if (self._LOGIN_REQUIRED
 339                 and self.get_param('cookiefile') is None
 340                 and self.get_param('cookiesfrombrowser') is None):
 341             self.raise_login_required(
 342                 'Login details are needed to download this content', method='cookies')
 343         username, password = self._get_login_info()
 344         if username:
 345             self.report_warning(f'Cannot login to YouTube using username and password. {self._LOGIN_HINTS["cookies"]}')
 346
 347     def _initialize_consent(self):
 348         cookies = self._get_cookies('https://www.youtube.com/')
 349         if cookies.get('__Secure-3PSID'):
 350             return
 351         consent_id = None
 352         consent = cookies.get('CONSENT')
 353         if consent:
 354             if 'YES' in consent.value:
 355                 return
 356             consent_id = self._search_regex(
 357                 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
 358         if not consent_id:
 359             consent_id = random.randint(100, 999)
 360         self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
 361
 362     def _initialize_pref(self):
 363         cookies = self._get_cookies('https://www.youtube.com/')
 364         pref_cookie = cookies.get('PREF')
 365         pref = {}
 366         if pref_cookie:
 367             try:
 368                 pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))
 369             except ValueError:
 370                 self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
 371         pref.update({'hl': 'en'})
 372         self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))
 373
 374     def _real_initialize(self):
 375         self._initialize_pref()
 376         self._initialize_consent()
 377         self._login()
 378
 379     _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
 380     _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
 381     _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
 382
 383     def _get_default_ytcfg(self, client='web'):
 384         return copy.deepcopy(INNERTUBE_CLIENTS[client])
 385
 386     def _get_innertube_host(self, client='web'):
 387         return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
 388
 389     def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
 390         # try_get but with fallback to default ytcfg client values when present
 391         _func = lambda y: try_get(y, getter, expected_type)
 392         return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
 393
 394     def _extract_client_name(self, ytcfg, default_client='web'):
 395         return self._ytcfg_get_safe(
 396             ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
 397                     lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
 398
 399     def _extract_client_version(self, ytcfg, default_client='web'):
 400         return self._ytcfg_get_safe(
 401             ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
 402                     lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
 403
 404     def _extract_api_key(self, ytcfg=None, default_client='web'):
 405         return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
 406
 407     def _extract_context(self, ytcfg=None, default_client='web'):
 408         context = get_first(
 409             (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
 410         # Enforce language for extraction
 411         traverse_obj(context, 'client', expected_type=dict, default={})['hl'] = 'en'
 412         return context
 413
 414     _SAPISID = None
 415
 416     def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
 417         time_now = round(time.time())
 418         if self._SAPISID is None:
 419             yt_cookies = self._get_cookies('https://www.youtube.com')
 420             # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
 421             # See: https://github.com/yt-dlp/yt-dlp/issues/393
 422             sapisid_cookie = dict_get(
 423                 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
 424             if sapisid_cookie and sapisid_cookie.value:
 425                 self._SAPISID = sapisid_cookie.value
 426                 self.write_debug('Extracted SAPISID cookie')
 427                 # SAPISID cookie is required if not already present
 428                 if not yt_cookies.get('SAPISID'):
 429                     self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
 430                     self._set_cookie(
 431                         '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
 432             else:
 433                 self._SAPISID = False
 434         if not self._SAPISID:
 435             return None
 436         # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
 437         sapisidhash = hashlib.sha1(
 438             f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
 439         return f'SAPISIDHASH {time_now}_{sapisidhash}'
 440
 441     def _call_api(self, ep, query, video_id, fatal=True, headers=None,
 442                   note='Downloading API JSON', errnote='Unable to download API page',
 443                   context=None, api_key=None, api_hostname=None, default_client='web'):
 444
 445         data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
 446         data.update(query)
 447         real_headers = self.generate_api_headers(default_client=default_client)
 448         real_headers.update({'content-type': 'application/json'})
 449         if headers:
 450             real_headers.update(headers)
 451         return self._download_json(
 452             'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
 453             video_id=video_id, fatal=fatal, note=note, errnote=errnote,
 454             data=json.dumps(data).encode('utf8'), headers=real_headers,
 455             query={'key': api_key or self._extract_api_key()})
 456
 457     def extract_yt_initial_data(self, item_id, webpage, fatal=True):
 458         data = self._search_regex(
 459             (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
 460              self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)
 461         if data:
 462             return self._parse_json(data, item_id, fatal=fatal)
 463
 464     @staticmethod
 465     def _extract_session_index(*data):
 466         """
 467         Index of current account in account list.
 468         See: https://github.com/yt-dlp/yt-dlp/pull/519
 469         """
 470         for ytcfg in data:
 471             session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
 472             if session_index is not None:
 473                 return session_index
 474
 475     # Deprecated?
 476     def _extract_identity_token(self, ytcfg=None, webpage=None):
 477         if ytcfg:
 478             token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
 479             if token:
 480                 return token
 481         if webpage:
 482             return self._search_regex(
 483                 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
 484                 'identity token', default=None, fatal=False)
 485
 486     @staticmethod
 487     def _extract_account_syncid(*args):
 488         """
 489         Extract syncId required to download private playlists of secondary channels
 490         @params response and/or ytcfg
 491         """
 492         for data in args:
 493             # ytcfg includes channel_syncid if on secondary channel
 494             delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
 495             if delegated_sid:
 496                 return delegated_sid
 497             sync_ids = (try_get(
 498                 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
 499                        lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')
 500             if len(sync_ids) >= 2 and sync_ids[1]:
 501                 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
 502                 # and just "user_syncid||" for primary channel. We only want the channel_syncid
 503                 return sync_ids[0]
 504
 505     @staticmethod
 506     def _extract_visitor_data(*args):
 507         """
 508         Extracts visitorData from an API response or ytcfg
 509         Appears to be used to track session state
 510         """
 511         return get_first(
 512             args, (('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))),
 513             expected_type=str)
 514
 515     @property
 516     def is_authenticated(self):
 517         return bool(self._generate_sapisidhash_header())
 518
 519     def extract_ytcfg(self, video_id, webpage):
 520         if not webpage:
 521             return {}
 522         return self._parse_json(
 523             self._search_regex(
 524                 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
 525                 default='{}'), video_id, fatal=False) or {}
 526
 527     def generate_api_headers(
 528             self, *, ytcfg=None, account_syncid=None, session_index=None,
 529             visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
 530
 531         origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
 532         headers = {
 533             'X-YouTube-Client-Name': compat_str(
 534                 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
 535             'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
 536             'Origin': origin,
 537             'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
 538             'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
 539             'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)
 540         }
 541         if session_index is None:
 542             session_index = self._extract_session_index(ytcfg)
 543         if account_syncid or session_index is not None:
 544             headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
 545
 546         auth = self._generate_sapisidhash_header(origin)
 547         if auth is not None:
 548             headers['Authorization'] = auth
 549             headers['X-Origin'] = origin
 550         return {h: v for h, v in headers.items() if v is not None}
 551
 552     @staticmethod
 553     def _build_api_continuation_query(continuation, ctp=None):
 554         query = {
 555             'continuation': continuation
 556         }
 557         # TODO: Inconsistency with clickTrackingParams.
 558         # Currently we have a fixed ctp contained within context (from ytcfg)
 559         # and a ctp in root query for continuation.
 560         if ctp:
 561             query['clickTracking'] = {'clickTrackingParams': ctp}
 562         return query
 563
 564     @classmethod
 565     def _extract_next_continuation_data(cls, renderer):
 566         next_continuation = try_get(
 567             renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
 568                        lambda x: x['continuation']['reloadContinuationData']), dict)
 569         if not next_continuation:
 570             return
 571         continuation = next_continuation.get('continuation')
 572         if not continuation:
 573             return
 574         ctp = next_continuation.get('clickTrackingParams')
 575         return cls._build_api_continuation_query(continuation, ctp)
 576
 577     @classmethod
 578     def _extract_continuation_ep_data(cls, continuation_ep: dict):
 579         if isinstance(continuation_ep, dict):
 580             continuation = try_get(
 581                 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
 582             if not continuation:
 583                 return
 584             ctp = continuation_ep.get('clickTrackingParams')
 585             return cls._build_api_continuation_query(continuation, ctp)
 586
 587     @classmethod
 588     def _extract_continuation(cls, renderer):
 589         next_continuation = cls._extract_next_continuation_data(renderer)
 590         if next_continuation:
 591             return next_continuation
 592
 593         contents = []
 594         for key in ('contents', 'items'):
 595             contents.extend(try_get(renderer, lambda x: x[key], list) or [])
 596
 597         for content in contents:
 598             if not isinstance(content, dict):
 599                 continue
 600             continuation_ep = try_get(
 601                 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
 602                           lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
 603                 dict)
 604             continuation = cls._extract_continuation_ep_data(continuation_ep)
 605             if continuation:
 606                 return continuation
 607
 608     @classmethod
 609     def _extract_alerts(cls, data):
 610         for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
 611             if not isinstance(alert_dict, dict):
 612                 continue
 613             for alert in alert_dict.values():
 614                 alert_type = alert.get('type')
 615                 if not alert_type:
 616                     continue
 617                 message = cls._get_text(alert, 'text')
 618                 if message:
 619                     yield alert_type, message
 620
 621     def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
 622         errors = []
 623         warnings = []
 624         for alert_type, alert_message in alerts:
 625             if alert_type.lower() == 'error' and fatal:
 626                 errors.append([alert_type, alert_message])
 627             else:
 628                 warnings.append([alert_type, alert_message])
 629
 630         for alert_type, alert_message in (warnings + errors[:-1]):
 631             self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message), only_once=only_once)
 632         if errors:
 633             raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
 634
 635     def _extract_and_report_alerts(self, data, *args, **kwargs):
 636         return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
 637
 638     def _extract_badges(self, renderer: dict):
 639         badges = set()
 640         for badge in try_get(renderer, lambda x: x['badges'], list) or []:
 641             label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
 642             if label:
 643                 badges.add(label.lower())
 644         return badges
 645
 646     @staticmethod
 647     def _get_text(data, *path_list, max_runs=None):
 648         for path in path_list or [None]:
 649             if path is None:
 650                 obj = [data]
 651             else:
 652                 obj = traverse_obj(data, path, default=[])
 653                 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
 654                     obj = [obj]
 655             for item in obj:
 656                 text = try_get(item, lambda x: x['simpleText'], compat_str)
 657                 if text:
 658                     return text
 659                 runs = try_get(item, lambda x: x['runs'], list) or []
 660                 if not runs and isinstance(item, list):
 661                     runs = item
 662
 663                 runs = runs[:min(len(runs), max_runs or len(runs))]
 664                 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
 665                 if text:
 666                     return text
 667
 668     @staticmethod
 669     def extract_relative_time(relative_time_text):
 670         """
 671         Extracts a relative time from string and converts to dt object
 672         e.g. 'streamed 6 days ago', '5 seconds ago (edited)'
 673         """
 674         mobj = re.search(r'(?P<time>\d+)\s*(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?\s*ago', relative_time_text)
 675         if mobj:
 676             try:
 677                 return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')), precision='auto')
 678             except ValueError:
 679                 return None
 680
 681     def _extract_time_text(self, renderer, *path_list):
 682         text = self._get_text(renderer, *path_list) or ''
 683         dt = self.extract_relative_time(text)
 684         timestamp = None
 685         if isinstance(dt, datetime.datetime):
 686             timestamp = calendar.timegm(dt.timetuple())
 687         if text and timestamp is None:
 688             self.report_warning('Cannot parse localized time text' + bug_reports_message(), only_once=True)
 689         return timestamp, text
 690
 691     def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
 692                           ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
 693                           default_client='web'):
 694         response = None
 695         last_error = None
 696         count = -1
 697         retries = self.get_param('extractor_retries', 3)
 698         if check_get_keys is None:
 699             check_get_keys = []
 700         while count < retries:
 701             count += 1
 702             if last_error:
 703                 self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))
 704             try:
 705                 response = self._call_api(
 706                     ep=ep, fatal=True, headers=headers,
 707                     video_id=item_id, query=query,
 708                     context=self._extract_context(ytcfg, default_client),
 709                     api_key=self._extract_api_key(ytcfg, default_client),
 710                     api_hostname=api_hostname, default_client=default_client,
 711                     note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
 712             except ExtractorError as e:
 713                 if isinstance(e.cause, network_exceptions):
 714                     if isinstance(e.cause, compat_HTTPError) and not is_html(e.cause.read(512)):
 715                         e.cause.seek(0)
 716                         yt_error = try_get(
 717                             self._parse_json(e.cause.read().decode(), item_id, fatal=False),
 718                             lambda x: x['error']['message'], compat_str)
 719                         if yt_error:
 720                             self._report_alerts([('ERROR', yt_error)], fatal=False)
 721                     # Downloading page may result in intermittent 5xx HTTP error
 722                     # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
 723                     # We also want to catch all other network exceptions since errors in later pages can be troublesome
 724                     # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
 725                     if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
 726                         last_error = error_to_compat_str(e.cause or e.msg)
 727                         if count < retries:
 728                             continue
 729                 if fatal:
 730                     raise
 731                 else:
 732                     self.report_warning(error_to_compat_str(e))
 733                     return
 734
 735             else:
 736                 try:
 737                     self._extract_and_report_alerts(response, only_once=True)
 738                 except ExtractorError as e:
 739                     # YouTube servers may return errors we want to retry on in a 200 OK response
 740                     # See: https://github.com/yt-dlp/yt-dlp/issues/839
 741                     if 'unknown error' in e.msg.lower():
 742                         last_error = e.msg
 743                         continue
 744                     if fatal:
 745                         raise
 746                     self.report_warning(error_to_compat_str(e))
 747                     return
 748                 if not check_get_keys or dict_get(response, check_get_keys):
 749                     break
 750                 # Youtube sometimes sends incomplete data
 751                 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
 752                 last_error = 'Incomplete data received'
 753                 if count >= retries:
 754                     if fatal:
 755                         raise ExtractorError(last_error)
 756                     else:
 757                         self.report_warning(last_error)
 758                         return
 759         return response
 760
 761     @staticmethod
 762     def is_music_url(url):
 763         return re.match(r'https?://music\.youtube\.com/', url) is not None
 764
 765     def _extract_video(self, renderer):
 766         video_id = renderer.get('videoId')
 767         title = self._get_text(renderer, 'title')
 768         description = self._get_text(renderer, 'descriptionSnippet')
 769         duration = parse_duration(self._get_text(
 770             renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
 771         view_count_text = self._get_text(renderer, 'viewCountText') or ''
 772         view_count = str_to_int(self._search_regex(
 773             r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
 774             'view count', default=None))
 775
 776         uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
 777         channel_id = traverse_obj(
 778             renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'), expected_type=str, get_all=False)
 779         timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')
 780         scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
 781         overlay_style = traverse_obj(
 782             renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'), get_all=False, expected_type=str)
 783         badges = self._extract_badges(renderer)
 784         return {
 785             '_type': 'url',
 786             'ie_key': YoutubeIE.ie_key(),
 787             'id': video_id,
 788             'url': f'https://www.youtube.com/watch?v={video_id}',
 789             'title': title,
 790             'description': description,
 791             'duration': duration,
 792             'view_count': view_count,
 793             'uploader': uploader,
 794             'channel_id': channel_id,
 795             'upload_date': strftime_or_none(timestamp, '%Y%m%d'),
 796             'live_status': ('is_upcoming' if scheduled_timestamp is not None
 797                             else 'was_live' if 'streamed' in time_text.lower()
 798                             else 'is_live' if overlay_style is not None and overlay_style == 'LIVE' or 'live now' in badges
 799                             else None),
 800             'release_timestamp': scheduled_timestamp,
 801             'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)
 802         }
 803
 804
 805 class YoutubeIE(YoutubeBaseInfoExtractor):
 806     IE_DESC = 'YouTube'
 807     _VALID_URL = r"""(?x)^
 808                      (
 809                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 810                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
 811                             (?:www\.)?deturl\.com/www\.youtube\.com|
 812                             (?:www\.)?pwnyoutube\.com|
 813                             (?:www\.)?hooktube\.com|
 814                             (?:www\.)?yourepeat\.com|
 815                             tube\.majestyc\.net|
 816                             %(invidious)s|
 817                             youtube\.googleapis\.com)/                        # the various hostnames, with wildcard subdomains
 818                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 819                          (?:                                                  # the various things that can precede the ID:
 820                              (?:(?:v|embed|e|shorts)/(?!videoseries))         # v/ or embed/ or e/ or shorts/
 821                              |(?:                                             # or the v= param in all its forms
 822                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 823                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 824                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 825                                  v=
 826                              )
 827                          ))
 828                          |(?:
 829                             youtu\.be|                                        # just youtu.be/xxxx
 830                             vid\.plus|                                        # or vid.plus/xxxx
 831                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 832                             %(invidious)s
 833                          )/
 834                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 835                          )
 836                      )?                                                       # all until now is optional -> you can pass the naked ID
 837                      (?P<id>[0-9A-Za-z_-]{11})                                # here is it! the YouTube video ID
 838                      (?(1).+)?                                                # if we found the ID, everything can follow
 839                      (?:\#|$)""" % {
 840         'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
 841     }
 842     _PLAYER_INFO_RE = (
 843         r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
 844         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
 845         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
 846     )
 847     _formats = {
 848         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 849         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 850         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 851         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 852         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 853         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 854         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 855         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 856         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 857         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 858         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 859         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 860         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 861         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 862         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 863         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 864         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 865         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 866
 867
 868         # 3D videos
 869         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 870         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 871         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 872         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 873         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 874         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 875         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 876
 877         # Apple HTTP Live Streaming
 878         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 879         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 880         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 881         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 882         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 883         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 884         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 885         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
 886
 887         # DASH mp4 video
 888         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
 889         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
 890         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 891         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
 892         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
 893         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
 894         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
 895         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 896         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
 897         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 898         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 899         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
 900
 901         # Dash mp4 audio
 902         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
 903         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
 904         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
 905         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 906         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 907         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
 908         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
 909
 910         # Dash webm
 911         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 912         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 913         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 914         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 915         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 916         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 917         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
 918         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 919         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 920         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 921         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 922         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 923         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 924         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 925         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 926         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
 927         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 928         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 929         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 930         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 931         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 932         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 933
 934         # Dash webm audio
 935         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
 936         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
 937
 938         # Dash webm audio with opus inside
 939         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
 940         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
 941         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
 942
 943         # RTMP (unnamed)
 944         '_rtmp': {'protocol': 'rtmp'},
 945
 946         # av01 video only formats sometimes served with "unknown" codecs
 947         '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
 948         '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
 949         '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
 950         '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
 951         '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
 952         '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
 953         '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
 954         '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
 955     }
 956     _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
 957
 958     _GEO_BYPASS = False
 959
 960     IE_NAME = 'youtube'
 961     _TESTS = [
 962         {
 963             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
 964             'info_dict': {
 965                 'id': 'BaW_jenozKc',
 966                 'ext': 'mp4',
 967                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 968                 'uploader': 'Philipp Hagemeister',
 969                 'uploader_id': 'phihag',
 970                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 971                 'channel': 'Philipp Hagemeister',
 972                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
 973                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
 974                 'upload_date': '20121002',
 975                 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
 976                 'categories': ['Science & Technology'],
 977                 'tags': ['youtube-dl'],
 978                 'duration': 10,
 979                 'view_count': int,
 980                 'like_count': int,
 981                 # 'dislike_count': int,
 982                 'availability': 'public',
 983                 'playable_in_embed': True,
 984                 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
 985                 'live_status': 'not_live',
 986                 'age_limit': 0,
 987                 'start_time': 1,
 988                 'end_time': 9,
 989             }
 990         },
 991         {
 992             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
 993             'note': 'Embed-only video (#1746)',
 994             'info_dict': {
 995                 'id': 'yZIXLfi8CZQ',
 996                 'ext': 'mp4',
 997                 'upload_date': '20120608',
 998                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
 999                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1000                 'uploader': 'SET India',
1001                 'uploader_id': 'setindia',
1002                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
1003                 'age_limit': 18,
1004             },
1005             'skip': 'Private video',
1006         },
1007         {
1008             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1009             'note': 'Use the first video ID in the URL',
1010             'info_dict': {
1011                 'id': 'BaW_jenozKc',
1012                 'ext': 'mp4',
1013                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1014                 'uploader': 'Philipp Hagemeister',
1015                 'uploader_id': 'phihag',
1016                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1017                 'upload_date': '20121002',
1018                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1019                 'categories': ['Science & Technology'],
1020                 'tags': ['youtube-dl'],
1021                 'duration': 10,
1022                 'view_count': int,
1023                 'like_count': int,
1024                 'dislike_count': int,
1025             },
1026             'params': {
1027                 'skip_download': True,
1028             },
1029         },
1030         {
1031             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1032             'note': '256k DASH audio (format 141) via DASH manifest',
1033             'info_dict': {
1034                 'id': 'a9LDPn-MO4I',
1035                 'ext': 'm4a',
1036                 'upload_date': '20121002',
1037                 'uploader_id': '8KVIDEO',
1038                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1039                 'description': '',
1040                 'uploader': '8KVIDEO',
1041                 'title': 'UHDTV TEST 8K VIDEO.mp4'
1042             },
1043             'params': {
1044                 'youtube_include_dash_manifest': True,
1045                 'format': '141',
1046             },
1047             'skip': 'format 141 not served anymore',
1048         },
1049         # DASH manifest with encrypted signature
1050         {
1051             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1052             'info_dict': {
1053                 'id': 'IB3lcPjvWLA',
1054                 'ext': 'm4a',
1055                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1056                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1057                 'duration': 244,
1058                 'uploader': 'AfrojackVEVO',
1059                 'uploader_id': 'AfrojackVEVO',
1060                 'upload_date': '20131011',
1061                 'abr': 129.495,
1062             },
1063             'params': {
1064                 'youtube_include_dash_manifest': True,
1065                 'format': '141/bestaudio[ext=m4a]',
1066             },
1067         },
1068         # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
1069         {
1070             'note': 'Embed allowed age-gate video',
1071             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1072             'info_dict': {
1073                 'id': 'HtVdAasjOgU',
1074                 'ext': 'mp4',
1075                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1076                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1077                 'duration': 142,
1078                 'uploader': 'The Witcher',
1079                 'uploader_id': 'WitcherGame',
1080                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1081                 'upload_date': '20140605',
1082                 'age_limit': 18,
1083             },
1084         },
1085         {
1086             'note': 'Age-gate video with embed allowed in public site',
1087             'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1088             'info_dict': {
1089                 'id': 'HsUATh_Nc2U',
1090                 'ext': 'mp4',
1091                 'title': 'Godzilla 2 (Official Video)',
1092                 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1093                 'upload_date': '20200408',
1094                 'uploader_id': 'FlyingKitty900',
1095                 'uploader': 'FlyingKitty',
1096                 'age_limit': 18,
1097             },
1098         },
1099         {
1100             'note': 'Age-gate video embedable only with clientScreen=EMBED',
1101             'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1102             'info_dict': {
1103                 'id': 'Tq92D6wQ1mg',
1104                 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
1105                 'ext': 'mp4',
1106                 'upload_date': '20191227',
1107                 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1108                 'uploader': 'Projekt Melody',
1109                 'description': 'md5:17eccca93a786d51bc67646756894066',
1110                 'age_limit': 18,
1111             },
1112         },
1113         {
1114             'note': 'Non-Agegated non-embeddable video',
1115             'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1116             'info_dict': {
1117                 'id': 'MeJVWBSsPAY',
1118                 'ext': 'mp4',
1119                 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1120                 'uploader': 'Herr Lurik',
1121                 'uploader_id': 'st3in234',
1122                 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1123                 'upload_date': '20130730',
1124             },
1125         },
1126         {
1127             'note': 'Non-bypassable age-gated video',
1128             'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1129             'only_matching': True,
1130         },
1131         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1132         # YouTube Red ad is not captured for creator
1133         {
1134             'url': '__2ABJjxzNo',
1135             'info_dict': {
1136                 'id': '__2ABJjxzNo',
1137                 'ext': 'mp4',
1138                 'duration': 266,
1139                 'upload_date': '20100430',
1140                 'uploader_id': 'deadmau5',
1141                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1142                 'creator': 'deadmau5',
1143                 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1144                 'uploader': 'deadmau5',
1145                 'title': 'Deadmau5 - Some Chords (HD)',
1146                 'alt_title': 'Some Chords',
1147             },
1148             'expected_warnings': [
1149                 'DASH manifest missing',
1150             ]
1151         },
1152         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1153         {
1154             'url': 'lqQg6PlCWgI',
1155             'info_dict': {
1156                 'id': 'lqQg6PlCWgI',
1157                 'ext': 'mp4',
1158                 'duration': 6085,
1159                 'upload_date': '20150827',
1160                 'uploader_id': 'olympic',
1161                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1162                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
1163                 'uploader': 'Olympics',
1164                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
1165             },
1166             'params': {
1167                 'skip_download': 'requires avconv',
1168             }
1169         },
1170         # Non-square pixels
1171         {
1172             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1173             'info_dict': {
1174                 'id': '_b-2C3KPAM0',
1175                 'ext': 'mp4',
1176                 'stretched_ratio': 16 / 9.,
1177                 'duration': 85,
1178                 'upload_date': '20110310',
1179                 'uploader_id': 'AllenMeow',
1180                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1181                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1182                 'uploader': '孫ᄋᄅ',
1183                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1184             },
1185         },
1186         # url_encoded_fmt_stream_map is empty string
1187         {
1188             'url': 'qEJwOuvDf7I',
1189             'info_dict': {
1190                 'id': 'qEJwOuvDf7I',
1191                 'ext': 'webm',
1192                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1193                 'description': '',
1194                 'upload_date': '20150404',
1195                 'uploader_id': 'spbelect',
1196                 'uploader': 'Наблюдатели Петербурга',
1197             },
1198             'params': {
1199                 'skip_download': 'requires avconv',
1200             },
1201             'skip': 'This live event has ended.',
1202         },
1203         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1204         {
1205             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1206             'info_dict': {
1207                 'id': 'FIl7x6_3R5Y',
1208                 'ext': 'webm',
1209                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1210                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1211                 'duration': 220,
1212                 'upload_date': '20150625',
1213                 'uploader_id': 'dorappi2000',
1214                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1215                 'uploader': 'dorappi2000',
1216                 'formats': 'mincount:31',
1217             },
1218             'skip': 'not actual anymore',
1219         },
1220         # DASH manifest with segment_list
1221         {
1222             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1223             'md5': '8ce563a1d667b599d21064e982ab9e31',
1224             'info_dict': {
1225                 'id': 'CsmdDsKjzN8',
1226                 'ext': 'mp4',
1227                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1228                 'uploader': 'Airtek',
1229                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1230                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1231                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1232             },
1233             'params': {
1234                 'youtube_include_dash_manifest': True,
1235                 'format': '135',  # bestvideo
1236             },
1237             'skip': 'This live event has ended.',
1238         },
1239         {
1240             # Multifeed videos (multiple cameras), URL is for Main Camera
1241             'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1242             'info_dict': {
1243                 'id': 'jvGDaLqkpTg',
1244                 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1245                 'description': 'md5:e03b909557865076822aa169218d6a5d',
1246             },
1247             'playlist': [{
1248                 'info_dict': {
1249                     'id': 'jvGDaLqkpTg',
1250                     'ext': 'mp4',
1251                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1252                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1253                     'duration': 10643,
1254                     'upload_date': '20161111',
1255                     'uploader': 'Team PGP',
1256                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1257                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1258                 },
1259             }, {
1260                 'info_dict': {
1261                     'id': '3AKt1R1aDnw',
1262                     'ext': 'mp4',
1263                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1264                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1265                     'duration': 10991,
1266                     'upload_date': '20161111',
1267                     'uploader': 'Team PGP',
1268                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1269                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1270                 },
1271             }, {
1272                 'info_dict': {
1273                     'id': 'RtAMM00gpVc',
1274                     'ext': 'mp4',
1275                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1276                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1277                     'duration': 10995,
1278                     'upload_date': '20161111',
1279                     'uploader': 'Team PGP',
1280                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1281                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1282                 },
1283             }, {
1284                 'info_dict': {
1285                     'id': '6N2fdlP3C5U',
1286                     'ext': 'mp4',
1287                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1288                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1289                     'duration': 10990,
1290                     'upload_date': '20161111',
1291                     'uploader': 'Team PGP',
1292                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1293                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1294                 },
1295             }],
1296             'params': {
1297                 'skip_download': True,
1298             },
1299             'skip': 'Not multifeed anymore',
1300         },
1301         {
1302             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1303             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1304             'info_dict': {
1305                 'id': 'gVfLd0zydlo',
1306                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1307             },
1308             'playlist_count': 2,
1309             'skip': 'Not multifeed anymore',
1310         },
1311         {
1312             'url': 'https://vid.plus/FlRa-iH7PGw',
1313             'only_matching': True,
1314         },
1315         {
1316             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1317             'only_matching': True,
1318         },
1319         {
1320             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1321             # Also tests cut-off URL expansion in video description (see
1322             # https://github.com/ytdl-org/youtube-dl/issues/1892,
1323             # https://github.com/ytdl-org/youtube-dl/issues/8164)
1324             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1325             'info_dict': {
1326                 'id': 'lsguqyKfVQg',
1327                 'ext': 'mp4',
1328                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1329                 'alt_title': 'Dark Walk',
1330                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1331                 'duration': 133,
1332                 'upload_date': '20151119',
1333                 'uploader_id': 'IronSoulElf',
1334                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1335                 'uploader': 'IronSoulElf',
1336                 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1337                 'track': 'Dark Walk',
1338                 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1339                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1340             },
1341             'params': {
1342                 'skip_download': True,
1343             },
1344         },
1345         {
1346             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1347             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1348             'only_matching': True,
1349         },
1350         {
1351             # Video with yt:stretch=17:0
1352             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1353             'info_dict': {
1354                 'id': 'Q39EVAstoRM',
1355                 'ext': 'mp4',
1356                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1357                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1358                 'upload_date': '20151107',
1359                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1360                 'uploader': 'CH GAMER DROID',
1361             },
1362             'params': {
1363                 'skip_download': True,
1364             },
1365             'skip': 'This video does not exist.',
1366         },
1367         {
1368             # Video with incomplete 'yt:stretch=16:'
1369             'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1370             'only_matching': True,
1371         },
1372         {
1373             # Video licensed under Creative Commons
1374             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1375             'info_dict': {
1376                 'id': 'M4gD1WSo5mA',
1377                 'ext': 'mp4',
1378                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1379                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1380                 'duration': 721,
1381                 'upload_date': '20150127',
1382                 'uploader_id': 'BerkmanCenter',
1383                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1384                 'uploader': 'The Berkman Klein Center for Internet & Society',
1385                 'license': 'Creative Commons Attribution license (reuse allowed)',
1386             },
1387             'params': {
1388                 'skip_download': True,
1389             },
1390         },
1391         {
1392             # Channel-like uploader_url
1393             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1394             'info_dict': {
1395                 'id': 'eQcmzGIKrzg',
1396                 'ext': 'mp4',
1397                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1398                 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1399                 'duration': 4060,
1400                 'upload_date': '20151119',
1401                 'uploader': 'Bernie Sanders',
1402                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1403                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1404                 'license': 'Creative Commons Attribution license (reuse allowed)',
1405             },
1406             'params': {
1407                 'skip_download': True,
1408             },
1409         },
1410         {
1411             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1412             'only_matching': True,
1413         },
1414         {
1415             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1416             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1417             'only_matching': True,
1418         },
1419         {
1420             # Rental video preview
1421             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1422             'info_dict': {
1423                 'id': 'uGpuVWrhIzE',
1424                 'ext': 'mp4',
1425                 'title': 'Piku - Trailer',
1426                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1427                 'upload_date': '20150811',
1428                 'uploader': 'FlixMatrix',
1429                 'uploader_id': 'FlixMatrixKaravan',
1430                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1431                 'license': 'Standard YouTube License',
1432             },
1433             'params': {
1434                 'skip_download': True,
1435             },
1436             'skip': 'This video is not available.',
1437         },
1438         {
1439             # YouTube Red video with episode data
1440             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1441             'info_dict': {
1442                 'id': 'iqKdEhx-dD4',
1443                 'ext': 'mp4',
1444                 'title': 'Isolation - Mind Field (Ep 1)',
1445                 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1446                 'duration': 2085,
1447                 'upload_date': '20170118',
1448                 'uploader': 'Vsauce',
1449                 'uploader_id': 'Vsauce',
1450                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1451                 'series': 'Mind Field',
1452                 'season_number': 1,
1453                 'episode_number': 1,
1454             },
1455             'params': {
1456                 'skip_download': True,
1457             },
1458             'expected_warnings': [
1459                 'Skipping DASH manifest',
1460             ],
1461         },
1462         {
1463             # The following content has been identified by the YouTube community
1464             # as inappropriate or offensive to some audiences.
1465             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1466             'info_dict': {
1467                 'id': '6SJNVb0GnPI',
1468                 'ext': 'mp4',
1469                 'title': 'Race Differences in Intelligence',
1470                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1471                 'duration': 965,
1472                 'upload_date': '20140124',
1473                 'uploader': 'New Century Foundation',
1474                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1475                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1476             },
1477             'params': {
1478                 'skip_download': True,
1479             },
1480             'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1481         },
1482         {
1483             # itag 212
1484             'url': '1t24XAntNCY',
1485             'only_matching': True,
1486         },
1487         {
1488             # geo restricted to JP
1489             'url': 'sJL6WA-aGkQ',
1490             'only_matching': True,
1491         },
1492         {
1493             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1494             'only_matching': True,
1495         },
1496         {
1497             'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1498             'only_matching': True,
1499         },
1500         {
1501             # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1502             'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1503             'only_matching': True,
1504         },
1505         {
1506             # DRM protected
1507             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1508             'only_matching': True,
1509         },
1510         {
1511             # Video with unsupported adaptive stream type formats
1512             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1513             'info_dict': {
1514                 'id': 'Z4Vy8R84T1U',
1515                 'ext': 'mp4',
1516                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1517                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1518                 'duration': 433,
1519                 'upload_date': '20130923',
1520                 'uploader': 'Amelia Putri Harwita',
1521                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1522                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1523                 'formats': 'maxcount:10',
1524             },
1525             'params': {
1526                 'skip_download': True,
1527                 'youtube_include_dash_manifest': False,
1528             },
1529             'skip': 'not actual anymore',
1530         },
1531         {
1532             # Youtube Music Auto-generated description
1533             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1534             'info_dict': {
1535                 'id': 'MgNrAu2pzNs',
1536                 'ext': 'mp4',
1537                 'title': 'Voyeur Girl',
1538                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1539                 'upload_date': '20190312',
1540                 'uploader': 'Stephen - Topic',
1541                 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1542                 'artist': 'Stephen',
1543                 'track': 'Voyeur Girl',
1544                 'album': 'it\'s too much love to know my dear',
1545                 'release_date': '20190313',
1546                 'release_year': 2019,
1547             },
1548             'params': {
1549                 'skip_download': True,
1550             },
1551         },
1552         {
1553             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1554             'only_matching': True,
1555         },
1556         {
1557             # invalid -> valid video id redirection
1558             'url': 'DJztXj2GPfl',
1559             'info_dict': {
1560                 'id': 'DJztXj2GPfk',
1561                 'ext': 'mp4',
1562                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1563                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1564                 'upload_date': '20090125',
1565                 'uploader': 'Prochorowka',
1566                 'uploader_id': 'Prochorowka',
1567                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1568                 'artist': 'Panjabi MC',
1569                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1570                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1571             },
1572             'params': {
1573                 'skip_download': True,
1574             },
1575             'skip': 'Video unavailable',
1576         },
1577         {
1578             # empty description results in an empty string
1579             'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1580             'info_dict': {
1581                 'id': 'x41yOUIvK2k',
1582                 'ext': 'mp4',
1583                 'title': 'IMG 3456',
1584                 'description': '',
1585                 'upload_date': '20170613',
1586                 'uploader_id': 'ElevageOrVert',
1587                 'uploader': 'ElevageOrVert',
1588             },
1589             'params': {
1590                 'skip_download': True,
1591             },
1592         },
1593         {
1594             # with '};' inside yt initial data (see [1])
1595             # see [2] for an example with '};' inside ytInitialPlayerResponse
1596             # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1597             # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1598             'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1599             'info_dict': {
1600                 'id': 'CHqg6qOn4no',
1601                 'ext': 'mp4',
1602                 'title': 'Part 77   Sort a list of simple types in c#',
1603                 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1604                 'upload_date': '20130831',
1605                 'uploader_id': 'kudvenkat',
1606                 'uploader': 'kudvenkat',
1607             },
1608             'params': {
1609                 'skip_download': True,
1610             },
1611         },
1612         {
1613             # another example of '};' in ytInitialData
1614             'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1615             'only_matching': True,
1616         },
1617         {
1618             'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1619             'only_matching': True,
1620         },
1621         {
1622             # https://github.com/ytdl-org/youtube-dl/pull/28094
1623             'url': 'OtqTfy26tG0',
1624             'info_dict': {
1625                 'id': 'OtqTfy26tG0',
1626                 'ext': 'mp4',
1627                 'title': 'Burn Out',
1628                 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1629                 'upload_date': '20141120',
1630                 'uploader': 'The Cinematic Orchestra - Topic',
1631                 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1632                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1633                 'artist': 'The Cinematic Orchestra',
1634                 'track': 'Burn Out',
1635                 'album': 'Every Day',
1636                 'release_data': None,
1637                 'release_year': None,
1638             },
1639             'params': {
1640                 'skip_download': True,
1641             },
1642         },
1643         {
1644             # controversial video, only works with bpctr when authenticated with cookies
1645             'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1646             'only_matching': True,
1647         },
1648         {
1649             # controversial video, requires bpctr/contentCheckOk
1650             'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1651             'info_dict': {
1652                 'id': 'SZJvDhaSDnc',
1653                 'ext': 'mp4',
1654                 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1655                 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1656                 'uploader': 'CBS This Morning',
1657                 'uploader_id': 'CBSThisMorning',
1658                 'upload_date': '20140716',
1659                 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1660             }
1661         },
1662         {
1663             # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1664             'url': 'cBvYw8_A0vQ',
1665             'info_dict': {
1666                 'id': 'cBvYw8_A0vQ',
1667                 'ext': 'mp4',
1668                 'title': '4K Ueno Okachimachi  Street  Scenes  上野御徒町歩き',
1669                 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1670                 'upload_date': '20201120',
1671                 'uploader': 'Walk around Japan',
1672                 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1673                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1674             },
1675             'params': {
1676                 'skip_download': True,
1677             },
1678         }, {
1679             # Has multiple audio streams
1680             'url': 'WaOKSUlf4TM',
1681             'only_matching': True
1682         }, {
1683             # Requires Premium: has format 141 when requested using YTM url
1684             'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1685             'only_matching': True
1686         }, {
1687             # multiple subtitles with same lang_code
1688             'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1689             'only_matching': True,
1690         }, {
1691             # Force use android client fallback
1692             'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1693             'info_dict': {
1694                 'id': 'YOelRv7fMxY',
1695                 'title': 'DIGGING A SECRET TUNNEL Part 1',
1696                 'ext': '3gp',
1697                 'upload_date': '20210624',
1698                 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1699                 'uploader': 'colinfurze',
1700                 'uploader_id': 'colinfurze',
1701                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1702                 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
1703             },
1704             'params': {
1705                 'format': '17',  # 3gp format available on android
1706                 'extractor_args': {'youtube': {'player_client': ['android']}},
1707             },
1708         },
1709         {
1710             # Skip download of additional client configs (remix client config in this case)
1711             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1712             'only_matching': True,
1713             'params': {
1714                 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1715             },
1716         }, {
1717             # shorts
1718             'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
1719             'only_matching': True,
1720         }, {
1721             'note': 'Storyboards',
1722             'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
1723             'info_dict': {
1724                 'id': '5KLPxDtMqe8',
1725                 'ext': 'mhtml',
1726                 'format_id': 'sb0',
1727                 'title': 'Your Brain is Plastic',
1728                 'uploader_id': 'scishow',
1729                 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
1730                 'upload_date': '20140324',
1731                 'uploader': 'SciShow',
1732             }, 'params': {'format': 'mhtml', 'skip_download': True}
1733         }
1734     ]
1735
1736     @classmethod
1737     def suitable(cls, url):
1738         from ..utils import parse_qs
1739
1740         qs = parse_qs(url)
1741         if qs.get('list', [None])[0]:
1742             return False
1743         return super(YoutubeIE, cls).suitable(url)
1744
1745     def __init__(self, *args, **kwargs):
1746         super(YoutubeIE, self).__init__(*args, **kwargs)
1747         self._code_cache = {}
1748         self._player_cache = {}
1749
1750     def _extract_player_url(self, *ytcfgs, webpage=None):
1751         player_url = traverse_obj(
1752             ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
1753             get_all=False, expected_type=compat_str)
1754         if not player_url:
1755             return
1756         if player_url.startswith('//'):
1757             player_url = 'https:' + player_url
1758         elif not re.match(r'https?://', player_url):
1759             player_url = compat_urlparse.urljoin(
1760                 'https://www.youtube.com', player_url)
1761         return player_url
1762
1763     def _download_player_url(self, video_id, fatal=False):
1764         res = self._download_webpage(
1765             'https://www.youtube.com/iframe_api',
1766             note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
1767         if res:
1768             player_version = self._search_regex(
1769                 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
1770             if player_version:
1771                 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
1772
1773     def _signature_cache_id(self, example_sig):
1774         """ Return a string representation of a signature """
1775         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1776
1777     @classmethod
1778     def _extract_player_info(cls, player_url):
1779         for player_re in cls._PLAYER_INFO_RE:
1780             id_m = re.search(player_re, player_url)
1781             if id_m:
1782                 break
1783         else:
1784             raise ExtractorError('Cannot identify player %r' % player_url)
1785         return id_m.group('id')
1786
1787     def _load_player(self, video_id, player_url, fatal=True):
1788         player_id = self._extract_player_info(player_url)
1789         if player_id not in self._code_cache:
1790             code = self._download_webpage(
1791                 player_url, video_id, fatal=fatal,
1792                 note='Downloading player ' + player_id,
1793                 errnote='Download of %s failed' % player_url)
1794             if code:
1795                 self._code_cache[player_id] = code
1796         return self._code_cache.get(player_id)
1797
1798     def _extract_signature_function(self, video_id, player_url, example_sig):
1799         player_id = self._extract_player_info(player_url)
1800
1801         # Read from filesystem cache
1802         func_id = 'js_%s_%s' % (
1803             player_id, self._signature_cache_id(example_sig))
1804         assert os.path.basename(func_id) == func_id
1805
1806         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1807         if cache_spec is not None:
1808             return lambda s: ''.join(s[i] for i in cache_spec)
1809
1810         code = self._load_player(video_id, player_url)
1811         if code:
1812             res = self._parse_sig_js(code)
1813
1814             test_string = ''.join(map(compat_chr, range(len(example_sig))))
1815             cache_res = res(test_string)
1816             cache_spec = [ord(c) for c in cache_res]
1817
1818             self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1819             return res
1820
1821     def _print_sig_code(self, func, example_sig):
1822         if not self.get_param('youtube_print_sig_code'):
1823             return
1824
1825         def gen_sig_code(idxs):
1826             def _genslice(start, end, step):
1827                 starts = '' if start == 0 else str(start)
1828                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1829                 steps = '' if step == 1 else (':%d' % step)
1830                 return 's[%s%s%s]' % (starts, ends, steps)
1831
1832             step = None
1833             # Quelch pyflakes warnings - start will be set when step is set
1834             start = '(Never used)'
1835             for i, prev in zip(idxs[1:], idxs[:-1]):
1836                 if step is not None:
1837                     if i - prev == step:
1838                         continue
1839                     yield _genslice(start, prev, step)
1840                     step = None
1841                     continue
1842                 if i - prev in [-1, 1]:
1843                     step = i - prev
1844                     start = prev
1845                     continue
1846                 else:
1847                     yield 's[%d]' % prev
1848             if step is None:
1849                 yield 's[%d]' % i
1850             else:
1851                 yield _genslice(start, i, step)
1852
1853         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1854         cache_res = func(test_string)
1855         cache_spec = [ord(c) for c in cache_res]
1856         expr_code = ' + '.join(gen_sig_code(cache_spec))
1857         signature_id_tuple = '(%s)' % (
1858             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1859         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1860                 '    return %s\n') % (signature_id_tuple, expr_code)
1861         self.to_screen('Extracted signature function:\n' + code)
1862
1863     def _parse_sig_js(self, jscode):
1864         funcname = self._search_regex(
1865             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1866              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1867              r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
1868              r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
1869              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
1870              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1871              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1872              # Obsolete patterns
1873              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1874              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1875              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1876              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1877              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1878              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1879              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1880              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1881             jscode, 'Initial JS player signature function name', group='sig')
1882
1883         jsi = JSInterpreter(jscode)
1884         initial_function = jsi.extract_function(funcname)
1885         return lambda s: initial_function([s])
1886
1887     def _decrypt_signature(self, s, video_id, player_url):
1888         """Turn the encrypted s field into a working signature"""
1889
1890         if player_url is None:
1891             raise ExtractorError('Cannot decrypt signature without player_url')
1892
1893         try:
1894             player_id = (player_url, self._signature_cache_id(s))
1895             if player_id not in self._player_cache:
1896                 func = self._extract_signature_function(
1897                     video_id, player_url, s
1898                 )
1899                 self._player_cache[player_id] = func
1900             func = self._player_cache[player_id]
1901             self._print_sig_code(func, s)
1902             return func(s)
1903         except Exception as e:
1904             raise ExtractorError('Signature extraction failed: ' + traceback.format_exc(), cause=e)
1905
1906     def _decrypt_nsig(self, s, video_id, player_url):
1907         """Turn the encrypted n field into a working signature"""
1908         if player_url is None:
1909             raise ExtractorError('Cannot decrypt nsig without player_url')
1910         if player_url.startswith('//'):
1911             player_url = 'https:' + player_url
1912         elif not re.match(r'https?://', player_url):
1913             player_url = compat_urlparse.urljoin(
1914                 'https://www.youtube.com', player_url)
1915
1916         sig_id = ('nsig_value', s)
1917         if sig_id in self._player_cache:
1918             return self._player_cache[sig_id]
1919
1920         try:
1921             player_id = ('nsig', player_url)
1922             if player_id not in self._player_cache:
1923                 self._player_cache[player_id] = self._extract_n_function(video_id, player_url)
1924             func = self._player_cache[player_id]
1925             self._player_cache[sig_id] = func(s)
1926             self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')
1927             return self._player_cache[sig_id]
1928         except Exception as e:
1929             raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)
1930
1931     def _extract_n_function_name(self, jscode):
1932         return self._search_regex(
1933             (r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]{3})\([a-zA-Z0-9]\)',),
1934             jscode, 'Initial JS player n function name', group='nfunc')
1935
1936     def _extract_n_function(self, video_id, player_url):
1937         player_id = self._extract_player_info(player_url)
1938         func_code = self._downloader.cache.load('youtube-nsig', player_id)
1939
1940         if func_code:
1941             jsi = JSInterpreter(func_code)
1942         else:
1943             jscode = self._load_player(video_id, player_url)
1944             funcname = self._extract_n_function_name(jscode)
1945             jsi = JSInterpreter(jscode)
1946             func_code = jsi.extract_function_code(funcname)
1947             self._downloader.cache.store('youtube-nsig', player_id, func_code)
1948
1949         if self.get_param('youtube_print_sig_code'):
1950             self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
1951
1952         return lambda s: jsi.extract_function_from_code(*func_code)([s])
1953
1954     def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1955         """
1956         Extract signatureTimestamp (sts)
1957         Required to tell API what sig/player version is in use.
1958         """
1959         sts = None
1960         if isinstance(ytcfg, dict):
1961             sts = int_or_none(ytcfg.get('STS'))
1962
1963         if not sts:
1964             # Attempt to extract from player
1965             if player_url is None:
1966                 error_msg = 'Cannot extract signature timestamp without player_url.'
1967                 if fatal:
1968                     raise ExtractorError(error_msg)
1969                 self.report_warning(error_msg)
1970                 return
1971             code = self._load_player(video_id, player_url, fatal=fatal)
1972             if code:
1973                 sts = int_or_none(self._search_regex(
1974                     r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
1975                     'JS player signature timestamp', group='sts', fatal=fatal))
1976         return sts
1977
1978     def _mark_watched(self, video_id, player_responses):
1979         playback_url = get_first(
1980             player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
1981             expected_type=url_or_none)
1982         if not playback_url:
1983             self.report_warning('Unable to mark watched')
1984             return
1985         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1986         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1987
1988         # cpn generation algorithm is reverse engineered from base.js.
1989         # In fact it works even with dummy cpn.
1990         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1991         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1992
1993         qs.update({
1994             'ver': ['2'],
1995             'cpn': [cpn],
1996         })
1997         playback_url = compat_urlparse.urlunparse(
1998             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1999
2000         self._download_webpage(
2001             playback_url, video_id, 'Marking watched',
2002             'Unable to mark watched', fatal=False)
2003
2004     @staticmethod
2005     def _extract_urls(webpage):
2006         # Embedded YouTube player
2007         entries = [
2008             unescapeHTML(mobj.group('url'))
2009             for mobj in re.finditer(r'''(?x)
2010             (?:
2011                 <iframe[^>]+?src=|
2012                 data-video-url=|
2013                 <embed[^>]+?src=|
2014                 embedSWF\(?:\s*|
2015                 <object[^>]+data=|
2016                 new\s+SWFObject\(
2017             )
2018             (["\'])
2019                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
2020                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
2021             \1''', webpage)]
2022
2023         # lazyYT YouTube embed
2024         entries.extend(list(map(
2025             unescapeHTML,
2026             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
2027
2028         # Wordpress "YouTube Video Importer" plugin
2029         matches = re.findall(r'''(?x)<div[^>]+
2030             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2031             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
2032         entries.extend(m[-1] for m in matches)
2033
2034         return entries
2035
2036     @staticmethod
2037     def _extract_url(webpage):
2038         urls = YoutubeIE._extract_urls(webpage)
2039         return urls[0] if urls else None
2040
2041     @classmethod
2042     def extract_id(cls, url):
2043         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
2044         if mobj is None:
2045             raise ExtractorError('Invalid URL: %s' % url)
2046         return mobj.group('id')
2047
2048     def _extract_chapters_from_json(self, data, duration):
2049         chapter_list = traverse_obj(
2050             data, (
2051                 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2052                 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2053             ), expected_type=list)
2054
2055         return self._extract_chapters(
2056             chapter_list,
2057             chapter_time=lambda chapter: float_or_none(
2058                 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2059             chapter_title=lambda chapter: traverse_obj(
2060                 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2061             duration=duration)
2062
2063     def _extract_chapters_from_engagement_panel(self, data, duration):
2064         content_list = traverse_obj(
2065             data,
2066             ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
2067             expected_type=list, default=[])
2068         chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2069         chapter_title = lambda chapter: self._get_text(chapter, 'title')
2070
2071         return next((
2072             filter(None, (
2073                 self._extract_chapters(
2074                     traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2075                     chapter_time, chapter_title, duration)
2076                 for contents in content_list
2077             ))), [])
2078
2079     def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
2080         chapters = []
2081         last_chapter = {'start_time': 0}
2082         for idx, chapter in enumerate(chapter_list or []):
2083             title = chapter_title(chapter)
2084             start_time = chapter_time(chapter)
2085             if start_time is None:
2086                 continue
2087             last_chapter['end_time'] = start_time
2088             if start_time < last_chapter['start_time']:
2089                 if idx == 1:
2090                     chapters.pop()
2091                     self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2092                 else:
2093                     self.report_warning(f'Invalid start time for chapter "{title}"')
2094                     continue
2095             last_chapter = {'start_time': start_time, 'title': title}
2096             chapters.append(last_chapter)
2097         last_chapter['end_time'] = duration
2098         return chapters
2099
2100     def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2101         return self._parse_json(self._search_regex(
2102             (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2103              regex), webpage, name, default='{}'), video_id, fatal=False)
2104
2105     def _extract_comment(self, comment_renderer, parent=None):
2106         comment_id = comment_renderer.get('commentId')
2107         if not comment_id:
2108             return
2109
2110         text = self._get_text(comment_renderer, 'contentText')
2111
2112         # note: timestamp is an estimate calculated from the current time and time_text
2113         timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')
2114         author = self._get_text(comment_renderer, 'authorText')
2115         author_id = try_get(comment_renderer,
2116                             lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
2117
2118         votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2119                                                        lambda x: x['likeCount']), compat_str)) or 0
2120         author_thumbnail = try_get(comment_renderer,
2121                                    lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2122
2123         author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2124         is_favorited = 'creatorHeart' in (try_get(
2125             comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2126         return {
2127             'id': comment_id,
2128             'text': text,
2129             'timestamp': timestamp,
2130             'time_text': time_text,
2131             'like_count': votes,
2132             'is_favorited': is_favorited,
2133             'author': author,
2134             'author_id': author_id,
2135             'author_thumbnail': author_thumbnail,
2136             'author_is_uploader': author_is_uploader,
2137             'parent': parent or 'root'
2138         }
2139
2140     def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
2141
2142         get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
2143
2144         def extract_header(contents):
2145             _continuation = None
2146             for content in contents:
2147                 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
2148                 expected_comment_count = parse_count(self._get_text(
2149                     comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
2150
2151                 if expected_comment_count:
2152                     tracker['est_total'] = expected_comment_count
2153                     self.to_screen(f'Downloading ~{expected_comment_count} comments')
2154                 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top')  # 1 = new, 0 = top
2155
2156                 sort_menu_item = try_get(
2157                     comments_header_renderer,
2158                     lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2159                 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2160
2161                 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2162                 if not _continuation:
2163                     continue
2164
2165                 sort_text = str_or_none(sort_menu_item.get('title'))
2166                 if not sort_text:
2167                     sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2168                 self.to_screen('Sorting comments by %s' % sort_text.lower())
2169                 break
2170             return _continuation
2171
2172         def extract_thread(contents):
2173             if not parent:
2174                 tracker['current_page_thread'] = 0
2175             for content in contents:
2176                 if not parent and tracker['total_parent_comments'] >= max_parents:
2177                     yield
2178                 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2179                 comment_renderer = get_first(
2180                     (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
2181                     expected_type=dict, default={})
2182
2183                 comment = self._extract_comment(comment_renderer, parent)
2184                 if not comment:
2185                     continue
2186
2187                 tracker['running_total'] += 1
2188                 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
2189                 yield comment
2190
2191                 # Attempt to get the replies
2192                 comment_replies_renderer = try_get(
2193                     comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2194
2195                 if comment_replies_renderer:
2196                     tracker['current_page_thread'] += 1
2197                     comment_entries_iter = self._comment_entries(
2198                         comment_replies_renderer, ytcfg, video_id,
2199                         parent=comment.get('id'), tracker=tracker)
2200                     for reply_comment in itertools.islice(comment_entries_iter, min(max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments']))):
2201                         yield reply_comment
2202
2203         # Keeps track of counts across recursive calls
2204         if not tracker:
2205             tracker = dict(
2206                 running_total=0,
2207                 est_total=0,
2208                 current_page_thread=0,
2209                 total_parent_comments=0,
2210                 total_reply_comments=0)
2211
2212         # TODO: Deprecated
2213         # YouTube comments have a max depth of 2
2214         max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
2215         if max_depth:
2216             self._downloader.deprecation_warning(
2217                 '[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')
2218         if max_depth == 1 and parent:
2219             return
2220
2221         max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(
2222             lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
2223
2224         continuation = self._extract_continuation(root_continuation_data)
2225         message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
2226         if message and not parent:
2227             self.report_warning(message, video_id=video_id)
2228
2229         response = None
2230         is_first_continuation = parent is None
2231
2232         for page_num in itertools.count(0):
2233             if not continuation:
2234                 break
2235             headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
2236             comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"
2237             if page_num == 0:
2238                 if is_first_continuation:
2239                     note_prefix = 'Downloading comment section API JSON'
2240                 else:
2241                     note_prefix = '    Downloading comment API JSON reply thread %d %s' % (
2242                         tracker['current_page_thread'], comment_prog_str)
2243             else:
2244                 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2245                     '       ' if parent else '', ' replies' if parent else '',
2246                     page_num, comment_prog_str)
2247
2248             response = self._extract_response(
2249                 item_id=None, query=continuation,
2250                 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2251                 check_get_keys='onResponseReceivedEndpoints')
2252
2253             continuation_contents = traverse_obj(
2254                 response, 'onResponseReceivedEndpoints', expected_type=list, default=[])
2255
2256             continuation = None
2257             for continuation_section in continuation_contents:
2258                 continuation_items = traverse_obj(
2259                     continuation_section,
2260                     (('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),
2261                     get_all=False, expected_type=list) or []
2262                 if is_first_continuation:
2263                     continuation = extract_header(continuation_items)
2264                     is_first_continuation = False
2265                     if continuation:
2266                         break
2267                     continue
2268
2269                 for entry in extract_thread(continuation_items):
2270                     if not entry:
2271                         return
2272                     yield entry
2273                 continuation = self._extract_continuation({'contents': continuation_items})
2274                 if continuation:
2275                     break
2276
2277     def _get_comments(self, ytcfg, video_id, contents, webpage):
2278         """Entry for comment extraction"""
2279         def _real_comment_extract(contents):
2280             renderer = next((
2281                 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
2282                 if item.get('sectionIdentifier') == 'comment-item-section'), None)
2283             yield from self._comment_entries(renderer, ytcfg, video_id)
2284
2285         max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
2286         return itertools.islice(_real_comment_extract(contents), 0, max_comments)
2287
2288     @staticmethod
2289     def _get_checkok_params():
2290         return {'contentCheckOk': True, 'racyCheckOk': True}
2291
2292     @classmethod
2293     def _generate_player_context(cls, sts=None):
2294         context = {
2295             'html5Preference': 'HTML5_PREF_WANTS',
2296         }
2297         if sts is not None:
2298             context['signatureTimestamp'] = sts
2299         return {
2300             'playbackContext': {
2301                 'contentPlaybackContext': context
2302             },
2303             **cls._get_checkok_params()
2304         }
2305
2306     @staticmethod
2307     def _is_agegated(player_response):
2308         if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
2309             return True
2310
2311         reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
2312         AGE_GATE_REASONS = (
2313             'confirm your age', 'age-restricted', 'inappropriate',  # reason
2314             'age_verification_required', 'age_check_required',  # status
2315         )
2316         return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
2317
2318     @staticmethod
2319     def _is_unplayable(player_response):
2320         return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
2321
2322     def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):
2323
2324         session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2325         syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
2326         sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
2327         headers = self.generate_api_headers(
2328             ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
2329
2330         yt_query = {'videoId': video_id}
2331         yt_query.update(self._generate_player_context(sts))
2332         return self._extract_response(
2333             item_id=video_id, ep='player', query=yt_query,
2334             ytcfg=player_ytcfg, headers=headers, fatal=True,
2335             default_client=client,
2336             note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2337         ) or None
2338
2339     def _get_requested_clients(self, url, smuggled_data):
2340         requested_clients = []
2341         default = ['android', 'web']
2342         allowed_clients = sorted(
2343             [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],
2344             key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
2345         for client in self._configuration_arg('player_client'):
2346             if client in allowed_clients:
2347                 requested_clients.append(client)
2348             elif client == 'default':
2349                 requested_clients.extend(default)
2350             elif client == 'all':
2351                 requested_clients.extend(allowed_clients)
2352             else:
2353                 self.report_warning(f'Skipping unsupported client {client}')
2354         if not requested_clients:
2355             requested_clients = default
2356
2357         if smuggled_data.get('is_music_url') or self.is_music_url(url):
2358             requested_clients.extend(
2359                 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
2360
2361         return orderedSet(requested_clients)
2362
2363     def _extract_player_ytcfg(self, client, video_id):
2364         url = {
2365             'web_music': 'https://music.youtube.com',
2366             'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
2367         }.get(client)
2368         if not url:
2369             return {}
2370         webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config')
2371         return self.extract_ytcfg(video_id, webpage) or {}
2372
2373     def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):
2374         initial_pr = None
2375         if webpage:
2376             initial_pr = self._extract_yt_initial_variable(
2377                 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2378                 video_id, 'initial player response')
2379
2380         original_clients = clients
2381         clients = clients[::-1]
2382         prs = []
2383
2384         def append_client(client_name):
2385             if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:
2386                 clients.append(client_name)
2387
2388         # Android player_response does not have microFormats which are needed for
2389         # extraction of some data. So we return the initial_pr with formats
2390         # stripped out even if not requested by the user
2391         # See: https://github.com/yt-dlp/yt-dlp/issues/501
2392         if initial_pr:
2393             pr = dict(initial_pr)
2394             pr['streamingData'] = None
2395             prs.append(pr)
2396
2397         last_error = None
2398         tried_iframe_fallback = False
2399         player_url = None
2400         while clients:
2401             client = clients.pop()
2402             player_ytcfg = master_ytcfg if client == 'web' else {}
2403             if 'configs' not in self._configuration_arg('player_skip'):
2404                 player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
2405
2406             player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
2407             require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
2408             if 'js' in self._configuration_arg('player_skip'):
2409                 require_js_player = False
2410                 player_url = None
2411
2412             if not player_url and not tried_iframe_fallback and require_js_player:
2413                 player_url = self._download_player_url(video_id)
2414                 tried_iframe_fallback = True
2415
2416             try:
2417                 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
2418                     client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)
2419             except ExtractorError as e:
2420                 if last_error:
2421                     self.report_warning(last_error)
2422                 last_error = e
2423                 continue
2424
2425             if pr:
2426                 prs.append(pr)
2427
2428             # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
2429             if client.endswith('_agegate') and self._is_unplayable(pr) and self.is_authenticated:
2430                 append_client(client.replace('_agegate', '_creator'))
2431             elif self._is_agegated(pr):
2432                 append_client(f'{client}_agegate')
2433
2434         if last_error:
2435             if not len(prs):
2436                 raise last_error
2437             self.report_warning(last_error)
2438         return prs, player_url
2439
2440     def _extract_formats(self, streaming_data, video_id, player_url, is_live):
2441         itags, stream_ids = {}, []
2442         itag_qualities, res_qualities = {}, {}
2443         q = qualities([
2444             # Normally tiny is the smallest video-only formats. But
2445             # audio-only formats with unknown quality may get tagged as tiny
2446             'tiny',
2447             'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high',  # Audio only formats
2448             'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2449         ])
2450         streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
2451
2452         for fmt in streaming_formats:
2453             if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2454                 continue
2455
2456             itag = str_or_none(fmt.get('itag'))
2457             audio_track = fmt.get('audioTrack') or {}
2458             stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2459             if stream_id in stream_ids:
2460                 continue
2461
2462             quality = fmt.get('quality')
2463             height = int_or_none(fmt.get('height'))
2464             if quality == 'tiny' or not quality:
2465                 quality = fmt.get('audioQuality', '').lower() or quality
2466             # The 3gp format (17) in android client has a quality of "small",
2467             # but is actually worse than other formats
2468             if itag == '17':
2469                 quality = 'tiny'
2470             if quality:
2471                 if itag:
2472                     itag_qualities[itag] = quality
2473                 if height:
2474                     res_qualities[height] = quality
2475             # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2476             # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2477             # number of fragment that would subsequently requested with (`&sq=N`)
2478             if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2479                 continue
2480
2481             fmt_url = fmt.get('url')
2482             if not fmt_url:
2483                 sc = compat_parse_qs(fmt.get('signatureCipher'))
2484                 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2485                 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2486                 if not (sc and fmt_url and encrypted_sig):
2487                     continue
2488                 if not player_url:
2489                     continue
2490                 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2491                 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2492                 fmt_url += '&' + sp + '=' + signature
2493
2494             query = parse_qs(fmt_url)
2495             throttled = False
2496             if query.get('n'):
2497                 try:
2498                     fmt_url = update_url_query(fmt_url, {
2499                         'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})
2500                 except ExtractorError as e:
2501                     self.report_warning(
2502                         f'nsig extraction failed: You may experience throttling for some formats\n'
2503                         f'n = {query["n"][0]} ; player = {player_url}\n{e}', only_once=True)
2504                     throttled = True
2505
2506             if itag:
2507                 itags[itag] = 'https'
2508                 stream_ids.append(stream_id)
2509
2510             tbr = float_or_none(
2511                 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
2512             dct = {
2513                 'asr': int_or_none(fmt.get('audioSampleRate')),
2514                 'filesize': int_or_none(fmt.get('contentLength')),
2515                 'format_id': itag,
2516                 'format_note': join_nonempty(
2517                     '%s%s' % (audio_track.get('displayName') or '',
2518                               ' (default)' if audio_track.get('audioIsDefault') else ''),
2519                     fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
2520                     throttled and 'THROTTLED', delim=', '),
2521                 'source_preference': -10 if throttled else -1,
2522                 'fps': int_or_none(fmt.get('fps')) or None,
2523                 'height': height,
2524                 'quality': q(quality),
2525                 'tbr': tbr,
2526                 'url': fmt_url,
2527                 'width': int_or_none(fmt.get('width')),
2528                 'language': audio_track.get('id', '').split('.')[0],
2529                 'language_preference': 1 if audio_track.get('audioIsDefault') else -1,
2530             }
2531             mime_mobj = re.match(
2532                 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2533             if mime_mobj:
2534                 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2535                 dct.update(parse_codecs(mime_mobj.group(2)))
2536             no_audio = dct.get('acodec') == 'none'
2537             no_video = dct.get('vcodec') == 'none'
2538             if no_audio:
2539                 dct['vbr'] = tbr
2540             if no_video:
2541                 dct['abr'] = tbr
2542             if no_audio or no_video:
2543                 dct['downloader_options'] = {
2544                     # Youtube throttles chunks >~10M
2545                     'http_chunk_size': 10485760,
2546                 }
2547                 if dct.get('ext'):
2548                     dct['container'] = dct['ext'] + '_dash'
2549             yield dct
2550
2551         skip_manifests = self._configuration_arg('skip')
2552         get_dash = (
2553             (not is_live or self._configuration_arg('include_live_dash'))
2554             and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True))
2555         get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2556
2557         def process_manifest_format(f, proto, itag):
2558             if itag in itags:
2559                 if itags[itag] == proto or f'{itag}-{proto}' in itags:
2560                     return False
2561                 itag = f'{itag}-{proto}'
2562             if itag:
2563                 f['format_id'] = itag
2564                 itags[itag] = proto
2565
2566             f['quality'] = next((
2567                 q(qdict[val])
2568                 for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities))
2569                 if val in qdict), -1)
2570             return True
2571
2572         for sd in streaming_data:
2573             hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
2574             if hls_manifest_url:
2575                 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
2576                     if process_manifest_format(f, 'hls', self._search_regex(
2577                             r'/itag/(\d+)', f['url'], 'itag', default=None)):
2578                         yield f
2579
2580             dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2581             if dash_manifest_url:
2582                 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
2583                     if process_manifest_format(f, 'dash', f['format_id']):
2584                         f['filesize'] = int_or_none(self._search_regex(
2585                             r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
2586                         yield f
2587
2588     def _extract_storyboard(self, player_responses, duration):
2589         spec = get_first(
2590             player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
2591         if not spec:
2592             return
2593         base_url = spec.pop()
2594         L = len(spec) - 1
2595         for i, args in enumerate(spec):
2596             args = args.split('#')
2597             counts = list(map(int_or_none, args[:5]))
2598             if len(args) != 8 or not all(counts):
2599                 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
2600                 continue
2601             width, height, frame_count, cols, rows = counts
2602             N, sigh = args[6:]
2603
2604             url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
2605             fragment_count = frame_count / (cols * rows)
2606             fragment_duration = duration / fragment_count
2607             yield {
2608                 'format_id': f'sb{i}',
2609                 'format_note': 'storyboard',
2610                 'ext': 'mhtml',
2611                 'protocol': 'mhtml',
2612                 'acodec': 'none',
2613                 'vcodec': 'none',
2614                 'url': url,
2615                 'width': width,
2616                 'height': height,
2617                 'fragments': [{
2618                     'path': url.replace('$M', str(j)),
2619                     'duration': min(fragment_duration, duration - (j * fragment_duration)),
2620                 } for j in range(math.ceil(fragment_count))],
2621             }
2622
2623     def _real_extract(self, url):
2624         url, smuggled_data = unsmuggle_url(url, {})
2625         video_id = self._match_id(url)
2626
2627         base_url = self.http_scheme() + '//www.youtube.com/'
2628         webpage_url = base_url + 'watch?v=' + video_id
2629         webpage = None
2630         if 'webpage' not in self._configuration_arg('player_skip'):
2631             webpage = self._download_webpage(
2632                 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2633
2634         master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2635
2636         player_responses, player_url = self._extract_player_responses(
2637             self._get_requested_clients(url, smuggled_data),
2638             video_id, webpage, master_ytcfg)
2639
2640         playability_statuses = traverse_obj(
2641             player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2642
2643         trailer_video_id = get_first(
2644             playability_statuses,
2645             ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2646             expected_type=str)
2647         if trailer_video_id:
2648             return self.url_result(
2649                 trailer_video_id, self.ie_key(), trailer_video_id)
2650
2651         search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2652                        if webpage else (lambda x: None))
2653
2654         video_details = traverse_obj(
2655             player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2656         microformats = traverse_obj(
2657             player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2658             expected_type=dict, default=[])
2659         video_title = (
2660             get_first(video_details, 'title')
2661             or self._get_text(microformats, (..., 'title'))
2662             or search_meta(['og:title', 'twitter:title', 'title']))
2663         video_description = get_first(video_details, 'shortDescription')
2664
2665         multifeed_metadata_list = get_first(
2666             player_responses,
2667             ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2668             expected_type=str)
2669         if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
2670             if self.get_param('noplaylist'):
2671                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2672             else:
2673                 entries = []
2674                 feed_ids = []
2675                 for feed in multifeed_metadata_list.split(','):
2676                     # Unquote should take place before split on comma (,) since textual
2677                     # fields may contain comma as well (see
2678                     # https://github.com/ytdl-org/youtube-dl/issues/8536)
2679                     feed_data = compat_parse_qs(
2680                         compat_urllib_parse_unquote_plus(feed))
2681
2682                     def feed_entry(name):
2683                         return try_get(
2684                             feed_data, lambda x: x[name][0], compat_str)
2685
2686                     feed_id = feed_entry('id')
2687                     if not feed_id:
2688                         continue
2689                     feed_title = feed_entry('title')
2690                     title = video_title
2691                     if feed_title:
2692                         title += ' (%s)' % feed_title
2693                     entries.append({
2694                         '_type': 'url_transparent',
2695                         'ie_key': 'Youtube',
2696                         'url': smuggle_url(
2697                             '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2698                             {'force_singlefeed': True}),
2699                         'title': title,
2700                     })
2701                     feed_ids.append(feed_id)
2702                 self.to_screen(
2703                     'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2704                     % (', '.join(feed_ids), video_id))
2705                 return self.playlist_result(
2706                     entries, video_id, video_title, video_description)
2707
2708         live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
2709         is_live = get_first(video_details, 'isLive')
2710         if is_live is None:
2711             is_live = get_first(live_broadcast_details, 'isLiveNow')
2712
2713         streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2714         formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
2715
2716         if not formats:
2717             if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
2718                 self.report_drm(video_id)
2719             pemr = get_first(
2720                 playability_statuses,
2721                 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2722             reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2723             subreason = clean_html(self._get_text(pemr, 'subreason') or '')
2724             if subreason:
2725                 if subreason == 'The uploader has not made this video available in your country.':
2726                     countries = get_first(microformats, 'availableCountries')
2727                     if not countries:
2728                         regions_allowed = search_meta('regionsAllowed')
2729                         countries = regions_allowed.split(',') if regions_allowed else None
2730                     self.raise_geo_restricted(subreason, countries, metadata_available=True)
2731                 reason += f'. {subreason}'
2732             if reason:
2733                 self.raise_no_formats(reason, expected=True)
2734
2735         keywords = get_first(video_details, 'keywords', expected_type=list) or []
2736         if not keywords and webpage:
2737             keywords = [
2738                 unescapeHTML(m.group('content'))
2739                 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2740         for keyword in keywords:
2741             if keyword.startswith('yt:stretch='):
2742                 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2743                 if mobj:
2744                     # NB: float is intentional for forcing float division
2745                     w, h = (float(v) for v in mobj.groups())
2746                     if w > 0 and h > 0:
2747                         ratio = w / h
2748                         for f in formats:
2749                             if f.get('vcodec') != 'none':
2750                                 f['stretched_ratio'] = ratio
2751                         break
2752
2753         thumbnails = []
2754         thumbnail_dicts = traverse_obj(
2755             (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2756             expected_type=dict, default=[])
2757         for thumbnail in thumbnail_dicts:
2758             thumbnail_url = thumbnail.get('url')
2759             if not thumbnail_url:
2760                 continue
2761             # Sometimes youtube gives a wrong thumbnail URL. See:
2762             # https://github.com/yt-dlp/yt-dlp/issues/233
2763             # https://github.com/ytdl-org/youtube-dl/issues/28023
2764             if 'maxresdefault' in thumbnail_url:
2765                 thumbnail_url = thumbnail_url.split('?')[0]
2766             thumbnails.append({
2767                 'url': thumbnail_url,
2768                 'height': int_or_none(thumbnail.get('height')),
2769                 'width': int_or_none(thumbnail.get('width')),
2770             })
2771         thumbnail_url = search_meta(['og:image', 'twitter:image'])
2772         if thumbnail_url:
2773             thumbnails.append({
2774                 'url': thumbnail_url,
2775             })
2776         original_thumbnails = thumbnails.copy()
2777
2778         # The best resolution thumbnails sometimes does not appear in the webpage
2779         # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
2780         # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2781         thumbnail_names = [
2782             'maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3',
2783             'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2784             'mqdefault', 'mq1', 'mq2', 'mq3',
2785             'default', '1', '2', '3'
2786         ]
2787         n_thumbnail_names = len(thumbnail_names)
2788         thumbnails.extend({
2789             'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2790                 video_id=video_id, name=name, ext=ext,
2791                 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
2792         } for name in thumbnail_names for ext in ('webp', 'jpg'))
2793         for thumb in thumbnails:
2794             i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
2795             thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
2796         self._remove_duplicate_formats(thumbnails)
2797         self._downloader._sort_thumbnails(original_thumbnails)
2798
2799         category = get_first(microformats, 'category') or search_meta('genre')
2800         channel_id = str_or_none(
2801             get_first(video_details, 'channelId')
2802             or get_first(microformats, 'externalChannelId')
2803             or search_meta('channelId'))
2804         duration = int_or_none(
2805             get_first(video_details, 'lengthSeconds')
2806             or get_first(microformats, 'lengthSeconds')
2807             or parse_duration(search_meta('duration'))) or None
2808         owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2809
2810         live_content = get_first(video_details, 'isLiveContent')
2811         is_upcoming = get_first(video_details, 'isUpcoming')
2812         if is_live is None:
2813             if is_upcoming or live_content is False:
2814                 is_live = False
2815         if is_upcoming is None and (live_content or is_live):
2816             is_upcoming = False
2817         live_starttime = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2818         live_endtime = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2819         if not duration and live_endtime and live_starttime:
2820             duration = live_endtime - live_starttime
2821
2822         formats.extend(self._extract_storyboard(player_responses, duration))
2823
2824         # Source is given priority since formats that throttle are given lower source_preference
2825         # When throttling issue is fully fixed, remove this
2826         self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))
2827
2828         info = {
2829             'id': video_id,
2830             'title': video_title,
2831             'formats': formats,
2832             'thumbnails': thumbnails,
2833             # The best thumbnail that we are sure exists. Prevents unnecessary
2834             # URL checking if user don't care about getting the best possible thumbnail
2835             'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
2836             'description': video_description,
2837             'upload_date': unified_strdate(
2838                 get_first(microformats, 'uploadDate')
2839                 or search_meta('uploadDate')),
2840             'uploader': get_first(video_details, 'author'),
2841             'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2842             'uploader_url': owner_profile_url,
2843             'channel_id': channel_id,
2844             'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
2845             'duration': duration,
2846             'view_count': int_or_none(
2847                 get_first((video_details, microformats), (..., 'viewCount'))
2848                 or search_meta('interactionCount')),
2849             'average_rating': float_or_none(get_first(video_details, 'averageRating')),
2850             'age_limit': 18 if (
2851                 get_first(microformats, 'isFamilySafe') is False
2852                 or search_meta('isFamilyFriendly') == 'false'
2853                 or search_meta('og:restrictions:age') == '18+') else 0,
2854             'webpage_url': webpage_url,
2855             'categories': [category] if category else None,
2856             'tags': keywords,
2857             'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
2858             'is_live': is_live,
2859             'was_live': (False if is_live or is_upcoming or live_content is False
2860                          else None if is_live is None or is_upcoming is None
2861                          else live_content),
2862             'live_status': 'is_upcoming' if is_upcoming else None,  # rest will be set by YoutubeDL
2863             'release_timestamp': live_starttime,
2864         }
2865
2866         pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
2867         if pctr:
2868             def get_lang_code(track):
2869                 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
2870                         or track.get('languageCode'))
2871
2872             # Converted into dicts to remove duplicates
2873             captions = {
2874                 get_lang_code(sub): sub
2875                 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
2876             translation_languages = {
2877                 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
2878                 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
2879
2880             def process_language(container, base_url, lang_code, sub_name, query):
2881                 lang_subs = container.setdefault(lang_code, [])
2882                 for fmt in self._SUBTITLE_FORMATS:
2883                     query.update({
2884                         'fmt': fmt,
2885                     })
2886                     lang_subs.append({
2887                         'ext': fmt,
2888                         'url': update_url_query(base_url, query),
2889                         'name': sub_name,
2890                     })
2891
2892             subtitles, automatic_captions = {}, {}
2893             for lang_code, caption_track in captions.items():
2894                 base_url = caption_track.get('baseUrl')
2895                 if not base_url:
2896                     continue
2897                 lang_name = self._get_text(caption_track, 'name', max_runs=1)
2898                 if caption_track.get('kind') != 'asr':
2899                     if not lang_code:
2900                         continue
2901                     process_language(
2902                         subtitles, base_url, lang_code, lang_name, {})
2903                     if not caption_track.get('isTranslatable'):
2904                         continue
2905                 for trans_code, trans_name in translation_languages.items():
2906                     if not trans_code:
2907                         continue
2908                     if caption_track.get('kind') != 'asr':
2909                         trans_code += f'-{lang_code}'
2910                         trans_name += format_field(lang_name, template=' from %s')
2911                     process_language(
2912                         automatic_captions, base_url, trans_code, trans_name, {'tlang': trans_code})
2913             info['automatic_captions'] = automatic_captions
2914             info['subtitles'] = subtitles
2915
2916         parsed_url = compat_urllib_parse_urlparse(url)
2917         for component in [parsed_url.fragment, parsed_url.query]:
2918             query = compat_parse_qs(component)
2919             for k, v in query.items():
2920                 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2921                     d_k += '_time'
2922                     if d_k not in info and k in s_ks:
2923                         info[d_k] = parse_duration(query[k][0])
2924
2925         # Youtube Music Auto-generated description
2926         if video_description:
2927             mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
2928             if mobj:
2929                 release_year = mobj.group('release_year')
2930                 release_date = mobj.group('release_date')
2931                 if release_date:
2932                     release_date = release_date.replace('-', '')
2933                     if not release_year:
2934                         release_year = release_date[:4]
2935                 info.update({
2936                     'album': mobj.group('album'.strip()),
2937                     'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2938                     'track': mobj.group('track').strip(),
2939                     'release_date': release_date,
2940                     'release_year': int_or_none(release_year),
2941                 })
2942
2943         initial_data = None
2944         if webpage:
2945             initial_data = self._extract_yt_initial_variable(
2946                 webpage, self._YT_INITIAL_DATA_RE, video_id,
2947                 'yt initial data')
2948         if not initial_data:
2949             query = {'videoId': video_id}
2950             query.update(self._get_checkok_params())
2951             initial_data = self._extract_response(
2952                 item_id=video_id, ep='next', fatal=False,
2953                 ytcfg=master_ytcfg, query=query,
2954                 headers=self.generate_api_headers(ytcfg=master_ytcfg),
2955                 note='Downloading initial data API JSON')
2956
2957         try:
2958             # This will error if there is no livechat
2959             initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2960             info.setdefault('subtitles', {})['live_chat'] = [{
2961                 'url': 'https://www.youtube.com/watch?v=%s' % video_id,  # url is needed to set cookies
2962                 'video_id': video_id,
2963                 'ext': 'json',
2964                 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
2965             }]
2966         except (KeyError, IndexError, TypeError):
2967             pass
2968
2969         if initial_data:
2970             info['chapters'] = (
2971                 self._extract_chapters_from_json(initial_data, duration)
2972                 or self._extract_chapters_from_engagement_panel(initial_data, duration)
2973                 or None)
2974
2975             contents = try_get(
2976                 initial_data,
2977                 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2978                 list) or []
2979             for content in contents:
2980                 vpir = content.get('videoPrimaryInfoRenderer')
2981                 if vpir:
2982                     stl = vpir.get('superTitleLink')
2983                     if stl:
2984                         stl = self._get_text(stl)
2985                         if try_get(
2986                                 vpir,
2987                                 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2988                             info['location'] = stl
2989                         else:
2990                             mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2991                             if mobj:
2992                                 info.update({
2993                                     'series': mobj.group(1),
2994                                     'season_number': int(mobj.group(2)),
2995                                     'episode_number': int(mobj.group(3)),
2996                                 })
2997                     for tlb in (try_get(
2998                             vpir,
2999                             lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3000                             list) or []):
3001                         tbr = tlb.get('toggleButtonRenderer') or {}
3002                         for getter, regex in [(
3003                                 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3004                                 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3005                                     lambda x: x['accessibility'],
3006                                     lambda x: x['accessibilityData']['accessibilityData'],
3007                                 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3008                             label = (try_get(tbr, getter, dict) or {}).get('label')
3009                             if label:
3010                                 mobj = re.match(regex, label)
3011                                 if mobj:
3012                                     info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3013                                     break
3014                     sbr_tooltip = try_get(
3015                         vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3016                     if sbr_tooltip:
3017                         like_count, dislike_count = sbr_tooltip.split(' / ')
3018                         info.update({
3019                             'like_count': str_to_int(like_count),
3020                             'dislike_count': str_to_int(dislike_count),
3021                         })
3022                 vsir = content.get('videoSecondaryInfoRenderer')
3023                 if vsir:
3024                     info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
3025                     rows = try_get(
3026                         vsir,
3027                         lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3028                         list) or []
3029                     multiple_songs = False
3030                     for row in rows:
3031                         if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3032                             multiple_songs = True
3033                             break
3034                     for row in rows:
3035                         mrr = row.get('metadataRowRenderer') or {}
3036                         mrr_title = mrr.get('title')
3037                         if not mrr_title:
3038                             continue
3039                         mrr_title = self._get_text(mrr, 'title')
3040                         mrr_contents_text = self._get_text(mrr, ('contents', 0))
3041                         if mrr_title == 'License':
3042                             info['license'] = mrr_contents_text
3043                         elif not multiple_songs:
3044                             if mrr_title == 'Album':
3045                                 info['album'] = mrr_contents_text
3046                             elif mrr_title == 'Artist':
3047                                 info['artist'] = mrr_contents_text
3048                             elif mrr_title == 'Song':
3049                                 info['track'] = mrr_contents_text
3050
3051         fallbacks = {
3052             'channel': 'uploader',
3053             'channel_id': 'uploader_id',
3054             'channel_url': 'uploader_url',
3055         }
3056         for to, frm in fallbacks.items():
3057             if not info.get(to):
3058                 info[to] = info.get(frm)
3059
3060         for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3061             v = info.get(s_k)
3062             if v:
3063                 info[d_k] = v
3064
3065         is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3066         is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
3067         is_membersonly = None
3068         is_premium = None
3069         if initial_data and is_private is not None:
3070             is_membersonly = False
3071             is_premium = False
3072             contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3073             badge_labels = set()
3074             for content in contents:
3075                 if not isinstance(content, dict):
3076                     continue
3077                 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3078             for badge_label in badge_labels:
3079                 if badge_label.lower() == 'members only':
3080                     is_membersonly = True
3081                 elif badge_label.lower() == 'premium':
3082                     is_premium = True
3083                 elif badge_label.lower() == 'unlisted':
3084                     is_unlisted = True
3085
3086         info['availability'] = self._availability(
3087             is_private=is_private,
3088             needs_premium=is_premium,
3089             needs_subscription=is_membersonly,
3090             needs_auth=info['age_limit'] >= 18,
3091             is_unlisted=None if is_private is None else is_unlisted)
3092
3093         info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
3094
3095         self.mark_watched(video_id, player_responses)
3096
3097         return info
3098
3099
3100 class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
3101
3102     def _extract_channel_id(self, webpage):
3103         channel_id = self._html_search_meta(
3104             'channelId', webpage, 'channel id', default=None)
3105         if channel_id:
3106             return channel_id
3107         channel_url = self._html_search_meta(
3108             ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3109              'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3110              'twitter:app:url:googleplay'), webpage, 'channel url')
3111         return self._search_regex(
3112             r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3113             channel_url, 'channel id')
3114
3115     @staticmethod
3116     def _extract_basic_item_renderer(item):
3117         # Modified from _extract_grid_item_renderer
3118         known_basic_renderers = (
3119             'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
3120         )
3121         for key, renderer in item.items():
3122             if not isinstance(renderer, dict):
3123                 continue
3124             elif key in known_basic_renderers:
3125                 return renderer
3126             elif key.startswith('grid') and key.endswith('Renderer'):
3127                 return renderer
3128
3129     def _grid_entries(self, grid_renderer):
3130         for item in grid_renderer['items']:
3131             if not isinstance(item, dict):
3132                 continue
3133             renderer = self._extract_basic_item_renderer(item)
3134             if not isinstance(renderer, dict):
3135                 continue
3136             title = self._get_text(renderer, 'title')
3137
3138             # playlist
3139             playlist_id = renderer.get('playlistId')
3140             if playlist_id:
3141                 yield self.url_result(
3142                     'https://www.youtube.com/playlist?list=%s' % playlist_id,
3143                     ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3144                     video_title=title)
3145                 continue
3146             # video
3147             video_id = renderer.get('videoId')
3148             if video_id:
3149                 yield self._extract_video(renderer)
3150                 continue
3151             # channel
3152             channel_id = renderer.get('channelId')
3153             if channel_id:
3154                 yield self.url_result(
3155                     'https://www.youtube.com/channel/%s' % channel_id,
3156                     ie=YoutubeTabIE.ie_key(), video_title=title)
3157                 continue
3158             # generic endpoint URL support
3159             ep_url = urljoin('https://www.youtube.com/', try_get(
3160                 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3161                 compat_str))
3162             if ep_url:
3163                 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3164                     if ie.suitable(ep_url):
3165                         yield self.url_result(
3166                             ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3167                         break
3168
3169     def _shelf_entries_from_content(self, shelf_renderer):
3170         content = shelf_renderer.get('content')
3171         if not isinstance(content, dict):
3172             return
3173         renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3174         if renderer:
3175             # TODO: add support for nested playlists so each shelf is processed
3176             # as separate playlist
3177             # TODO: this includes only first N items
3178             for entry in self._grid_entries(renderer):
3179                 yield entry
3180         renderer = content.get('horizontalListRenderer')
3181         if renderer:
3182             # TODO
3183             pass
3184
3185     def _shelf_entries(self, shelf_renderer, skip_channels=False):
3186         ep = try_get(
3187             shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3188             compat_str)
3189         shelf_url = urljoin('https://www.youtube.com', ep)
3190         if shelf_url:
3191             # Skipping links to another channels, note that checking for
3192             # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3193             # will not work
3194             if skip_channels and '/channels?' in shelf_url:
3195                 return
3196             title = self._get_text(shelf_renderer, 'title')
3197             yield self.url_result(shelf_url, video_title=title)
3198         # Shelf may not contain shelf URL, fallback to extraction from content
3199         for entry in self._shelf_entries_from_content(shelf_renderer):
3200             yield entry
3201
3202     def _playlist_entries(self, video_list_renderer):
3203         for content in video_list_renderer['contents']:
3204             if not isinstance(content, dict):
3205                 continue
3206             renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3207             if not isinstance(renderer, dict):
3208                 continue
3209             video_id = renderer.get('videoId')
3210             if not video_id:
3211                 continue
3212             yield self._extract_video(renderer)
3213
3214     def _rich_entries(self, rich_grid_renderer):
3215         renderer = try_get(
3216             rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3217         video_id = renderer.get('videoId')
3218         if not video_id:
3219             return
3220         yield self._extract_video(renderer)
3221
3222     def _video_entry(self, video_renderer):
3223         video_id = video_renderer.get('videoId')
3224         if video_id:
3225             return self._extract_video(video_renderer)
3226
3227     def _post_thread_entries(self, post_thread_renderer):
3228         post_renderer = try_get(
3229             post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3230         if not post_renderer:
3231             return
3232         # video attachment
3233         video_renderer = try_get(
3234             post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3235         video_id = video_renderer.get('videoId')
3236         if video_id:
3237             entry = self._extract_video(video_renderer)
3238             if entry:
3239                 yield entry
3240         # playlist attachment
3241         playlist_id = try_get(
3242             post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3243         if playlist_id:
3244             yield self.url_result(
3245                 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3246                 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3247         # inline video links
3248         runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3249         for run in runs:
3250             if not isinstance(run, dict):
3251                 continue
3252             ep_url = try_get(
3253                 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3254             if not ep_url:
3255                 continue
3256             if not YoutubeIE.suitable(ep_url):
3257                 continue
3258             ep_video_id = YoutubeIE._match_id(ep_url)
3259             if video_id == ep_video_id:
3260                 continue
3261             yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
3262
3263     def _post_thread_continuation_entries(self, post_thread_continuation):
3264         contents = post_thread_continuation.get('contents')
3265         if not isinstance(contents, list):
3266             return
3267         for content in contents:
3268             renderer = content.get('backstagePostThreadRenderer')
3269             if not isinstance(renderer, dict):
3270                 continue
3271             for entry in self._post_thread_entries(renderer):
3272                 yield entry
3273
3274     r''' # unused
3275     def _rich_grid_entries(self, contents):
3276         for content in contents:
3277             video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3278             if video_renderer:
3279                 entry = self._video_entry(video_renderer)
3280                 if entry:
3281                     yield entry
3282     '''
3283     def _extract_entries(self, parent_renderer, continuation_list):
3284         # continuation_list is modified in-place with continuation_list = [continuation_token]
3285         continuation_list[:] = [None]
3286         contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3287         for content in contents:
3288             if not isinstance(content, dict):
3289                 continue
3290             is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3291             if not is_renderer:
3292                 renderer = content.get('richItemRenderer')
3293                 if renderer:
3294                     for entry in self._rich_entries(renderer):
3295                         yield entry
3296                     continuation_list[0] = self._extract_continuation(parent_renderer)
3297                 continue
3298             isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3299             for isr_content in isr_contents:
3300                 if not isinstance(isr_content, dict):
3301                     continue
3302
3303                 known_renderers = {
3304                     'playlistVideoListRenderer': self._playlist_entries,
3305                     'gridRenderer': self._grid_entries,
3306                     'shelfRenderer': lambda x: self._shelf_entries(x),
3307                     'backstagePostThreadRenderer': self._post_thread_entries,
3308                     'videoRenderer': lambda x: [self._video_entry(x)],
3309                     'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
3310                     'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
3311                 }
3312                 for key, renderer in isr_content.items():
3313                     if key not in known_renderers:
3314                         continue
3315                     for entry in known_renderers[key](renderer):
3316                         if entry:
3317                             yield entry
3318                     continuation_list[0] = self._extract_continuation(renderer)
3319                     break
3320
3321             if not continuation_list[0]:
3322                 continuation_list[0] = self._extract_continuation(is_renderer)
3323
3324         if not continuation_list[0]:
3325             continuation_list[0] = self._extract_continuation(parent_renderer)
3326
3327     def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
3328         continuation_list = [None]
3329         extract_entries = lambda x: self._extract_entries(x, continuation_list)
3330         tab_content = try_get(tab, lambda x: x['content'], dict)
3331         if not tab_content:
3332             return
3333         parent_renderer = (
3334             try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3335             or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
3336         for entry in extract_entries(parent_renderer):
3337             yield entry
3338         continuation = continuation_list[0]
3339
3340         for page_num in itertools.count(1):
3341             if not continuation:
3342                 break
3343             headers = self.generate_api_headers(
3344                 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
3345             response = self._extract_response(
3346                 item_id='%s page %s' % (item_id, page_num),
3347                 query=continuation, headers=headers, ytcfg=ytcfg,
3348                 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3349
3350             if not response:
3351                 break
3352             # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
3353             # See: https://github.com/ytdl-org/youtube-dl/issues/28702
3354             visitor_data = self._extract_visitor_data(response) or visitor_data
3355
3356             known_continuation_renderers = {
3357                 'playlistVideoListContinuation': self._playlist_entries,
3358                 'gridContinuation': self._grid_entries,
3359                 'itemSectionContinuation': self._post_thread_continuation_entries,
3360                 'sectionListContinuation': extract_entries,  # for feeds
3361             }
3362             continuation_contents = try_get(
3363                 response, lambda x: x['continuationContents'], dict) or {}
3364             continuation_renderer = None
3365             for key, value in continuation_contents.items():
3366                 if key not in known_continuation_renderers:
3367                     continue
3368                 continuation_renderer = value
3369                 continuation_list = [None]
3370                 for entry in known_continuation_renderers[key](continuation_renderer):
3371                     yield entry
3372                 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3373                 break
3374             if continuation_renderer:
3375                 continue
3376
3377             known_renderers = {
3378                 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3379                 'gridVideoRenderer': (self._grid_entries, 'items'),
3380                 'gridChannelRenderer': (self._grid_entries, 'items'),
3381                 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
3382                 'itemSectionRenderer': (extract_entries, 'contents'),  # for feeds
3383                 'richItemRenderer': (extract_entries, 'contents'),  # for hashtag
3384                 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
3385             }
3386             on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3387             continuation_items = try_get(
3388                 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
3389             continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3390             video_items_renderer = None
3391             for key, value in continuation_item.items():
3392                 if key not in known_renderers:
3393                     continue
3394                 video_items_renderer = {known_renderers[key][1]: continuation_items}
3395                 continuation_list = [None]
3396                 for entry in known_renderers[key][0](video_items_renderer):
3397                     yield entry
3398                 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
3399                 break
3400             if video_items_renderer:
3401                 continue
3402             break
3403
3404     @staticmethod
3405     def _extract_selected_tab(tabs):
3406         for tab in tabs:
3407             renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3408             if renderer.get('selected') is True:
3409                 return renderer
3410         else:
3411             raise ExtractorError('Unable to find selected tab')
3412
3413     @classmethod
3414     def _extract_uploader(cls, data):
3415         uploader = {}
3416         renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3417         owner = try_get(
3418             renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3419         if owner:
3420             uploader['uploader'] = owner.get('text')
3421             uploader['uploader_id'] = try_get(
3422                 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3423             uploader['uploader_url'] = urljoin(
3424                 'https://www.youtube.com/',
3425                 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3426         return {k: v for k, v in uploader.items() if v is not None}
3427
3428     def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
3429         playlist_id = title = description = channel_url = channel_name = channel_id = None
3430         thumbnails_list = []
3431         tags = []
3432
3433         selected_tab = self._extract_selected_tab(tabs)
3434         renderer = try_get(
3435             data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3436         if renderer:
3437             channel_name = renderer.get('title')
3438             channel_url = renderer.get('channelUrl')
3439             channel_id = renderer.get('externalId')
3440         else:
3441             renderer = try_get(
3442                 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3443
3444         if renderer:
3445             title = renderer.get('title')
3446             description = renderer.get('description', '')
3447             playlist_id = channel_id
3448             tags = renderer.get('keywords', '').split()
3449             thumbnails_list = (
3450                 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
3451                 or try_get(
3452                     self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3453                     lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3454                     list)
3455                 or [])
3456
3457         thumbnails = []
3458         for t in thumbnails_list:
3459             if not isinstance(t, dict):
3460                 continue
3461             thumbnail_url = url_or_none(t.get('url'))
3462             if not thumbnail_url:
3463                 continue
3464             thumbnails.append({
3465                 'url': thumbnail_url,
3466                 'width': int_or_none(t.get('width')),
3467                 'height': int_or_none(t.get('height')),
3468             })
3469         if playlist_id is None:
3470             playlist_id = item_id
3471         if title is None:
3472             title = (
3473                 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3474                 or playlist_id)
3475         title += format_field(selected_tab, 'title', ' - %s')
3476         title += format_field(selected_tab, 'expandedText', ' - %s')
3477         metadata = {
3478             'playlist_id': playlist_id,
3479             'playlist_title': title,
3480             'playlist_description': description,
3481             'uploader': channel_name,
3482             'uploader_id': channel_id,
3483             'uploader_url': channel_url,
3484             'thumbnails': thumbnails,
3485             'tags': tags,
3486         }
3487         availability = self._extract_availability(data)
3488         if availability:
3489             metadata['availability'] = availability
3490         if not channel_id:
3491             metadata.update(self._extract_uploader(data))
3492         metadata.update({
3493             'channel': metadata['uploader'],
3494             'channel_id': metadata['uploader_id'],
3495             'channel_url': metadata['uploader_url']})
3496         return self.playlist_result(
3497             self._entries(
3498                 selected_tab, playlist_id, ytcfg,
3499                 self._extract_account_syncid(ytcfg, data),
3500                 self._extract_visitor_data(data, ytcfg)),
3501             **metadata)
3502
3503     def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg):
3504         first_id = last_id = response = None
3505         for page_num in itertools.count(1):
3506             videos = list(self._playlist_entries(playlist))
3507             if not videos:
3508                 return
3509             start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3510             if start >= len(videos):
3511                 return
3512             for video in videos[start:]:
3513                 if video['id'] == first_id:
3514                     self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3515                     return
3516                 yield video
3517             first_id = first_id or videos[0]['id']
3518             last_id = videos[-1]['id']
3519             watch_endpoint = try_get(
3520                 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3521             headers = self.generate_api_headers(
3522                 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3523                 visitor_data=self._extract_visitor_data(response, data, ytcfg))
3524             query = {
3525                 'playlistId': playlist_id,
3526                 'videoId': watch_endpoint.get('videoId') or last_id,
3527                 'index': watch_endpoint.get('index') or len(videos),
3528                 'params': watch_endpoint.get('params') or 'OAE%3D'
3529             }
3530             response = self._extract_response(
3531                 item_id='%s page %d' % (playlist_id, page_num),
3532                 query=query, ep='next', headers=headers, ytcfg=ytcfg,
3533                 check_get_keys='contents'
3534             )
3535             playlist = try_get(
3536                 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3537
3538     def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
3539         title = playlist.get('title') or try_get(
3540             data, lambda x: x['titleText']['simpleText'], compat_str)
3541         playlist_id = playlist.get('playlistId') or item_id
3542
3543         # Delegating everything except mix playlists to regular tab-based playlist URL
3544         playlist_url = urljoin(url, try_get(
3545             playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3546             compat_str))
3547         if playlist_url and playlist_url != url:
3548             return self.url_result(
3549                 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3550                 video_title=title)
3551
3552         return self.playlist_result(
3553             self._extract_mix_playlist(playlist, playlist_id, data, ytcfg),
3554             playlist_id=playlist_id, playlist_title=title)
3555
3556     def _extract_availability(self, data):
3557         """
3558         Gets the availability of a given playlist/tab.
3559         Note: Unless YouTube tells us explicitly, we do not assume it is public
3560         @param data: response
3561         """
3562         is_private = is_unlisted = None
3563         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
3564         badge_labels = self._extract_badges(renderer)
3565
3566         # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
3567         privacy_dropdown_entries = try_get(
3568             renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
3569         for renderer_dict in privacy_dropdown_entries:
3570             is_selected = try_get(
3571                 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
3572             if not is_selected:
3573                 continue
3574             label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
3575             if label:
3576                 badge_labels.add(label.lower())
3577                 break
3578
3579         for badge_label in badge_labels:
3580             if badge_label == 'unlisted':
3581                 is_unlisted = True
3582             elif badge_label == 'private':
3583                 is_private = True
3584             elif badge_label == 'public':
3585                 is_unlisted = is_private = False
3586         return self._availability(is_private, False, False, False, is_unlisted)
3587
3588     @staticmethod
3589     def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
3590         sidebar_renderer = try_get(
3591             data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
3592         for item in sidebar_renderer:
3593             renderer = try_get(item, lambda x: x[info_renderer], expected_type)
3594             if renderer:
3595                 return renderer
3596
3597     def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
3598         """
3599         Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3600         """
3601         browse_id = params = None
3602         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
3603         if not renderer:
3604             return
3605         menu_renderer = try_get(
3606             renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3607         for menu_item in menu_renderer:
3608             if not isinstance(menu_item, dict):
3609                 continue
3610             nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3611             text = try_get(
3612                 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3613             if not text or text.lower() != 'show unavailable videos':
3614                 continue
3615             browse_endpoint = try_get(
3616                 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3617             browse_id = browse_endpoint.get('browseId')
3618             params = browse_endpoint.get('params')
3619             break
3620
3621         headers = self.generate_api_headers(
3622             ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3623             visitor_data=self._extract_visitor_data(data, ytcfg))
3624         query = {
3625             'params': params or 'wgYCCAA=',
3626             'browseId': browse_id or 'VL%s' % item_id
3627         }
3628         return self._extract_response(
3629             item_id=item_id, headers=headers, query=query,
3630             check_get_keys='contents', fatal=False, ytcfg=ytcfg,
3631             note='Downloading API JSON with unavailable videos')
3632
3633     def _extract_webpage(self, url, item_id, fatal=True):
3634         retries = self.get_param('extractor_retries', 3)
3635         count = -1
3636         webpage = data = last_error = None
3637         while count < retries:
3638             count += 1
3639             # Sometimes youtube returns a webpage with incomplete ytInitialData
3640             # See: https://github.com/yt-dlp/yt-dlp/issues/116
3641             if last_error:
3642                 self.report_warning('%s. Retrying ...' % last_error)
3643             try:
3644                 webpage = self._download_webpage(
3645                     url, item_id,
3646                     note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))
3647                 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
3648             except ExtractorError as e:
3649                 if isinstance(e.cause, network_exceptions):
3650                     if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
3651                         last_error = error_to_compat_str(e.cause or e.msg)
3652                         if count < retries:
3653                             continue
3654                 if fatal:
3655                     raise
3656                 self.report_warning(error_to_compat_str(e))
3657                 break
3658             else:
3659                 try:
3660                     self._extract_and_report_alerts(data)
3661                 except ExtractorError as e:
3662                     if fatal:
3663                         raise
3664                     self.report_warning(error_to_compat_str(e))
3665                     break
3666
3667                 if dict_get(data, ('contents', 'currentVideoEndpoint')):
3668                     break
3669
3670                 last_error = 'Incomplete yt initial data received'
3671                 if count >= retries:
3672                     if fatal:
3673                         raise ExtractorError(last_error)
3674                     self.report_warning(last_error)
3675                     break
3676
3677         return webpage, data
3678
3679     def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
3680         data = None
3681         if 'webpage' not in self._configuration_arg('skip'):
3682             webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
3683             ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
3684         if not data:
3685             if not ytcfg and self.is_authenticated:
3686                 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download.'
3687                 if 'authcheck' not in self._configuration_arg('skip') and fatal:
3688                     raise ExtractorError(
3689                         msg + ' If you are not downloading private content, or your cookies are only for the first account and channel,'
3690                               ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
3691                         expected=True)
3692                 self.report_warning(msg, only_once=True)
3693             data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
3694         return data, ytcfg
3695
3696     def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
3697         headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
3698         resolve_response = self._extract_response(
3699             item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
3700             ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
3701         endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
3702         for ep_key, ep in endpoints.items():
3703             params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
3704             if params:
3705                 return self._extract_response(
3706                     item_id=item_id, query=params, ep=ep, headers=headers,
3707                     ytcfg=ytcfg, fatal=fatal, default_client=default_client,
3708                     check_get_keys=('contents', 'currentVideoEndpoint'))
3709         err_note = 'Failed to resolve url (does the playlist exist?)'
3710         if fatal:
3711             raise ExtractorError(err_note, expected=True)
3712         self.report_warning(err_note, item_id)
3713
3714     @staticmethod
3715     def _smuggle_data(entries, data):
3716         for entry in entries:
3717             if data:
3718                 entry['url'] = smuggle_url(entry['url'], data)
3719             yield entry
3720
3721     _SEARCH_PARAMS = None
3722
3723     def _search_results(self, query, params=NO_DEFAULT):
3724         data = {'query': query}
3725         if params is NO_DEFAULT:
3726             params = self._SEARCH_PARAMS
3727         if params:
3728             data['params'] = params
3729         continuation_list = [None]
3730         for page_num in itertools.count(1):
3731             data.update(continuation_list[0] or {})
3732             search = self._extract_response(
3733                 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
3734                 check_get_keys=('contents', 'onResponseReceivedCommands'))
3735             slr_contents = try_get(
3736                 search,
3737                 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3738                  lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3739                 list)
3740             yield from self._extract_entries({'contents': slr_contents}, continuation_list)
3741             if not continuation_list[0]:
3742                 break
3743
3744
3745 class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
3746     IE_DESC = 'YouTube Tabs'
3747     _VALID_URL = r'''(?x:
3748         https?://
3749             (?:\w+\.)?
3750             (?:
3751                 youtube(?:kids)?\.com|
3752                 %(invidious)s
3753             )/
3754             (?:
3755                 (?P<channel_type>channel|c|user|browse)/|
3756                 (?P<not_channel>
3757                     feed/|hashtag/|
3758                     (?:playlist|watch)\?.*?\blist=
3759                 )|
3760                 (?!(?:%(reserved_names)s)\b)  # Direct URLs
3761             )
3762             (?P<id>[^/?\#&]+)
3763     )''' % {
3764         'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
3765         'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
3766     }
3767     IE_NAME = 'youtube:tab'
3768
3769     _TESTS = [{
3770         'note': 'playlists, multipage',
3771         'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3772         'playlist_mincount': 94,
3773         'info_dict': {
3774             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3775             'title': 'Игорь Клейнер - Playlists',
3776             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3777             'uploader': 'Игорь Клейнер',
3778             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3779         },
3780     }, {
3781         'note': 'playlists, multipage, different order',
3782         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3783         'playlist_mincount': 94,
3784         'info_dict': {
3785             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3786             'title': 'Игорь Клейнер - Playlists',
3787             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3788             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3789             'uploader': 'Игорь Клейнер',
3790         },
3791     }, {
3792         'note': 'playlists, series',
3793         'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3794         'playlist_mincount': 5,
3795         'info_dict': {
3796             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3797             'title': '3Blue1Brown - Playlists',
3798             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3799             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3800             'uploader': '3Blue1Brown',
3801         },
3802     }, {
3803         'note': 'playlists, singlepage',
3804         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3805         'playlist_mincount': 4,
3806         'info_dict': {
3807             'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3808             'title': 'ThirstForScience - Playlists',
3809             'description': 'md5:609399d937ea957b0f53cbffb747a14c',
3810             'uploader': 'ThirstForScience',
3811             'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3812         }
3813     }, {
3814         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3815         'only_matching': True,
3816     }, {
3817         'note': 'basic, single video playlist',
3818         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3819         'info_dict': {
3820             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3821             'uploader': 'Sergey M.',
3822             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3823             'title': 'youtube-dl public playlist',
3824         },
3825         'playlist_count': 1,
3826     }, {
3827         'note': 'empty playlist',
3828         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3829         'info_dict': {
3830             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3831             'uploader': 'Sergey M.',
3832             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3833             'title': 'youtube-dl empty playlist',
3834         },
3835         'playlist_count': 0,
3836     }, {
3837         'note': 'Home tab',
3838         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
3839         'info_dict': {
3840             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3841             'title': 'lex will - Home',
3842             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3843             'uploader': 'lex will',
3844             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3845         },
3846         'playlist_mincount': 2,
3847     }, {
3848         'note': 'Videos tab',
3849         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
3850         'info_dict': {
3851             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3852             'title': 'lex will - Videos',
3853             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3854             'uploader': 'lex will',
3855             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3856         },
3857         'playlist_mincount': 975,
3858     }, {
3859         'note': 'Videos tab, sorted by popular',
3860         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
3861         'info_dict': {
3862             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3863             'title': 'lex will - Videos',
3864             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3865             'uploader': 'lex will',
3866             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3867         },
3868         'playlist_mincount': 199,
3869     }, {
3870         'note': 'Playlists tab',
3871         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
3872         'info_dict': {
3873             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3874             'title': 'lex will - Playlists',
3875             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3876             'uploader': 'lex will',
3877             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3878         },
3879         'playlist_mincount': 17,
3880     }, {
3881         'note': 'Community tab',
3882         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
3883         'info_dict': {
3884             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3885             'title': 'lex will - Community',
3886             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3887             'uploader': 'lex will',
3888             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3889         },
3890         'playlist_mincount': 18,
3891     }, {
3892         'note': 'Channels tab',
3893         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
3894         'info_dict': {
3895             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3896             'title': 'lex will - Channels',
3897             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3898             'uploader': 'lex will',
3899             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3900         },
3901         'playlist_mincount': 12,
3902     }, {
3903         'note': 'Search tab',
3904         'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3905         'playlist_mincount': 40,
3906         'info_dict': {
3907             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3908             'title': '3Blue1Brown - Search - linear algebra',
3909             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3910             'uploader': '3Blue1Brown',
3911             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3912         },
3913     }, {
3914         'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3915         'only_matching': True,
3916     }, {
3917         'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3918         'only_matching': True,
3919     }, {
3920         'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3921         'only_matching': True,
3922     }, {
3923         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3924         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3925         'info_dict': {
3926             'title': '29C3: Not my department',
3927             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3928             'uploader': 'Christiaan008',
3929             'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
3930             'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
3931         },
3932         'playlist_count': 96,
3933     }, {
3934         'note': 'Large playlist',
3935         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
3936         'info_dict': {
3937             'title': 'Uploads from Cauchemar',
3938             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3939             'uploader': 'Cauchemar',
3940             'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
3941         },
3942         'playlist_mincount': 1123,
3943     }, {
3944         'note': 'even larger playlist, 8832 videos',
3945         'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3946         'only_matching': True,
3947     }, {
3948         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3949         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3950         'info_dict': {
3951             'title': 'Uploads from Interstellar Movie',
3952             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
3953             'uploader': 'Interstellar Movie',
3954             'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
3955         },
3956         'playlist_mincount': 21,
3957     }, {
3958         'note': 'Playlist with "show unavailable videos" button',
3959         'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3960         'info_dict': {
3961             'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3962             'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3963             'uploader': 'Phim Siêu Nhân Nhật Bản',
3964             'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3965         },
3966         'playlist_mincount': 200,
3967     }, {
3968         'note': 'Playlist with unavailable videos in page 7',
3969         'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3970         'info_dict': {
3971             'title': 'Uploads from BlankTV',
3972             'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3973             'uploader': 'BlankTV',
3974             'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3975         },
3976         'playlist_mincount': 1000,
3977     }, {
3978         'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
3979         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3980         'info_dict': {
3981             'title': 'Data Analysis with Dr Mike Pound',
3982             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3983             'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3984             'uploader': 'Computerphile',
3985             'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
3986         },
3987         'playlist_mincount': 11,
3988     }, {
3989         'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3990         'only_matching': True,
3991     }, {
3992         'note': 'Playlist URL that does not actually serve a playlist',
3993         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3994         'info_dict': {
3995             'id': 'FqZTN594JQw',
3996             'ext': 'webm',
3997             'title': "Smiley's People 01 detective, Adventure Series, Action",
3998             'uploader': 'STREEM',
3999             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
4000             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
4001             'upload_date': '20150526',
4002             'license': 'Standard YouTube License',
4003             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
4004             'categories': ['People & Blogs'],
4005             'tags': list,
4006             'view_count': int,
4007             'like_count': int,
4008             'dislike_count': int,
4009         },
4010         'params': {
4011             'skip_download': True,
4012         },
4013         'skip': 'This video is not available.',
4014         'add_ie': [YoutubeIE.ie_key()],
4015     }, {
4016         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
4017         'only_matching': True,
4018     }, {
4019         'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
4020         'only_matching': True,
4021     }, {
4022         'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
4023         'info_dict': {
4024             'id': '3yImotZU3tw',  # This will keep changing
4025             'ext': 'mp4',
4026             'title': compat_str,
4027             'uploader': 'Sky News',
4028             'uploader_id': 'skynews',
4029             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
4030             'upload_date': r're:\d{8}',
4031             'description': compat_str,
4032             'categories': ['News & Politics'],
4033             'tags': list,
4034             'like_count': int,
4035             'dislike_count': int,
4036         },
4037         'params': {
4038             'skip_download': True,
4039         },
4040         'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
4041     }, {
4042         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
4043         'info_dict': {
4044             'id': 'a48o2S1cPoo',
4045             'ext': 'mp4',
4046             'title': 'The Young Turks - Live Main Show',
4047             'uploader': 'The Young Turks',
4048             'uploader_id': 'TheYoungTurks',
4049             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
4050             'upload_date': '20150715',
4051             'license': 'Standard YouTube License',
4052             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
4053             'categories': ['News & Politics'],
4054             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
4055             'like_count': int,
4056             'dislike_count': int,
4057         },
4058         'params': {
4059             'skip_download': True,
4060         },
4061         'only_matching': True,
4062     }, {
4063         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
4064         'only_matching': True,
4065     }, {
4066         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
4067         'only_matching': True,
4068     }, {
4069         'note': 'A channel that is not live. Should raise error',
4070         'url': 'https://www.youtube.com/user/numberphile/live',
4071         'only_matching': True,
4072     }, {
4073         'url': 'https://www.youtube.com/feed/trending',
4074         'only_matching': True,
4075     }, {
4076         'url': 'https://www.youtube.com/feed/library',
4077         'only_matching': True,
4078     }, {
4079         'url': 'https://www.youtube.com/feed/history',
4080         'only_matching': True,
4081     }, {
4082         'url': 'https://www.youtube.com/feed/subscriptions',
4083         'only_matching': True,
4084     }, {
4085         'url': 'https://www.youtube.com/feed/watch_later',
4086         'only_matching': True,
4087     }, {
4088         'note': 'Recommended - redirects to home page.',
4089         'url': 'https://www.youtube.com/feed/recommended',
4090         'only_matching': True,
4091     }, {
4092         'note': 'inline playlist with not always working continuations',
4093         'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
4094         'only_matching': True,
4095     }, {
4096         'url': 'https://www.youtube.com/course',
4097         'only_matching': True,
4098     }, {
4099         'url': 'https://www.youtube.com/zsecurity',
4100         'only_matching': True,
4101     }, {
4102         'url': 'http://www.youtube.com/NASAgovVideo/videos',
4103         'only_matching': True,
4104     }, {
4105         'url': 'https://www.youtube.com/TheYoungTurks/live',
4106         'only_matching': True,
4107     }, {
4108         'url': 'https://www.youtube.com/hashtag/cctv9',
4109         'info_dict': {
4110             'id': 'cctv9',
4111             'title': '#cctv9',
4112         },
4113         'playlist_mincount': 350,
4114     }, {
4115         'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
4116         'only_matching': True,
4117     }, {
4118         'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
4119         'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4120         'only_matching': True
4121     }, {
4122         'note': '/browse/ should redirect to /channel/',
4123         'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
4124         'only_matching': True
4125     }, {
4126         'note': 'VLPL, should redirect to playlist?list=PL...',
4127         'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4128         'info_dict': {
4129             'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4130             'uploader': 'NoCopyrightSounds',
4131             'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
4132             'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
4133             'title': 'NCS Releases',
4134         },
4135         'playlist_mincount': 166,
4136     }, {
4137         'note': 'Topic, should redirect to playlist?list=UU...',
4138         'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
4139         'info_dict': {
4140             'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
4141             'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
4142             'title': 'Uploads from Royalty Free Music - Topic',
4143             'uploader': 'Royalty Free Music - Topic',
4144         },
4145         'expected_warnings': [
4146             'A channel/user page was given',
4147             'The URL does not have a videos tab',
4148         ],
4149         'playlist_mincount': 101,
4150     }, {
4151         'note': 'Topic without a UU playlist',
4152         'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
4153         'info_dict': {
4154             'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
4155             'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
4156         },
4157         'expected_warnings': [
4158             'A channel/user page was given',
4159             'The URL does not have a videos tab',
4160             'Falling back to channel URL',
4161         ],
4162         'playlist_mincount': 9,
4163     }, {
4164         'note': 'Youtube music Album',
4165         'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
4166         'info_dict': {
4167             'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
4168             'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
4169         },
4170         'playlist_count': 50,
4171     }, {
4172         'note': 'unlisted single video playlist',
4173         'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
4174         'info_dict': {
4175             'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
4176             'uploader': 'colethedj',
4177             'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
4178             'title': 'yt-dlp unlisted playlist test',
4179             'availability': 'unlisted'
4180         },
4181         'playlist_count': 1,
4182     }, {
4183         'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
4184         'url': 'https://www.youtube.com/feed/recommended',
4185         'info_dict': {
4186             'id': 'recommended',
4187             'title': 'recommended',
4188         },
4189         'playlist_mincount': 50,
4190         'params': {
4191             'skip_download': True,
4192             'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4193         },
4194     }, {
4195         'note': 'API Fallback: /videos tab, sorted by oldest first',
4196         'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
4197         'info_dict': {
4198             'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
4199             'title': 'Cody\'sLab - Videos',
4200             'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
4201             'uploader': 'Cody\'sLab',
4202             'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
4203         },
4204         'playlist_mincount': 650,
4205         'params': {
4206             'skip_download': True,
4207             'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4208         },
4209     }, {
4210         'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
4211         'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
4212         'info_dict': {
4213             'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
4214             'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
4215             'title': 'Uploads from Royalty Free Music - Topic',
4216             'uploader': 'Royalty Free Music - Topic',
4217         },
4218         'expected_warnings': [
4219             'A channel/user page was given',
4220             'The URL does not have a videos tab',
4221         ],
4222         'playlist_mincount': 101,
4223         'params': {
4224             'skip_download': True,
4225             'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4226         },
4227     }]
4228
4229     @classmethod
4230     def suitable(cls, url):
4231         return False if YoutubeIE.suitable(url) else super(
4232             YoutubeTabIE, cls).suitable(url)
4233
4234     def _real_extract(self, url):
4235         url, smuggled_data = unsmuggle_url(url, {})
4236         if self.is_music_url(url):
4237             smuggled_data['is_music_url'] = True
4238         info_dict = self.__real_extract(url, smuggled_data)
4239         if info_dict.get('entries'):
4240             info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4241         return info_dict
4242
4243     _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$')
4244
4245     def __real_extract(self, url, smuggled_data):
4246         item_id = self._match_id(url)
4247         url = compat_urlparse.urlunparse(
4248             compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
4249         compat_opts = self.get_param('compat_opts', [])
4250
4251         def get_mobj(url):
4252             mobj = self._URL_RE.match(url).groupdict()
4253             mobj.update((k, '') for k, v in mobj.items() if v is None)
4254             return mobj
4255
4256         mobj, redirect_warning = get_mobj(url), None
4257         # Youtube returns incomplete data if tabname is not lower case
4258         pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4259         if is_channel:
4260             if smuggled_data.get('is_music_url'):
4261                 if item_id[:2] == 'VL':  # Youtube music VL channels have an equivalent playlist
4262                     item_id = item_id[2:]
4263                     pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False
4264                 elif item_id[:2] == 'MP':  # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
4265                     mdata = self._extract_tab_endpoint(
4266                         f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
4267                     murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
4268                                         get_all=False, expected_type=compat_str)
4269                     if not murl:
4270                         raise ExtractorError('Failed to resolve album to playlist')
4271                     return self.url_result(murl, ie=YoutubeTabIE.ie_key())
4272                 elif mobj['channel_type'] == 'browse':  # Youtube music /browse/ should be changed to /channel/
4273                     pre = f'https://www.youtube.com/channel/{item_id}'
4274
4275         if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4276             # Home URLs should redirect to /videos/
4277             redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '
4278                                 'To download only the videos in the home page, add a "/featured" to the URL')
4279             tab = '/videos'
4280
4281         url = ''.join((pre, tab, post))
4282         mobj = get_mobj(url)
4283
4284         # Handle both video/playlist URLs
4285         qs = parse_qs(url)
4286         video_id, playlist_id = [qs.get(key, [None])[0] for key in ('v', 'list')]
4287
4288         if not video_id and mobj['not_channel'].startswith('watch'):
4289             if not playlist_id:
4290                 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
4291                 raise ExtractorError('Unable to recognize tab page')
4292             # Common mistake: https://www.youtube.com/watch?list=playlist_id
4293             self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
4294             url = f'https://www.youtube.com/playlist?list={playlist_id}'
4295             mobj = get_mobj(url)
4296
4297         if video_id and playlist_id:
4298             if self.get_param('noplaylist'):
4299                 self.to_screen(f'Downloading just video {video_id} because of --no-playlist')
4300                 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
4301                                        ie=YoutubeIE.ie_key(), video_id=video_id)
4302             self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')
4303
4304         data, ytcfg = self._extract_data(url, item_id)
4305
4306         tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
4307         if tabs:
4308             selected_tab = self._extract_selected_tab(tabs)
4309             tab_name = selected_tab.get('title', '')
4310             if 'no-youtube-channel-redirect' not in compat_opts:
4311                 if mobj['tab'] == '/live':
4312                     # Live tab should have redirected to the video
4313                     raise ExtractorError('The channel is not currently live', expected=True)
4314                 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4315                     redirect_warning = f'The URL does not have a {mobj["tab"][1:]} tab'
4316                     if not mobj['not_channel'] and item_id[:2] == 'UC':
4317                         # Topic channels don't have /videos. Use the equivalent playlist instead
4318                         pl_id = f'UU{item_id[2:]}'
4319                         pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
4320                         try:
4321                             data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True)
4322                         except ExtractorError:
4323                             redirect_warning += ' and the playlist redirect gave error'
4324                         else:
4325                             item_id, url, tab_name = pl_id, pl_url, mobj['tab'][1:]
4326                             redirect_warning += f'. Redirecting to playlist {pl_id} instead'
4327                     if tab_name.lower() != mobj['tab'][1:]:
4328                         redirect_warning += f'. {tab_name} tab is being downloaded instead'
4329
4330         if redirect_warning:
4331             self.report_warning(redirect_warning)
4332         self.write_debug(f'Final URL: {url}')
4333
4334         # YouTube sometimes provides a button to reload playlist with unavailable videos.
4335         if 'no-youtube-unavailable-videos' not in compat_opts:
4336             data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data
4337         self._extract_and_report_alerts(data, only_once=True)
4338         tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
4339         if tabs:
4340             return self._extract_from_tabs(item_id, ytcfg, data, tabs)
4341
4342         playlist = traverse_obj(
4343             data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
4344         if playlist:
4345             return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
4346
4347         video_id = traverse_obj(
4348             data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
4349         if video_id:
4350             if mobj['tab'] != '/live':  # live tab is expected to redirect to video
4351                 self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
4352             return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
4353                                    ie=YoutubeIE.ie_key(), video_id=video_id)
4354
4355         raise ExtractorError('Unable to recognize tab page')
4356
4357
4358 class YoutubePlaylistIE(InfoExtractor):
4359     IE_DESC = 'YouTube playlists'
4360     _VALID_URL = r'''(?x)(?:
4361                         (?:https?://)?
4362                         (?:\w+\.)?
4363                         (?:
4364                             (?:
4365                                 youtube(?:kids)?\.com|
4366                                 %(invidious)s
4367                             )
4368                             /.*?\?.*?\blist=
4369                         )?
4370                         (?P<id>%(playlist_id)s)
4371                      )''' % {
4372         'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
4373         'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
4374     }
4375     IE_NAME = 'youtube:playlist'
4376     _TESTS = [{
4377         'note': 'issue #673',
4378         'url': 'PLBB231211A4F62143',
4379         'info_dict': {
4380             'title': '[OLD]Team Fortress 2 (Class-based LP)',
4381             'id': 'PLBB231211A4F62143',
4382             'uploader': 'Wickydoo',
4383             'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4384             'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
4385         },
4386         'playlist_mincount': 29,
4387     }, {
4388         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4389         'info_dict': {
4390             'title': 'YDL_safe_search',
4391             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4392         },
4393         'playlist_count': 2,
4394         'skip': 'This playlist is private',
4395     }, {
4396         'note': 'embedded',
4397         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4398         'playlist_count': 4,
4399         'info_dict': {
4400             'title': 'JODA15',
4401             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4402             'uploader': 'milan',
4403             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
4404         }
4405     }, {
4406         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4407         'playlist_mincount': 654,
4408         'info_dict': {
4409             'title': '2018 Chinese New Singles (11/6 updated)',
4410             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4411             'uploader': 'LBK',
4412             'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4413             'description': 'md5:da521864744d60a198e3a88af4db0d9d',
4414         }
4415     }, {
4416         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4417         'only_matching': True,
4418     }, {
4419         # music album playlist
4420         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4421         'only_matching': True,
4422     }]
4423
4424     @classmethod
4425     def suitable(cls, url):
4426         if YoutubeTabIE.suitable(url):
4427             return False
4428         from ..utils import parse_qs
4429         qs = parse_qs(url)
4430         if qs.get('v', [None])[0]:
4431             return False
4432         return super(YoutubePlaylistIE, cls).suitable(url)
4433
4434     def _real_extract(self, url):
4435         playlist_id = self._match_id(url)
4436         is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
4437         url = update_url_query(
4438             'https://www.youtube.com/playlist',
4439             parse_qs(url) or {'list': playlist_id})
4440         if is_music_url:
4441             url = smuggle_url(url, {'is_music_url': True})
4442         return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4443
4444
4445 class YoutubeYtBeIE(InfoExtractor):
4446     IE_DESC = 'youtu.be'
4447     _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4448     _TESTS = [{
4449         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4450         'info_dict': {
4451             'id': 'yeWKywCrFtk',
4452             'ext': 'mp4',
4453             'title': 'Small Scale Baler and Braiding Rugs',
4454             'uploader': 'Backus-Page House Museum',
4455             'uploader_id': 'backuspagemuseum',
4456             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4457             'upload_date': '20161008',
4458             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4459             'categories': ['Nonprofits & Activism'],
4460             'tags': list,
4461             'like_count': int,
4462             'dislike_count': int,
4463         },
4464         'params': {
4465             'noplaylist': True,
4466             'skip_download': True,
4467         },
4468     }, {
4469         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
4470         'only_matching': True,
4471     }]
4472
4473     def _real_extract(self, url):
4474         mobj = self._match_valid_url(url)
4475         video_id = mobj.group('id')
4476         playlist_id = mobj.group('playlist_id')
4477         return self.url_result(
4478             update_url_query('https://www.youtube.com/watch', {
4479                 'v': video_id,
4480                 'list': playlist_id,
4481                 'feature': 'youtu.be',
4482             }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4483
4484
4485 class YoutubeYtUserIE(InfoExtractor):
4486     IE_DESC = 'YouTube user videos; "ytuser:" prefix'
4487     _VALID_URL = r'ytuser:(?P<id>.+)'
4488     _TESTS = [{
4489         'url': 'ytuser:phihag',
4490         'only_matching': True,
4491     }]
4492
4493     def _real_extract(self, url):
4494         user_id = self._match_id(url)
4495         return self.url_result(
4496             'https://www.youtube.com/user/%s/videos' % user_id,
4497             ie=YoutubeTabIE.ie_key(), video_id=user_id)
4498
4499
4500 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
4501     IE_NAME = 'youtube:favorites'
4502     IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
4503     _VALID_URL = r':ytfav(?:ou?rite)?s?'
4504     _LOGIN_REQUIRED = True
4505     _TESTS = [{
4506         'url': ':ytfav',
4507         'only_matching': True,
4508     }, {
4509         'url': ':ytfavorites',
4510         'only_matching': True,
4511     }]
4512
4513     def _real_extract(self, url):
4514         return self.url_result(
4515             'https://www.youtube.com/playlist?list=LL',
4516             ie=YoutubeTabIE.ie_key())
4517
4518
4519 class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
4520     IE_DESC = 'YouTube search'
4521     IE_NAME = 'youtube:search'
4522     _SEARCH_KEY = 'ytsearch'
4523     _SEARCH_PARAMS = 'EgIQAQ%3D%3D'  # Videos only
4524     _TESTS = []
4525
4526
4527 class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
4528     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
4529     _SEARCH_KEY = 'ytsearchdate'
4530     IE_DESC = 'YouTube search, newest videos first'
4531     _SEARCH_PARAMS = 'CAISAhAB'  # Videos only, sorted by date
4532
4533
4534 class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
4535     IE_DESC = 'YouTube search URLs with sorting and filter support'
4536     IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4537     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
4538     _TESTS = [{
4539         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4540         'playlist_mincount': 5,
4541         'info_dict': {
4542             'id': 'youtube-dl test video',
4543             'title': 'youtube-dl test video',
4544         }
4545     }, {
4546         'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
4547         'playlist_mincount': 5,
4548         'info_dict': {
4549             'id': 'python',
4550             'title': 'python',
4551         }
4552
4553     }, {
4554         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4555         'only_matching': True,
4556     }]
4557
4558     def _real_extract(self, url):
4559         qs = parse_qs(url)
4560         query = (qs.get('search_query') or qs.get('q'))[0]
4561         return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
4562
4563
4564 class YoutubeFeedsInfoExtractor(YoutubeTabIE):
4565     """
4566     Base class for feed extractors
4567     Subclasses must define the _FEED_NAME property.
4568     """
4569     _LOGIN_REQUIRED = True
4570     _TESTS = []
4571
4572     @property
4573     def IE_NAME(self):
4574         return 'youtube:%s' % self._FEED_NAME
4575
4576     def _real_extract(self, url):
4577         return self.url_result(
4578             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4579             ie=YoutubeTabIE.ie_key())
4580
4581
4582 class YoutubeWatchLaterIE(InfoExtractor):
4583     IE_NAME = 'youtube:watchlater'
4584     IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
4585     _VALID_URL = r':ytwatchlater'
4586     _TESTS = [{
4587         'url': ':ytwatchlater',
4588         'only_matching': True,
4589     }]
4590
4591     def _real_extract(self, url):
4592         return self.url_result(
4593             'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
4594
4595
4596 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4597     IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
4598     _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
4599     _FEED_NAME = 'recommended'
4600     _LOGIN_REQUIRED = False
4601     _TESTS = [{
4602         'url': ':ytrec',
4603         'only_matching': True,
4604     }, {
4605         'url': ':ytrecommended',
4606         'only_matching': True,
4607     }, {
4608         'url': 'https://youtube.com',
4609         'only_matching': True,
4610     }]
4611
4612
4613 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
4614     IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
4615     _VALID_URL = r':ytsub(?:scription)?s?'
4616     _FEED_NAME = 'subscriptions'
4617     _TESTS = [{
4618         'url': ':ytsubs',
4619         'only_matching': True,
4620     }, {
4621         'url': ':ytsubscriptions',
4622         'only_matching': True,
4623     }]
4624
4625
4626 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
4627     IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
4628     _VALID_URL = r':ythis(?:tory)?'
4629     _FEED_NAME = 'history'
4630     _TESTS = [{
4631         'url': ':ythistory',
4632         'only_matching': True,
4633     }]
4634
4635
4636 class YoutubeTruncatedURLIE(InfoExtractor):
4637     IE_NAME = 'youtube:truncated_url'
4638     IE_DESC = False  # Do not list
4639     _VALID_URL = r'''(?x)
4640         (?:https?://)?
4641         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4642         (?:watch\?(?:
4643             feature=[a-z_]+|
4644             annotation_id=annotation_[^&]+|
4645             x-yt-cl=[0-9]+|
4646             hl=[^&]*|
4647             t=[0-9]+
4648         )?
4649         |
4650             attribution_link\?a=[^&]+
4651         )
4652         $
4653     '''
4654
4655     _TESTS = [{
4656         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
4657         'only_matching': True,
4658     }, {
4659         'url': 'https://www.youtube.com/watch?',
4660         'only_matching': True,
4661     }, {
4662         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4663         'only_matching': True,
4664     }, {
4665         'url': 'https://www.youtube.com/watch?feature=foo',
4666         'only_matching': True,
4667     }, {
4668         'url': 'https://www.youtube.com/watch?hl=en-GB',
4669         'only_matching': True,
4670     }, {
4671         'url': 'https://www.youtube.com/watch?t=2372',
4672         'only_matching': True,
4673     }]
4674
4675     def _real_extract(self, url):
4676         raise ExtractorError(
4677             'Did you forget to quote the URL? Remember that & is a meta '
4678             'character in most shells, so you want to put the URL in quotes, '
4679             'like  youtube-dl '
4680             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
4681             ' or simply  youtube-dl BaW_jenozKc  .',
4682             expected=True)
4683
4684
4685 class YoutubeClipIE(InfoExtractor):
4686     IE_NAME = 'youtube:clip'
4687     IE_DESC = False  # Do not list
4688     _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'
4689
4690     def _real_extract(self, url):
4691         self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')
4692         return self.url_result(url, 'Generic')
4693
4694
4695 class YoutubeTruncatedIDIE(InfoExtractor):
4696     IE_NAME = 'youtube:truncated_id'
4697     IE_DESC = False  # Do not list
4698     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
4699
4700     _TESTS = [{
4701         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4702         'only_matching': True,
4703     }]
4704
4705     def _real_extract(self, url):
4706         video_id = self._match_id(url)
4707         raise ExtractorError(
4708             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4709             expected=True)