yt_dlp/extractor/youtube.py

   1 import calendar
   2 import copy
   3 import datetime
   4 import functools
   5 import hashlib
   6 import itertools
   7 import json
   8 import math
   9 import os.path
  10 import random
  11 import re
  12 import sys
  13 import threading
  14 import time
  15 import traceback
  16
  17 from .common import InfoExtractor, SearchInfoExtractor
  18 from ..compat import (
  19     compat_chr,
  20     compat_HTTPError,
  21     compat_parse_qs,
  22     compat_str,
  23     compat_urllib_parse_unquote_plus,
  24     compat_urllib_parse_urlencode,
  25     compat_urllib_parse_urlparse,
  26     compat_urlparse,
  27 )
  28 from ..jsinterp import JSInterpreter
  29 from ..utils import (
  30     NO_DEFAULT,
  31     ExtractorError,
  32     bug_reports_message,
  33     clean_html,
  34     datetime_from_str,
  35     dict_get,
  36     error_to_compat_str,
  37     float_or_none,
  38     format_field,
  39     get_first,
  40     int_or_none,
  41     is_html,
  42     join_nonempty,
  43     js_to_json,
  44     mimetype2ext,
  45     network_exceptions,
  46     orderedSet,
  47     parse_codecs,
  48     parse_count,
  49     parse_duration,
  50     parse_iso8601,
  51     parse_qs,
  52     qualities,
  53     remove_end,
  54     remove_start,
  55     smuggle_url,
  56     str_or_none,
  57     str_to_int,
  58     strftime_or_none,
  59     traverse_obj,
  60     try_get,
  61     unescapeHTML,
  62     unified_strdate,
  63     unified_timestamp,
  64     unsmuggle_url,
  65     update_url_query,
  66     url_or_none,
  67     urljoin,
  68     variadic,
  69 )
  70
  71 # any clients starting with _ cannot be explicity requested by the user
  72 INNERTUBE_CLIENTS = {
  73     'web': {
  74         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
  75         'INNERTUBE_CONTEXT': {
  76             'client': {
  77                 'clientName': 'WEB',
  78                 'clientVersion': '2.20211221.00.00',
  79             }
  80         },
  81         'INNERTUBE_CONTEXT_CLIENT_NAME': 1
  82     },
  83     'web_embedded': {
  84         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
  85         'INNERTUBE_CONTEXT': {
  86             'client': {
  87                 'clientName': 'WEB_EMBEDDED_PLAYER',
  88                 'clientVersion': '1.20211215.00.01',
  89             },
  90         },
  91         'INNERTUBE_CONTEXT_CLIENT_NAME': 56
  92     },
  93     'web_music': {
  94         'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
  95         'INNERTUBE_HOST': 'music.youtube.com',
  96         'INNERTUBE_CONTEXT': {
  97             'client': {
  98                 'clientName': 'WEB_REMIX',
  99                 'clientVersion': '1.20211213.00.00',
 100             }
 101         },
 102         'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
 103     },
 104     'web_creator': {
 105         'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
 106         'INNERTUBE_CONTEXT': {
 107             'client': {
 108                 'clientName': 'WEB_CREATOR',
 109                 'clientVersion': '1.20211220.02.00',
 110             }
 111         },
 112         'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
 113     },
 114     'android': {
 115         'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
 116         'INNERTUBE_CONTEXT': {
 117             'client': {
 118                 'clientName': 'ANDROID',
 119                 'clientVersion': '16.49',
 120             }
 121         },
 122         'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
 123         'REQUIRE_JS_PLAYER': False
 124     },
 125     'android_embedded': {
 126         'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
 127         'INNERTUBE_CONTEXT': {
 128             'client': {
 129                 'clientName': 'ANDROID_EMBEDDED_PLAYER',
 130                 'clientVersion': '16.49',
 131             },
 132         },
 133         'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
 134         'REQUIRE_JS_PLAYER': False
 135     },
 136     'android_music': {
 137         'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
 138         'INNERTUBE_CONTEXT': {
 139             'client': {
 140                 'clientName': 'ANDROID_MUSIC',
 141                 'clientVersion': '4.57',
 142             }
 143         },
 144         'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
 145         'REQUIRE_JS_PLAYER': False
 146     },
 147     'android_creator': {
 148         'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
 149         'INNERTUBE_CONTEXT': {
 150             'client': {
 151                 'clientName': 'ANDROID_CREATOR',
 152                 'clientVersion': '21.47',
 153             },
 154         },
 155         'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
 156         'REQUIRE_JS_PLAYER': False
 157     },
 158     # iOS clients have HLS live streams. Setting device model to get 60fps formats.
 159     # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
 160     'ios': {
 161         'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
 162         'INNERTUBE_CONTEXT': {
 163             'client': {
 164                 'clientName': 'IOS',
 165                 'clientVersion': '16.46',
 166                 'deviceModel': 'iPhone14,3',
 167             }
 168         },
 169         'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
 170         'REQUIRE_JS_PLAYER': False
 171     },
 172     'ios_embedded': {
 173         'INNERTUBE_CONTEXT': {
 174             'client': {
 175                 'clientName': 'IOS_MESSAGES_EXTENSION',
 176                 'clientVersion': '16.46',
 177                 'deviceModel': 'iPhone14,3',
 178             },
 179         },
 180         'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
 181         'REQUIRE_JS_PLAYER': False
 182     },
 183     'ios_music': {
 184         'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
 185         'INNERTUBE_CONTEXT': {
 186             'client': {
 187                 'clientName': 'IOS_MUSIC',
 188                 'clientVersion': '4.57',
 189             },
 190         },
 191         'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
 192         'REQUIRE_JS_PLAYER': False
 193     },
 194     'ios_creator': {
 195         'INNERTUBE_CONTEXT': {
 196             'client': {
 197                 'clientName': 'IOS_CREATOR',
 198                 'clientVersion': '21.47',
 199             },
 200         },
 201         'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
 202         'REQUIRE_JS_PLAYER': False
 203     },
 204     # mweb has 'ultralow' formats
 205     # See: https://github.com/yt-dlp/yt-dlp/pull/557
 206     'mweb': {
 207         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 208         'INNERTUBE_CONTEXT': {
 209             'client': {
 210                 'clientName': 'MWEB',
 211                 'clientVersion': '2.20211221.01.00',
 212             }
 213         },
 214         'INNERTUBE_CONTEXT_CLIENT_NAME': 2
 215     },
 216     # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
 217     # See: https://github.com/zerodytrash/YouTube-Internal-Clients
 218     'tv_embedded': {
 219         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 220         'INNERTUBE_CONTEXT': {
 221             'client': {
 222                 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
 223                 'clientVersion': '2.0',
 224             },
 225         },
 226         'INNERTUBE_CONTEXT_CLIENT_NAME': 85
 227     },
 228 }
 229
 230
 231 def _split_innertube_client(client_name):
 232     variant, *base = client_name.rsplit('.', 1)
 233     if base:
 234         return variant, base[0], variant
 235     base, *variant = client_name.split('_', 1)
 236     return client_name, base, variant[0] if variant else None
 237
 238
 239 def build_innertube_clients():
 240     THIRD_PARTY = {
 241         'embedUrl': 'https://www.youtube.com/',  # Can be any valid URL
 242     }
 243     BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
 244     priority = qualities(BASE_CLIENTS[::-1])
 245
 246     for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
 247         ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
 248         ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
 249         ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
 250         ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
 251
 252         _, base_client, variant = _split_innertube_client(client)
 253         ytcfg['priority'] = 10 * priority(base_client)
 254
 255         if not variant:
 256             INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
 257             embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
 258             embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
 259             embedscreen['priority'] -= 3
 260         elif variant == 'embedded':
 261             ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
 262             ytcfg['priority'] -= 2
 263         else:
 264             ytcfg['priority'] -= 3
 265
 266
 267 build_innertube_clients()
 268
 269
 270 class YoutubeBaseInfoExtractor(InfoExtractor):
 271     """Provide base functions for Youtube extractors"""
 272
 273     _RESERVED_NAMES = (
 274         r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
 275         r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|'
 276         r'browse|oembed|get_video_info|iframe_api|s/player|'
 277         r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
 278
 279     _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
 280
 281     # _NETRC_MACHINE = 'youtube'
 282
 283     # If True it will raise an error if no login info is provided
 284     _LOGIN_REQUIRED = False
 285
 286     _INVIDIOUS_SITES = (
 287         # invidious-redirect websites
 288         r'(?:www\.)?redirect\.invidious\.io',
 289         r'(?:(?:www|dev)\.)?invidio\.us',
 290         # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
 291         r'(?:www\.)?invidious\.pussthecat\.org',
 292         r'(?:www\.)?invidious\.zee\.li',
 293         r'(?:www\.)?invidious\.ethibox\.fr',
 294         r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
 295         r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
 296         r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
 297         # youtube-dl invidious instances list
 298         r'(?:(?:www|no)\.)?invidiou\.sh',
 299         r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
 300         r'(?:www\.)?invidious\.kabi\.tk',
 301         r'(?:www\.)?invidious\.mastodon\.host',
 302         r'(?:www\.)?invidious\.zapashcanon\.fr',
 303         r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
 304         r'(?:www\.)?invidious\.tinfoil-hat\.net',
 305         r'(?:www\.)?invidious\.himiko\.cloud',
 306         r'(?:www\.)?invidious\.reallyancient\.tech',
 307         r'(?:www\.)?invidious\.tube',
 308         r'(?:www\.)?invidiou\.site',
 309         r'(?:www\.)?invidious\.site',
 310         r'(?:www\.)?invidious\.xyz',
 311         r'(?:www\.)?invidious\.nixnet\.xyz',
 312         r'(?:www\.)?invidious\.048596\.xyz',
 313         r'(?:www\.)?invidious\.drycat\.fr',
 314         r'(?:www\.)?inv\.skyn3t\.in',
 315         r'(?:www\.)?tube\.poal\.co',
 316         r'(?:www\.)?tube\.connect\.cafe',
 317         r'(?:www\.)?vid\.wxzm\.sx',
 318         r'(?:www\.)?vid\.mint\.lgbt',
 319         r'(?:www\.)?vid\.puffyan\.us',
 320         r'(?:www\.)?yewtu\.be',
 321         r'(?:www\.)?yt\.elukerio\.org',
 322         r'(?:www\.)?yt\.lelux\.fi',
 323         r'(?:www\.)?invidious\.ggc-project\.de',
 324         r'(?:www\.)?yt\.maisputain\.ovh',
 325         r'(?:www\.)?ytprivate\.com',
 326         r'(?:www\.)?invidious\.13ad\.de',
 327         r'(?:www\.)?invidious\.toot\.koeln',
 328         r'(?:www\.)?invidious\.fdn\.fr',
 329         r'(?:www\.)?watch\.nettohikari\.com',
 330         r'(?:www\.)?invidious\.namazso\.eu',
 331         r'(?:www\.)?invidious\.silkky\.cloud',
 332         r'(?:www\.)?invidious\.exonip\.de',
 333         r'(?:www\.)?invidious\.riverside\.rocks',
 334         r'(?:www\.)?invidious\.blamefran\.net',
 335         r'(?:www\.)?invidious\.moomoo\.de',
 336         r'(?:www\.)?ytb\.trom\.tf',
 337         r'(?:www\.)?yt\.cyberhost\.uk',
 338         r'(?:www\.)?kgg2m7yk5aybusll\.onion',
 339         r'(?:www\.)?qklhadlycap4cnod\.onion',
 340         r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
 341         r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
 342         r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
 343         r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
 344         r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
 345         r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
 346         r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
 347         r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
 348         r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
 349         r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
 350     )
 351
 352     def _initialize_consent(self):
 353         cookies = self._get_cookies('https://www.youtube.com/')
 354         if cookies.get('__Secure-3PSID'):
 355             return
 356         consent_id = None
 357         consent = cookies.get('CONSENT')
 358         if consent:
 359             if 'YES' in consent.value:
 360                 return
 361             consent_id = self._search_regex(
 362                 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
 363         if not consent_id:
 364             consent_id = random.randint(100, 999)
 365         self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
 366
 367     def _initialize_pref(self):
 368         cookies = self._get_cookies('https://www.youtube.com/')
 369         pref_cookie = cookies.get('PREF')
 370         pref = {}
 371         if pref_cookie:
 372             try:
 373                 pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))
 374             except ValueError:
 375                 self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
 376         pref.update({'hl': 'en', 'tz': 'UTC'})
 377         self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))
 378
 379     def _real_initialize(self):
 380         self._initialize_pref()
 381         self._initialize_consent()
 382         self._check_login_required()
 383
 384     def _check_login_required(self):
 385         if (self._LOGIN_REQUIRED
 386                 and self.get_param('cookiefile') is None
 387                 and self.get_param('cookiesfrombrowser') is None):
 388             self.raise_login_required('Login details are needed to download this content', method='cookies')
 389
 390     _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
 391     _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
 392     _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
 393
 394     def _get_default_ytcfg(self, client='web'):
 395         return copy.deepcopy(INNERTUBE_CLIENTS[client])
 396
 397     def _get_innertube_host(self, client='web'):
 398         return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
 399
 400     def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
 401         # try_get but with fallback to default ytcfg client values when present
 402         _func = lambda y: try_get(y, getter, expected_type)
 403         return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
 404
 405     def _extract_client_name(self, ytcfg, default_client='web'):
 406         return self._ytcfg_get_safe(
 407             ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
 408                     lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
 409
 410     def _extract_client_version(self, ytcfg, default_client='web'):
 411         return self._ytcfg_get_safe(
 412             ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
 413                     lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
 414
 415     def _extract_api_key(self, ytcfg=None, default_client='web'):
 416         return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
 417
 418     def _extract_context(self, ytcfg=None, default_client='web'):
 419         context = get_first(
 420             (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
 421         # Enforce language and tz for extraction
 422         client_context = traverse_obj(context, 'client', expected_type=dict, default={})
 423         client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
 424         return context
 425
 426     _SAPISID = None
 427
 428     def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
 429         time_now = round(time.time())
 430         if self._SAPISID is None:
 431             yt_cookies = self._get_cookies('https://www.youtube.com')
 432             # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
 433             # See: https://github.com/yt-dlp/yt-dlp/issues/393
 434             sapisid_cookie = dict_get(
 435                 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
 436             if sapisid_cookie and sapisid_cookie.value:
 437                 self._SAPISID = sapisid_cookie.value
 438                 self.write_debug('Extracted SAPISID cookie')
 439                 # SAPISID cookie is required if not already present
 440                 if not yt_cookies.get('SAPISID'):
 441                     self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
 442                     self._set_cookie(
 443                         '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
 444             else:
 445                 self._SAPISID = False
 446         if not self._SAPISID:
 447             return None
 448         # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
 449         sapisidhash = hashlib.sha1(
 450             f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
 451         return f'SAPISIDHASH {time_now}_{sapisidhash}'
 452
 453     def _call_api(self, ep, query, video_id, fatal=True, headers=None,
 454                   note='Downloading API JSON', errnote='Unable to download API page',
 455                   context=None, api_key=None, api_hostname=None, default_client='web'):
 456
 457         data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
 458         data.update(query)
 459         real_headers = self.generate_api_headers(default_client=default_client)
 460         real_headers.update({'content-type': 'application/json'})
 461         if headers:
 462             real_headers.update(headers)
 463         return self._download_json(
 464             f'https://{api_hostname or self._get_innertube_host(default_client)}/youtubei/v1/{ep}',
 465             video_id=video_id, fatal=fatal, note=note, errnote=errnote,
 466             data=json.dumps(data).encode('utf8'), headers=real_headers,
 467             query={'key': api_key or self._extract_api_key(), 'prettyPrint': 'false'})
 468
 469     def extract_yt_initial_data(self, item_id, webpage, fatal=True):
 470         data = self._search_regex(
 471             (fr'{self._YT_INITIAL_DATA_RE}\s*{self._YT_INITIAL_BOUNDARY_RE}',
 472              self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)
 473         if data:
 474             return self._parse_json(data, item_id, fatal=fatal)
 475
 476     @staticmethod
 477     def _extract_session_index(*data):
 478         """
 479         Index of current account in account list.
 480         See: https://github.com/yt-dlp/yt-dlp/pull/519
 481         """
 482         for ytcfg in data:
 483             session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
 484             if session_index is not None:
 485                 return session_index
 486
 487     # Deprecated?
 488     def _extract_identity_token(self, ytcfg=None, webpage=None):
 489         if ytcfg:
 490             token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
 491             if token:
 492                 return token
 493         if webpage:
 494             return self._search_regex(
 495                 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
 496                 'identity token', default=None, fatal=False)
 497
 498     @staticmethod
 499     def _extract_account_syncid(*args):
 500         """
 501         Extract syncId required to download private playlists of secondary channels
 502         @params response and/or ytcfg
 503         """
 504         for data in args:
 505             # ytcfg includes channel_syncid if on secondary channel
 506             delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
 507             if delegated_sid:
 508                 return delegated_sid
 509             sync_ids = (try_get(
 510                 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
 511                        lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')
 512             if len(sync_ids) >= 2 and sync_ids[1]:
 513                 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
 514                 # and just "user_syncid||" for primary channel. We only want the channel_syncid
 515                 return sync_ids[0]
 516
 517     @staticmethod
 518     def _extract_visitor_data(*args):
 519         """
 520         Extracts visitorData from an API response or ytcfg
 521         Appears to be used to track session state
 522         """
 523         return get_first(
 524             args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
 525             expected_type=str)
 526
 527     @property
 528     def is_authenticated(self):
 529         return bool(self._generate_sapisidhash_header())
 530
 531     def extract_ytcfg(self, video_id, webpage):
 532         if not webpage:
 533             return {}
 534         return self._parse_json(
 535             self._search_regex(
 536                 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
 537                 default='{}'), video_id, fatal=False) or {}
 538
 539     def generate_api_headers(
 540             self, *, ytcfg=None, account_syncid=None, session_index=None,
 541             visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
 542
 543         origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
 544         headers = {
 545             'X-YouTube-Client-Name': compat_str(
 546                 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
 547             'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
 548             'Origin': origin,
 549             'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
 550             'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
 551             'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)
 552         }
 553         if session_index is None:
 554             session_index = self._extract_session_index(ytcfg)
 555         if account_syncid or session_index is not None:
 556             headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
 557
 558         auth = self._generate_sapisidhash_header(origin)
 559         if auth is not None:
 560             headers['Authorization'] = auth
 561             headers['X-Origin'] = origin
 562         return {h: v for h, v in headers.items() if v is not None}
 563
 564     def _download_ytcfg(self, client, video_id):
 565         url = {
 566             'web': 'https://www.youtube.com',
 567             'web_music': 'https://music.youtube.com',
 568             'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
 569         }.get(client)
 570         if not url:
 571             return {}
 572         webpage = self._download_webpage(
 573             url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')
 574         return self.extract_ytcfg(video_id, webpage) or {}
 575
 576     @staticmethod
 577     def _build_api_continuation_query(continuation, ctp=None):
 578         query = {
 579             'continuation': continuation
 580         }
 581         # TODO: Inconsistency with clickTrackingParams.
 582         # Currently we have a fixed ctp contained within context (from ytcfg)
 583         # and a ctp in root query for continuation.
 584         if ctp:
 585             query['clickTracking'] = {'clickTrackingParams': ctp}
 586         return query
 587
 588     @classmethod
 589     def _extract_next_continuation_data(cls, renderer):
 590         next_continuation = try_get(
 591             renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
 592                        lambda x: x['continuation']['reloadContinuationData']), dict)
 593         if not next_continuation:
 594             return
 595         continuation = next_continuation.get('continuation')
 596         if not continuation:
 597             return
 598         ctp = next_continuation.get('clickTrackingParams')
 599         return cls._build_api_continuation_query(continuation, ctp)
 600
 601     @classmethod
 602     def _extract_continuation_ep_data(cls, continuation_ep: dict):
 603         if isinstance(continuation_ep, dict):
 604             continuation = try_get(
 605                 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
 606             if not continuation:
 607                 return
 608             ctp = continuation_ep.get('clickTrackingParams')
 609             return cls._build_api_continuation_query(continuation, ctp)
 610
 611     @classmethod
 612     def _extract_continuation(cls, renderer):
 613         next_continuation = cls._extract_next_continuation_data(renderer)
 614         if next_continuation:
 615             return next_continuation
 616
 617         contents = []
 618         for key in ('contents', 'items'):
 619             contents.extend(try_get(renderer, lambda x: x[key], list) or [])
 620
 621         for content in contents:
 622             if not isinstance(content, dict):
 623                 continue
 624             continuation_ep = try_get(
 625                 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
 626                           lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
 627                 dict)
 628             continuation = cls._extract_continuation_ep_data(continuation_ep)
 629             if continuation:
 630                 return continuation
 631
 632     @classmethod
 633     def _extract_alerts(cls, data):
 634         for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
 635             if not isinstance(alert_dict, dict):
 636                 continue
 637             for alert in alert_dict.values():
 638                 alert_type = alert.get('type')
 639                 if not alert_type:
 640                     continue
 641                 message = cls._get_text(alert, 'text')
 642                 if message:
 643                     yield alert_type, message
 644
 645     def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
 646         errors = []
 647         warnings = []
 648         for alert_type, alert_message in alerts:
 649             if alert_type.lower() == 'error' and fatal:
 650                 errors.append([alert_type, alert_message])
 651             else:
 652                 warnings.append([alert_type, alert_message])
 653
 654         for alert_type, alert_message in (warnings + errors[:-1]):
 655             self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)
 656         if errors:
 657             raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
 658
 659     def _extract_and_report_alerts(self, data, *args, **kwargs):
 660         return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
 661
 662     def _extract_badges(self, renderer: dict):
 663         badges = set()
 664         for badge in try_get(renderer, lambda x: x['badges'], list) or []:
 665             label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
 666             if label:
 667                 badges.add(label.lower())
 668         return badges
 669
 670     @staticmethod
 671     def _get_text(data, *path_list, max_runs=None):
 672         for path in path_list or [None]:
 673             if path is None:
 674                 obj = [data]
 675             else:
 676                 obj = traverse_obj(data, path, default=[])
 677                 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
 678                     obj = [obj]
 679             for item in obj:
 680                 text = try_get(item, lambda x: x['simpleText'], compat_str)
 681                 if text:
 682                     return text
 683                 runs = try_get(item, lambda x: x['runs'], list) or []
 684                 if not runs and isinstance(item, list):
 685                     runs = item
 686
 687                 runs = runs[:min(len(runs), max_runs or len(runs))]
 688                 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
 689                 if text:
 690                     return text
 691
 692     def _get_count(self, data, *path_list):
 693         count_text = self._get_text(data, *path_list) or ''
 694         count = parse_count(count_text)
 695         if count is None:
 696             count = str_to_int(
 697                 self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))
 698         return count
 699
 700     @staticmethod
 701     def _extract_thumbnails(data, *path_list):
 702         """
 703         Extract thumbnails from thumbnails dict
 704         @param path_list: path list to level that contains 'thumbnails' key
 705         """
 706         thumbnails = []
 707         for path in path_list or [()]:
 708             for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):
 709                 thumbnail_url = url_or_none(thumbnail.get('url'))
 710                 if not thumbnail_url:
 711                     continue
 712                 # Sometimes youtube gives a wrong thumbnail URL. See:
 713                 # https://github.com/yt-dlp/yt-dlp/issues/233
 714                 # https://github.com/ytdl-org/youtube-dl/issues/28023
 715                 if 'maxresdefault' in thumbnail_url:
 716                     thumbnail_url = thumbnail_url.split('?')[0]
 717                 thumbnails.append({
 718                     'url': thumbnail_url,
 719                     'height': int_or_none(thumbnail.get('height')),
 720                     'width': int_or_none(thumbnail.get('width')),
 721                 })
 722         return thumbnails
 723
 724     @staticmethod
 725     def extract_relative_time(relative_time_text):
 726         """
 727         Extracts a relative time from string and converts to dt object
 728         e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'
 729         """
 730         mobj = re.search(r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?\s*ago', relative_time_text)
 731         if mobj:
 732             start = mobj.group('start')
 733             if start:
 734                 return datetime_from_str(start)
 735             try:
 736                 return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))
 737             except ValueError:
 738                 return None
 739
 740     def _extract_time_text(self, renderer, *path_list):
 741         """@returns (timestamp, time_text)"""
 742         text = self._get_text(renderer, *path_list) or ''
 743         dt = self.extract_relative_time(text)
 744         timestamp = None
 745         if isinstance(dt, datetime.datetime):
 746             timestamp = calendar.timegm(dt.timetuple())
 747
 748         if timestamp is None:
 749             timestamp = (
 750                 unified_timestamp(text) or unified_timestamp(
 751                     self._search_regex(
 752                         (r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),
 753                         text.lower(), 'time text', default=None)))
 754
 755         if text and timestamp is None:
 756             self.report_warning(f"Cannot parse localized time text '{text}'" + bug_reports_message(), only_once=True)
 757         return timestamp, text
 758
 759     def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
 760                           ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
 761                           default_client='web'):
 762         response = None
 763         last_error = None
 764         count = -1
 765         retries = self.get_param('extractor_retries', 3)
 766         if check_get_keys is None:
 767             check_get_keys = []
 768         while count < retries:
 769             count += 1
 770             if last_error:
 771                 self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))
 772             try:
 773                 response = self._call_api(
 774                     ep=ep, fatal=True, headers=headers,
 775                     video_id=item_id, query=query,
 776                     context=self._extract_context(ytcfg, default_client),
 777                     api_key=self._extract_api_key(ytcfg, default_client),
 778                     api_hostname=api_hostname, default_client=default_client,
 779                     note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
 780             except ExtractorError as e:
 781                 if isinstance(e.cause, network_exceptions):
 782                     if isinstance(e.cause, compat_HTTPError):
 783                         first_bytes = e.cause.read(512)
 784                         if not is_html(first_bytes):
 785                             yt_error = try_get(
 786                                 self._parse_json(
 787                                     self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
 788                                 lambda x: x['error']['message'], compat_str)
 789                             if yt_error:
 790                                 self._report_alerts([('ERROR', yt_error)], fatal=False)
 791                     # Downloading page may result in intermittent 5xx HTTP error
 792                     # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
 793                     # We also want to catch all other network exceptions since errors in later pages can be troublesome
 794                     # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
 795                     if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
 796                         last_error = error_to_compat_str(e.cause or e.msg)
 797                         if count < retries:
 798                             continue
 799                 if fatal:
 800                     raise
 801                 else:
 802                     self.report_warning(error_to_compat_str(e))
 803                     return
 804
 805             else:
 806                 try:
 807                     self._extract_and_report_alerts(response, only_once=True)
 808                 except ExtractorError as e:
 809                     # YouTube servers may return errors we want to retry on in a 200 OK response
 810                     # See: https://github.com/yt-dlp/yt-dlp/issues/839
 811                     if 'unknown error' in e.msg.lower():
 812                         last_error = e.msg
 813                         continue
 814                     if fatal:
 815                         raise
 816                     self.report_warning(error_to_compat_str(e))
 817                     return
 818                 if not check_get_keys or dict_get(response, check_get_keys):
 819                     break
 820                 # Youtube sometimes sends incomplete data
 821                 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
 822                 last_error = 'Incomplete data received'
 823                 if count >= retries:
 824                     if fatal:
 825                         raise ExtractorError(last_error)
 826                     else:
 827                         self.report_warning(last_error)
 828                         return
 829         return response
 830
 831     @staticmethod
 832     def is_music_url(url):
 833         return re.match(r'https?://music\.youtube\.com/', url) is not None
 834
 835     def _extract_video(self, renderer):
 836         video_id = renderer.get('videoId')
 837         title = self._get_text(renderer, 'title')
 838         description = self._get_text(renderer, 'descriptionSnippet')
 839         duration = parse_duration(self._get_text(
 840             renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
 841         if duration is None:
 842             duration = parse_duration(self._search_regex(
 843                 r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',
 844                 traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),
 845                 video_id, default=None, group='duration'))
 846
 847         view_count = self._get_count(renderer, 'viewCountText')
 848
 849         uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
 850         channel_id = traverse_obj(
 851             renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
 852             expected_type=str, get_all=False)
 853         timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')
 854         scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
 855         overlay_style = traverse_obj(
 856             renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
 857             get_all=False, expected_type=str)
 858         badges = self._extract_badges(renderer)
 859         thumbnails = self._extract_thumbnails(renderer, 'thumbnail')
 860         navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
 861             renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
 862             expected_type=str)) or ''
 863         url = f'https://www.youtube.com/watch?v={video_id}'
 864         if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:
 865             url = f'https://www.youtube.com/shorts/{video_id}'
 866
 867         return {
 868             '_type': 'url',
 869             'ie_key': YoutubeIE.ie_key(),
 870             'id': video_id,
 871             'url': url,
 872             'title': title,
 873             'description': description,
 874             'duration': duration,
 875             'view_count': view_count,
 876             'uploader': uploader,
 877             'channel_id': channel_id,
 878             'thumbnails': thumbnails,
 879             'upload_date': (strftime_or_none(timestamp, '%Y%m%d')
 880                             if self._configuration_arg('approximate_date', ie_key='youtubetab')
 881                             else None),
 882             'live_status': ('is_upcoming' if scheduled_timestamp is not None
 883                             else 'was_live' if 'streamed' in time_text.lower()
 884                             else 'is_live' if overlay_style is not None and overlay_style == 'LIVE' or 'live now' in badges
 885                             else None),
 886             'release_timestamp': scheduled_timestamp,
 887             'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)
 888         }
 889
 890
 891 class YoutubeIE(YoutubeBaseInfoExtractor):
 892     IE_DESC = 'YouTube'
 893     _VALID_URL = r"""(?x)^
 894                      (
 895                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 896                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
 897                             (?:www\.)?deturl\.com/www\.youtube\.com|
 898                             (?:www\.)?pwnyoutube\.com|
 899                             (?:www\.)?hooktube\.com|
 900                             (?:www\.)?yourepeat\.com|
 901                             tube\.majestyc\.net|
 902                             %(invidious)s|
 903                             youtube\.googleapis\.com)/                        # the various hostnames, with wildcard subdomains
 904                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 905                          (?:                                                  # the various things that can precede the ID:
 906                              (?:(?:v|embed|e|shorts)/(?!videoseries|live_stream))  # v/ or embed/ or e/ or shorts/
 907                              |(?:                                             # or the v= param in all its forms
 908                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 909                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 910                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 911                                  v=
 912                              )
 913                          ))
 914                          |(?:
 915                             youtu\.be|                                        # just youtu.be/xxxx
 916                             vid\.plus|                                        # or vid.plus/xxxx
 917                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 918                             %(invidious)s
 919                          )/
 920                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 921                          )
 922                      )?                                                       # all until now is optional -> you can pass the naked ID
 923                      (?P<id>[0-9A-Za-z_-]{11})                                # here is it! the YouTube video ID
 924                      (?(1).+)?                                                # if we found the ID, everything can follow
 925                      (?:\#|$)""" % {
 926         'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
 927     }
 928     _PLAYER_INFO_RE = (
 929         r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
 930         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
 931         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
 932     )
 933     _formats = {
 934         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 935         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 936         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 937         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 938         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 939         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 940         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 941         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 942         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 943         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 944         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 945         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 946         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 947         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 948         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 949         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 950         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 951         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 952
 953
 954         # 3D videos
 955         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 956         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 957         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 958         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 959         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 960         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 961         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 962
 963         # Apple HTTP Live Streaming
 964         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 965         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 966         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 967         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 968         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 969         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 970         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 971         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
 972
 973         # DASH mp4 video
 974         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
 975         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
 976         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 977         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
 978         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
 979         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
 980         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
 981         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 982         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
 983         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 984         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 985         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
 986
 987         # Dash mp4 audio
 988         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
 989         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
 990         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
 991         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 992         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 993         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
 994         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
 995
 996         # Dash webm
 997         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 998         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 999         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1000         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1001         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1002         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1003         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1004         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1005         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1006         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1007         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1008         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1009         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1010         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1011         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1012         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
1013         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1014         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1015         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1016         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1017         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1018         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1019
1020         # Dash webm audio
1021         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1022         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
1023
1024         # Dash webm audio with opus inside
1025         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1026         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1027         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
1028
1029         # RTMP (unnamed)
1030         '_rtmp': {'protocol': 'rtmp'},
1031
1032         # av01 video only formats sometimes served with "unknown" codecs
1033         '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1034         '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1035         '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
1036         '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1037         '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1038         '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1039         '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1040         '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1041     }
1042     _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
1043
1044     _GEO_BYPASS = False
1045
1046     IE_NAME = 'youtube'
1047     _TESTS = [
1048         {
1049             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
1050             'info_dict': {
1051                 'id': 'BaW_jenozKc',
1052                 'ext': 'mp4',
1053                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1054                 'uploader': 'Philipp Hagemeister',
1055                 'uploader_id': 'phihag',
1056                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1057                 'channel': 'Philipp Hagemeister',
1058                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1059                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1060                 'upload_date': '20121002',
1061                 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
1062                 'categories': ['Science & Technology'],
1063                 'tags': ['youtube-dl'],
1064                 'duration': 10,
1065                 'view_count': int,
1066                 'like_count': int,
1067                 'availability': 'public',
1068                 'playable_in_embed': True,
1069                 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1070                 'live_status': 'not_live',
1071                 'age_limit': 0,
1072                 'start_time': 1,
1073                 'end_time': 9,
1074                 'channel_follower_count': int
1075             }
1076         },
1077         {
1078             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1079             'note': 'Embed-only video (#1746)',
1080             'info_dict': {
1081                 'id': 'yZIXLfi8CZQ',
1082                 'ext': 'mp4',
1083                 'upload_date': '20120608',
1084                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1085                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1086                 'uploader': 'SET India',
1087                 'uploader_id': 'setindia',
1088                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
1089                 'age_limit': 18,
1090             },
1091             'skip': 'Private video',
1092         },
1093         {
1094             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1095             'note': 'Use the first video ID in the URL',
1096             'info_dict': {
1097                 'id': 'BaW_jenozKc',
1098                 'ext': 'mp4',
1099                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1100                 'uploader': 'Philipp Hagemeister',
1101                 'uploader_id': 'phihag',
1102                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1103                 'channel': 'Philipp Hagemeister',
1104                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1105                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1106                 'upload_date': '20121002',
1107                 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
1108                 'categories': ['Science & Technology'],
1109                 'tags': ['youtube-dl'],
1110                 'duration': 10,
1111                 'view_count': int,
1112                 'like_count': int,
1113                 'availability': 'public',
1114                 'playable_in_embed': True,
1115                 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1116                 'live_status': 'not_live',
1117                 'age_limit': 0,
1118                 'channel_follower_count': int
1119             },
1120             'params': {
1121                 'skip_download': True,
1122             },
1123         },
1124         {
1125             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1126             'note': '256k DASH audio (format 141) via DASH manifest',
1127             'info_dict': {
1128                 'id': 'a9LDPn-MO4I',
1129                 'ext': 'm4a',
1130                 'upload_date': '20121002',
1131                 'uploader_id': '8KVIDEO',
1132                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1133                 'description': '',
1134                 'uploader': '8KVIDEO',
1135                 'title': 'UHDTV TEST 8K VIDEO.mp4'
1136             },
1137             'params': {
1138                 'youtube_include_dash_manifest': True,
1139                 'format': '141',
1140             },
1141             'skip': 'format 141 not served anymore',
1142         },
1143         # DASH manifest with encrypted signature
1144         {
1145             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1146             'info_dict': {
1147                 'id': 'IB3lcPjvWLA',
1148                 'ext': 'm4a',
1149                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1150                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1151                 'duration': 244,
1152                 'uploader': 'AfrojackVEVO',
1153                 'uploader_id': 'AfrojackVEVO',
1154                 'upload_date': '20131011',
1155                 'abr': 129.495,
1156                 'like_count': int,
1157                 'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',
1158                 'playable_in_embed': True,
1159                 'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',
1160                 'view_count': int,
1161                 'track': 'The Spark',
1162                 'live_status': 'not_live',
1163                 'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',
1164                 'channel': 'Afrojack',
1165                 'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',
1166                 'tags': 'count:19',
1167                 'availability': 'public',
1168                 'categories': ['Music'],
1169                 'age_limit': 0,
1170                 'alt_title': 'The Spark',
1171                 'channel_follower_count': int
1172             },
1173             'params': {
1174                 'youtube_include_dash_manifest': True,
1175                 'format': '141/bestaudio[ext=m4a]',
1176             },
1177         },
1178         # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
1179         {
1180             'note': 'Embed allowed age-gate video',
1181             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1182             'info_dict': {
1183                 'id': 'HtVdAasjOgU',
1184                 'ext': 'mp4',
1185                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1186                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1187                 'duration': 142,
1188                 'uploader': 'The Witcher',
1189                 'uploader_id': 'WitcherGame',
1190                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1191                 'upload_date': '20140605',
1192                 'age_limit': 18,
1193                 'categories': ['Gaming'],
1194                 'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',
1195                 'availability': 'needs_auth',
1196                 'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
1197                 'like_count': int,
1198                 'channel': 'The Witcher',
1199                 'live_status': 'not_live',
1200                 'tags': 'count:17',
1201                 'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
1202                 'playable_in_embed': True,
1203                 'view_count': int,
1204                 'channel_follower_count': int
1205             },
1206         },
1207         {
1208             'note': 'Age-gate video with embed allowed in public site',
1209             'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1210             'info_dict': {
1211                 'id': 'HsUATh_Nc2U',
1212                 'ext': 'mp4',
1213                 'title': 'Godzilla 2 (Official Video)',
1214                 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1215                 'upload_date': '20200408',
1216                 'uploader_id': 'FlyingKitty900',
1217                 'uploader': 'FlyingKitty',
1218                 'age_limit': 18,
1219                 'availability': 'needs_auth',
1220                 'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
1221                 'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',
1222                 'channel': 'FlyingKitty',
1223                 'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
1224                 'view_count': int,
1225                 'categories': ['Entertainment'],
1226                 'live_status': 'not_live',
1227                 'tags': ['Flyingkitty', 'godzilla 2'],
1228                 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
1229                 'like_count': int,
1230                 'duration': 177,
1231                 'playable_in_embed': True,
1232                 'channel_follower_count': int
1233             },
1234         },
1235         {
1236             'note': 'Age-gate video embedable only with clientScreen=EMBED',
1237             'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1238             'info_dict': {
1239                 'id': 'Tq92D6wQ1mg',
1240                 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
1241                 'ext': 'mp4',
1242                 'upload_date': '20191228',
1243                 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1244                 'uploader': 'Projekt Melody',
1245                 'description': 'md5:17eccca93a786d51bc67646756894066',
1246                 'age_limit': 18,
1247                 'like_count': int,
1248                 'availability': 'needs_auth',
1249                 'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
1250                 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1251                 'view_count': int,
1252                 'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',
1253                 'channel': 'Projekt Melody',
1254                 'live_status': 'not_live',
1255                 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
1256                 'playable_in_embed': True,
1257                 'categories': ['Entertainment'],
1258                 'duration': 106,
1259                 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
1260                 'channel_follower_count': int
1261             },
1262         },
1263         {
1264             'note': 'Non-Agegated non-embeddable video',
1265             'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1266             'info_dict': {
1267                 'id': 'MeJVWBSsPAY',
1268                 'ext': 'mp4',
1269                 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1270                 'uploader': 'Herr Lurik',
1271                 'uploader_id': 'st3in234',
1272                 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1273                 'upload_date': '20130730',
1274                 'track': 'Such mich find mich',
1275                 'age_limit': 0,
1276                 'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
1277                 'like_count': int,
1278                 'playable_in_embed': False,
1279                 'creator': 'OOMPH!',
1280                 'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',
1281                 'view_count': int,
1282                 'alt_title': 'Such mich find mich',
1283                 'duration': 210,
1284                 'channel': 'Herr Lurik',
1285                 'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
1286                 'categories': ['Music'],
1287                 'availability': 'public',
1288                 'uploader_url': 'http://www.youtube.com/user/st3in234',
1289                 'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
1290                 'live_status': 'not_live',
1291                 'artist': 'OOMPH!',
1292                 'channel_follower_count': int
1293             },
1294         },
1295         {
1296             'note': 'Non-bypassable age-gated video',
1297             'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1298             'only_matching': True,
1299         },
1300         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1301         # YouTube Red ad is not captured for creator
1302         {
1303             'url': '__2ABJjxzNo',
1304             'info_dict': {
1305                 'id': '__2ABJjxzNo',
1306                 'ext': 'mp4',
1307                 'duration': 266,
1308                 'upload_date': '20100430',
1309                 'uploader_id': 'deadmau5',
1310                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1311                 'creator': 'deadmau5',
1312                 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1313                 'uploader': 'deadmau5',
1314                 'title': 'Deadmau5 - Some Chords (HD)',
1315                 'alt_title': 'Some Chords',
1316                 'availability': 'public',
1317                 'tags': 'count:14',
1318                 'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',
1319                 'view_count': int,
1320                 'live_status': 'not_live',
1321                 'channel': 'deadmau5',
1322                 'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',
1323                 'like_count': int,
1324                 'track': 'Some Chords',
1325                 'artist': 'deadmau5',
1326                 'playable_in_embed': True,
1327                 'age_limit': 0,
1328                 'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',
1329                 'categories': ['Music'],
1330                 'album': 'Some Chords',
1331                 'channel_follower_count': int
1332             },
1333             'expected_warnings': [
1334                 'DASH manifest missing',
1335             ]
1336         },
1337         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1338         {
1339             'url': 'lqQg6PlCWgI',
1340             'info_dict': {
1341                 'id': 'lqQg6PlCWgI',
1342                 'ext': 'mp4',
1343                 'duration': 6085,
1344                 'upload_date': '20150827',
1345                 'uploader_id': 'olympic',
1346                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1347                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
1348                 'uploader': 'Olympics',
1349                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
1350                 'like_count': int,
1351                 'release_timestamp': 1343767800,
1352                 'playable_in_embed': True,
1353                 'categories': ['Sports'],
1354                 'release_date': '20120731',
1355                 'channel': 'Olympics',
1356                 'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
1357                 'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
1358                 'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
1359                 'age_limit': 0,
1360                 'availability': 'public',
1361                 'live_status': 'was_live',
1362                 'view_count': int,
1363                 'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
1364                 'channel_follower_count': int
1365             },
1366             'params': {
1367                 'skip_download': 'requires avconv',
1368             }
1369         },
1370         # Non-square pixels
1371         {
1372             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1373             'info_dict': {
1374                 'id': '_b-2C3KPAM0',
1375                 'ext': 'mp4',
1376                 'stretched_ratio': 16 / 9.,
1377                 'duration': 85,
1378                 'upload_date': '20110310',
1379                 'uploader_id': 'AllenMeow',
1380                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1381                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1382                 'uploader': '孫ᄋᄅ',
1383                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1384                 'playable_in_embed': True,
1385                 'channel': '孫ᄋᄅ',
1386                 'age_limit': 0,
1387                 'tags': 'count:11',
1388                 'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',
1389                 'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',
1390                 'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',
1391                 'view_count': int,
1392                 'categories': ['People & Blogs'],
1393                 'like_count': int,
1394                 'live_status': 'not_live',
1395                 'availability': 'unlisted',
1396                 'channel_follower_count': int
1397             },
1398         },
1399         # url_encoded_fmt_stream_map is empty string
1400         {
1401             'url': 'qEJwOuvDf7I',
1402             'info_dict': {
1403                 'id': 'qEJwOuvDf7I',
1404                 'ext': 'webm',
1405                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1406                 'description': '',
1407                 'upload_date': '20150404',
1408                 'uploader_id': 'spbelect',
1409                 'uploader': 'Наблюдатели Петербурга',
1410             },
1411             'params': {
1412                 'skip_download': 'requires avconv',
1413             },
1414             'skip': 'This live event has ended.',
1415         },
1416         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1417         {
1418             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1419             'info_dict': {
1420                 'id': 'FIl7x6_3R5Y',
1421                 'ext': 'webm',
1422                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1423                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1424                 'duration': 220,
1425                 'upload_date': '20150625',
1426                 'uploader_id': 'dorappi2000',
1427                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1428                 'uploader': 'dorappi2000',
1429                 'formats': 'mincount:31',
1430             },
1431             'skip': 'not actual anymore',
1432         },
1433         # DASH manifest with segment_list
1434         {
1435             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1436             'md5': '8ce563a1d667b599d21064e982ab9e31',
1437             'info_dict': {
1438                 'id': 'CsmdDsKjzN8',
1439                 'ext': 'mp4',
1440                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1441                 'uploader': 'Airtek',
1442                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1443                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1444                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1445             },
1446             'params': {
1447                 'youtube_include_dash_manifest': True,
1448                 'format': '135',  # bestvideo
1449             },
1450             'skip': 'This live event has ended.',
1451         },
1452         {
1453             # Multifeed videos (multiple cameras), URL is for Main Camera
1454             'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1455             'info_dict': {
1456                 'id': 'jvGDaLqkpTg',
1457                 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1458                 'description': 'md5:e03b909557865076822aa169218d6a5d',
1459             },
1460             'playlist': [{
1461                 'info_dict': {
1462                     'id': 'jvGDaLqkpTg',
1463                     'ext': 'mp4',
1464                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1465                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1466                     'duration': 10643,
1467                     'upload_date': '20161111',
1468                     'uploader': 'Team PGP',
1469                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1470                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1471                 },
1472             }, {
1473                 'info_dict': {
1474                     'id': '3AKt1R1aDnw',
1475                     'ext': 'mp4',
1476                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1477                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1478                     'duration': 10991,
1479                     'upload_date': '20161111',
1480                     'uploader': 'Team PGP',
1481                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1482                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1483                 },
1484             }, {
1485                 'info_dict': {
1486                     'id': 'RtAMM00gpVc',
1487                     'ext': 'mp4',
1488                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1489                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1490                     'duration': 10995,
1491                     'upload_date': '20161111',
1492                     'uploader': 'Team PGP',
1493                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1494                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1495                 },
1496             }, {
1497                 'info_dict': {
1498                     'id': '6N2fdlP3C5U',
1499                     'ext': 'mp4',
1500                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1501                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1502                     'duration': 10990,
1503                     'upload_date': '20161111',
1504                     'uploader': 'Team PGP',
1505                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1506                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1507                 },
1508             }],
1509             'params': {
1510                 'skip_download': True,
1511             },
1512             'skip': 'Not multifeed anymore',
1513         },
1514         {
1515             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1516             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1517             'info_dict': {
1518                 'id': 'gVfLd0zydlo',
1519                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1520             },
1521             'playlist_count': 2,
1522             'skip': 'Not multifeed anymore',
1523         },
1524         {
1525             'url': 'https://vid.plus/FlRa-iH7PGw',
1526             'only_matching': True,
1527         },
1528         {
1529             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1530             'only_matching': True,
1531         },
1532         {
1533             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1534             # Also tests cut-off URL expansion in video description (see
1535             # https://github.com/ytdl-org/youtube-dl/issues/1892,
1536             # https://github.com/ytdl-org/youtube-dl/issues/8164)
1537             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1538             'info_dict': {
1539                 'id': 'lsguqyKfVQg',
1540                 'ext': 'mp4',
1541                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1542                 'alt_title': 'Dark Walk',
1543                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1544                 'duration': 133,
1545                 'upload_date': '20151119',
1546                 'uploader_id': 'IronSoulElf',
1547                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1548                 'uploader': 'IronSoulElf',
1549                 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1550                 'track': 'Dark Walk',
1551                 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1552                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1553                 'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',
1554                 'categories': ['Film & Animation'],
1555                 'view_count': int,
1556                 'live_status': 'not_live',
1557                 'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',
1558                 'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',
1559                 'tags': 'count:13',
1560                 'availability': 'public',
1561                 'channel': 'IronSoulElf',
1562                 'playable_in_embed': True,
1563                 'like_count': int,
1564                 'age_limit': 0,
1565                 'channel_follower_count': int
1566             },
1567             'params': {
1568                 'skip_download': True,
1569             },
1570         },
1571         {
1572             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1573             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1574             'only_matching': True,
1575         },
1576         {
1577             # Video with yt:stretch=17:0
1578             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1579             'info_dict': {
1580                 'id': 'Q39EVAstoRM',
1581                 'ext': 'mp4',
1582                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1583                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1584                 'upload_date': '20151107',
1585                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1586                 'uploader': 'CH GAMER DROID',
1587             },
1588             'params': {
1589                 'skip_download': True,
1590             },
1591             'skip': 'This video does not exist.',
1592         },
1593         {
1594             # Video with incomplete 'yt:stretch=16:'
1595             'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1596             'only_matching': True,
1597         },
1598         {
1599             # Video licensed under Creative Commons
1600             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1601             'info_dict': {
1602                 'id': 'M4gD1WSo5mA',
1603                 'ext': 'mp4',
1604                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1605                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1606                 'duration': 721,
1607                 'upload_date': '20150128',
1608                 'uploader_id': 'BerkmanCenter',
1609                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1610                 'uploader': 'The Berkman Klein Center for Internet & Society',
1611                 'license': 'Creative Commons Attribution license (reuse allowed)',
1612                 'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',
1613                 'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',
1614                 'like_count': int,
1615                 'age_limit': 0,
1616                 'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],
1617                 'channel': 'The Berkman Klein Center for Internet & Society',
1618                 'availability': 'public',
1619                 'view_count': int,
1620                 'categories': ['Education'],
1621                 'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',
1622                 'live_status': 'not_live',
1623                 'playable_in_embed': True,
1624                 'channel_follower_count': int
1625             },
1626             'params': {
1627                 'skip_download': True,
1628             },
1629         },
1630         {
1631             # Channel-like uploader_url
1632             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1633             'info_dict': {
1634                 'id': 'eQcmzGIKrzg',
1635                 'ext': 'mp4',
1636                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1637                 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1638                 'duration': 4060,
1639                 'upload_date': '20151120',
1640                 'uploader': 'Bernie Sanders',
1641                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1642                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1643                 'license': 'Creative Commons Attribution license (reuse allowed)',
1644                 'playable_in_embed': True,
1645                 'tags': 'count:12',
1646                 'like_count': int,
1647                 'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1648                 'age_limit': 0,
1649                 'availability': 'public',
1650                 'categories': ['News & Politics'],
1651                 'channel': 'Bernie Sanders',
1652                 'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',
1653                 'view_count': int,
1654                 'live_status': 'not_live',
1655                 'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1656                 'channel_follower_count': int
1657             },
1658             'params': {
1659                 'skip_download': True,
1660             },
1661         },
1662         {
1663             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1664             'only_matching': True,
1665         },
1666         {
1667             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1668             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1669             'only_matching': True,
1670         },
1671         {
1672             # Rental video preview
1673             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1674             'info_dict': {
1675                 'id': 'uGpuVWrhIzE',
1676                 'ext': 'mp4',
1677                 'title': 'Piku - Trailer',
1678                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1679                 'upload_date': '20150811',
1680                 'uploader': 'FlixMatrix',
1681                 'uploader_id': 'FlixMatrixKaravan',
1682                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1683                 'license': 'Standard YouTube License',
1684             },
1685             'params': {
1686                 'skip_download': True,
1687             },
1688             'skip': 'This video is not available.',
1689         },
1690         {
1691             # YouTube Red video with episode data
1692             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1693             'info_dict': {
1694                 'id': 'iqKdEhx-dD4',
1695                 'ext': 'mp4',
1696                 'title': 'Isolation - Mind Field (Ep 1)',
1697                 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1698                 'duration': 2085,
1699                 'upload_date': '20170118',
1700                 'uploader': 'Vsauce',
1701                 'uploader_id': 'Vsauce',
1702                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1703                 'series': 'Mind Field',
1704                 'season_number': 1,
1705                 'episode_number': 1,
1706                 'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',
1707                 'tags': 'count:12',
1708                 'view_count': int,
1709                 'availability': 'public',
1710                 'age_limit': 0,
1711                 'channel': 'Vsauce',
1712                 'episode': 'Episode 1',
1713                 'categories': ['Entertainment'],
1714                 'season': 'Season 1',
1715                 'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',
1716                 'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',
1717                 'like_count': int,
1718                 'playable_in_embed': True,
1719                 'live_status': 'not_live',
1720                 'channel_follower_count': int
1721             },
1722             'params': {
1723                 'skip_download': True,
1724             },
1725             'expected_warnings': [
1726                 'Skipping DASH manifest',
1727             ],
1728         },
1729         {
1730             # The following content has been identified by the YouTube community
1731             # as inappropriate or offensive to some audiences.
1732             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1733             'info_dict': {
1734                 'id': '6SJNVb0GnPI',
1735                 'ext': 'mp4',
1736                 'title': 'Race Differences in Intelligence',
1737                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1738                 'duration': 965,
1739                 'upload_date': '20140124',
1740                 'uploader': 'New Century Foundation',
1741                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1742                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1743             },
1744             'params': {
1745                 'skip_download': True,
1746             },
1747             'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1748         },
1749         {
1750             # itag 212
1751             'url': '1t24XAntNCY',
1752             'only_matching': True,
1753         },
1754         {
1755             # geo restricted to JP
1756             'url': 'sJL6WA-aGkQ',
1757             'only_matching': True,
1758         },
1759         {
1760             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1761             'only_matching': True,
1762         },
1763         {
1764             'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1765             'only_matching': True,
1766         },
1767         {
1768             # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1769             'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1770             'only_matching': True,
1771         },
1772         {
1773             # DRM protected
1774             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1775             'only_matching': True,
1776         },
1777         {
1778             # Video with unsupported adaptive stream type formats
1779             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1780             'info_dict': {
1781                 'id': 'Z4Vy8R84T1U',
1782                 'ext': 'mp4',
1783                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1784                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1785                 'duration': 433,
1786                 'upload_date': '20130923',
1787                 'uploader': 'Amelia Putri Harwita',
1788                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1789                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1790                 'formats': 'maxcount:10',
1791             },
1792             'params': {
1793                 'skip_download': True,
1794                 'youtube_include_dash_manifest': False,
1795             },
1796             'skip': 'not actual anymore',
1797         },
1798         {
1799             # Youtube Music Auto-generated description
1800             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1801             'info_dict': {
1802                 'id': 'MgNrAu2pzNs',
1803                 'ext': 'mp4',
1804                 'title': 'Voyeur Girl',
1805                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1806                 'upload_date': '20190312',
1807                 'uploader': 'Stephen - Topic',
1808                 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1809                 'artist': 'Stephen',
1810                 'track': 'Voyeur Girl',
1811                 'album': 'it\'s too much love to know my dear',
1812                 'release_date': '20190313',
1813                 'release_year': 2019,
1814                 'alt_title': 'Voyeur Girl',
1815                 'view_count': int,
1816                 'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1817                 'playable_in_embed': True,
1818                 'like_count': int,
1819                 'categories': ['Music'],
1820                 'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1821                 'channel': 'Stephen',
1822                 'availability': 'public',
1823                 'creator': 'Stephen',
1824                 'duration': 169,
1825                 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
1826                 'age_limit': 0,
1827                 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1828                 'tags': 'count:11',
1829                 'live_status': 'not_live',
1830                 'channel_follower_count': int
1831             },
1832             'params': {
1833                 'skip_download': True,
1834             },
1835         },
1836         {
1837             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1838             'only_matching': True,
1839         },
1840         {
1841             # invalid -> valid video id redirection
1842             'url': 'DJztXj2GPfl',
1843             'info_dict': {
1844                 'id': 'DJztXj2GPfk',
1845                 'ext': 'mp4',
1846                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1847                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1848                 'upload_date': '20090125',
1849                 'uploader': 'Prochorowka',
1850                 'uploader_id': 'Prochorowka',
1851                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1852                 'artist': 'Panjabi MC',
1853                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1854                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1855             },
1856             'params': {
1857                 'skip_download': True,
1858             },
1859             'skip': 'Video unavailable',
1860         },
1861         {
1862             # empty description results in an empty string
1863             'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1864             'info_dict': {
1865                 'id': 'x41yOUIvK2k',
1866                 'ext': 'mp4',
1867                 'title': 'IMG 3456',
1868                 'description': '',
1869                 'upload_date': '20170613',
1870                 'uploader_id': 'ElevageOrVert',
1871                 'uploader': 'ElevageOrVert',
1872                 'view_count': int,
1873                 'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',
1874                 'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',
1875                 'like_count': int,
1876                 'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',
1877                 'tags': [],
1878                 'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',
1879                 'availability': 'public',
1880                 'age_limit': 0,
1881                 'categories': ['Pets & Animals'],
1882                 'duration': 7,
1883                 'playable_in_embed': True,
1884                 'live_status': 'not_live',
1885                 'channel': 'ElevageOrVert',
1886                 'channel_follower_count': int
1887             },
1888             'params': {
1889                 'skip_download': True,
1890             },
1891         },
1892         {
1893             # with '};' inside yt initial data (see [1])
1894             # see [2] for an example with '};' inside ytInitialPlayerResponse
1895             # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1896             # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1897             'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1898             'info_dict': {
1899                 'id': 'CHqg6qOn4no',
1900                 'ext': 'mp4',
1901                 'title': 'Part 77   Sort a list of simple types in c#',
1902                 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1903                 'upload_date': '20130831',
1904                 'uploader_id': 'kudvenkat',
1905                 'uploader': 'kudvenkat',
1906                 'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',
1907                 'like_count': int,
1908                 'uploader_url': 'http://www.youtube.com/user/kudvenkat',
1909                 'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',
1910                 'live_status': 'not_live',
1911                 'categories': ['Education'],
1912                 'availability': 'public',
1913                 'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',
1914                 'tags': 'count:12',
1915                 'playable_in_embed': True,
1916                 'age_limit': 0,
1917                 'view_count': int,
1918                 'duration': 522,
1919                 'channel': 'kudvenkat',
1920                 'channel_follower_count': int
1921             },
1922             'params': {
1923                 'skip_download': True,
1924             },
1925         },
1926         {
1927             # another example of '};' in ytInitialData
1928             'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1929             'only_matching': True,
1930         },
1931         {
1932             'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1933             'only_matching': True,
1934         },
1935         {
1936             # https://github.com/ytdl-org/youtube-dl/pull/28094
1937             'url': 'OtqTfy26tG0',
1938             'info_dict': {
1939                 'id': 'OtqTfy26tG0',
1940                 'ext': 'mp4',
1941                 'title': 'Burn Out',
1942                 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1943                 'upload_date': '20141120',
1944                 'uploader': 'The Cinematic Orchestra - Topic',
1945                 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1946                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1947                 'artist': 'The Cinematic Orchestra',
1948                 'track': 'Burn Out',
1949                 'album': 'Every Day',
1950                 'like_count': int,
1951                 'live_status': 'not_live',
1952                 'alt_title': 'Burn Out',
1953                 'duration': 614,
1954                 'age_limit': 0,
1955                 'view_count': int,
1956                 'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1957                 'creator': 'The Cinematic Orchestra',
1958                 'channel': 'The Cinematic Orchestra',
1959                 'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],
1960                 'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1961                 'availability': 'public',
1962                 'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',
1963                 'categories': ['Music'],
1964                 'playable_in_embed': True,
1965                 'channel_follower_count': int
1966             },
1967             'params': {
1968                 'skip_download': True,
1969             },
1970         },
1971         {
1972             # controversial video, only works with bpctr when authenticated with cookies
1973             'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1974             'only_matching': True,
1975         },
1976         {
1977             # controversial video, requires bpctr/contentCheckOk
1978             'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1979             'info_dict': {
1980                 'id': 'SZJvDhaSDnc',
1981                 'ext': 'mp4',
1982                 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1983                 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1984                 'uploader': 'CBS Mornings',
1985                 'uploader_id': 'CBSThisMorning',
1986                 'upload_date': '20140716',
1987                 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',
1988                 'duration': 170,
1989                 'categories': ['News & Politics'],
1990                 'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',
1991                 'view_count': int,
1992                 'channel': 'CBS Mornings',
1993                 'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],
1994                 'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',
1995                 'age_limit': 18,
1996                 'availability': 'needs_auth',
1997                 'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',
1998                 'like_count': int,
1999                 'live_status': 'not_live',
2000                 'playable_in_embed': True,
2001                 'channel_follower_count': int
2002             }
2003         },
2004         {
2005             # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
2006             'url': 'cBvYw8_A0vQ',
2007             'info_dict': {
2008                 'id': 'cBvYw8_A0vQ',
2009                 'ext': 'mp4',
2010                 'title': '4K Ueno Okachimachi  Street  Scenes  上野御徒町歩き',
2011                 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
2012                 'upload_date': '20201120',
2013                 'uploader': 'Walk around Japan',
2014                 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2015                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2016                 'duration': 1456,
2017                 'categories': ['Travel & Events'],
2018                 'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2019                 'view_count': int,
2020                 'channel': 'Walk around Japan',
2021                 'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],
2022                 'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',
2023                 'age_limit': 0,
2024                 'availability': 'public',
2025                 'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2026                 'live_status': 'not_live',
2027                 'playable_in_embed': True,
2028                 'channel_follower_count': int
2029             },
2030             'params': {
2031                 'skip_download': True,
2032             },
2033         }, {
2034             # Has multiple audio streams
2035             'url': 'WaOKSUlf4TM',
2036             'only_matching': True
2037         }, {
2038             # Requires Premium: has format 141 when requested using YTM url
2039             'url': 'https://music.youtube.com/watch?v=XclachpHxis',
2040             'only_matching': True
2041         }, {
2042             # multiple subtitles with same lang_code
2043             'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
2044             'only_matching': True,
2045         }, {
2046             # Force use android client fallback
2047             'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
2048             'info_dict': {
2049                 'id': 'YOelRv7fMxY',
2050                 'title': 'DIGGING A SECRET TUNNEL Part 1',
2051                 'ext': '3gp',
2052                 'upload_date': '20210624',
2053                 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
2054                 'uploader': 'colinfurze',
2055                 'uploader_id': 'colinfurze',
2056                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
2057                 'description': 'md5:5d5991195d599b56cd0c4148907eec50',
2058                 'duration': 596,
2059                 'categories': ['Entertainment'],
2060                 'uploader_url': 'http://www.youtube.com/user/colinfurze',
2061                 'view_count': int,
2062                 'channel': 'colinfurze',
2063                 'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],
2064                 'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',
2065                 'age_limit': 0,
2066                 'availability': 'public',
2067                 'like_count': int,
2068                 'live_status': 'not_live',
2069                 'playable_in_embed': True,
2070                 'channel_follower_count': int
2071             },
2072             'params': {
2073                 'format': '17',  # 3gp format available on android
2074                 'extractor_args': {'youtube': {'player_client': ['android']}},
2075             },
2076         },
2077         {
2078             # Skip download of additional client configs (remix client config in this case)
2079             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2080             'only_matching': True,
2081             'params': {
2082                 'extractor_args': {'youtube': {'player_skip': ['configs']}},
2083             },
2084         }, {
2085             # shorts
2086             'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
2087             'only_matching': True,
2088         }, {
2089             'note': 'Storyboards',
2090             'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
2091             'info_dict': {
2092                 'id': '5KLPxDtMqe8',
2093                 'ext': 'mhtml',
2094                 'format_id': 'sb0',
2095                 'title': 'Your Brain is Plastic',
2096                 'uploader_id': 'scishow',
2097                 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
2098                 'upload_date': '20140324',
2099                 'uploader': 'SciShow',
2100                 'like_count': int,
2101                 'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',
2102                 'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',
2103                 'view_count': int,
2104                 'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',
2105                 'playable_in_embed': True,
2106                 'tags': 'count:12',
2107                 'uploader_url': 'http://www.youtube.com/user/scishow',
2108                 'availability': 'public',
2109                 'channel': 'SciShow',
2110                 'live_status': 'not_live',
2111                 'duration': 248,
2112                 'categories': ['Education'],
2113                 'age_limit': 0,
2114                 'channel_follower_count': int
2115             }, 'params': {'format': 'mhtml', 'skip_download': True}
2116         }, {
2117             # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
2118             'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2119             'info_dict': {
2120                 'id': '2NUZ8W2llS4',
2121                 'ext': 'mp4',
2122                 'title': 'The NP that test your phone performance 🙂',
2123                 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
2124                 'uploader': 'Leon Nguyen',
2125                 'uploader_id': 'VNSXIII',
2126                 'uploader_url': 'http://www.youtube.com/user/VNSXIII',
2127                 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2128                 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2129                 'duration': 21,
2130                 'view_count': int,
2131                 'age_limit': 0,
2132                 'categories': ['Gaming'],
2133                 'tags': 'count:23',
2134                 'playable_in_embed': True,
2135                 'live_status': 'not_live',
2136                 'upload_date': '20220103',
2137                 'like_count': int,
2138                 'availability': 'public',
2139                 'channel': 'Leon Nguyen',
2140                 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
2141                 'channel_follower_count': int
2142             }
2143         }, {
2144             # date text is premiered video, ensure upload date in UTC (published 1641172509)
2145             'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',
2146             'info_dict': {
2147                 'id': 'mzZzzBU6lrM',
2148                 'ext': 'mp4',
2149                 'title': 'I Met GeorgeNotFound In Real Life...',
2150                 'description': 'md5:cca98a355c7184e750f711f3a1b22c84',
2151                 'uploader': 'Quackity',
2152                 'uploader_id': 'QuackityHQ',
2153                 'uploader_url': 'http://www.youtube.com/user/QuackityHQ',
2154                 'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',
2155                 'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',
2156                 'duration': 955,
2157                 'view_count': int,
2158                 'age_limit': 0,
2159                 'categories': ['Entertainment'],
2160                 'tags': 'count:26',
2161                 'playable_in_embed': True,
2162                 'live_status': 'not_live',
2163                 'release_timestamp': 1641172509,
2164                 'release_date': '20220103',
2165                 'upload_date': '20220103',
2166                 'like_count': int,
2167                 'availability': 'public',
2168                 'channel': 'Quackity',
2169                 'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',
2170                 'channel_follower_count': int
2171             }
2172         },
2173         {   # continuous livestream. Microformat upload date should be preferred.
2174             # Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27
2175             'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',
2176             'info_dict': {
2177                 'id': 'kgx4WGK0oNU',
2178                 'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
2179                 'ext': 'mp4',
2180                 'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2181                 'availability': 'public',
2182                 'age_limit': 0,
2183                 'release_timestamp': 1637975704,
2184                 'upload_date': '20210619',
2185                 'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2186                 'live_status': 'is_live',
2187                 'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',
2188                 'uploader': '阿鲍Abao',
2189                 'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2190                 'channel': 'Abao in Tokyo',
2191                 'channel_follower_count': int,
2192                 'release_date': '20211127',
2193                 'tags': 'count:39',
2194                 'categories': ['People & Blogs'],
2195                 'like_count': int,
2196                 'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2197                 'view_count': int,
2198                 'playable_in_embed': True,
2199                 'description': 'md5:2ef1d002cad520f65825346e2084e49d',
2200             },
2201             'params': {'skip_download': True}
2202         },
2203     ]
2204
2205     @classmethod
2206     def suitable(cls, url):
2207         from ..utils import parse_qs
2208
2209         qs = parse_qs(url)
2210         if qs.get('list', [None])[0]:
2211             return False
2212         return super().suitable(url)
2213
2214     def __init__(self, *args, **kwargs):
2215         super().__init__(*args, **kwargs)
2216         self._code_cache = {}
2217         self._player_cache = {}
2218
2219     def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):
2220         lock = threading.Lock()
2221
2222         is_live = True
2223         start_time = time.time()
2224         formats = [f for f in formats if f.get('is_from_start')]
2225
2226         def refetch_manifest(format_id, delay):
2227             nonlocal formats, start_time, is_live
2228             if time.time() <= start_time + delay:
2229                 return
2230
2231             _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
2232             video_details = traverse_obj(
2233                 prs, (..., 'videoDetails'), expected_type=dict, default=[])
2234             microformats = traverse_obj(
2235                 prs, (..., 'microformat', 'playerMicroformatRenderer'),
2236                 expected_type=dict, default=[])
2237             _, is_live, _, formats = self._list_formats(video_id, microformats, video_details, prs, player_url)
2238             start_time = time.time()
2239
2240         def mpd_feed(format_id, delay):
2241             """
2242             @returns (manifest_url, manifest_stream_number, is_live) or None
2243             """
2244             with lock:
2245                 refetch_manifest(format_id, delay)
2246
2247             f = next((f for f in formats if f['format_id'] == format_id), None)
2248             if not f:
2249                 if not is_live:
2250                     self.to_screen(f'{video_id}: Video is no longer live')
2251                 else:
2252                     self.report_warning(
2253                         f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')
2254                 return None
2255             return f['manifest_url'], f['manifest_stream_number'], is_live
2256
2257         for f in formats:
2258             f['is_live'] = True
2259             f['protocol'] = 'http_dash_segments_generator'
2260             f['fragments'] = functools.partial(
2261                 self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)
2262
2263     def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):
2264         FETCH_SPAN, MAX_DURATION = 5, 432000
2265
2266         mpd_url, stream_number, is_live = None, None, True
2267
2268         begin_index = 0
2269         download_start_time = ctx.get('start') or time.time()
2270
2271         lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
2272         if lack_early_segments:
2273             self.report_warning(bug_reports_message(
2274                 'Starting download from the last 120 hours of the live stream since '
2275                 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
2276             lack_early_segments = True
2277
2278         known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
2279         fragments, fragment_base_url = None, None
2280
2281         def _extract_sequence_from_mpd(refresh_sequence, immediate):
2282             nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
2283             # Obtain from MPD's maximum seq value
2284             old_mpd_url = mpd_url
2285             last_error = ctx.pop('last_error', None)
2286             expire_fast = immediate or last_error and isinstance(last_error, compat_HTTPError) and last_error.code == 403
2287             mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
2288                                                or (mpd_url, stream_number, False))
2289             if not refresh_sequence:
2290                 if expire_fast and not is_live:
2291                     return False, last_seq
2292                 elif old_mpd_url == mpd_url:
2293                     return True, last_seq
2294             try:
2295                 fmts, _ = self._extract_mpd_formats_and_subtitles(
2296                     mpd_url, None, note=False, errnote=False, fatal=False)
2297             except ExtractorError:
2298                 fmts = None
2299             if not fmts:
2300                 no_fragment_score += 2
2301                 return False, last_seq
2302             fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
2303             fragments = fmt_info['fragments']
2304             fragment_base_url = fmt_info['fragment_base_url']
2305             assert fragment_base_url
2306
2307             _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
2308             return True, _last_seq
2309
2310         while is_live:
2311             fetch_time = time.time()
2312             if no_fragment_score > 30:
2313                 return
2314             if last_segment_url:
2315                 # Obtain from "X-Head-Seqnum" header value from each segment
2316                 try:
2317                     urlh = self._request_webpage(
2318                         last_segment_url, None, note=False, errnote=False, fatal=False)
2319                 except ExtractorError:
2320                     urlh = None
2321                 last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
2322                 if last_seq is None:
2323                     no_fragment_score += 2
2324                     last_segment_url = None
2325                     continue
2326             else:
2327                 should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)
2328                 no_fragment_score += 2
2329                 if not should_continue:
2330                     continue
2331
2332             if known_idx > last_seq:
2333                 last_segment_url = None
2334                 continue
2335
2336             last_seq += 1
2337
2338             if begin_index < 0 and known_idx < 0:
2339                 # skip from the start when it's negative value
2340                 known_idx = last_seq + begin_index
2341             if lack_early_segments:
2342                 known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
2343             try:
2344                 for idx in range(known_idx, last_seq):
2345                     # do not update sequence here or you'll get skipped some part of it
2346                     should_continue, _ = _extract_sequence_from_mpd(False, False)
2347                     if not should_continue:
2348                         known_idx = idx - 1
2349                         raise ExtractorError('breaking out of outer loop')
2350                     last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)
2351                     yield {
2352                         'url': last_segment_url,
2353                     }
2354                 if known_idx == last_seq:
2355                     no_fragment_score += 5
2356                 else:
2357                     no_fragment_score = 0
2358                 known_idx = last_seq
2359             except ExtractorError:
2360                 continue
2361
2362             time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
2363
2364     def _extract_player_url(self, *ytcfgs, webpage=None):
2365         player_url = traverse_obj(
2366             ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
2367             get_all=False, expected_type=compat_str)
2368         if not player_url:
2369             return
2370         return urljoin('https://www.youtube.com', player_url)
2371
2372     def _download_player_url(self, video_id, fatal=False):
2373         res = self._download_webpage(
2374             'https://www.youtube.com/iframe_api',
2375             note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
2376         if res:
2377             player_version = self._search_regex(
2378                 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
2379             if player_version:
2380                 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
2381
2382     def _signature_cache_id(self, example_sig):
2383         """ Return a string representation of a signature """
2384         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
2385
2386     @classmethod
2387     def _extract_player_info(cls, player_url):
2388         for player_re in cls._PLAYER_INFO_RE:
2389             id_m = re.search(player_re, player_url)
2390             if id_m:
2391                 break
2392         else:
2393             raise ExtractorError('Cannot identify player %r' % player_url)
2394         return id_m.group('id')
2395
2396     def _load_player(self, video_id, player_url, fatal=True):
2397         player_id = self._extract_player_info(player_url)
2398         if player_id not in self._code_cache:
2399             code = self._download_webpage(
2400                 player_url, video_id, fatal=fatal,
2401                 note='Downloading player ' + player_id,
2402                 errnote='Download of %s failed' % player_url)
2403             if code:
2404                 self._code_cache[player_id] = code
2405         return self._code_cache.get(player_id)
2406
2407     def _extract_signature_function(self, video_id, player_url, example_sig):
2408         player_id = self._extract_player_info(player_url)
2409
2410         # Read from filesystem cache
2411         func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'
2412         assert os.path.basename(func_id) == func_id
2413
2414         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
2415         if cache_spec is not None:
2416             return lambda s: ''.join(s[i] for i in cache_spec)
2417
2418         code = self._load_player(video_id, player_url)
2419         if code:
2420             res = self._parse_sig_js(code)
2421
2422             test_string = ''.join(map(compat_chr, range(len(example_sig))))
2423             cache_res = res(test_string)
2424             cache_spec = [ord(c) for c in cache_res]
2425
2426             self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
2427             return res
2428
2429     def _print_sig_code(self, func, example_sig):
2430         if not self.get_param('youtube_print_sig_code'):
2431             return
2432
2433         def gen_sig_code(idxs):
2434             def _genslice(start, end, step):
2435                 starts = '' if start == 0 else str(start)
2436                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
2437                 steps = '' if step == 1 else (':%d' % step)
2438                 return f's[{starts}{ends}{steps}]'
2439
2440             step = None
2441             # Quelch pyflakes warnings - start will be set when step is set
2442             start = '(Never used)'
2443             for i, prev in zip(idxs[1:], idxs[:-1]):
2444                 if step is not None:
2445                     if i - prev == step:
2446                         continue
2447                     yield _genslice(start, prev, step)
2448                     step = None
2449                     continue
2450                 if i - prev in [-1, 1]:
2451                     step = i - prev
2452                     start = prev
2453                     continue
2454                 else:
2455                     yield 's[%d]' % prev
2456             if step is None:
2457                 yield 's[%d]' % i
2458             else:
2459                 yield _genslice(start, i, step)
2460
2461         test_string = ''.join(map(compat_chr, range(len(example_sig))))
2462         cache_res = func(test_string)
2463         cache_spec = [ord(c) for c in cache_res]
2464         expr_code = ' + '.join(gen_sig_code(cache_spec))
2465         signature_id_tuple = '(%s)' % (
2466             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
2467         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
2468                 '    return %s\n') % (signature_id_tuple, expr_code)
2469         self.to_screen('Extracted signature function:\n' + code)
2470
2471     def _parse_sig_js(self, jscode):
2472         funcname = self._search_regex(
2473             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2474              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2475              r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
2476              r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
2477              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
2478              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
2479              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
2480              # Obsolete patterns
2481              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2482              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
2483              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2484              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2485              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2486              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2487              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2488              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
2489             jscode, 'Initial JS player signature function name', group='sig')
2490
2491         jsi = JSInterpreter(jscode)
2492         initial_function = jsi.extract_function(funcname)
2493         return lambda s: initial_function([s])
2494
2495     def _decrypt_signature(self, s, video_id, player_url):
2496         """Turn the encrypted s field into a working signature"""
2497
2498         if player_url is None:
2499             raise ExtractorError('Cannot decrypt signature without player_url')
2500
2501         try:
2502             player_id = (player_url, self._signature_cache_id(s))
2503             if player_id not in self._player_cache:
2504                 func = self._extract_signature_function(
2505                     video_id, player_url, s
2506                 )
2507                 self._player_cache[player_id] = func
2508             func = self._player_cache[player_id]
2509             self._print_sig_code(func, s)
2510             return func(s)
2511         except Exception as e:
2512             raise ExtractorError('Signature extraction failed: ' + traceback.format_exc(), cause=e)
2513
2514     def _decrypt_nsig(self, s, video_id, player_url):
2515         """Turn the encrypted n field into a working signature"""
2516         if player_url is None:
2517             raise ExtractorError('Cannot decrypt nsig without player_url')
2518         player_url = urljoin('https://www.youtube.com', player_url)
2519
2520         sig_id = ('nsig_value', s)
2521         if sig_id in self._player_cache:
2522             return self._player_cache[sig_id]
2523
2524         try:
2525             player_id = ('nsig', player_url)
2526             if player_id not in self._player_cache:
2527                 self._player_cache[player_id] = self._extract_n_function(video_id, player_url)
2528             func = self._player_cache[player_id]
2529             self._player_cache[sig_id] = func(s)
2530             self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')
2531             return self._player_cache[sig_id]
2532         except Exception as e:
2533             raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)
2534
2535     def _extract_n_function_name(self, jscode):
2536         nfunc, idx = self._search_regex(
2537             r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)',
2538             jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
2539         if not idx:
2540             return nfunc
2541         return json.loads(js_to_json(self._search_regex(
2542             rf'var {re.escape(nfunc)}\s*=\s*(\[.+?\]);', jscode,
2543             f'Initial JS player n function list ({nfunc}.{idx})')))[int(idx)]
2544
2545     def _extract_n_function(self, video_id, player_url):
2546         player_id = self._extract_player_info(player_url)
2547         func_code = self._downloader.cache.load('youtube-nsig', player_id)
2548
2549         if func_code:
2550             jsi = JSInterpreter(func_code)
2551         else:
2552             jscode = self._load_player(video_id, player_url)
2553             funcname = self._extract_n_function_name(jscode)
2554             jsi = JSInterpreter(jscode)
2555             func_code = jsi.extract_function_code(funcname)
2556             self._downloader.cache.store('youtube-nsig', player_id, func_code)
2557
2558         if self.get_param('youtube_print_sig_code'):
2559             self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
2560
2561         return lambda s: jsi.extract_function_from_code(*func_code)([s])
2562
2563     def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
2564         """
2565         Extract signatureTimestamp (sts)
2566         Required to tell API what sig/player version is in use.
2567         """
2568         sts = None
2569         if isinstance(ytcfg, dict):
2570             sts = int_or_none(ytcfg.get('STS'))
2571
2572         if not sts:
2573             # Attempt to extract from player
2574             if player_url is None:
2575                 error_msg = 'Cannot extract signature timestamp without player_url.'
2576                 if fatal:
2577                     raise ExtractorError(error_msg)
2578                 self.report_warning(error_msg)
2579                 return
2580             code = self._load_player(video_id, player_url, fatal=fatal)
2581             if code:
2582                 sts = int_or_none(self._search_regex(
2583                     r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2584                     'JS player signature timestamp', group='sts', fatal=fatal))
2585         return sts
2586
2587     def _mark_watched(self, video_id, player_responses):
2588         playback_url = get_first(
2589             player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
2590             expected_type=url_or_none)
2591         if not playback_url:
2592             self.report_warning('Unable to mark watched')
2593             return
2594         parsed_playback_url = compat_urlparse.urlparse(playback_url)
2595         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
2596
2597         # cpn generation algorithm is reverse engineered from base.js.
2598         # In fact it works even with dummy cpn.
2599         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2600         cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))
2601
2602         qs.update({
2603             'ver': ['2'],
2604             'cpn': [cpn],
2605         })
2606         playback_url = compat_urlparse.urlunparse(
2607             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
2608
2609         self._download_webpage(
2610             playback_url, video_id, 'Marking watched',
2611             'Unable to mark watched', fatal=False)
2612
2613     @staticmethod
2614     def _extract_urls(webpage):
2615         # Embedded YouTube player
2616         entries = [
2617             unescapeHTML(mobj.group('url'))
2618             for mobj in re.finditer(r'''(?x)
2619             (?:
2620                 <iframe[^>]+?src=|
2621                 data-video-url=|
2622                 <embed[^>]+?src=|
2623                 embedSWF\(?:\s*|
2624                 <object[^>]+data=|
2625                 new\s+SWFObject\(
2626             )
2627             (["\'])
2628                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
2629                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
2630             \1''', webpage)]
2631
2632         # lazyYT YouTube embed
2633         entries.extend(list(map(
2634             unescapeHTML,
2635             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
2636
2637         # Wordpress "YouTube Video Importer" plugin
2638         matches = re.findall(r'''(?x)<div[^>]+
2639             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2640             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
2641         entries.extend(m[-1] for m in matches)
2642
2643         return entries
2644
2645     @staticmethod
2646     def _extract_url(webpage):
2647         urls = YoutubeIE._extract_urls(webpage)
2648         return urls[0] if urls else None
2649
2650     @classmethod
2651     def extract_id(cls, url):
2652         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
2653         if mobj is None:
2654             raise ExtractorError('Invalid URL: %s' % url)
2655         return mobj.group('id')
2656
2657     def _extract_chapters_from_json(self, data, duration):
2658         chapter_list = traverse_obj(
2659             data, (
2660                 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2661                 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2662             ), expected_type=list)
2663
2664         return self._extract_chapters(
2665             chapter_list,
2666             chapter_time=lambda chapter: float_or_none(
2667                 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2668             chapter_title=lambda chapter: traverse_obj(
2669                 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2670             duration=duration)
2671
2672     def _extract_chapters_from_engagement_panel(self, data, duration):
2673         content_list = traverse_obj(
2674             data,
2675             ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
2676             expected_type=list, default=[])
2677         chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2678         chapter_title = lambda chapter: self._get_text(chapter, 'title')
2679
2680         return next((
2681             filter(None, (
2682                 self._extract_chapters(
2683                     traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2684                     chapter_time, chapter_title, duration)
2685                 for contents in content_list
2686             ))), [])
2687
2688     def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
2689         chapters = []
2690         last_chapter = {'start_time': 0}
2691         for idx, chapter in enumerate(chapter_list or []):
2692             title = chapter_title(chapter)
2693             start_time = chapter_time(chapter)
2694             if start_time is None:
2695                 continue
2696             last_chapter['end_time'] = start_time
2697             if start_time < last_chapter['start_time']:
2698                 if idx == 1:
2699                     chapters.pop()
2700                     self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2701                 else:
2702                     self.report_warning(f'Invalid start time for chapter "{title}"')
2703                     continue
2704             last_chapter = {'start_time': start_time, 'title': title}
2705             chapters.append(last_chapter)
2706         last_chapter['end_time'] = duration
2707         return chapters
2708
2709     def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2710         return self._parse_json(self._search_regex(
2711             (fr'{regex}\s*{self._YT_INITIAL_BOUNDARY_RE}',
2712              regex), webpage, name, default='{}'), video_id, fatal=False)
2713
2714     def _extract_comment(self, comment_renderer, parent=None):
2715         comment_id = comment_renderer.get('commentId')
2716         if not comment_id:
2717             return
2718
2719         text = self._get_text(comment_renderer, 'contentText')
2720
2721         # note: timestamp is an estimate calculated from the current time and time_text
2722         timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')
2723         author = self._get_text(comment_renderer, 'authorText')
2724         author_id = try_get(comment_renderer,
2725                             lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
2726
2727         votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2728                                                        lambda x: x['likeCount']), compat_str)) or 0
2729         author_thumbnail = try_get(comment_renderer,
2730                                    lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2731
2732         author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2733         is_favorited = 'creatorHeart' in (try_get(
2734             comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2735         return {
2736             'id': comment_id,
2737             'text': text,
2738             'timestamp': timestamp,
2739             'time_text': time_text,
2740             'like_count': votes,
2741             'is_favorited': is_favorited,
2742             'author': author,
2743             'author_id': author_id,
2744             'author_thumbnail': author_thumbnail,
2745             'author_is_uploader': author_is_uploader,
2746             'parent': parent or 'root'
2747         }
2748
2749     def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
2750
2751         get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
2752
2753         def extract_header(contents):
2754             _continuation = None
2755             for content in contents:
2756                 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
2757                 expected_comment_count = self._get_count(
2758                     comments_header_renderer, 'countText', 'commentsCount')
2759
2760                 if expected_comment_count:
2761                     tracker['est_total'] = expected_comment_count
2762                     self.to_screen(f'Downloading ~{expected_comment_count} comments')
2763                 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top')  # 1 = new, 0 = top
2764
2765                 sort_menu_item = try_get(
2766                     comments_header_renderer,
2767                     lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2768                 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2769
2770                 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2771                 if not _continuation:
2772                     continue
2773
2774                 sort_text = str_or_none(sort_menu_item.get('title'))
2775                 if not sort_text:
2776                     sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2777                 self.to_screen('Sorting comments by %s' % sort_text.lower())
2778                 break
2779             return _continuation
2780
2781         def extract_thread(contents):
2782             if not parent:
2783                 tracker['current_page_thread'] = 0
2784             for content in contents:
2785                 if not parent and tracker['total_parent_comments'] >= max_parents:
2786                     yield
2787                 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2788                 comment_renderer = get_first(
2789                     (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
2790                     expected_type=dict, default={})
2791
2792                 comment = self._extract_comment(comment_renderer, parent)
2793                 if not comment:
2794                     continue
2795
2796                 tracker['running_total'] += 1
2797                 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
2798                 yield comment
2799
2800                 # Attempt to get the replies
2801                 comment_replies_renderer = try_get(
2802                     comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2803
2804                 if comment_replies_renderer:
2805                     tracker['current_page_thread'] += 1
2806                     comment_entries_iter = self._comment_entries(
2807                         comment_replies_renderer, ytcfg, video_id,
2808                         parent=comment.get('id'), tracker=tracker)
2809                     yield from itertools.islice(comment_entries_iter, min(
2810                         max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))
2811
2812         # Keeps track of counts across recursive calls
2813         if not tracker:
2814             tracker = dict(
2815                 running_total=0,
2816                 est_total=0,
2817                 current_page_thread=0,
2818                 total_parent_comments=0,
2819                 total_reply_comments=0)
2820
2821         # TODO: Deprecated
2822         # YouTube comments have a max depth of 2
2823         max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
2824         if max_depth:
2825             self._downloader.deprecation_warning(
2826                 '[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')
2827         if max_depth == 1 and parent:
2828             return
2829
2830         max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(
2831             lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
2832
2833         continuation = self._extract_continuation(root_continuation_data)
2834         message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
2835         if message and not parent:
2836             self.report_warning(message, video_id=video_id)
2837
2838         response = None
2839         is_first_continuation = parent is None
2840
2841         for page_num in itertools.count(0):
2842             if not continuation:
2843                 break
2844             headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
2845             comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"
2846             if page_num == 0:
2847                 if is_first_continuation:
2848                     note_prefix = 'Downloading comment section API JSON'
2849                 else:
2850                     note_prefix = '    Downloading comment API JSON reply thread %d %s' % (
2851                         tracker['current_page_thread'], comment_prog_str)
2852             else:
2853                 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2854                     '       ' if parent else '', ' replies' if parent else '',
2855                     page_num, comment_prog_str)
2856
2857             response = self._extract_response(
2858                 item_id=None, query=continuation,
2859                 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2860                 check_get_keys='onResponseReceivedEndpoints')
2861
2862             continuation_contents = traverse_obj(
2863                 response, 'onResponseReceivedEndpoints', expected_type=list, default=[])
2864
2865             continuation = None
2866             for continuation_section in continuation_contents:
2867                 continuation_items = traverse_obj(
2868                     continuation_section,
2869                     (('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),
2870                     get_all=False, expected_type=list) or []
2871                 if is_first_continuation:
2872                     continuation = extract_header(continuation_items)
2873                     is_first_continuation = False
2874                     if continuation:
2875                         break
2876                     continue
2877
2878                 for entry in extract_thread(continuation_items):
2879                     if not entry:
2880                         return
2881                     yield entry
2882                 continuation = self._extract_continuation({'contents': continuation_items})
2883                 if continuation:
2884                     break
2885
2886     def _get_comments(self, ytcfg, video_id, contents, webpage):
2887         """Entry for comment extraction"""
2888         def _real_comment_extract(contents):
2889             renderer = next((
2890                 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
2891                 if item.get('sectionIdentifier') == 'comment-item-section'), None)
2892             yield from self._comment_entries(renderer, ytcfg, video_id)
2893
2894         max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
2895         return itertools.islice(_real_comment_extract(contents), 0, max_comments)
2896
2897     @staticmethod
2898     def _get_checkok_params():
2899         return {'contentCheckOk': True, 'racyCheckOk': True}
2900
2901     @classmethod
2902     def _generate_player_context(cls, sts=None):
2903         context = {
2904             'html5Preference': 'HTML5_PREF_WANTS',
2905         }
2906         if sts is not None:
2907             context['signatureTimestamp'] = sts
2908         return {
2909             'playbackContext': {
2910                 'contentPlaybackContext': context
2911             },
2912             **cls._get_checkok_params()
2913         }
2914
2915     @staticmethod
2916     def _is_agegated(player_response):
2917         if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
2918             return True
2919
2920         reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
2921         AGE_GATE_REASONS = (
2922             'confirm your age', 'age-restricted', 'inappropriate',  # reason
2923             'age_verification_required', 'age_check_required',  # status
2924         )
2925         return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
2926
2927     @staticmethod
2928     def _is_unplayable(player_response):
2929         return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
2930
2931     def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):
2932
2933         session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2934         syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
2935         sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
2936         headers = self.generate_api_headers(
2937             ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
2938
2939         yt_query = {'videoId': video_id}
2940         yt_query.update(self._generate_player_context(sts))
2941         return self._extract_response(
2942             item_id=video_id, ep='player', query=yt_query,
2943             ytcfg=player_ytcfg, headers=headers, fatal=True,
2944             default_client=client,
2945             note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2946         ) or None
2947
2948     def _get_requested_clients(self, url, smuggled_data):
2949         requested_clients = []
2950         default = ['android', 'web']
2951         allowed_clients = sorted(
2952             (client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),
2953             key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
2954         for client in self._configuration_arg('player_client'):
2955             if client in allowed_clients:
2956                 requested_clients.append(client)
2957             elif client == 'default':
2958                 requested_clients.extend(default)
2959             elif client == 'all':
2960                 requested_clients.extend(allowed_clients)
2961             else:
2962                 self.report_warning(f'Skipping unsupported client {client}')
2963         if not requested_clients:
2964             requested_clients = default
2965
2966         if smuggled_data.get('is_music_url') or self.is_music_url(url):
2967             requested_clients.extend(
2968                 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
2969
2970         return orderedSet(requested_clients)
2971
2972     def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):
2973         initial_pr = None
2974         if webpage:
2975             initial_pr = self._extract_yt_initial_variable(
2976                 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2977                 video_id, 'initial player response')
2978
2979         all_clients = set(clients)
2980         clients = clients[::-1]
2981         prs = []
2982
2983         def append_client(*client_names):
2984             """ Append the first client name that exists but not already used """
2985             for client_name in client_names:
2986                 actual_client = _split_innertube_client(client_name)[0]
2987                 if actual_client in INNERTUBE_CLIENTS:
2988                     if actual_client not in all_clients:
2989                         clients.append(client_name)
2990                         all_clients.add(actual_client)
2991                         return
2992
2993         # Android player_response does not have microFormats which are needed for
2994         # extraction of some data. So we return the initial_pr with formats
2995         # stripped out even if not requested by the user
2996         # See: https://github.com/yt-dlp/yt-dlp/issues/501
2997         if initial_pr:
2998             pr = dict(initial_pr)
2999             pr['streamingData'] = None
3000             prs.append(pr)
3001
3002         last_error = None
3003         tried_iframe_fallback = False
3004         player_url = None
3005         while clients:
3006             client, base_client, variant = _split_innertube_client(clients.pop())
3007             player_ytcfg = master_ytcfg if client == 'web' else {}
3008             if 'configs' not in self._configuration_arg('player_skip') and client != 'web':
3009                 player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
3010
3011             player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
3012             require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
3013             if 'js' in self._configuration_arg('player_skip'):
3014                 require_js_player = False
3015                 player_url = None
3016
3017             if not player_url and not tried_iframe_fallback and require_js_player:
3018                 player_url = self._download_player_url(video_id)
3019                 tried_iframe_fallback = True
3020
3021             try:
3022                 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
3023                     client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)
3024             except ExtractorError as e:
3025                 if last_error:
3026                     self.report_warning(last_error)
3027                 last_error = e
3028                 continue
3029
3030             if pr:
3031                 prs.append(pr)
3032
3033             # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
3034             if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
3035                 append_client(f'{base_client}_creator')
3036             elif self._is_agegated(pr):
3037                 if variant == 'tv_embedded':
3038                     append_client(f'{base_client}_embedded')
3039                 elif not variant:
3040                     append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
3041
3042         if last_error:
3043             if not len(prs):
3044                 raise last_error
3045             self.report_warning(last_error)
3046         return prs, player_url
3047
3048     def _extract_formats(self, streaming_data, video_id, player_url, is_live, duration):
3049         itags, stream_ids = {}, []
3050         itag_qualities, res_qualities = {}, {}
3051         q = qualities([
3052             # Normally tiny is the smallest video-only formats. But
3053             # audio-only formats with unknown quality may get tagged as tiny
3054             'tiny',
3055             'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high',  # Audio only formats
3056             'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
3057         ])
3058         streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
3059
3060         for fmt in streaming_formats:
3061             if fmt.get('targetDurationSec'):
3062                 continue
3063
3064             itag = str_or_none(fmt.get('itag'))
3065             audio_track = fmt.get('audioTrack') or {}
3066             stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
3067             if stream_id in stream_ids:
3068                 continue
3069
3070             quality = fmt.get('quality')
3071             height = int_or_none(fmt.get('height'))
3072             if quality == 'tiny' or not quality:
3073                 quality = fmt.get('audioQuality', '').lower() or quality
3074             # The 3gp format (17) in android client has a quality of "small",
3075             # but is actually worse than other formats
3076             if itag == '17':
3077                 quality = 'tiny'
3078             if quality:
3079                 if itag:
3080                     itag_qualities[itag] = quality
3081                 if height:
3082                     res_qualities[height] = quality
3083             # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
3084             # (adding `&sq=0` to the URL) and parsing emsg box to determine the
3085             # number of fragment that would subsequently requested with (`&sq=N`)
3086             if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
3087                 continue
3088
3089             fmt_url = fmt.get('url')
3090             if not fmt_url:
3091                 sc = compat_parse_qs(fmt.get('signatureCipher'))
3092                 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
3093                 encrypted_sig = try_get(sc, lambda x: x['s'][0])
3094                 if not (sc and fmt_url and encrypted_sig):
3095                     continue
3096                 if not player_url:
3097                     continue
3098                 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
3099                 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
3100                 fmt_url += '&' + sp + '=' + signature
3101
3102             query = parse_qs(fmt_url)
3103             throttled = False
3104             if query.get('n'):
3105                 try:
3106                     fmt_url = update_url_query(fmt_url, {
3107                         'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})
3108                 except ExtractorError as e:
3109                     self.report_warning(
3110                         f'nsig extraction failed: You may experience throttling for some formats\n'
3111                         f'n = {query["n"][0]} ; player = {player_url}\n{e}', only_once=True)
3112                     throttled = True
3113
3114             if itag:
3115                 itags[itag] = 'https'
3116                 stream_ids.append(stream_id)
3117
3118             tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
3119             language_preference = (
3120                 10 if audio_track.get('audioIsDefault') and 10
3121                 else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
3122                 else -1)
3123             # Some formats may have much smaller duration than others (possibly damaged during encoding)
3124             # Eg: 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
3125             # Make sure to avoid false positives with small duration differences.
3126             # Eg: __2ABJjxzNo, ySuUZEjARPY
3127             is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)
3128             if is_damaged:
3129                 self.report_warning(f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
3130             dct = {
3131                 'asr': int_or_none(fmt.get('audioSampleRate')),
3132                 'filesize': int_or_none(fmt.get('contentLength')),
3133                 'format_id': itag,
3134                 'format_note': join_nonempty(
3135                     '%s%s' % (audio_track.get('displayName') or '',
3136                               ' (default)' if language_preference > 0 else ''),
3137                     fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
3138                     throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),
3139                 'source_preference': -10 if throttled else -1,
3140                 'fps': int_or_none(fmt.get('fps')) or None,
3141                 'height': height,
3142                 'quality': q(quality),
3143                 'has_drm': bool(fmt.get('drmFamilies')),
3144                 'tbr': tbr,
3145                 'url': fmt_url,
3146                 'width': int_or_none(fmt.get('width')),
3147                 'language': join_nonempty(audio_track.get('id', '').split('.')[0],
3148                                           'desc' if language_preference < -1 else ''),
3149                 'language_preference': language_preference,
3150                 # Strictly de-prioritize damaged and 3gp formats
3151                 'preference': -10 if is_damaged else -2 if itag == '17' else None,
3152             }
3153             mime_mobj = re.match(
3154                 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
3155             if mime_mobj:
3156                 dct['ext'] = mimetype2ext(mime_mobj.group(1))
3157                 dct.update(parse_codecs(mime_mobj.group(2)))
3158             no_audio = dct.get('acodec') == 'none'
3159             no_video = dct.get('vcodec') == 'none'
3160             if no_audio:
3161                 dct['vbr'] = tbr
3162             if no_video:
3163                 dct['abr'] = tbr
3164             if no_audio or no_video:
3165                 dct['downloader_options'] = {
3166                     # Youtube throttles chunks >~10M
3167                     'http_chunk_size': 10485760,
3168                 }
3169                 if dct.get('ext'):
3170                     dct['container'] = dct['ext'] + '_dash'
3171             yield dct
3172
3173         live_from_start = is_live and self.get_param('live_from_start')
3174         skip_manifests = self._configuration_arg('skip')
3175         if not self.get_param('youtube_include_hls_manifest', True):
3176             skip_manifests.append('hls')
3177         get_dash = 'dash' not in skip_manifests and (
3178             not is_live or live_from_start or self._configuration_arg('include_live_dash'))
3179         get_hls = not live_from_start and 'hls' not in skip_manifests
3180
3181         def process_manifest_format(f, proto, itag):
3182             if itag in itags:
3183                 if itags[itag] == proto or f'{itag}-{proto}' in itags:
3184                     return False
3185                 itag = f'{itag}-{proto}'
3186             if itag:
3187                 f['format_id'] = itag
3188                 itags[itag] = proto
3189
3190             f['quality'] = next((
3191                 q(qdict[val])
3192                 for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities))
3193                 if val in qdict), -1)
3194             return True
3195
3196         for sd in streaming_data:
3197             hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
3198             if hls_manifest_url:
3199                 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
3200                     if process_manifest_format(f, 'hls', self._search_regex(
3201                             r'/itag/(\d+)', f['url'], 'itag', default=None)):
3202                         yield f
3203
3204             dash_manifest_url = get_dash and sd.get('dashManifestUrl')
3205             if dash_manifest_url:
3206                 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
3207                     if process_manifest_format(f, 'dash', f['format_id']):
3208                         f['filesize'] = int_or_none(self._search_regex(
3209                             r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
3210                         if live_from_start:
3211                             f['is_from_start'] = True
3212
3213                         yield f
3214
3215     def _extract_storyboard(self, player_responses, duration):
3216         spec = get_first(
3217             player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
3218         base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))
3219         if not base_url:
3220             return
3221         L = len(spec) - 1
3222         for i, args in enumerate(spec):
3223             args = args.split('#')
3224             counts = list(map(int_or_none, args[:5]))
3225             if len(args) != 8 or not all(counts):
3226                 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
3227                 continue
3228             width, height, frame_count, cols, rows = counts
3229             N, sigh = args[6:]
3230
3231             url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
3232             fragment_count = frame_count / (cols * rows)
3233             fragment_duration = duration / fragment_count
3234             yield {
3235                 'format_id': f'sb{i}',
3236                 'format_note': 'storyboard',
3237                 'ext': 'mhtml',
3238                 'protocol': 'mhtml',
3239                 'acodec': 'none',
3240                 'vcodec': 'none',
3241                 'url': url,
3242                 'width': width,
3243                 'height': height,
3244                 'fragments': [{
3245                     'url': url.replace('$M', str(j)),
3246                     'duration': min(fragment_duration, duration - (j * fragment_duration)),
3247                 } for j in range(math.ceil(fragment_count))],
3248             }
3249
3250     def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
3251         webpage = None
3252         if 'webpage' not in self._configuration_arg('player_skip'):
3253             webpage = self._download_webpage(
3254                 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
3255
3256         master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
3257
3258         player_responses, player_url = self._extract_player_responses(
3259             self._get_requested_clients(url, smuggled_data),
3260             video_id, webpage, master_ytcfg)
3261
3262         return webpage, master_ytcfg, player_responses, player_url
3263
3264     def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):
3265         live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
3266         is_live = get_first(video_details, 'isLive')
3267         if is_live is None:
3268             is_live = get_first(live_broadcast_details, 'isLiveNow')
3269
3270         streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
3271         formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live, duration))
3272
3273         return live_broadcast_details, is_live, streaming_data, formats
3274
3275     def _real_extract(self, url):
3276         url, smuggled_data = unsmuggle_url(url, {})
3277         video_id = self._match_id(url)
3278
3279         base_url = self.http_scheme() + '//www.youtube.com/'
3280         webpage_url = base_url + 'watch?v=' + video_id
3281
3282         webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
3283
3284         playability_statuses = traverse_obj(
3285             player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
3286
3287         trailer_video_id = get_first(
3288             playability_statuses,
3289             ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
3290             expected_type=str)
3291         if trailer_video_id:
3292             return self.url_result(
3293                 trailer_video_id, self.ie_key(), trailer_video_id)
3294
3295         search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
3296                        if webpage else (lambda x: None))
3297
3298         video_details = traverse_obj(
3299             player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
3300         microformats = traverse_obj(
3301             player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
3302             expected_type=dict, default=[])
3303         video_title = (
3304             get_first(video_details, 'title')
3305             or self._get_text(microformats, (..., 'title'))
3306             or search_meta(['og:title', 'twitter:title', 'title']))
3307         video_description = get_first(video_details, 'shortDescription')
3308
3309         multifeed_metadata_list = get_first(
3310             player_responses,
3311             ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
3312             expected_type=str)
3313         if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
3314             if self.get_param('noplaylist'):
3315                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3316             else:
3317                 entries = []
3318                 feed_ids = []
3319                 for feed in multifeed_metadata_list.split(','):
3320                     # Unquote should take place before split on comma (,) since textual
3321                     # fields may contain comma as well (see
3322                     # https://github.com/ytdl-org/youtube-dl/issues/8536)
3323                     feed_data = compat_parse_qs(
3324                         compat_urllib_parse_unquote_plus(feed))
3325
3326                     def feed_entry(name):
3327                         return try_get(
3328                             feed_data, lambda x: x[name][0], compat_str)
3329
3330                     feed_id = feed_entry('id')
3331                     if not feed_id:
3332                         continue
3333                     feed_title = feed_entry('title')
3334                     title = video_title
3335                     if feed_title:
3336                         title += ' (%s)' % feed_title
3337                     entries.append({
3338                         '_type': 'url_transparent',
3339                         'ie_key': 'Youtube',
3340                         'url': smuggle_url(
3341                             '%swatch?v=%s' % (base_url, feed_data['id'][0]),
3342                             {'force_singlefeed': True}),
3343                         'title': title,
3344                     })
3345                     feed_ids.append(feed_id)
3346                 self.to_screen(
3347                     'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
3348                     % (', '.join(feed_ids), video_id))
3349                 return self.playlist_result(
3350                     entries, video_id, video_title, video_description)
3351
3352         duration = int_or_none(
3353             get_first(video_details, 'lengthSeconds')
3354             or get_first(microformats, 'lengthSeconds')
3355             or parse_duration(search_meta('duration'))) or None
3356
3357         live_broadcast_details, is_live, streaming_data, formats = self._list_formats(
3358             video_id, microformats, video_details, player_responses, player_url, duration)
3359
3360         if not formats:
3361             if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
3362                 self.report_drm(video_id)
3363             pemr = get_first(
3364                 playability_statuses,
3365                 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
3366             reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
3367             subreason = clean_html(self._get_text(pemr, 'subreason') or '')
3368             if subreason:
3369                 if subreason == 'The uploader has not made this video available in your country.':
3370                     countries = get_first(microformats, 'availableCountries')
3371                     if not countries:
3372                         regions_allowed = search_meta('regionsAllowed')
3373                         countries = regions_allowed.split(',') if regions_allowed else None
3374                     self.raise_geo_restricted(subreason, countries, metadata_available=True)
3375                 reason += f'. {subreason}'
3376             if reason:
3377                 self.raise_no_formats(reason, expected=True)
3378
3379         keywords = get_first(video_details, 'keywords', expected_type=list) or []
3380         if not keywords and webpage:
3381             keywords = [
3382                 unescapeHTML(m.group('content'))
3383                 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
3384         for keyword in keywords:
3385             if keyword.startswith('yt:stretch='):
3386                 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
3387                 if mobj:
3388                     # NB: float is intentional for forcing float division
3389                     w, h = (float(v) for v in mobj.groups())
3390                     if w > 0 and h > 0:
3391                         ratio = w / h
3392                         for f in formats:
3393                             if f.get('vcodec') != 'none':
3394                                 f['stretched_ratio'] = ratio
3395                         break
3396         thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))
3397         thumbnail_url = search_meta(['og:image', 'twitter:image'])
3398         if thumbnail_url:
3399             thumbnails.append({
3400                 'url': thumbnail_url,
3401             })
3402         original_thumbnails = thumbnails.copy()
3403
3404         # The best resolution thumbnails sometimes does not appear in the webpage
3405         # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
3406         # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
3407         thumbnail_names = [
3408             'maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3',
3409             'hqdefault', 'hq1', 'hq2', 'hq3', '0',
3410             'mqdefault', 'mq1', 'mq2', 'mq3',
3411             'default', '1', '2', '3'
3412         ]
3413         n_thumbnail_names = len(thumbnail_names)
3414         thumbnails.extend({
3415             'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
3416                 video_id=video_id, name=name, ext=ext,
3417                 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
3418         } for name in thumbnail_names for ext in ('webp', 'jpg'))
3419         for thumb in thumbnails:
3420             i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
3421             thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
3422         self._remove_duplicate_formats(thumbnails)
3423         self._downloader._sort_thumbnails(original_thumbnails)
3424
3425         category = get_first(microformats, 'category') or search_meta('genre')
3426         channel_id = str_or_none(
3427             get_first(video_details, 'channelId')
3428             or get_first(microformats, 'externalChannelId')
3429             or search_meta('channelId'))
3430         owner_profile_url = get_first(microformats, 'ownerProfileUrl')
3431
3432         live_content = get_first(video_details, 'isLiveContent')
3433         is_upcoming = get_first(video_details, 'isUpcoming')
3434         if is_live is None:
3435             if is_upcoming or live_content is False:
3436                 is_live = False
3437         if is_upcoming is None and (live_content or is_live):
3438             is_upcoming = False
3439         live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
3440         live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
3441         if not duration and live_end_time and live_start_time:
3442             duration = live_end_time - live_start_time
3443
3444         if is_live and self.get_param('live_from_start'):
3445             self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)
3446
3447         formats.extend(self._extract_storyboard(player_responses, duration))
3448
3449         # Source is given priority since formats that throttle are given lower source_preference
3450         # When throttling issue is fully fixed, remove this
3451         self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))
3452
3453         info = {
3454             'id': video_id,
3455             'title': video_title,
3456             'formats': formats,
3457             'thumbnails': thumbnails,
3458             # The best thumbnail that we are sure exists. Prevents unnecessary
3459             # URL checking if user don't care about getting the best possible thumbnail
3460             'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
3461             'description': video_description,
3462             'uploader': get_first(video_details, 'author'),
3463             'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
3464             'uploader_url': owner_profile_url,
3465             'channel_id': channel_id,
3466             'channel_url': format_field(channel_id, template='https://www.youtube.com/channel/%s'),
3467             'duration': duration,
3468             'view_count': int_or_none(
3469                 get_first((video_details, microformats), (..., 'viewCount'))
3470                 or search_meta('interactionCount')),
3471             'average_rating': float_or_none(get_first(video_details, 'averageRating')),
3472             'age_limit': 18 if (
3473                 get_first(microformats, 'isFamilySafe') is False
3474                 or search_meta('isFamilyFriendly') == 'false'
3475                 or search_meta('og:restrictions:age') == '18+') else 0,
3476             'webpage_url': webpage_url,
3477             'categories': [category] if category else None,
3478             'tags': keywords,
3479             'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
3480             'is_live': is_live,
3481             'was_live': (False if is_live or is_upcoming or live_content is False
3482                          else None if is_live is None or is_upcoming is None
3483                          else live_content),
3484             'live_status': 'is_upcoming' if is_upcoming else None,  # rest will be set by YoutubeDL
3485             'release_timestamp': live_start_time,
3486         }
3487
3488         pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
3489         if pctr:
3490             def get_lang_code(track):
3491                 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
3492                         or track.get('languageCode'))
3493
3494             # Converted into dicts to remove duplicates
3495             captions = {
3496                 get_lang_code(sub): sub
3497                 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
3498             translation_languages = {
3499                 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
3500                 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
3501
3502             def process_language(container, base_url, lang_code, sub_name, query):
3503                 lang_subs = container.setdefault(lang_code, [])
3504                 for fmt in self._SUBTITLE_FORMATS:
3505                     query.update({
3506                         'fmt': fmt,
3507                     })
3508                     lang_subs.append({
3509                         'ext': fmt,
3510                         'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),
3511                         'name': sub_name,
3512                     })
3513
3514             subtitles, automatic_captions = {}, {}
3515             for lang_code, caption_track in captions.items():
3516                 base_url = caption_track.get('baseUrl')
3517                 orig_lang = parse_qs(base_url).get('lang', [None])[-1]
3518                 if not base_url:
3519                     continue
3520                 lang_name = self._get_text(caption_track, 'name', max_runs=1)
3521                 if caption_track.get('kind') != 'asr':
3522                     if not lang_code:
3523                         continue
3524                     process_language(
3525                         subtitles, base_url, lang_code, lang_name, {})
3526                     if not caption_track.get('isTranslatable'):
3527                         continue
3528                 for trans_code, trans_name in translation_languages.items():
3529                     if not trans_code:
3530                         continue
3531                     orig_trans_code = trans_code
3532                     if caption_track.get('kind') != 'asr':
3533                         if 'translated_subs' in self._configuration_arg('skip'):
3534                             continue
3535                         trans_code += f'-{lang_code}'
3536                         trans_name += format_field(lang_name, template=' from %s')
3537                     # Add an "-orig" label to the original language so that it can be distinguished.
3538                     # The subs are returned without "-orig" as well for compatibility
3539                     if lang_code == f'a-{orig_trans_code}':
3540                         process_language(
3541                             automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
3542                     # Setting tlang=lang returns damaged subtitles.
3543                     process_language(automatic_captions, base_url, trans_code, trans_name,
3544                                      {} if orig_lang == orig_trans_code else {'tlang': trans_code})
3545             info['automatic_captions'] = automatic_captions
3546             info['subtitles'] = subtitles
3547
3548         parsed_url = compat_urllib_parse_urlparse(url)
3549         for component in [parsed_url.fragment, parsed_url.query]:
3550             query = compat_parse_qs(component)
3551             for k, v in query.items():
3552                 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
3553                     d_k += '_time'
3554                     if d_k not in info and k in s_ks:
3555                         info[d_k] = parse_duration(query[k][0])
3556
3557         # Youtube Music Auto-generated description
3558         if video_description:
3559             mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
3560             if mobj:
3561                 release_year = mobj.group('release_year')
3562                 release_date = mobj.group('release_date')
3563                 if release_date:
3564                     release_date = release_date.replace('-', '')
3565                     if not release_year:
3566                         release_year = release_date[:4]
3567                 info.update({
3568                     'album': mobj.group('album'.strip()),
3569                     'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
3570                     'track': mobj.group('track').strip(),
3571                     'release_date': release_date,
3572                     'release_year': int_or_none(release_year),
3573                 })
3574
3575         initial_data = None
3576         if webpage:
3577             initial_data = self._extract_yt_initial_variable(
3578                 webpage, self._YT_INITIAL_DATA_RE, video_id,
3579                 'yt initial data')
3580         if not initial_data:
3581             query = {'videoId': video_id}
3582             query.update(self._get_checkok_params())
3583             initial_data = self._extract_response(
3584                 item_id=video_id, ep='next', fatal=False,
3585                 ytcfg=master_ytcfg, query=query,
3586                 headers=self.generate_api_headers(ytcfg=master_ytcfg),
3587                 note='Downloading initial data API JSON')
3588
3589         try:  # This will error if there is no livechat
3590             initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
3591         except (KeyError, IndexError, TypeError):
3592             pass
3593         else:
3594             info.setdefault('subtitles', {})['live_chat'] = [{
3595                 'url': f'https://www.youtube.com/watch?v={video_id}',  # url is needed to set cookies
3596                 'video_id': video_id,
3597                 'ext': 'json',
3598                 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
3599             }]
3600
3601         if initial_data:
3602             info['chapters'] = (
3603                 self._extract_chapters_from_json(initial_data, duration)
3604                 or self._extract_chapters_from_engagement_panel(initial_data, duration)
3605                 or None)
3606
3607         contents = traverse_obj(
3608             initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
3609             expected_type=list, default=[])
3610
3611         vpir = get_first(contents, 'videoPrimaryInfoRenderer')
3612         if vpir:
3613             stl = vpir.get('superTitleLink')
3614             if stl:
3615                 stl = self._get_text(stl)
3616                 if try_get(
3617                         vpir,
3618                         lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3619                     info['location'] = stl
3620                 else:
3621                     mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)
3622                     if mobj:
3623                         info.update({
3624                             'series': mobj.group(1),
3625                             'season_number': int(mobj.group(2)),
3626                             'episode_number': int(mobj.group(3)),
3627                         })
3628             for tlb in (try_get(
3629                     vpir,
3630                     lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3631                     list) or []):
3632                 tbr = tlb.get('toggleButtonRenderer') or {}
3633                 for getter, regex in [(
3634                         lambda x: x['defaultText']['accessibility']['accessibilityData'],
3635                         r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3636                             lambda x: x['accessibility'],
3637                             lambda x: x['accessibilityData']['accessibilityData'],
3638                         ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3639                     label = (try_get(tbr, getter, dict) or {}).get('label')
3640                     if label:
3641                         mobj = re.match(regex, label)
3642                         if mobj:
3643                             info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3644                             break
3645             sbr_tooltip = try_get(
3646                 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3647             if sbr_tooltip:
3648                 like_count, dislike_count = sbr_tooltip.split(' / ')
3649                 info.update({
3650                     'like_count': str_to_int(like_count),
3651                     'dislike_count': str_to_int(dislike_count),
3652                 })
3653         vsir = get_first(contents, 'videoSecondaryInfoRenderer')
3654         if vsir:
3655             vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))
3656             info.update({
3657                 'channel': self._get_text(vor, 'title'),
3658                 'channel_follower_count': self._get_count(vor, 'subscriberCountText')})
3659
3660             rows = try_get(
3661                 vsir,
3662                 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3663                 list) or []
3664             multiple_songs = False
3665             for row in rows:
3666                 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3667                     multiple_songs = True
3668                     break
3669             for row in rows:
3670                 mrr = row.get('metadataRowRenderer') or {}
3671                 mrr_title = mrr.get('title')
3672                 if not mrr_title:
3673                     continue
3674                 mrr_title = self._get_text(mrr, 'title')
3675                 mrr_contents_text = self._get_text(mrr, ('contents', 0))
3676                 if mrr_title == 'License':
3677                     info['license'] = mrr_contents_text
3678                 elif not multiple_songs:
3679                     if mrr_title == 'Album':
3680                         info['album'] = mrr_contents_text
3681                     elif mrr_title == 'Artist':
3682                         info['artist'] = mrr_contents_text
3683                     elif mrr_title == 'Song':
3684                         info['track'] = mrr_contents_text
3685
3686         fallbacks = {
3687             'channel': 'uploader',
3688             'channel_id': 'uploader_id',
3689             'channel_url': 'uploader_url',
3690         }
3691
3692         # The upload date for scheduled, live and past live streams / premieres in microformats
3693         # may be different from the stream date. Although not in UTC, we will prefer it in this case.
3694         # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139
3695         upload_date = (
3696             unified_strdate(get_first(microformats, 'uploadDate'))
3697             or unified_strdate(search_meta('uploadDate')))
3698         if not upload_date or (not info.get('is_live') and not info.get('was_live') and info.get('live_status') != 'is_upcoming'):
3699             upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d')
3700         info['upload_date'] = upload_date
3701
3702         for to, frm in fallbacks.items():
3703             if not info.get(to):
3704                 info[to] = info.get(frm)
3705
3706         for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3707             v = info.get(s_k)
3708             if v:
3709                 info[d_k] = v
3710
3711         is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3712         is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
3713         is_membersonly = None
3714         is_premium = None
3715         if initial_data and is_private is not None:
3716             is_membersonly = False
3717             is_premium = False
3718             contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3719             badge_labels = set()
3720             for content in contents:
3721                 if not isinstance(content, dict):
3722                     continue
3723                 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3724             for badge_label in badge_labels:
3725                 if badge_label.lower() == 'members only':
3726                     is_membersonly = True
3727                 elif badge_label.lower() == 'premium':
3728                     is_premium = True
3729                 elif badge_label.lower() == 'unlisted':
3730                     is_unlisted = True
3731
3732         info['availability'] = self._availability(
3733             is_private=is_private,
3734             needs_premium=is_premium,
3735             needs_subscription=is_membersonly,
3736             needs_auth=info['age_limit'] >= 18,
3737             is_unlisted=None if is_private is None else is_unlisted)
3738
3739         info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
3740
3741         self.mark_watched(video_id, player_responses)
3742
3743         return info
3744
3745
3746 class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
3747
3748     @staticmethod
3749     def passthrough_smuggled_data(func):
3750         def _smuggle(entries, smuggled_data):
3751             for entry in entries:
3752                 # TODO: Convert URL to music.youtube instead.
3753                 # Do we need to passthrough any other smuggled_data?
3754                 entry['url'] = smuggle_url(entry['url'], smuggled_data)
3755                 yield entry
3756
3757         @functools.wraps(func)
3758         def wrapper(self, url):
3759             url, smuggled_data = unsmuggle_url(url, {})
3760             if self.is_music_url(url):
3761                 smuggled_data['is_music_url'] = True
3762             info_dict = func(self, url, smuggled_data)
3763             if smuggled_data and info_dict.get('entries'):
3764                 info_dict['entries'] = _smuggle(info_dict['entries'], smuggled_data)
3765             return info_dict
3766         return wrapper
3767
3768     def _extract_channel_id(self, webpage):
3769         channel_id = self._html_search_meta(
3770             'channelId', webpage, 'channel id', default=None)
3771         if channel_id:
3772             return channel_id
3773         channel_url = self._html_search_meta(
3774             ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3775              'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3776              'twitter:app:url:googleplay'), webpage, 'channel url')
3777         return self._search_regex(
3778             r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3779             channel_url, 'channel id')
3780
3781     @staticmethod
3782     def _extract_basic_item_renderer(item):
3783         # Modified from _extract_grid_item_renderer
3784         known_basic_renderers = (
3785             'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'
3786         )
3787         for key, renderer in item.items():
3788             if not isinstance(renderer, dict):
3789                 continue
3790             elif key in known_basic_renderers:
3791                 return renderer
3792             elif key.startswith('grid') and key.endswith('Renderer'):
3793                 return renderer
3794
3795     def _grid_entries(self, grid_renderer):
3796         for item in grid_renderer['items']:
3797             if not isinstance(item, dict):
3798                 continue
3799             renderer = self._extract_basic_item_renderer(item)
3800             if not isinstance(renderer, dict):
3801                 continue
3802             title = self._get_text(renderer, 'title')
3803
3804             # playlist
3805             playlist_id = renderer.get('playlistId')
3806             if playlist_id:
3807                 yield self.url_result(
3808                     'https://www.youtube.com/playlist?list=%s' % playlist_id,
3809                     ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3810                     video_title=title)
3811                 continue
3812             # video
3813             video_id = renderer.get('videoId')
3814             if video_id:
3815                 yield self._extract_video(renderer)
3816                 continue
3817             # channel
3818             channel_id = renderer.get('channelId')
3819             if channel_id:
3820                 yield self.url_result(
3821                     'https://www.youtube.com/channel/%s' % channel_id,
3822                     ie=YoutubeTabIE.ie_key(), video_title=title)
3823                 continue
3824             # generic endpoint URL support
3825             ep_url = urljoin('https://www.youtube.com/', try_get(
3826                 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3827                 compat_str))
3828             if ep_url:
3829                 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3830                     if ie.suitable(ep_url):
3831                         yield self.url_result(
3832                             ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3833                         break
3834
3835     def _music_reponsive_list_entry(self, renderer):
3836         video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))
3837         if video_id:
3838             return self.url_result(f'https://music.youtube.com/watch?v={video_id}',
3839                                    ie=YoutubeIE.ie_key(), video_id=video_id)
3840         playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))
3841         if playlist_id:
3842             video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))
3843             if video_id:
3844                 return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',
3845                                        ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3846             return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',
3847                                    ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3848         browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))
3849         if browse_id:
3850             return self.url_result(f'https://music.youtube.com/browse/{browse_id}',
3851                                    ie=YoutubeTabIE.ie_key(), video_id=browse_id)
3852
3853     def _shelf_entries_from_content(self, shelf_renderer):
3854         content = shelf_renderer.get('content')
3855         if not isinstance(content, dict):
3856             return
3857         renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3858         if renderer:
3859             # TODO: add support for nested playlists so each shelf is processed
3860             # as separate playlist
3861             # TODO: this includes only first N items
3862             yield from self._grid_entries(renderer)
3863         renderer = content.get('horizontalListRenderer')
3864         if renderer:
3865             # TODO
3866             pass
3867
3868     def _shelf_entries(self, shelf_renderer, skip_channels=False):
3869         ep = try_get(
3870             shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3871             compat_str)
3872         shelf_url = urljoin('https://www.youtube.com', ep)
3873         if shelf_url:
3874             # Skipping links to another channels, note that checking for
3875             # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3876             # will not work
3877             if skip_channels and '/channels?' in shelf_url:
3878                 return
3879             title = self._get_text(shelf_renderer, 'title')
3880             yield self.url_result(shelf_url, video_title=title)
3881         # Shelf may not contain shelf URL, fallback to extraction from content
3882         yield from self._shelf_entries_from_content(shelf_renderer)
3883
3884     def _playlist_entries(self, video_list_renderer):
3885         for content in video_list_renderer['contents']:
3886             if not isinstance(content, dict):
3887                 continue
3888             renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3889             if not isinstance(renderer, dict):
3890                 continue
3891             video_id = renderer.get('videoId')
3892             if not video_id:
3893                 continue
3894             yield self._extract_video(renderer)
3895
3896     def _rich_entries(self, rich_grid_renderer):
3897         renderer = try_get(
3898             rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3899         video_id = renderer.get('videoId')
3900         if not video_id:
3901             return
3902         yield self._extract_video(renderer)
3903
3904     def _video_entry(self, video_renderer):
3905         video_id = video_renderer.get('videoId')
3906         if video_id:
3907             return self._extract_video(video_renderer)
3908
3909     def _hashtag_tile_entry(self, hashtag_tile_renderer):
3910         url = urljoin('https://youtube.com', traverse_obj(
3911             hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))
3912         if url:
3913             return self.url_result(
3914                 url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))
3915
3916     def _post_thread_entries(self, post_thread_renderer):
3917         post_renderer = try_get(
3918             post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3919         if not post_renderer:
3920             return
3921         # video attachment
3922         video_renderer = try_get(
3923             post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3924         video_id = video_renderer.get('videoId')
3925         if video_id:
3926             entry = self._extract_video(video_renderer)
3927             if entry:
3928                 yield entry
3929         # playlist attachment
3930         playlist_id = try_get(
3931             post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3932         if playlist_id:
3933             yield self.url_result(
3934                 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3935                 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3936         # inline video links
3937         runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3938         for run in runs:
3939             if not isinstance(run, dict):
3940                 continue
3941             ep_url = try_get(
3942                 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3943             if not ep_url:
3944                 continue
3945             if not YoutubeIE.suitable(ep_url):
3946                 continue
3947             ep_video_id = YoutubeIE._match_id(ep_url)
3948             if video_id == ep_video_id:
3949                 continue
3950             yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
3951
3952     def _post_thread_continuation_entries(self, post_thread_continuation):
3953         contents = post_thread_continuation.get('contents')
3954         if not isinstance(contents, list):
3955             return
3956         for content in contents:
3957             renderer = content.get('backstagePostThreadRenderer')
3958             if not isinstance(renderer, dict):
3959                 continue
3960             yield from self._post_thread_entries(renderer)
3961
3962     r''' # unused
3963     def _rich_grid_entries(self, contents):
3964         for content in contents:
3965             video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3966             if video_renderer:
3967                 entry = self._video_entry(video_renderer)
3968                 if entry:
3969                     yield entry
3970     '''
3971
3972     def _extract_entries(self, parent_renderer, continuation_list):
3973         # continuation_list is modified in-place with continuation_list = [continuation_token]
3974         continuation_list[:] = [None]
3975         contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3976         for content in contents:
3977             if not isinstance(content, dict):
3978                 continue
3979             is_renderer = traverse_obj(
3980                 content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',
3981                 expected_type=dict)
3982             if not is_renderer:
3983                 renderer = content.get('richItemRenderer')
3984                 if renderer:
3985                     for entry in self._rich_entries(renderer):
3986                         yield entry
3987                     continuation_list[0] = self._extract_continuation(parent_renderer)
3988                 continue
3989             isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3990             for isr_content in isr_contents:
3991                 if not isinstance(isr_content, dict):
3992                     continue
3993
3994                 known_renderers = {
3995                     'playlistVideoListRenderer': self._playlist_entries,
3996                     'gridRenderer': self._grid_entries,
3997                     'reelShelfRenderer': self._grid_entries,
3998                     'shelfRenderer': self._shelf_entries,
3999                     'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],
4000                     'backstagePostThreadRenderer': self._post_thread_entries,
4001                     'videoRenderer': lambda x: [self._video_entry(x)],
4002                     'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
4003                     'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
4004                     'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]
4005                 }
4006                 for key, renderer in isr_content.items():
4007                     if key not in known_renderers:
4008                         continue
4009                     for entry in known_renderers[key](renderer):
4010                         if entry:
4011                             yield entry
4012                     continuation_list[0] = self._extract_continuation(renderer)
4013                     break
4014
4015             if not continuation_list[0]:
4016                 continuation_list[0] = self._extract_continuation(is_renderer)
4017
4018         if not continuation_list[0]:
4019             continuation_list[0] = self._extract_continuation(parent_renderer)
4020
4021     def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
4022         continuation_list = [None]
4023         extract_entries = lambda x: self._extract_entries(x, continuation_list)
4024         tab_content = try_get(tab, lambda x: x['content'], dict)
4025         if not tab_content:
4026             return
4027         parent_renderer = (
4028             try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
4029             or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
4030         yield from extract_entries(parent_renderer)
4031         continuation = continuation_list[0]
4032
4033         for page_num in itertools.count(1):
4034             if not continuation:
4035                 break
4036             headers = self.generate_api_headers(
4037                 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
4038             response = self._extract_response(
4039                 item_id=f'{item_id} page {page_num}',
4040                 query=continuation, headers=headers, ytcfg=ytcfg,
4041                 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
4042
4043             if not response:
4044                 break
4045             # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
4046             # See: https://github.com/ytdl-org/youtube-dl/issues/28702
4047             visitor_data = self._extract_visitor_data(response) or visitor_data
4048
4049             known_continuation_renderers = {
4050                 'playlistVideoListContinuation': self._playlist_entries,
4051                 'gridContinuation': self._grid_entries,
4052                 'itemSectionContinuation': self._post_thread_continuation_entries,
4053                 'sectionListContinuation': extract_entries,  # for feeds
4054             }
4055             continuation_contents = try_get(
4056                 response, lambda x: x['continuationContents'], dict) or {}
4057             continuation_renderer = None
4058             for key, value in continuation_contents.items():
4059                 if key not in known_continuation_renderers:
4060                     continue
4061                 continuation_renderer = value
4062                 continuation_list = [None]
4063                 yield from known_continuation_renderers[key](continuation_renderer)
4064                 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
4065                 break
4066             if continuation_renderer:
4067                 continue
4068
4069             known_renderers = {
4070                 'videoRenderer': (self._grid_entries, 'items'),  # for membership tab
4071                 'gridPlaylistRenderer': (self._grid_entries, 'items'),
4072                 'gridVideoRenderer': (self._grid_entries, 'items'),
4073                 'gridChannelRenderer': (self._grid_entries, 'items'),
4074                 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
4075                 'itemSectionRenderer': (extract_entries, 'contents'),  # for feeds
4076                 'richItemRenderer': (extract_entries, 'contents'),  # for hashtag
4077                 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
4078             }
4079             on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
4080             continuation_items = try_get(
4081                 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
4082             continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
4083             video_items_renderer = None
4084             for key, value in continuation_item.items():
4085                 if key not in known_renderers:
4086                     continue
4087                 video_items_renderer = {known_renderers[key][1]: continuation_items}
4088                 continuation_list = [None]
4089                 yield from known_renderers[key][0](video_items_renderer)
4090                 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
4091                 break
4092             if video_items_renderer:
4093                 continue
4094             break
4095
4096     @staticmethod
4097     def _extract_selected_tab(tabs, fatal=True):
4098         for tab in tabs:
4099             renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
4100             if renderer.get('selected') is True:
4101                 return renderer
4102         else:
4103             if fatal:
4104                 raise ExtractorError('Unable to find selected tab')
4105
4106     def _extract_uploader(self, data):
4107         uploader = {}
4108         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
4109         owner = try_get(
4110             renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
4111         if owner:
4112             owner_text = owner.get('text')
4113             uploader['uploader'] = self._search_regex(
4114                 r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text)
4115             uploader['uploader_id'] = try_get(
4116                 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
4117             uploader['uploader_url'] = urljoin(
4118                 'https://www.youtube.com/',
4119                 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
4120         return {k: v for k, v in uploader.items() if v is not None}
4121
4122     def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
4123         playlist_id = title = description = channel_url = channel_name = channel_id = None
4124         tags = []
4125
4126         selected_tab = self._extract_selected_tab(tabs)
4127         primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4128         renderer = try_get(
4129             data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
4130         if renderer:
4131             channel_name = renderer.get('title')
4132             channel_url = renderer.get('channelUrl')
4133             channel_id = renderer.get('externalId')
4134         else:
4135             renderer = try_get(
4136                 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
4137
4138         if renderer:
4139             title = renderer.get('title')
4140             description = renderer.get('description', '')
4141             playlist_id = channel_id
4142             tags = renderer.get('keywords', '').split()
4143
4144         # We can get the uncropped banner/avatar by replacing the crop params with '=s0'
4145         # See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714
4146         def _get_uncropped(url):
4147             return url_or_none((url or '').split('=')[0] + '=s0')
4148
4149         avatar_thumbnails = self._extract_thumbnails(renderer, 'avatar')
4150         if avatar_thumbnails:
4151             uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])
4152             if uncropped_avatar:
4153                 avatar_thumbnails.append({
4154                     'url': uncropped_avatar,
4155                     'id': 'avatar_uncropped',
4156                     'preference': 1
4157                 })
4158
4159         channel_banners = self._extract_thumbnails(
4160             data, ('header', ..., ['banner', 'mobileBanner', 'tvBanner']))
4161         for banner in channel_banners:
4162             banner['preference'] = -10
4163
4164         if channel_banners:
4165             uncropped_banner = _get_uncropped(channel_banners[0]['url'])
4166             if uncropped_banner:
4167                 channel_banners.append({
4168                     'url': uncropped_banner,
4169                     'id': 'banner_uncropped',
4170                     'preference': -5
4171                 })
4172
4173         primary_thumbnails = self._extract_thumbnails(
4174             primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))
4175
4176         if playlist_id is None:
4177             playlist_id = item_id
4178
4179         playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')
4180         last_updated_unix, _ = self._extract_time_text(playlist_stats, 2)
4181         if title is None:
4182             title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id
4183         title += format_field(selected_tab, 'title', ' - %s')
4184         title += format_field(selected_tab, 'expandedText', ' - %s')
4185
4186         metadata = {
4187             'playlist_id': playlist_id,
4188             'playlist_title': title,
4189             'playlist_description': description,
4190             'uploader': channel_name,
4191             'uploader_id': channel_id,
4192             'uploader_url': channel_url,
4193             'thumbnails': primary_thumbnails + avatar_thumbnails + channel_banners,
4194             'tags': tags,
4195             'view_count': self._get_count(playlist_stats, 1),
4196             'availability': self._extract_availability(data),
4197             'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'),
4198             'playlist_count': self._get_count(playlist_stats, 0),
4199             'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),
4200         }
4201         if not channel_id:
4202             metadata.update(self._extract_uploader(data))
4203         metadata.update({
4204             'channel': metadata['uploader'],
4205             'channel_id': metadata['uploader_id'],
4206             'channel_url': metadata['uploader_url']})
4207         return self.playlist_result(
4208             self._entries(
4209                 selected_tab, playlist_id, ytcfg,
4210                 self._extract_account_syncid(ytcfg, data),
4211                 self._extract_visitor_data(data, ytcfg)),
4212             **metadata)
4213
4214     def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg):
4215         first_id = last_id = response = None
4216         for page_num in itertools.count(1):
4217             videos = list(self._playlist_entries(playlist))
4218             if not videos:
4219                 return
4220             start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4221             if start >= len(videos):
4222                 return
4223             for video in videos[start:]:
4224                 if video['id'] == first_id:
4225                     self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
4226                     return
4227                 yield video
4228             first_id = first_id or videos[0]['id']
4229             last_id = videos[-1]['id']
4230             watch_endpoint = try_get(
4231                 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
4232             headers = self.generate_api_headers(
4233                 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4234                 visitor_data=self._extract_visitor_data(response, data, ytcfg))
4235             query = {
4236                 'playlistId': playlist_id,
4237                 'videoId': watch_endpoint.get('videoId') or last_id,
4238                 'index': watch_endpoint.get('index') or len(videos),
4239                 'params': watch_endpoint.get('params') or 'OAE%3D'
4240             }
4241             response = self._extract_response(
4242                 item_id='%s page %d' % (playlist_id, page_num),
4243                 query=query, ep='next', headers=headers, ytcfg=ytcfg,
4244                 check_get_keys='contents'
4245             )
4246             playlist = try_get(
4247                 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4248
4249     def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
4250         title = playlist.get('title') or try_get(
4251             data, lambda x: x['titleText']['simpleText'], compat_str)
4252         playlist_id = playlist.get('playlistId') or item_id
4253
4254         # Delegating everything except mix playlists to regular tab-based playlist URL
4255         playlist_url = urljoin(url, try_get(
4256             playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4257             compat_str))
4258         if playlist_url and playlist_url != url:
4259             return self.url_result(
4260                 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4261                 video_title=title)
4262
4263         return self.playlist_result(
4264             self._extract_mix_playlist(playlist, playlist_id, data, ytcfg),
4265             playlist_id=playlist_id, playlist_title=title)
4266
4267     def _extract_availability(self, data):
4268         """
4269         Gets the availability of a given playlist/tab.
4270         Note: Unless YouTube tells us explicitly, we do not assume it is public
4271         @param data: response
4272         """
4273         is_private = is_unlisted = None
4274         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4275         badge_labels = self._extract_badges(renderer)
4276
4277         # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4278         privacy_dropdown_entries = try_get(
4279             renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4280         for renderer_dict in privacy_dropdown_entries:
4281             is_selected = try_get(
4282                 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4283             if not is_selected:
4284                 continue
4285             label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
4286             if label:
4287                 badge_labels.add(label.lower())
4288                 break
4289
4290         for badge_label in badge_labels:
4291             if badge_label == 'unlisted':
4292                 is_unlisted = True
4293             elif badge_label == 'private':
4294                 is_private = True
4295             elif badge_label == 'public':
4296                 is_unlisted = is_private = False
4297         return self._availability(is_private, False, False, False, is_unlisted)
4298
4299     @staticmethod
4300     def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4301         sidebar_renderer = try_get(
4302             data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4303         for item in sidebar_renderer:
4304             renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4305             if renderer:
4306                 return renderer
4307
4308     def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
4309         """
4310         Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4311         """
4312         browse_id = params = None
4313         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4314         if not renderer:
4315             return
4316         menu_renderer = try_get(
4317             renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4318         for menu_item in menu_renderer:
4319             if not isinstance(menu_item, dict):
4320                 continue
4321             nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4322             text = try_get(
4323                 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
4324             if not text or text.lower() != 'show unavailable videos':
4325                 continue
4326             browse_endpoint = try_get(
4327                 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4328             browse_id = browse_endpoint.get('browseId')
4329             params = browse_endpoint.get('params')
4330             break
4331
4332         headers = self.generate_api_headers(
4333             ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4334             visitor_data=self._extract_visitor_data(data, ytcfg))
4335         query = {
4336             'params': params or 'wgYCCAA=',
4337             'browseId': browse_id or 'VL%s' % item_id
4338         }
4339         return self._extract_response(
4340             item_id=item_id, headers=headers, query=query,
4341             check_get_keys='contents', fatal=False, ytcfg=ytcfg,
4342             note='Downloading API JSON with unavailable videos')
4343
4344     @property
4345     def skip_webpage(self):
4346         return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())
4347
4348     def _extract_webpage(self, url, item_id, fatal=True):
4349         retries = self.get_param('extractor_retries', 3)
4350         count = -1
4351         webpage = data = last_error = None
4352         while count < retries:
4353             count += 1
4354             # Sometimes youtube returns a webpage with incomplete ytInitialData
4355             # See: https://github.com/yt-dlp/yt-dlp/issues/116
4356             if last_error:
4357                 self.report_warning('%s. Retrying ...' % last_error)
4358             try:
4359                 webpage = self._download_webpage(
4360                     url, item_id,
4361                     note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))
4362                 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
4363             except ExtractorError as e:
4364                 if isinstance(e.cause, network_exceptions):
4365                     if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
4366                         last_error = error_to_compat_str(e.cause or e.msg)
4367                         if count < retries:
4368                             continue
4369                 if fatal:
4370                     raise
4371                 self.report_warning(error_to_compat_str(e))
4372                 break
4373             else:
4374                 try:
4375                     self._extract_and_report_alerts(data)
4376                 except ExtractorError as e:
4377                     if fatal:
4378                         raise
4379                     self.report_warning(error_to_compat_str(e))
4380                     break
4381
4382                 if dict_get(data, ('contents', 'currentVideoEndpoint', 'onResponseReceivedActions')):
4383                     break
4384
4385                 last_error = 'Incomplete yt initial data received'
4386                 if count >= retries:
4387                     if fatal:
4388                         raise ExtractorError(last_error)
4389                     self.report_warning(last_error)
4390                     break
4391
4392         return webpage, data
4393
4394     def _report_playlist_authcheck(self, ytcfg, fatal=True):
4395         """Use if failed to extract ytcfg (and data) from initial webpage"""
4396         if not ytcfg and self.is_authenticated:
4397             msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'
4398             if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:
4399                 raise ExtractorError(
4400                     f'{msg}. If you are not downloading private content, or '
4401                     'your cookies are only for the first account and channel,'
4402                     ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
4403                     expected=True)
4404             self.report_warning(msg, only_once=True)
4405
4406     def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
4407         data = None
4408         if not self.skip_webpage:
4409             webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
4410             ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
4411             # Reject webpage data if redirected to home page without explicitly requesting
4412             selected_tab = self._extract_selected_tab(traverse_obj(
4413                 data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list, default=[]), fatal=False) or {}
4414             if (url != 'https://www.youtube.com/feed/recommended'
4415                     and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch'  # Home page
4416                     and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):
4417                 msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'
4418                 if fatal:
4419                     raise ExtractorError(msg, expected=True)
4420                 self.report_warning(msg, only_once=True)
4421         if not data:
4422             self._report_playlist_authcheck(ytcfg, fatal=fatal)
4423             data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
4424         return data, ytcfg
4425
4426     def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
4427         headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
4428         resolve_response = self._extract_response(
4429             item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
4430             ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
4431         endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
4432         for ep_key, ep in endpoints.items():
4433             params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
4434             if params:
4435                 return self._extract_response(
4436                     item_id=item_id, query=params, ep=ep, headers=headers,
4437                     ytcfg=ytcfg, fatal=fatal, default_client=default_client,
4438                     check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))
4439         err_note = 'Failed to resolve url (does the playlist exist?)'
4440         if fatal:
4441             raise ExtractorError(err_note, expected=True)
4442         self.report_warning(err_note, item_id)
4443
4444     _SEARCH_PARAMS = None
4445
4446     def _search_results(self, query, params=NO_DEFAULT, default_client='web'):
4447         data = {'query': query}
4448         if params is NO_DEFAULT:
4449             params = self._SEARCH_PARAMS
4450         if params:
4451             data['params'] = params
4452
4453         content_keys = (
4454             ('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),
4455             ('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),
4456             # ytmusic search
4457             ('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),
4458             ('continuationContents', ),
4459         )
4460         display_id = f'query "{query}"'
4461         check_get_keys = tuple({keys[0] for keys in content_keys})
4462         ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}
4463         self._report_playlist_authcheck(ytcfg, fatal=False)
4464
4465         continuation_list = [None]
4466         search = None
4467         for page_num in itertools.count(1):
4468             data.update(continuation_list[0] or {})
4469             headers = self.generate_api_headers(
4470                 ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)
4471             search = self._extract_response(
4472                 item_id=f'{display_id} page {page_num}', ep='search', query=data,
4473                 default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)
4474             slr_contents = traverse_obj(search, *content_keys)
4475             yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)
4476             if not continuation_list[0]:
4477                 break
4478
4479
4480 class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
4481     IE_DESC = 'YouTube Tabs'
4482     _VALID_URL = r'''(?x:
4483         https?://
4484             (?:\w+\.)?
4485             (?:
4486                 youtube(?:kids)?\.com|
4487                 %(invidious)s
4488             )/
4489             (?:
4490                 (?P<channel_type>channel|c|user|browse)/|
4491                 (?P<not_channel>
4492                     feed/|hashtag/|
4493                     (?:playlist|watch)\?.*?\blist=
4494                 )|
4495                 (?!(?:%(reserved_names)s)\b)  # Direct URLs
4496             )
4497             (?P<id>[^/?\#&]+)
4498     )''' % {
4499         'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
4500         'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
4501     }
4502     IE_NAME = 'youtube:tab'
4503
4504     _TESTS = [{
4505         'note': 'playlists, multipage',
4506         'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
4507         'playlist_mincount': 94,
4508         'info_dict': {
4509             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
4510             'title': 'Igor Kleiner - Playlists',
4511             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
4512             'uploader': 'Igor Kleiner',
4513             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4514             'channel': 'Igor Kleiner',
4515             'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4516             'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
4517             'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4518             'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4519             'channel_follower_count': int
4520         },
4521     }, {
4522         'note': 'playlists, multipage, different order',
4523         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
4524         'playlist_mincount': 94,
4525         'info_dict': {
4526             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
4527             'title': 'Igor Kleiner - Playlists',
4528             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
4529             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4530             'uploader': 'Igor Kleiner',
4531             'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4532             'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
4533             'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4534             'channel': 'Igor Kleiner',
4535             'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4536             'channel_follower_count': int
4537         },
4538     }, {
4539         'note': 'playlists, series',
4540         'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
4541         'playlist_mincount': 5,
4542         'info_dict': {
4543             'id': 'UCYO_jab_esuFRV4b17AJtAw',
4544             'title': '3Blue1Brown - Playlists',
4545             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
4546             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
4547             'uploader': '3Blue1Brown',
4548             'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4549             'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4550             'channel': '3Blue1Brown',
4551             'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
4552             'tags': ['Mathematics'],
4553             'channel_follower_count': int
4554         },
4555     }, {
4556         'note': 'playlists, singlepage',
4557         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
4558         'playlist_mincount': 4,
4559         'info_dict': {
4560             'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
4561             'title': 'ThirstForScience - Playlists',
4562             'description': 'md5:609399d937ea957b0f53cbffb747a14c',
4563             'uploader': 'ThirstForScience',
4564             'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
4565             'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
4566             'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
4567             'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
4568             'tags': 'count:13',
4569             'channel': 'ThirstForScience',
4570             'channel_follower_count': int
4571         }
4572     }, {
4573         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
4574         'only_matching': True,
4575     }, {
4576         'note': 'basic, single video playlist',
4577         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4578         'info_dict': {
4579             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4580             'uploader': 'Sergey M.',
4581             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4582             'title': 'youtube-dl public playlist',
4583             'description': '',
4584             'tags': [],
4585             'view_count': int,
4586             'modified_date': '20201130',
4587             'channel': 'Sergey M.',
4588             'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4589             'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4590             'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4591         },
4592         'playlist_count': 1,
4593     }, {
4594         'note': 'empty playlist',
4595         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
4596         'info_dict': {
4597             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4598             'uploader': 'Sergey M.',
4599             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
4600             'title': 'youtube-dl empty playlist',
4601             'tags': [],
4602             'channel': 'Sergey M.',
4603             'description': '',
4604             'modified_date': '20160902',
4605             'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4606             'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4607             'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4608         },
4609         'playlist_count': 0,
4610     }, {
4611         'note': 'Home tab',
4612         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
4613         'info_dict': {
4614             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4615             'title': 'lex will - Home',
4616             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4617             'uploader': 'lex will',
4618             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4619             'channel': 'lex will',
4620             'tags': ['bible', 'history', 'prophesy'],
4621             'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4622             'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4623             'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4624             'channel_follower_count': int
4625         },
4626         'playlist_mincount': 2,
4627     }, {
4628         'note': 'Videos tab',
4629         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
4630         'info_dict': {
4631             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4632             'title': 'lex will - Videos',
4633             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4634             'uploader': 'lex will',
4635             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4636             'tags': ['bible', 'history', 'prophesy'],
4637             'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4638             'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4639             'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4640             'channel': 'lex will',
4641             'channel_follower_count': int
4642         },
4643         'playlist_mincount': 975,
4644     }, {
4645         'note': 'Videos tab, sorted by popular',
4646         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
4647         'info_dict': {
4648             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4649             'title': 'lex will - Videos',
4650             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4651             'uploader': 'lex will',
4652             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4653             'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4654             'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4655             'channel': 'lex will',
4656             'tags': ['bible', 'history', 'prophesy'],
4657             'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4658             'channel_follower_count': int
4659         },
4660         'playlist_mincount': 199,
4661     }, {
4662         'note': 'Playlists tab',
4663         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
4664         'info_dict': {
4665             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4666             'title': 'lex will - Playlists',
4667             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4668             'uploader': 'lex will',
4669             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4670             'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4671             'channel': 'lex will',
4672             'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4673             'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4674             'tags': ['bible', 'history', 'prophesy'],
4675             'channel_follower_count': int
4676         },
4677         'playlist_mincount': 17,
4678     }, {
4679         'note': 'Community tab',
4680         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
4681         'info_dict': {
4682             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4683             'title': 'lex will - Community',
4684             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4685             'uploader': 'lex will',
4686             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4687             'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4688             'channel': 'lex will',
4689             'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4690             'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4691             'tags': ['bible', 'history', 'prophesy'],
4692             'channel_follower_count': int
4693         },
4694         'playlist_mincount': 18,
4695     }, {
4696         'note': 'Channels tab',
4697         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
4698         'info_dict': {
4699             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4700             'title': 'lex will - Channels',
4701             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4702             'uploader': 'lex will',
4703             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4704             'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4705             'channel': 'lex will',
4706             'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4707             'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4708             'tags': ['bible', 'history', 'prophesy'],
4709             'channel_follower_count': int
4710         },
4711         'playlist_mincount': 12,
4712     }, {
4713         'note': 'Search tab',
4714         'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
4715         'playlist_mincount': 40,
4716         'info_dict': {
4717             'id': 'UCYO_jab_esuFRV4b17AJtAw',
4718             'title': '3Blue1Brown - Search - linear algebra',
4719             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
4720             'uploader': '3Blue1Brown',
4721             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
4722             'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4723             'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4724             'tags': ['Mathematics'],
4725             'channel': '3Blue1Brown',
4726             'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
4727             'channel_follower_count': int
4728         },
4729     }, {
4730         'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4731         'only_matching': True,
4732     }, {
4733         'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4734         'only_matching': True,
4735     }, {
4736         'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4737         'only_matching': True,
4738     }, {
4739         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
4740         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
4741         'info_dict': {
4742             'title': '29C3: Not my department',
4743             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
4744             'uploader': 'Christiaan008',
4745             'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
4746             'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
4747             'tags': [],
4748             'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',
4749             'view_count': int,
4750             'modified_date': '20150605',
4751             'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
4752             'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',
4753             'channel': 'Christiaan008',
4754         },
4755         'playlist_count': 96,
4756     }, {
4757         'note': 'Large playlist',
4758         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
4759         'info_dict': {
4760             'title': 'Uploads from Cauchemar',
4761             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
4762             'uploader': 'Cauchemar',
4763             'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
4764             'channel_url': 'https://www.youtube.com/c/Cauchemar89',
4765             'tags': [],
4766             'modified_date': r're:\d{8}',
4767             'channel': 'Cauchemar',
4768             'uploader_url': 'https://www.youtube.com/c/Cauchemar89',
4769             'view_count': int,
4770             'description': '',
4771             'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
4772         },
4773         'playlist_mincount': 1123,
4774         'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
4775     }, {
4776         'note': 'even larger playlist, 8832 videos',
4777         'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
4778         'only_matching': True,
4779     }, {
4780         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
4781         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
4782         'info_dict': {
4783             'title': 'Uploads from Interstellar Movie',
4784             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
4785             'uploader': 'Interstellar Movie',
4786             'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4787             'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',
4788             'tags': [],
4789             'view_count': int,
4790             'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4791             'channel_url': 'https://www.youtube.com/c/InterstellarMovie',
4792             'channel': 'Interstellar Movie',
4793             'description': '',
4794             'modified_date': r're:\d{8}',
4795         },
4796         'playlist_mincount': 21,
4797     }, {
4798         'note': 'Playlist with "show unavailable videos" button',
4799         'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
4800         'info_dict': {
4801             'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
4802             'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
4803             'uploader': 'Phim Siêu Nhân Nhật Bản',
4804             'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
4805             'view_count': int,
4806             'channel': 'Phim Siêu Nhân Nhật Bản',
4807             'tags': [],
4808             'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
4809             'description': '',
4810             'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
4811             'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
4812             'modified_date': r're:\d{8}',
4813         },
4814         'playlist_mincount': 200,
4815         'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
4816     }, {
4817         'note': 'Playlist with unavailable videos in page 7',
4818         'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
4819         'info_dict': {
4820             'title': 'Uploads from BlankTV',
4821             'id': 'UU8l9frL61Yl5KFOl87nIm2w',
4822             'uploader': 'BlankTV',
4823             'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
4824             'channel': 'BlankTV',
4825             'channel_url': 'https://www.youtube.com/c/blanktv',
4826             'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',
4827             'view_count': int,
4828             'tags': [],
4829             'uploader_url': 'https://www.youtube.com/c/blanktv',
4830             'modified_date': r're:\d{8}',
4831             'description': '',
4832         },
4833         'playlist_mincount': 1000,
4834         'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
4835     }, {
4836         'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
4837         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
4838         'info_dict': {
4839             'title': 'Data Analysis with Dr Mike Pound',
4840             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
4841             'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
4842             'uploader': 'Computerphile',
4843             'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
4844             'uploader_url': 'https://www.youtube.com/user/Computerphile',
4845             'tags': [],
4846             'view_count': int,
4847             'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
4848             'channel_url': 'https://www.youtube.com/user/Computerphile',
4849             'channel': 'Computerphile',
4850         },
4851         'playlist_mincount': 11,
4852     }, {
4853         'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4854         'only_matching': True,
4855     }, {
4856         'note': 'Playlist URL that does not actually serve a playlist',
4857         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
4858         'info_dict': {
4859             'id': 'FqZTN594JQw',
4860             'ext': 'webm',
4861             'title': "Smiley's People 01 detective, Adventure Series, Action",
4862             'uploader': 'STREEM',
4863             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
4864             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
4865             'upload_date': '20150526',
4866             'license': 'Standard YouTube License',
4867             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
4868             'categories': ['People & Blogs'],
4869             'tags': list,
4870             'view_count': int,
4871             'like_count': int,
4872         },
4873         'params': {
4874             'skip_download': True,
4875         },
4876         'skip': 'This video is not available.',
4877         'add_ie': [YoutubeIE.ie_key()],
4878     }, {
4879         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
4880         'only_matching': True,
4881     }, {
4882         'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
4883         'only_matching': True,
4884     }, {
4885         'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
4886         'info_dict': {
4887             'id': 'GgL890LIznQ',  # This will keep changing
4888             'ext': 'mp4',
4889             'title': str,
4890             'uploader': 'Sky News',
4891             'uploader_id': 'skynews',
4892             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
4893             'upload_date': r're:\d{8}',
4894             'description': str,
4895             'categories': ['News & Politics'],
4896             'tags': list,
4897             'like_count': int,
4898             'release_timestamp': 1642502819,
4899             'channel': 'Sky News',
4900             'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',
4901             'age_limit': 0,
4902             'view_count': int,
4903             'thumbnail': 'https://i.ytimg.com/vi/GgL890LIznQ/maxresdefault_live.jpg',
4904             'playable_in_embed': True,
4905             'release_date': '20220118',
4906             'availability': 'public',
4907             'live_status': 'is_live',
4908             'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',
4909             'channel_follower_count': int
4910         },
4911         'params': {
4912             'skip_download': True,
4913         },
4914         'expected_warnings': ['Ignoring subtitle tracks found in '],
4915     }, {
4916         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
4917         'info_dict': {
4918             'id': 'a48o2S1cPoo',
4919             'ext': 'mp4',
4920             'title': 'The Young Turks - Live Main Show',
4921             'uploader': 'The Young Turks',
4922             'uploader_id': 'TheYoungTurks',
4923             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
4924             'upload_date': '20150715',
4925             'license': 'Standard YouTube License',
4926             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
4927             'categories': ['News & Politics'],
4928             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
4929             'like_count': int,
4930         },
4931         'params': {
4932             'skip_download': True,
4933         },
4934         'only_matching': True,
4935     }, {
4936         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
4937         'only_matching': True,
4938     }, {
4939         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
4940         'only_matching': True,
4941     }, {
4942         'note': 'A channel that is not live. Should raise error',
4943         'url': 'https://www.youtube.com/user/numberphile/live',
4944         'only_matching': True,
4945     }, {
4946         'url': 'https://www.youtube.com/feed/trending',
4947         'only_matching': True,
4948     }, {
4949         'url': 'https://www.youtube.com/feed/library',
4950         'only_matching': True,
4951     }, {
4952         'url': 'https://www.youtube.com/feed/history',
4953         'only_matching': True,
4954     }, {
4955         'url': 'https://www.youtube.com/feed/subscriptions',
4956         'only_matching': True,
4957     }, {
4958         'url': 'https://www.youtube.com/feed/watch_later',
4959         'only_matching': True,
4960     }, {
4961         'note': 'Recommended - redirects to home page.',
4962         'url': 'https://www.youtube.com/feed/recommended',
4963         'only_matching': True,
4964     }, {
4965         'note': 'inline playlist with not always working continuations',
4966         'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
4967         'only_matching': True,
4968     }, {
4969         'url': 'https://www.youtube.com/course',
4970         'only_matching': True,
4971     }, {
4972         'url': 'https://www.youtube.com/zsecurity',
4973         'only_matching': True,
4974     }, {
4975         'url': 'http://www.youtube.com/NASAgovVideo/videos',
4976         'only_matching': True,
4977     }, {
4978         'url': 'https://www.youtube.com/TheYoungTurks/live',
4979         'only_matching': True,
4980     }, {
4981         'url': 'https://www.youtube.com/hashtag/cctv9',
4982         'info_dict': {
4983             'id': 'cctv9',
4984             'title': '#cctv9',
4985             'tags': [],
4986         },
4987         'playlist_mincount': 350,
4988     }, {
4989         'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
4990         'only_matching': True,
4991     }, {
4992         'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
4993         'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4994         'only_matching': True
4995     }, {
4996         'note': '/browse/ should redirect to /channel/',
4997         'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
4998         'only_matching': True
4999     }, {
5000         'note': 'VLPL, should redirect to playlist?list=PL...',
5001         'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5002         'info_dict': {
5003             'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5004             'uploader': 'NoCopyrightSounds',
5005             'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
5006             'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
5007             'title': 'NCS Releases',
5008             'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',
5009             'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',
5010             'modified_date': r're:\d{8}',
5011             'view_count': int,
5012             'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
5013             'tags': [],
5014             'channel': 'NoCopyrightSounds',
5015         },
5016         'playlist_mincount': 166,
5017         'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5018     }, {
5019         'note': 'Topic, should redirect to playlist?list=UU...',
5020         'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5021         'info_dict': {
5022             'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5023             'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5024             'title': 'Uploads from Royalty Free Music - Topic',
5025             'uploader': 'Royalty Free Music - Topic',
5026             'tags': [],
5027             'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5028             'channel': 'Royalty Free Music - Topic',
5029             'view_count': int,
5030             'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5031             'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5032             'modified_date': r're:\d{8}',
5033             'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5034             'description': '',
5035         },
5036         'expected_warnings': [
5037             'The URL does not have a videos tab',
5038             r'[Uu]navailable videos (are|will be) hidden',
5039         ],
5040         'playlist_mincount': 101,
5041     }, {
5042         'note': 'Topic without a UU playlist',
5043         'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
5044         'info_dict': {
5045             'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
5046             'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
5047             'tags': [],
5048         },
5049         'expected_warnings': [
5050             'the playlist redirect gave error',
5051         ],
5052         'playlist_mincount': 9,
5053     }, {
5054         'note': 'Youtube music Album',
5055         'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
5056         'info_dict': {
5057             'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
5058             'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
5059             'tags': [],
5060             'view_count': int,
5061             'description': '',
5062             'availability': 'unlisted',
5063             'modified_date': r're:\d{8}',
5064         },
5065         'playlist_count': 50,
5066     }, {
5067         'note': 'unlisted single video playlist',
5068         'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5069         'info_dict': {
5070             'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5071             'uploader': 'colethedj',
5072             'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5073             'title': 'yt-dlp unlisted playlist test',
5074             'availability': 'unlisted',
5075             'tags': [],
5076             'modified_date': '20211208',
5077             'channel': 'colethedj',
5078             'view_count': int,
5079             'description': '',
5080             'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
5081             'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5082             'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
5083         },
5084         'playlist_count': 1,
5085     }, {
5086         'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
5087         'url': 'https://www.youtube.com/feed/recommended',
5088         'info_dict': {
5089             'id': 'recommended',
5090             'title': 'recommended',
5091             'tags': [],
5092         },
5093         'playlist_mincount': 50,
5094         'params': {
5095             'skip_download': True,
5096             'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5097         },
5098     }, {
5099         'note': 'API Fallback: /videos tab, sorted by oldest first',
5100         'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
5101         'info_dict': {
5102             'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5103             'title': 'Cody\'sLab - Videos',
5104             'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
5105             'uploader': 'Cody\'sLab',
5106             'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5107             'channel': 'Cody\'sLab',
5108             'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5109             'tags': [],
5110             'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
5111             'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
5112             'channel_follower_count': int
5113         },
5114         'playlist_mincount': 650,
5115         'params': {
5116             'skip_download': True,
5117             'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5118         },
5119     }, {
5120         'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
5121         'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5122         'info_dict': {
5123             'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5124             'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5125             'title': 'Uploads from Royalty Free Music - Topic',
5126             'uploader': 'Royalty Free Music - Topic',
5127             'modified_date': r're:\d{8}',
5128             'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5129             'description': '',
5130             'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5131             'tags': [],
5132             'channel': 'Royalty Free Music - Topic',
5133             'view_count': int,
5134             'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5135         },
5136         'expected_warnings': [
5137             'does not have a videos tab',
5138             r'[Uu]navailable videos (are|will be) hidden',
5139         ],
5140         'playlist_mincount': 101,
5141         'params': {
5142             'skip_download': True,
5143             'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5144         },
5145     }, {
5146         'note': 'non-standard redirect to regional channel',
5147         'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
5148         'only_matching': True
5149     }, {
5150         'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
5151         'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5152         'info_dict': {
5153             'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5154             'modified_date': '20220407',
5155             'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5156             'tags': [],
5157             'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5158             'uploader': 'pukkandan',
5159             'availability': 'unlisted',
5160             'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5161             'channel': 'pukkandan',
5162             'description': 'Test for collaborative playlist',
5163             'title': 'yt-dlp test - collaborative playlist',
5164             'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5165         },
5166         'playlist_mincount': 2
5167     }]
5168
5169     @classmethod
5170     def suitable(cls, url):
5171         return False if YoutubeIE.suitable(url) else super().suitable(url)
5172
5173     _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$')
5174
5175     @YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
5176     def _real_extract(self, url, smuggled_data):
5177         item_id = self._match_id(url)
5178         url = compat_urlparse.urlunparse(
5179             compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
5180         compat_opts = self.get_param('compat_opts', [])
5181
5182         def get_mobj(url):
5183             mobj = self._URL_RE.match(url).groupdict()
5184             mobj.update((k, '') for k, v in mobj.items() if v is None)
5185             return mobj
5186
5187         mobj, redirect_warning = get_mobj(url), None
5188         # Youtube returns incomplete data if tabname is not lower case
5189         pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
5190         if is_channel:
5191             if smuggled_data.get('is_music_url'):
5192                 if item_id[:2] == 'VL':  # Youtube music VL channels have an equivalent playlist
5193                     item_id = item_id[2:]
5194                     pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False
5195                 elif item_id[:2] == 'MP':  # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
5196                     mdata = self._extract_tab_endpoint(
5197                         f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
5198                     murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
5199                                         get_all=False, expected_type=compat_str)
5200                     if not murl:
5201                         raise ExtractorError('Failed to resolve album to playlist')
5202                     return self.url_result(murl, ie=YoutubeTabIE.ie_key())
5203                 elif mobj['channel_type'] == 'browse':  # Youtube music /browse/ should be changed to /channel/
5204                     pre = f'https://www.youtube.com/channel/{item_id}'
5205
5206         original_tab_name = tab
5207         if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
5208             # Home URLs should redirect to /videos/
5209             redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '
5210                                 'To download only the videos in the home page, add a "/featured" to the URL')
5211             tab = '/videos'
5212
5213         url = ''.join((pre, tab, post))
5214         mobj = get_mobj(url)
5215
5216         # Handle both video/playlist URLs
5217         qs = parse_qs(url)
5218         video_id, playlist_id = (qs.get(key, [None])[0] for key in ('v', 'list'))
5219
5220         if not video_id and mobj['not_channel'].startswith('watch'):
5221             if not playlist_id:
5222                 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
5223                 raise ExtractorError('Unable to recognize tab page')
5224             # Common mistake: https://www.youtube.com/watch?list=playlist_id
5225             self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
5226             url = f'https://www.youtube.com/playlist?list={playlist_id}'
5227             mobj = get_mobj(url)
5228
5229         if video_id and playlist_id:
5230             if self.get_param('noplaylist'):
5231                 self.to_screen(f'Downloading just video {video_id} because of --no-playlist')
5232                 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
5233                                        ie=YoutubeIE.ie_key(), video_id=video_id)
5234             self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')
5235
5236         data, ytcfg = self._extract_data(url, item_id)
5237
5238         # YouTube may provide a non-standard redirect to the regional channel
5239         # See: https://github.com/yt-dlp/yt-dlp/issues/2694
5240         redirect_url = traverse_obj(
5241             data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)
5242         if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:
5243             redirect_url = ''.join((
5244                 urljoin('https://www.youtube.com', redirect_url), mobj['tab'], mobj['post']))
5245             self.to_screen(f'This playlist is likely not available in your region. Following redirect to regional playlist {redirect_url}')
5246             return self.url_result(redirect_url, ie=YoutubeTabIE.ie_key())
5247
5248         tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
5249         if tabs:
5250             selected_tab = self._extract_selected_tab(tabs)
5251             selected_tab_name = selected_tab.get('title', '').lower()
5252             if selected_tab_name == 'home':
5253                 selected_tab_name = 'featured'
5254             requested_tab_name = mobj['tab'][1:]
5255             if 'no-youtube-channel-redirect' not in compat_opts:
5256                 if requested_tab_name == 'live':
5257                     # Live tab should have redirected to the video
5258                     raise ExtractorError('The channel is not currently live', expected=True)
5259                 if requested_tab_name not in ('', selected_tab_name):
5260                     redirect_warning = f'The channel does not have a {requested_tab_name} tab'
5261                     if not original_tab_name:
5262                         if item_id[:2] == 'UC':
5263                             # Topic channels don't have /videos. Use the equivalent playlist instead
5264                             pl_id = f'UU{item_id[2:]}'
5265                             pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
5266                             try:
5267                                 data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)
5268                             except ExtractorError:
5269                                 redirect_warning += ' and the playlist redirect gave error'
5270                             else:
5271                                 item_id, url, selected_tab_name = pl_id, pl_url, requested_tab_name
5272                                 redirect_warning += f'. Redirecting to playlist {pl_id} instead'
5273                         if selected_tab_name and selected_tab_name != requested_tab_name:
5274                             redirect_warning += f'. {selected_tab_name} tab is being downloaded instead'
5275                     else:
5276                         raise ExtractorError(redirect_warning, expected=True)
5277
5278         if redirect_warning:
5279             self.to_screen(redirect_warning)
5280         self.write_debug(f'Final URL: {url}')
5281
5282         # YouTube sometimes provides a button to reload playlist with unavailable videos.
5283         if 'no-youtube-unavailable-videos' not in compat_opts:
5284             data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data
5285         self._extract_and_report_alerts(data, only_once=True)
5286         tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
5287         if tabs:
5288             return self._extract_from_tabs(item_id, ytcfg, data, tabs)
5289
5290         playlist = traverse_obj(
5291             data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
5292         if playlist:
5293             return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
5294
5295         video_id = traverse_obj(
5296             data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
5297         if video_id:
5298             if mobj['tab'] != '/live':  # live tab is expected to redirect to video
5299                 self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
5300             return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
5301                                    ie=YoutubeIE.ie_key(), video_id=video_id)
5302
5303         raise ExtractorError('Unable to recognize tab page')
5304
5305
5306 class YoutubePlaylistIE(InfoExtractor):
5307     IE_DESC = 'YouTube playlists'
5308     _VALID_URL = r'''(?x)(?:
5309                         (?:https?://)?
5310                         (?:\w+\.)?
5311                         (?:
5312                             (?:
5313                                 youtube(?:kids)?\.com|
5314                                 %(invidious)s
5315                             )
5316                             /.*?\?.*?\blist=
5317                         )?
5318                         (?P<id>%(playlist_id)s)
5319                      )''' % {
5320         'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
5321         'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
5322     }
5323     IE_NAME = 'youtube:playlist'
5324     _TESTS = [{
5325         'note': 'issue #673',
5326         'url': 'PLBB231211A4F62143',
5327         'info_dict': {
5328             'title': '[OLD]Team Fortress 2 (Class-based LP)',
5329             'id': 'PLBB231211A4F62143',
5330             'uploader': 'Wickman',
5331             'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
5332             'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
5333             'view_count': int,
5334             'uploader_url': 'https://www.youtube.com/user/Wickydoo',
5335             'modified_date': r're:\d{8}',
5336             'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
5337             'channel': 'Wickman',
5338             'tags': [],
5339             'channel_url': 'https://www.youtube.com/user/Wickydoo',
5340         },
5341         'playlist_mincount': 29,
5342     }, {
5343         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
5344         'info_dict': {
5345             'title': 'YDL_safe_search',
5346             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
5347         },
5348         'playlist_count': 2,
5349         'skip': 'This playlist is private',
5350     }, {
5351         'note': 'embedded',
5352         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
5353         'playlist_count': 4,
5354         'info_dict': {
5355             'title': 'JODA15',
5356             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
5357             'uploader': 'milan',
5358             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
5359             'description': '',
5360             'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
5361             'tags': [],
5362             'modified_date': '20140919',
5363             'view_count': int,
5364             'channel': 'milan',
5365             'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
5366             'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
5367         },
5368         'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5369     }, {
5370         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
5371         'playlist_mincount': 654,
5372         'info_dict': {
5373             'title': '2018 Chinese New Singles (11/6 updated)',
5374             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
5375             'uploader': 'LBK',
5376             'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
5377             'description': 'md5:da521864744d60a198e3a88af4db0d9d',
5378             'channel': 'LBK',
5379             'view_count': int,
5380             'channel_url': 'https://www.youtube.com/c/愛低音的國王',
5381             'tags': [],
5382             'uploader_url': 'https://www.youtube.com/c/愛低音的國王',
5383             'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
5384             'modified_date': r're:\d{8}',
5385         },
5386         'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5387     }, {
5388         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
5389         'only_matching': True,
5390     }, {
5391         # music album playlist
5392         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
5393         'only_matching': True,
5394     }]
5395
5396     @classmethod
5397     def suitable(cls, url):
5398         if YoutubeTabIE.suitable(url):
5399             return False
5400         from ..utils import parse_qs
5401         qs = parse_qs(url)
5402         if qs.get('v', [None])[0]:
5403             return False
5404         return super().suitable(url)
5405
5406     def _real_extract(self, url):
5407         playlist_id = self._match_id(url)
5408         is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
5409         url = update_url_query(
5410             'https://www.youtube.com/playlist',
5411             parse_qs(url) or {'list': playlist_id})
5412         if is_music_url:
5413             url = smuggle_url(url, {'is_music_url': True})
5414         return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
5415
5416
5417 class YoutubeYtBeIE(InfoExtractor):
5418     IE_DESC = 'youtu.be'
5419     _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
5420     _TESTS = [{
5421         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
5422         'info_dict': {
5423             'id': 'yeWKywCrFtk',
5424             'ext': 'mp4',
5425             'title': 'Small Scale Baler and Braiding Rugs',
5426             'uploader': 'Backus-Page House Museum',
5427             'uploader_id': 'backuspagemuseum',
5428             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
5429             'upload_date': '20161008',
5430             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
5431             'categories': ['Nonprofits & Activism'],
5432             'tags': list,
5433             'like_count': int,
5434             'age_limit': 0,
5435             'playable_in_embed': True,
5436             'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',
5437             'channel': 'Backus-Page House Museum',
5438             'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',
5439             'live_status': 'not_live',
5440             'view_count': int,
5441             'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',
5442             'availability': 'public',
5443             'duration': 59,
5444         },
5445         'params': {
5446             'noplaylist': True,
5447             'skip_download': True,
5448         },
5449     }, {
5450         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
5451         'only_matching': True,
5452     }]
5453
5454     def _real_extract(self, url):
5455         mobj = self._match_valid_url(url)
5456         video_id = mobj.group('id')
5457         playlist_id = mobj.group('playlist_id')
5458         return self.url_result(
5459             update_url_query('https://www.youtube.com/watch', {
5460                 'v': video_id,
5461                 'list': playlist_id,
5462                 'feature': 'youtu.be',
5463             }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
5464
5465
5466 class YoutubeLivestreamEmbedIE(InfoExtractor):
5467     IE_DESC = 'YouTube livestream embeds'
5468     _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'
5469     _TESTS = [{
5470         'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',
5471         'only_matching': True,
5472     }]
5473
5474     def _real_extract(self, url):
5475         channel_id = self._match_id(url)
5476         return self.url_result(
5477             f'https://www.youtube.com/channel/{channel_id}/live',
5478             ie=YoutubeTabIE.ie_key(), video_id=channel_id)
5479
5480
5481 class YoutubeYtUserIE(InfoExtractor):
5482     IE_DESC = 'YouTube user videos; "ytuser:" prefix'
5483     IE_NAME = 'youtube:user'
5484     _VALID_URL = r'ytuser:(?P<id>.+)'
5485     _TESTS = [{
5486         'url': 'ytuser:phihag',
5487         'only_matching': True,
5488     }]
5489
5490     def _real_extract(self, url):
5491         user_id = self._match_id(url)
5492         return self.url_result(
5493             'https://www.youtube.com/user/%s/videos' % user_id,
5494             ie=YoutubeTabIE.ie_key(), video_id=user_id)
5495
5496
5497 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
5498     IE_NAME = 'youtube:favorites'
5499     IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
5500     _VALID_URL = r':ytfav(?:ou?rite)?s?'
5501     _LOGIN_REQUIRED = True
5502     _TESTS = [{
5503         'url': ':ytfav',
5504         'only_matching': True,
5505     }, {
5506         'url': ':ytfavorites',
5507         'only_matching': True,
5508     }]
5509
5510     def _real_extract(self, url):
5511         return self.url_result(
5512             'https://www.youtube.com/playlist?list=LL',
5513             ie=YoutubeTabIE.ie_key())
5514
5515
5516 class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):
5517     IE_NAME = 'youtube:notif'
5518     IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'
5519     _VALID_URL = r':ytnotif(?:ication)?s?'
5520     _LOGIN_REQUIRED = True
5521     _TESTS = [{
5522         'url': ':ytnotif',
5523         'only_matching': True,
5524     }, {
5525         'url': ':ytnotifications',
5526         'only_matching': True,
5527     }]
5528
5529     def _extract_notification_menu(self, response, continuation_list):
5530         notification_list = traverse_obj(
5531             response,
5532             ('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),
5533             ('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),
5534             expected_type=list) or []
5535         continuation_list[0] = None
5536         for item in notification_list:
5537             entry = self._extract_notification_renderer(item.get('notificationRenderer'))
5538             if entry:
5539                 yield entry
5540             continuation = item.get('continuationItemRenderer')
5541             if continuation:
5542                 continuation_list[0] = continuation
5543
5544     def _extract_notification_renderer(self, notification):
5545         video_id = traverse_obj(
5546             notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)
5547         url = f'https://www.youtube.com/watch?v={video_id}'
5548         channel_id = None
5549         if not video_id:
5550             browse_ep = traverse_obj(
5551                 notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)
5552             channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)
5553             post_id = self._search_regex(
5554                 r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),
5555                 'post id', default=None)
5556             if not channel_id or not post_id:
5557                 return
5558             # The direct /post url redirects to this in the browser
5559             url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'
5560
5561         channel = traverse_obj(
5562             notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),
5563             expected_type=str)
5564         title = self._search_regex(
5565             rf'{re.escape(channel)} [^:]+: (.+)', self._get_text(notification, 'shortMessage'),
5566             'video title', default=None)
5567         if title:
5568             title = title.replace('\xad', '')  # remove soft hyphens
5569         upload_date = (strftime_or_none(self._extract_time_text(notification, 'sentTimeText')[0], '%Y%m%d')
5570                        if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE.ie_key())
5571                        else None)
5572         return {
5573             '_type': 'url',
5574             'url': url,
5575             'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),
5576             'video_id': video_id,
5577             'title': title,
5578             'channel_id': channel_id,
5579             'channel': channel,
5580             'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),
5581             'upload_date': upload_date,
5582         }
5583
5584     def _notification_menu_entries(self, ytcfg):
5585         continuation_list = [None]
5586         response = None
5587         for page in itertools.count(1):
5588             ctoken = traverse_obj(
5589                 continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)
5590             response = self._extract_response(
5591                 item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,
5592                 ep='notification/get_notification_menu', check_get_keys='actions',
5593                 headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))
5594             yield from self._extract_notification_menu(response, continuation_list)
5595             if not continuation_list[0]:
5596                 break
5597
5598     def _real_extract(self, url):
5599         display_id = 'notifications'
5600         ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}
5601         self._report_playlist_authcheck(ytcfg)
5602         return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)
5603
5604
5605 class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
5606     IE_DESC = 'YouTube search'
5607     IE_NAME = 'youtube:search'
5608     _SEARCH_KEY = 'ytsearch'
5609     _SEARCH_PARAMS = 'EgIQAQ%3D%3D'  # Videos only
5610     _TESTS = [{
5611         'url': 'ytsearch5:youtube-dl test video',
5612         'playlist_count': 5,
5613         'info_dict': {
5614             'id': 'youtube-dl test video',
5615             'title': 'youtube-dl test video',
5616         }
5617     }]
5618
5619
5620 class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
5621     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
5622     _SEARCH_KEY = 'ytsearchdate'
5623     IE_DESC = 'YouTube search, newest videos first'
5624     _SEARCH_PARAMS = 'CAISAhAB'  # Videos only, sorted by date
5625     _TESTS = [{
5626         'url': 'ytsearchdate5:youtube-dl test video',
5627         'playlist_count': 5,
5628         'info_dict': {
5629             'id': 'youtube-dl test video',
5630             'title': 'youtube-dl test video',
5631         }
5632     }]
5633
5634
5635 class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
5636     IE_DESC = 'YouTube search URLs with sorting and filter support'
5637     IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
5638     _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
5639     _TESTS = [{
5640         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
5641         'playlist_mincount': 5,
5642         'info_dict': {
5643             'id': 'youtube-dl test video',
5644             'title': 'youtube-dl test video',
5645         }
5646     }, {
5647         'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
5648         'playlist_mincount': 5,
5649         'info_dict': {
5650             'id': 'python',
5651             'title': 'python',
5652         }
5653     }, {
5654         'url': 'https://www.youtube.com/results?search_query=%23cats',
5655         'playlist_mincount': 1,
5656         'info_dict': {
5657             'id': '#cats',
5658             'title': '#cats',
5659             'entries': [{
5660                 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',
5661                 'title': '#cats',
5662             }],
5663         },
5664     }, {
5665         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
5666         'only_matching': True,
5667     }]
5668
5669     def _real_extract(self, url):
5670         qs = parse_qs(url)
5671         query = (qs.get('search_query') or qs.get('q'))[0]
5672         return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
5673
5674
5675 class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
5676     IE_DESC = 'YouTube music search URLs with selectable sections (Eg: #songs)'
5677     IE_NAME = 'youtube:music:search_url'
5678     _VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
5679     _TESTS = [{
5680         'url': 'https://music.youtube.com/search?q=royalty+free+music',
5681         'playlist_count': 16,
5682         'info_dict': {
5683             'id': 'royalty free music',
5684             'title': 'royalty free music',
5685         }
5686     }, {
5687         'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',
5688         'playlist_mincount': 30,
5689         'info_dict': {
5690             'id': 'royalty free music - songs',
5691             'title': 'royalty free music - songs',
5692         },
5693         'params': {'extract_flat': 'in_playlist'}
5694     }, {
5695         'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',
5696         'playlist_mincount': 30,
5697         'info_dict': {
5698             'id': 'royalty free music - community playlists',
5699             'title': 'royalty free music - community playlists',
5700         },
5701         'params': {'extract_flat': 'in_playlist'}
5702     }]
5703
5704     _SECTIONS = {
5705         'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',
5706         'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',
5707         'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',
5708         'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',
5709         'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',
5710         'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',
5711     }
5712
5713     def _real_extract(self, url):
5714         qs = parse_qs(url)
5715         query = (qs.get('search_query') or qs.get('q'))[0]
5716         params = qs.get('sp', (None,))[0]
5717         if params:
5718             section = next((k for k, v in self._SECTIONS.items() if v == params), params)
5719         else:
5720             section = compat_urllib_parse_unquote_plus((url.split('#') + [''])[1]).lower()
5721             params = self._SECTIONS.get(section)
5722             if not params:
5723                 section = None
5724         title = join_nonempty(query, section, delim=' - ')
5725         return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)
5726
5727
5728 class YoutubeFeedsInfoExtractor(InfoExtractor):
5729     """
5730     Base class for feed extractors
5731     Subclasses must define the _FEED_NAME property.
5732     """
5733     _LOGIN_REQUIRED = True
5734
5735     def _real_initialize(self):
5736         YoutubeBaseInfoExtractor._check_login_required(self)
5737
5738     @property
5739     def IE_NAME(self):
5740         return 'youtube:%s' % self._FEED_NAME
5741
5742     def _real_extract(self, url):
5743         return self.url_result(
5744             f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())
5745
5746
5747 class YoutubeWatchLaterIE(InfoExtractor):
5748     IE_NAME = 'youtube:watchlater'
5749     IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
5750     _VALID_URL = r':ytwatchlater'
5751     _TESTS = [{
5752         'url': ':ytwatchlater',
5753         'only_matching': True,
5754     }]
5755
5756     def _real_extract(self, url):
5757         return self.url_result(
5758             'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
5759
5760
5761 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
5762     IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
5763     _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
5764     _FEED_NAME = 'recommended'
5765     _LOGIN_REQUIRED = False
5766     _TESTS = [{
5767         'url': ':ytrec',
5768         'only_matching': True,
5769     }, {
5770         'url': ':ytrecommended',
5771         'only_matching': True,
5772     }, {
5773         'url': 'https://youtube.com',
5774         'only_matching': True,
5775     }]
5776
5777
5778 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
5779     IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
5780     _VALID_URL = r':ytsub(?:scription)?s?'
5781     _FEED_NAME = 'subscriptions'
5782     _TESTS = [{
5783         'url': ':ytsubs',
5784         'only_matching': True,
5785     }, {
5786         'url': ':ytsubscriptions',
5787         'only_matching': True,
5788     }]
5789
5790
5791 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
5792     IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
5793     _VALID_URL = r':ythis(?:tory)?'
5794     _FEED_NAME = 'history'
5795     _TESTS = [{
5796         'url': ':ythistory',
5797         'only_matching': True,
5798     }]
5799
5800
5801 class YoutubeTruncatedURLIE(InfoExtractor):
5802     IE_NAME = 'youtube:truncated_url'
5803     IE_DESC = False  # Do not list
5804     _VALID_URL = r'''(?x)
5805         (?:https?://)?
5806         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
5807         (?:watch\?(?:
5808             feature=[a-z_]+|
5809             annotation_id=annotation_[^&]+|
5810             x-yt-cl=[0-9]+|
5811             hl=[^&]*|
5812             t=[0-9]+
5813         )?
5814         |
5815             attribution_link\?a=[^&]+
5816         )
5817         $
5818     '''
5819
5820     _TESTS = [{
5821         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
5822         'only_matching': True,
5823     }, {
5824         'url': 'https://www.youtube.com/watch?',
5825         'only_matching': True,
5826     }, {
5827         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
5828         'only_matching': True,
5829     }, {
5830         'url': 'https://www.youtube.com/watch?feature=foo',
5831         'only_matching': True,
5832     }, {
5833         'url': 'https://www.youtube.com/watch?hl=en-GB',
5834         'only_matching': True,
5835     }, {
5836         'url': 'https://www.youtube.com/watch?t=2372',
5837         'only_matching': True,
5838     }]
5839
5840     def _real_extract(self, url):
5841         raise ExtractorError(
5842             'Did you forget to quote the URL? Remember that & is a meta '
5843             'character in most shells, so you want to put the URL in quotes, '
5844             'like  youtube-dl '
5845             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
5846             ' or simply  youtube-dl BaW_jenozKc  .',
5847             expected=True)
5848
5849
5850 class YoutubeClipIE(InfoExtractor):
5851     IE_NAME = 'youtube:clip'
5852     IE_DESC = False  # Do not list
5853     _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'
5854
5855     def _real_extract(self, url):
5856         self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')
5857         return self.url_result(url, 'Generic')
5858
5859
5860 class YoutubeTruncatedIDIE(InfoExtractor):
5861     IE_NAME = 'youtube:truncated_id'
5862     IE_DESC = False  # Do not list
5863     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
5864
5865     _TESTS = [{
5866         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
5867         'only_matching': True,
5868     }]
5869
5870     def _real_extract(self, url):
5871         video_id = self._match_id(url)
5872         raise ExtractorError(
5873             f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',
5874             expected=True)