21 from .common
import InfoExtractor
, SearchInfoExtractor
22 from .openload
import PhantomJSwrapper
23 from ..compat
import functools
24 from ..jsinterp
import JSInterpreter
25 from ..networking
.exceptions
import HTTPError
, network_exceptions
71 STREAMING_DATA_CLIENT_NAME
= '__yt_dlp_client'
72 # any clients starting with _ cannot be explicitly requested by the user
75 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
76 'INNERTUBE_CONTEXT': {
79 'clientVersion': '2.20220801.00.00',
82 'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
85 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
86 'INNERTUBE_CONTEXT': {
88 'clientName': 'WEB_EMBEDDED_PLAYER',
89 'clientVersion': '1.20220731.00.00',
92 'INNERTUBE_CONTEXT_CLIENT_NAME': 56,
95 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
96 'INNERTUBE_HOST': 'music.youtube.com',
97 'INNERTUBE_CONTEXT': {
99 'clientName': 'WEB_REMIX',
100 'clientVersion': '1.20220727.01.00',
103 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
106 'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
107 'INNERTUBE_CONTEXT': {
109 'clientName': 'WEB_CREATOR',
110 'clientVersion': '1.20220726.00.00',
113 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
116 'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
117 'INNERTUBE_CONTEXT': {
119 'clientName': 'ANDROID',
120 'clientVersion': '19.09.37',
121 'androidSdkVersion': 30,
122 'userAgent': 'com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip',
125 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
126 'REQUIRE_JS_PLAYER': False,
128 'android_embedded': {
129 'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
130 'INNERTUBE_CONTEXT': {
132 'clientName': 'ANDROID_EMBEDDED_PLAYER',
133 'clientVersion': '19.09.37',
134 'androidSdkVersion': 30,
135 'userAgent': 'com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip',
138 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
139 'REQUIRE_JS_PLAYER': False,
142 'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
143 'INNERTUBE_CONTEXT': {
145 'clientName': 'ANDROID_MUSIC',
146 'clientVersion': '6.42.52',
147 'androidSdkVersion': 30,
148 'userAgent': 'com.google.android.apps.youtube.music/6.42.52 (Linux; U; Android 11) gzip',
151 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
152 'REQUIRE_JS_PLAYER': False,
155 'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
156 'INNERTUBE_CONTEXT': {
158 'clientName': 'ANDROID_CREATOR',
159 'clientVersion': '22.30.100',
160 'androidSdkVersion': 30,
161 'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip',
164 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
165 'REQUIRE_JS_PLAYER': False,
167 # iOS clients have HLS live streams. Setting device model to get 60fps formats.
168 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
170 'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
171 'INNERTUBE_CONTEXT': {
174 'clientVersion': '19.09.3',
175 'deviceModel': 'iPhone14,3',
176 'userAgent': 'com.google.ios.youtube/19.09.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)',
179 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
180 'REQUIRE_JS_PLAYER': False,
183 'INNERTUBE_CONTEXT': {
185 'clientName': 'IOS_MESSAGES_EXTENSION',
186 'clientVersion': '19.09.3',
187 'deviceModel': 'iPhone14,3',
188 'userAgent': 'com.google.ios.youtube/19.09.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)',
191 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
192 'REQUIRE_JS_PLAYER': False,
195 'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
196 'INNERTUBE_CONTEXT': {
198 'clientName': 'IOS_MUSIC',
199 'clientVersion': '6.33.3',
200 'deviceModel': 'iPhone14,3',
201 'userAgent': 'com.google.ios.youtubemusic/6.33.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)',
204 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
205 'REQUIRE_JS_PLAYER': False,
208 'INNERTUBE_CONTEXT': {
210 'clientName': 'IOS_CREATOR',
211 'clientVersion': '22.33.101',
212 'deviceModel': 'iPhone14,3',
213 'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)',
216 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
217 'REQUIRE_JS_PLAYER': False,
219 # mweb has 'ultralow' formats
220 # See: https://github.com/yt-dlp/yt-dlp/pull/557
222 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
223 'INNERTUBE_CONTEXT': {
225 'clientName': 'MWEB',
226 'clientVersion': '2.20220801.00.00',
229 'INNERTUBE_CONTEXT_CLIENT_NAME': 2,
231 # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
232 # See: https://github.com/zerodytrash/YouTube-Internal-Clients
234 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
235 'INNERTUBE_CONTEXT': {
237 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
238 'clientVersion': '2.0',
241 'INNERTUBE_CONTEXT_CLIENT_NAME': 85,
243 # This client has pre-merged video+audio 720p/1080p streams
245 'INNERTUBE_CONTEXT': {
247 'clientName': 'MEDIA_CONNECT_FRONTEND',
248 'clientVersion': '0.1',
251 'INNERTUBE_CONTEXT_CLIENT_NAME': 95,
256 def _split_innertube_client(client_name
):
257 variant
, *base
= client_name
.rsplit('.', 1)
259 return variant
, base
[0], variant
260 base
, *variant
= client_name
.split('_', 1)
261 return client_name
, base
, variant
[0] if variant
else None
264 def short_client_name(client_name
):
265 main
, *parts
= _split_innertube_client(client_name
)[0].replace('embedscreen', 'e_s').split('_')
266 return join_nonempty(main
[:4], ''.join(x
[0] for x
in parts
)).upper()
269 def build_innertube_clients():
271 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
273 BASE_CLIENTS
= ('ios', 'android', 'web', 'tv', 'mweb')
274 priority
= qualities(BASE_CLIENTS
[::-1])
276 for client
, ytcfg
in tuple(INNERTUBE_CLIENTS
.items()):
277 ytcfg
.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
278 ytcfg
.setdefault('INNERTUBE_HOST', 'www.youtube.com')
279 ytcfg
.setdefault('REQUIRE_JS_PLAYER', True)
280 ytcfg
['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
282 _
, base_client
, variant
= _split_innertube_client(client
)
283 ytcfg
['priority'] = 10 * priority(base_client
)
286 INNERTUBE_CLIENTS
[f
'{client}_embedscreen'] = embedscreen
= copy
.deepcopy(ytcfg
)
287 embedscreen
['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
288 embedscreen
['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
289 embedscreen
['priority'] -= 3
290 elif variant
== 'embedded':
291 ytcfg
['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
292 ytcfg
['priority'] -= 2
294 ytcfg
['priority'] -= 3
297 build_innertube_clients()
300 class BadgeType(enum
.Enum
):
301 AVAILABILITY_UNLISTED
= enum
.auto()
302 AVAILABILITY_PRIVATE
= enum
.auto()
303 AVAILABILITY_PUBLIC
= enum
.auto()
304 AVAILABILITY_PREMIUM
= enum
.auto()
305 AVAILABILITY_SUBSCRIPTION
= enum
.auto()
306 LIVE_NOW
= enum
.auto()
307 VERIFIED
= enum
.auto()
310 class YoutubeBaseInfoExtractor(InfoExtractor
):
311 """Provide base functions for Youtube extractors"""
314 r
'channel|c|user|playlist|watch|w|v|embed|e|live|watch_popup|clip|'
315 r
'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|'
316 r
'browse|oembed|get_video_info|iframe_api|s/player|source|'
317 r
'storefront|oops|index|account|t/terms|about|upload|signin|logout')
319 _PLAYLIST_ID_RE
= r
'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
321 # _NETRC_MACHINE = 'youtube'
323 # If True it will raise an error if no login info is provided
324 _LOGIN_REQUIRED
= False
327 # invidious-redirect websites
328 r
'(?:www\.)?redirect\.invidious\.io',
329 r
'(?:(?:www|dev)\.)?invidio\.us',
330 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
331 r
'(?:www\.)?invidious\.pussthecat\.org',
332 r
'(?:www\.)?invidious\.zee\.li',
333 r
'(?:www\.)?invidious\.ethibox\.fr',
334 r
'(?:www\.)?iv\.ggtyler\.dev',
335 r
'(?:www\.)?inv\.vern\.i2p',
336 r
'(?:www\.)?am74vkcrjp2d5v36lcdqgsj2m6x36tbrkhsruoegwfcizzabnfgf5zyd\.onion',
337 r
'(?:www\.)?inv\.riverside\.rocks',
338 r
'(?:www\.)?invidious\.silur\.me',
339 r
'(?:www\.)?inv\.bp\.projectsegfau\.lt',
340 r
'(?:www\.)?invidious\.g4c3eya4clenolymqbpgwz3q3tawoxw56yhzk4vugqrl6dtu3ejvhjid\.onion',
341 r
'(?:www\.)?invidious\.slipfox\.xyz',
342 r
'(?:www\.)?invidious\.esmail5pdn24shtvieloeedh7ehz3nrwcdivnfhfcedl7gf4kwddhkqd\.onion',
343 r
'(?:www\.)?inv\.vernccvbvyi5qhfzyqengccj7lkove6bjot2xhh5kajhwvidqafczrad\.onion',
344 r
'(?:www\.)?invidious\.tiekoetter\.com',
345 r
'(?:www\.)?iv\.odysfvr23q5wgt7i456o5t3trw2cw5dgn56vbjfbq2m7xsc5vqbqpcyd\.onion',
346 r
'(?:www\.)?invidious\.nerdvpn\.de',
347 r
'(?:www\.)?invidious\.weblibre\.org',
348 r
'(?:www\.)?inv\.odyssey346\.dev',
349 r
'(?:www\.)?invidious\.dhusch\.de',
350 r
'(?:www\.)?iv\.melmac\.space',
351 r
'(?:www\.)?watch\.thekitty\.zone',
352 r
'(?:www\.)?invidious\.privacydev\.net',
353 r
'(?:www\.)?ng27owmagn5amdm7l5s3rsqxwscl5ynppnis5dqcasogkyxcfqn7psid\.onion',
354 r
'(?:www\.)?invidious\.drivet\.xyz',
355 r
'(?:www\.)?vid\.priv\.au',
356 r
'(?:www\.)?euxxcnhsynwmfidvhjf6uzptsmh4dipkmgdmcmxxuo7tunp3ad2jrwyd\.onion',
357 r
'(?:www\.)?inv\.vern\.cc',
358 r
'(?:www\.)?invidious\.esmailelbob\.xyz',
359 r
'(?:www\.)?invidious\.sethforprivacy\.com',
360 r
'(?:www\.)?yt\.oelrichsgarcia\.de',
361 r
'(?:www\.)?yt\.artemislena\.eu',
362 r
'(?:www\.)?invidious\.flokinet\.to',
363 r
'(?:www\.)?invidious\.baczek\.me',
364 r
'(?:www\.)?y\.com\.sb',
365 r
'(?:www\.)?invidious\.epicsite\.xyz',
366 r
'(?:www\.)?invidious\.lidarshield\.cloud',
367 r
'(?:www\.)?yt\.funami\.tech',
368 r
'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
369 r
'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
370 r
'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
371 # youtube-dl invidious instances list
372 r
'(?:(?:www|no)\.)?invidiou\.sh',
373 r
'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
374 r
'(?:www\.)?invidious\.kabi\.tk',
375 r
'(?:www\.)?invidious\.mastodon\.host',
376 r
'(?:www\.)?invidious\.zapashcanon\.fr',
377 r
'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
378 r
'(?:www\.)?invidious\.tinfoil-hat\.net',
379 r
'(?:www\.)?invidious\.himiko\.cloud',
380 r
'(?:www\.)?invidious\.reallyancient\.tech',
381 r
'(?:www\.)?invidious\.tube',
382 r
'(?:www\.)?invidiou\.site',
383 r
'(?:www\.)?invidious\.site',
384 r
'(?:www\.)?invidious\.xyz',
385 r
'(?:www\.)?invidious\.nixnet\.xyz',
386 r
'(?:www\.)?invidious\.048596\.xyz',
387 r
'(?:www\.)?invidious\.drycat\.fr',
388 r
'(?:www\.)?inv\.skyn3t\.in',
389 r
'(?:www\.)?tube\.poal\.co',
390 r
'(?:www\.)?tube\.connect\.cafe',
391 r
'(?:www\.)?vid\.wxzm\.sx',
392 r
'(?:www\.)?vid\.mint\.lgbt',
393 r
'(?:www\.)?vid\.puffyan\.us',
394 r
'(?:www\.)?yewtu\.be',
395 r
'(?:www\.)?yt\.elukerio\.org',
396 r
'(?:www\.)?yt\.lelux\.fi',
397 r
'(?:www\.)?invidious\.ggc-project\.de',
398 r
'(?:www\.)?yt\.maisputain\.ovh',
399 r
'(?:www\.)?ytprivate\.com',
400 r
'(?:www\.)?invidious\.13ad\.de',
401 r
'(?:www\.)?invidious\.toot\.koeln',
402 r
'(?:www\.)?invidious\.fdn\.fr',
403 r
'(?:www\.)?watch\.nettohikari\.com',
404 r
'(?:www\.)?invidious\.namazso\.eu',
405 r
'(?:www\.)?invidious\.silkky\.cloud',
406 r
'(?:www\.)?invidious\.exonip\.de',
407 r
'(?:www\.)?invidious\.riverside\.rocks',
408 r
'(?:www\.)?invidious\.blamefran\.net',
409 r
'(?:www\.)?invidious\.moomoo\.de',
410 r
'(?:www\.)?ytb\.trom\.tf',
411 r
'(?:www\.)?yt\.cyberhost\.uk',
412 r
'(?:www\.)?kgg2m7yk5aybusll\.onion',
413 r
'(?:www\.)?qklhadlycap4cnod\.onion',
414 r
'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
415 r
'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
416 r
'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
417 r
'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
418 r
'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
419 r
'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
420 r
'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
421 r
'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
422 r
'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
423 r
'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
424 # piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
425 r
'(?:www\.)?piped\.kavin\.rocks',
426 r
'(?:www\.)?piped\.tokhmi\.xyz',
427 r
'(?:www\.)?piped\.syncpundit\.io',
428 r
'(?:www\.)?piped\.mha\.fi',
429 r
'(?:www\.)?watch\.whatever\.social',
430 r
'(?:www\.)?piped\.garudalinux\.org',
431 r
'(?:www\.)?piped\.rivo\.lol',
432 r
'(?:www\.)?piped-libre\.kavin\.rocks',
433 r
'(?:www\.)?yt\.jae\.fi',
434 r
'(?:www\.)?piped\.mint\.lgbt',
436 r
'(?:www\.)?piped\.esmailelbob\.xyz',
437 r
'(?:www\.)?piped\.projectsegfau\.lt',
438 r
'(?:www\.)?piped\.privacydev\.net',
439 r
'(?:www\.)?piped\.palveluntarjoaja\.eu',
440 r
'(?:www\.)?piped\.smnz\.de',
441 r
'(?:www\.)?piped\.adminforge\.de',
442 r
'(?:www\.)?watch\.whatevertinfoil\.de',
443 r
'(?:www\.)?piped\.qdi\.fi',
444 r
'(?:(?:www|cf)\.)?piped\.video',
445 r
'(?:www\.)?piped\.aeong\.one',
446 r
'(?:www\.)?piped\.moomoo\.me',
447 r
'(?:www\.)?piped\.chauvet\.pro',
448 r
'(?:www\.)?watch\.leptons\.xyz',
449 r
'(?:www\.)?pd\.vern\.cc',
450 r
'(?:www\.)?piped\.hostux\.net',
451 r
'(?:www\.)?piped\.lunar\.icu',
452 # Hyperpipe instances from https://hyperpipe.codeberg.page/
453 r
'(?:www\.)?hyperpipe\.surge\.sh',
454 r
'(?:www\.)?hyperpipe\.esmailelbob\.xyz',
455 r
'(?:www\.)?listen\.whatever\.social',
456 r
'(?:www\.)?music\.adminforge\.de',
459 # extracted from account/account_menu ep
460 # XXX: These are the supported YouTube UI and API languages,
461 # which is slightly different from languages supported for translation in YouTube studio
462 _SUPPORTED_LANG_CODES
= [
463 'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',
464 'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',
465 'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',
466 'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',
467 'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',
468 'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko',
471 _IGNORED_WARNINGS
= {
472 'Unavailable videos will be hidden during playback',
473 'Unavailable videos are hidden',
476 _YT_HANDLE_RE
= r
'@[\w.-]{3,30}' # https://support.google.com/youtube/answer/11585688?hl=en
477 _YT_CHANNEL_UCID_RE
= r
'UC[\w-]{22}'
479 def ucid_or_none(self
, ucid
):
480 return self
._search
_regex
(rf
'^({self._YT_CHANNEL_UCID_RE})$', ucid
, 'UC-id', default
=None)
482 def handle_or_none(self
, handle
):
483 return self
._search
_regex
(rf
'^({self._YT_HANDLE_RE})$', handle
, '@-handle', default
=None)
485 def handle_from_url(self
, url
):
486 return self
._search
_regex
(rf
'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_HANDLE_RE})',
487 url
, 'channel handle', default
=None)
489 def ucid_from_url(self
, url
):
490 return self
._search
_regex
(rf
'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_CHANNEL_UCID_RE})',
491 url
, 'channel id', default
=None)
493 @functools.cached_property
494 def _preferred_lang(self
):
496 Returns a language code supported by YouTube for the user preferred language.
497 Returns None if no preferred language set.
499 preferred_lang
= self
._configuration
_arg
('lang', ie_key
='Youtube', casesense
=True, default
=[''])[0]
500 if not preferred_lang
:
502 if preferred_lang
not in self
._SUPPORTED
_LANG
_CODES
:
503 raise ExtractorError(
504 f
'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.',
506 elif preferred_lang
!= 'en':
508 f
'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.')
509 return preferred_lang
511 def _initialize_consent(self
):
512 cookies
= self
._get
_cookies
('https://www.youtube.com/')
513 if cookies
.get('__Secure-3PSID'):
515 socs
= cookies
.get('SOCS')
516 if socs
and not socs
.value
.startswith('CAA'): # not consented
518 self
._set
_cookie
('.youtube.com', 'SOCS', 'CAI', secure
=True) # accept all (required for mixes)
520 def _initialize_pref(self
):
521 cookies
= self
._get
_cookies
('https://www.youtube.com/')
522 pref_cookie
= cookies
.get('PREF')
526 pref
= dict(urllib
.parse
.parse_qsl(pref_cookie
.value
))
528 self
.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
529 pref
.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'}
)
530 self
._set
_cookie
('.youtube.com', name
='PREF', value
=urllib
.parse
.urlencode(pref
))
532 def _real_initialize(self
):
533 self
._initialize
_pref
()
534 self
._initialize
_consent
()
535 self
._check
_login
_required
()
537 def _check_login_required(self
):
538 if self
._LOGIN
_REQUIRED
and not self
._cookies
_passed
:
539 self
.raise_login_required('Login details are needed to download this content', method
='cookies')
541 _YT_INITIAL_DATA_RE
= r
'(?:window\s*\[\s*["\']ytInitialData
["\']\s*\]|ytInitialData)\s*='
542 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
544 def _get_default_ytcfg(self, client='web'):
545 return copy.deepcopy(INNERTUBE_CLIENTS[client])
547 def _get_innertube_host(self, client='web'):
548 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
550 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
551 # try_get but with fallback to default ytcfg client values when present
552 _func = lambda y: try_get(y, getter, expected_type)
553 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
555 def _extract_client_name(self, ytcfg, default_client='web'):
556 return self._ytcfg_get_safe(
557 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
558 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
560 def _extract_client_version(self, ytcfg, default_client='web'):
561 return self._ytcfg_get_safe(
562 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
563 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
565 def _select_api_hostname(self, req_api_hostname, default_client=None):
566 return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
567 or req_api_hostname or self._get_innertube_host(default_client or 'web'))
569 def _extract_api_key(self, ytcfg=None, default_client='web'):
570 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
572 def _extract_context(self, ytcfg=None, default_client='web'):
574 (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
575 # Enforce language and tz for extraction
576 client_context = traverse_obj(context, 'client', expected_type=dict, default={})
577 client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
582 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
583 time_now = round(time.time())
584 if self._SAPISID is None:
585 yt_cookies = self._get_cookies('https://www.youtube.com')
586 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
587 # See: https://github.com/yt-dlp/yt-dlp/issues/393
588 sapisid_cookie = dict_get(
589 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
590 if sapisid_cookie and sapisid_cookie.value:
591 self._SAPISID = sapisid_cookie.value
592 self.write_debug('Extracted SAPISID cookie')
593 # SAPISID cookie is required if not already present
594 if not yt_cookies.get('SAPISID'):
595 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
597 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
599 self._SAPISID = False
600 if not self._SAPISID:
602 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
603 sapisidhash = hashlib.sha1(
604 f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
605 return f'SAPISIDHASH {time_now}_{sapisidhash}'
607 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
608 note='Downloading API JSON', errnote='Unable to download API page',
609 context=None, api_key=None, api_hostname=None, default_client='web'):
611 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
613 real_headers = self.generate_api_headers(default_client=default_client)
614 real_headers.update({'content-type': 'application/json'})
616 real_headers.update(headers)
617 api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]
618 or api_key or self._extract_api_key(default_client=default_client))
619 return self._download_json(
620 f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
621 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
622 data=json.dumps(data).encode('utf8'), headers=real_headers,
623 query={'key': api_key, 'prettyPrint': 'false'})
625 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
626 return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
629 def _extract_session_index(*data):
631 Index of current account in account list.
632 See: https://github.com/yt-dlp/yt-dlp/pull/519
635 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
636 if session_index is not None:
640 def _extract_identity_token(self, ytcfg=None, webpage=None):
642 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)
646 return self._search_regex(
647 r'\bID_TOKEN["\']\s
*:\s
*["\'](.+?)["\']', webpage,
648 'identity token
', default=None, fatal=False)
651 def _extract_account_syncid(*args):
653 Extract syncId required to download private playlists of secondary channels
654 @params response and/or ytcfg
657 # ytcfg includes channel_syncid if on secondary channel
658 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID
'], str)
662 data, (lambda x: x['responseContext
']['mainAppWebResponseContext
']['datasyncId
'],
663 lambda x: x['DATASYNC_ID
']), str) or '').split('||
')
664 if len(sync_ids) >= 2 and sync_ids[1]:
665 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
666 # and just "user_syncid||" for primary channel. We only want the channel_syncid
670 def _extract_visitor_data(*args):
672 Extracts visitorData from an API response or ytcfg
673 Appears to be used to track session state
676 args, [('VISITOR_DATA
', ('INNERTUBE_CONTEXT
', 'client
', 'visitorData
'), ('responseContext
', 'visitorData
'))],
679 @functools.cached_property
680 def is_authenticated(self):
681 return bool(self._generate_sapisidhash_header())
683 def extract_ytcfg(self, video_id, webpage):
686 return self._parse_json(
688 r'ytcfg\
.set\s
*\
(\s
*({.+?}
)\s
*\
)\s
*;', webpage, 'ytcfg
',
689 default='{}'), video_id, fatal=False) or {}
691 def generate_api_headers(
692 self
, *, ytcfg
=None, account_syncid
=None, session_index
=None,
693 visitor_data
=None, identity_token
=None, api_hostname
=None, default_client
='web'):
695 origin
= 'https://' + (self
._select
_api
_hostname
(api_hostname
, default_client
))
697 'X-YouTube-Client-Name': str(
698 self
._ytcfg
_get
_safe
(ytcfg
, lambda x
: x
['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client
=default_client
)),
699 'X-YouTube-Client-Version': self
._extract
_client
_version
(ytcfg
, default_client
),
701 'X-Youtube-Identity-Token': identity_token
or self
._extract
_identity
_token
(ytcfg
),
702 'X-Goog-PageId': account_syncid
or self
._extract
_account
_syncid
(ytcfg
),
703 'X-Goog-Visitor-Id': visitor_data
or self
._extract
_visitor
_data
(ytcfg
),
704 'User-Agent': self
._ytcfg
_get
_safe
(ytcfg
, lambda x
: x
['INNERTUBE_CONTEXT']['client']['userAgent'], default_client
=default_client
),
706 if session_index
is None:
707 session_index
= self
._extract
_session
_index
(ytcfg
)
708 if account_syncid
or session_index
is not None:
709 headers
['X-Goog-AuthUser'] = session_index
if session_index
is not None else 0
711 auth
= self
._generate
_sapisidhash
_header
(origin
)
713 headers
['Authorization'] = auth
714 headers
['X-Origin'] = origin
715 return filter_dict(headers
)
717 def _download_ytcfg(self
, client
, video_id
):
719 'web': 'https://www.youtube.com',
720 'web_music': 'https://music.youtube.com',
721 'web_embedded': f
'https://www.youtube.com/embed/{video_id}?html5=1',
725 webpage
= self
._download
_webpage
(
726 url
, video_id
, fatal
=False, note
=f
'Downloading {client.replace("_", " ").strip()} client config')
727 return self
.extract_ytcfg(video_id
, webpage
) or {}
730 def _build_api_continuation_query(continuation
, ctp
=None):
732 'continuation': continuation
,
734 # TODO: Inconsistency with clickTrackingParams.
735 # Currently we have a fixed ctp contained within context (from ytcfg)
736 # and a ctp in root query for continuation.
738 query
['clickTracking'] = {'clickTrackingParams': ctp}
742 def _extract_next_continuation_data(cls
, renderer
):
743 next_continuation
= try_get(
744 renderer
, (lambda x
: x
['continuations'][0]['nextContinuationData'],
745 lambda x
: x
['continuation']['reloadContinuationData']), dict)
746 if not next_continuation
:
748 continuation
= next_continuation
.get('continuation')
751 ctp
= next_continuation
.get('clickTrackingParams')
752 return cls
._build
_api
_continuation
_query
(continuation
, ctp
)
755 def _extract_continuation_ep_data(cls
, continuation_ep
: dict):
756 if isinstance(continuation_ep
, dict):
757 continuation
= try_get(
758 continuation_ep
, lambda x
: x
['continuationCommand']['token'], str)
761 ctp
= continuation_ep
.get('clickTrackingParams')
762 return cls
._build
_api
_continuation
_query
(continuation
, ctp
)
765 def _extract_continuation(cls
, renderer
):
766 next_continuation
= cls
._extract
_next
_continuation
_data
(renderer
)
767 if next_continuation
:
768 return next_continuation
770 return traverse_obj(renderer
, (
771 ('contents', 'items', 'rows'), ..., 'continuationItemRenderer',
772 ('continuationEndpoint', ('button', 'buttonRenderer', 'command')),
773 ), get_all
=False, expected_type
=cls
._extract
_continuation
_ep
_data
)
776 def _extract_alerts(cls
, data
):
777 for alert_dict
in try_get(data
, lambda x
: x
['alerts'], list) or []:
778 if not isinstance(alert_dict
, dict):
780 for alert
in alert_dict
.values():
781 alert_type
= alert
.get('type')
784 message
= cls
._get
_text
(alert
, 'text')
786 yield alert_type
, message
788 def _report_alerts(self
, alerts
, expected
=True, fatal
=True, only_once
=False):
789 errors
, warnings
= [], []
790 for alert_type
, alert_message
in alerts
:
791 if alert_type
.lower() == 'error' and fatal
:
792 errors
.append([alert_type
, alert_message
])
793 elif alert_message
not in self
._IGNORED
_WARNINGS
:
794 warnings
.append([alert_type
, alert_message
])
796 for alert_type
, alert_message
in (warnings
+ errors
[:-1]):
797 self
.report_warning(f
'YouTube said: {alert_type} - {alert_message}', only_once
=only_once
)
799 raise ExtractorError(f
'YouTube said: {errors[-1][1]}', expected
=expected
)
801 def _extract_and_report_alerts(self
, data
, *args
, **kwargs
):
802 return self
._report
_alerts
(self
._extract
_alerts
(data
), *args
, **kwargs
)
804 def _extract_badges(self
, badge_list
: list):
806 Extract known BadgeType's from a list of badge renderers.
807 @returns [{'type': BadgeType}]
810 'PRIVACY_UNLISTED': BadgeType
.AVAILABILITY_UNLISTED
,
811 'PRIVACY_PRIVATE': BadgeType
.AVAILABILITY_PRIVATE
,
812 'PRIVACY_PUBLIC': BadgeType
.AVAILABILITY_PUBLIC
,
813 'CHECK_CIRCLE_THICK': BadgeType
.VERIFIED
,
814 'OFFICIAL_ARTIST_BADGE': BadgeType
.VERIFIED
,
815 'CHECK': BadgeType
.VERIFIED
,
819 'BADGE_STYLE_TYPE_MEMBERS_ONLY': BadgeType
.AVAILABILITY_SUBSCRIPTION
,
820 'BADGE_STYLE_TYPE_PREMIUM': BadgeType
.AVAILABILITY_PREMIUM
,
821 'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType
.LIVE_NOW
,
822 'BADGE_STYLE_TYPE_VERIFIED': BadgeType
.VERIFIED
,
823 'BADGE_STYLE_TYPE_VERIFIED_ARTIST': BadgeType
.VERIFIED
,
827 'unlisted': BadgeType
.AVAILABILITY_UNLISTED
,
828 'private': BadgeType
.AVAILABILITY_PRIVATE
,
829 'members only': BadgeType
.AVAILABILITY_SUBSCRIPTION
,
830 'live': BadgeType
.LIVE_NOW
,
831 'premium': BadgeType
.AVAILABILITY_PREMIUM
,
832 'verified': BadgeType
.VERIFIED
,
833 'official artist channel': BadgeType
.VERIFIED
,
837 for badge
in traverse_obj(badge_list
, (..., lambda key
, _
: re
.search(r
'[bB]adgeRenderer$', key
))):
839 icon_type_map
.get(traverse_obj(badge
, ('icon', 'iconType'), expected_type
=str))
840 or badge_style_map
.get(traverse_obj(badge
, 'style'))
843 badges
.append({'type': badge_type}
)
846 # fallback, won't work in some languages
847 label
= traverse_obj(
848 badge
, 'label', ('accessibilityData', 'label'), 'tooltip', 'iconTooltip', get_all
=False, expected_type
=str, default
='')
849 for match
, label_badge_type
in label_map
.items():
850 if match
in label
.lower():
851 badges
.append({'type': label_badge_type}
)
857 def _has_badge(badges
, badge_type
):
858 return bool(traverse_obj(badges
, lambda _
, v
: v
['type'] == badge_type
))
861 def _get_text(data
, *path_list
, max_runs
=None):
862 for path
in path_list
or [None]:
866 obj
= traverse_obj(data
, path
, default
=[])
867 if not any(key
is ... or isinstance(key
, (list, tuple)) for key
in variadic(path
)):
870 text
= try_get(item
, lambda x
: x
['simpleText'], str)
873 runs
= try_get(item
, lambda x
: x
['runs'], list) or []
874 if not runs
and isinstance(item
, list):
877 runs
= runs
[:min(len(runs
), max_runs
or len(runs
))]
878 text
= ''.join(traverse_obj(runs
, (..., 'text'), expected_type
=str))
882 def _get_count(self
, data
, *path_list
):
883 count_text
= self
._get
_text
(data
, *path_list
) or ''
884 count
= parse_count(count_text
)
887 self
._search
_regex
(r
'^([\d,]+)', re
.sub(r
'\s', '', count_text
), 'count', default
=None))
891 def _extract_thumbnails(data
, *path_list
, final_key
='thumbnails'):
893 Extract thumbnails from thumbnails dict
894 @param path_list: path list to level that contains 'thumbnails' key
897 for path
in path_list
or [()]:
898 for thumbnail
in traverse_obj(data
, (*variadic(path
), final_key
, ...)):
899 thumbnail_url
= url_or_none(thumbnail
.get('url'))
900 if not thumbnail_url
:
902 # Sometimes youtube gives a wrong thumbnail URL. See:
903 # https://github.com/yt-dlp/yt-dlp/issues/233
904 # https://github.com/ytdl-org/youtube-dl/issues/28023
905 if 'maxresdefault' in thumbnail_url
:
906 thumbnail_url
= thumbnail_url
.split('?')[0]
908 'url': thumbnail_url
,
909 'height': int_or_none(thumbnail
.get('height')),
910 'width': int_or_none(thumbnail
.get('width')),
915 def extract_relative_time(relative_time_text
):
917 Extracts a relative time from string and converts to dt object
918 e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today', '8 yr ago'
921 # XXX: this could be moved to a general function in utils/_utils.py
922 # The relative time text strings are roughly the same as what
923 # Javascript's Intl.RelativeTimeFormat function generates.
924 # See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/RelativeTimeFormat
926 r
'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>sec(?:ond)?|s|min(?:ute)?|h(?:our|r)?|d(?:ay)?|w(?:eek|k)?|mo(?:nth)?|y(?:ear|r)?)s?\s*ago',
929 start
= mobj
.group('start')
931 return datetime_from_str(start
)
933 return datetime_from_str('now-{}{}'.format(mobj
.group('time'), mobj
.group('unit')))
937 def _parse_time_text(self
, text
):
940 dt_
= self
.extract_relative_time(text
)
942 if isinstance(dt_
, dt
.datetime
):
943 timestamp
= calendar
.timegm(dt_
.timetuple())
945 if timestamp
is None:
947 unified_timestamp(text
) or unified_timestamp(
949 (r
'([a-z]+\s*\d{1,2},?\s*20\d{2})', r
'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),
950 text
.lower(), 'time text', default
=None)))
952 if text
and timestamp
is None and self
._preferred
_lang
in (None, 'en'):
954 f
'Cannot parse localized time text "{text}"', only_once
=True)
957 def _extract_response(self
, item_id
, query
, note
='Downloading API JSON', headers
=None,
958 ytcfg
=None, check_get_keys
=None, ep
='browse', fatal
=True, api_hostname
=None,
959 default_client
='web'):
960 raise_for_incomplete
= bool(self
._configuration
_arg
('raise_incomplete_data', ie_key
=YoutubeIE
))
961 # Incomplete Data should be a warning by default when retries are exhausted, while other errors should be fatal.
962 icd_retries
= iter(self
.RetryManager(fatal
=raise_for_incomplete
))
963 icd_rm
= next(icd_retries
)
964 main_retries
= iter(self
.RetryManager())
965 main_rm
= next(main_retries
)
966 # Manual retry loop for multiple RetryManagers
967 # The proper RetryManager MUST be advanced after an error
968 # and its result MUST be checked if the manager is non fatal
971 response
= self
._call
_api
(
972 ep
=ep
, fatal
=True, headers
=headers
,
973 video_id
=item_id
, query
=query
, note
=note
,
974 context
=self
._extract
_context
(ytcfg
, default_client
),
975 api_key
=self
._extract
_api
_key
(ytcfg
, default_client
),
976 api_hostname
=api_hostname
, default_client
=default_client
)
977 except ExtractorError
as e
:
978 if not isinstance(e
.cause
, network_exceptions
):
979 return self
._error
_or
_warning
(e
, fatal
=fatal
)
980 elif not isinstance(e
.cause
, HTTPError
):
985 first_bytes
= e
.cause
.response
.read(512)
986 if not is_html(first_bytes
):
989 self
._webpage
_read
_content
(e
.cause
.response
, None, item_id
, prefix
=first_bytes
) or '{}', item_id
, fatal
=False),
990 lambda x
: x
['error']['message'], str)
992 self
._report
_alerts
([('ERROR', yt_error
)], fatal
=False)
993 # Downloading page may result in intermittent 5xx HTTP error
994 # Sometimes a 404 is also received. See: https://github.com/ytdl-org/youtube-dl/issues/28289
995 # We also want to catch all other network exceptions since errors in later pages can be troublesome
996 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
997 if e
.cause
.status
not in (403, 429):
1001 return self
._error
_or
_warning
(e
, fatal
=fatal
)
1004 self
._extract
_and
_report
_alerts
(response
, only_once
=True)
1005 except ExtractorError
as e
:
1006 # YouTube's servers may return errors we want to retry on in a 200 OK response
1007 # See: https://github.com/yt-dlp/yt-dlp/issues/839
1008 if 'unknown error' in e
.msg
.lower():
1012 return self
._error
_or
_warning
(e
, fatal
=fatal
)
1013 # Youtube sometimes sends incomplete data
1014 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
1015 if not traverse_obj(response
, *variadic(check_get_keys
)):
1016 icd_rm
.error
= ExtractorError('Incomplete data received', expected
=True)
1017 should_retry
= next(icd_retries
, None)
1018 if not should_retry
:
1025 def is_music_url(url
):
1026 return re
.match(r
'(https?://)?music\.youtube\.com/', url
) is not None
1028 def _extract_video(self
, renderer
):
1029 video_id
= renderer
.get('videoId')
1031 reel_header_renderer
= traverse_obj(renderer
, (
1032 'navigationEndpoint', 'reelWatchEndpoint', 'overlay', 'reelPlayerOverlayRenderer',
1033 'reelPlayerHeaderSupportedRenderers', 'reelPlayerHeaderRenderer'))
1035 title
= self
._get
_text
(renderer
, 'title', 'headline') or self
._get
_text
(reel_header_renderer
, 'reelTitleText')
1036 description
= self
._get
_text
(renderer
, 'descriptionSnippet')
1038 duration
= int_or_none(renderer
.get('lengthSeconds'))
1039 if duration
is None:
1040 duration
= parse_duration(self
._get
_text
(
1041 renderer
, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
1042 if duration
is None:
1043 # XXX: should write a parser to be more general to support more cases (e.g. shorts in shorts tab)
1044 duration
= parse_duration(self
._search
_regex
(
1045 r
'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',
1046 traverse_obj(renderer
, ('title', 'accessibility', 'accessibilityData', 'label'), default
='', expected_type
=str),
1047 video_id
, default
=None, group
='duration'))
1049 channel_id
= traverse_obj(
1050 renderer
, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
1051 expected_type
=str, get_all
=False)
1053 channel_id
= traverse_obj(reel_header_renderer
, ('channelNavigationEndpoint', 'browseEndpoint', 'browseId'))
1055 channel_id
= self
.ucid_or_none(channel_id
)
1057 overlay_style
= traverse_obj(
1058 renderer
, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
1059 get_all
=False, expected_type
=str)
1060 badges
= self
._extract
_badges
(traverse_obj(renderer
, 'badges'))
1061 owner_badges
= self
._extract
_badges
(traverse_obj(renderer
, 'ownerBadges'))
1062 navigation_url
= urljoin('https://www.youtube.com/', traverse_obj(
1063 renderer
, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
1064 expected_type
=str)) or ''
1065 url
= f
'https://www.youtube.com/watch?v={video_id}'
1066 if overlay_style
== 'SHORTS' or '/shorts/' in navigation_url
:
1067 url
= f
'https://www.youtube.com/shorts/{video_id}'
1069 time_text
= (self
._get
_text
(renderer
, 'publishedTimeText', 'videoInfo')
1070 or self
._get
_text
(reel_header_renderer
, 'timestampText') or '')
1071 scheduled_timestamp
= str_to_int(traverse_obj(renderer
, ('upcomingEventData', 'startTime'), get_all
=False))
1074 'is_upcoming' if scheduled_timestamp
is not None
1075 else 'was_live' if 'streamed' in time_text
.lower()
1076 else 'is_live' if overlay_style
== 'LIVE' or self
._has
_badge
(badges
, BadgeType
.LIVE_NOW
)
1079 # videoInfo is a string like '50K views • 10 years ago'.
1080 view_count_text
= self
._get
_text
(renderer
, 'viewCountText', 'shortViewCountText', 'videoInfo') or ''
1081 view_count
= (0 if 'no views' in view_count_text
.lower()
1082 else self
._get
_count
({'simpleText': view_count_text}
))
1083 view_count_field
= 'concurrent_view_count' if live_status
in ('is_live', 'is_upcoming') else 'view_count'
1085 channel
= (self
._get
_text
(renderer
, 'ownerText', 'shortBylineText')
1086 or self
._get
_text
(reel_header_renderer
, 'channelTitleText'))
1088 channel_handle
= traverse_obj(renderer
, (
1089 'shortBylineText', 'runs', ..., 'navigationEndpoint',
1090 (('commandMetadata', 'webCommandMetadata', 'url'), ('browseEndpoint', 'canonicalBaseUrl'))),
1091 expected_type
=self
.handle_from_url
, get_all
=False)
1094 'ie_key': YoutubeIE
.ie_key(),
1098 'description': description
,
1099 'duration': duration
,
1100 'channel_id': channel_id
,
1102 'channel_url': f
'https://www.youtube.com/channel/{channel_id}' if channel_id
else None,
1103 'uploader': channel
,
1104 'uploader_id': channel_handle
,
1105 'uploader_url': format_field(channel_handle
, None, 'https://www.youtube.com/%s', default
=None),
1106 'thumbnails': self
._extract
_thumbnails
(renderer
, 'thumbnail'),
1107 'timestamp': (self
._parse
_time
_text
(time_text
)
1108 if self
._configuration
_arg
('approximate_date', ie_key
=YoutubeTabIE
)
1110 'release_timestamp': scheduled_timestamp
,
1112 'public' if self
._has
_badge
(badges
, BadgeType
.AVAILABILITY_PUBLIC
)
1113 else self
._availability
(
1114 is_private
=self
._has
_badge
(badges
, BadgeType
.AVAILABILITY_PRIVATE
) or None,
1115 needs_premium
=self
._has
_badge
(badges
, BadgeType
.AVAILABILITY_PREMIUM
) or None,
1116 needs_subscription
=self
._has
_badge
(badges
, BadgeType
.AVAILABILITY_SUBSCRIPTION
) or None,
1117 is_unlisted
=self
._has
_badge
(badges
, BadgeType
.AVAILABILITY_UNLISTED
) or None),
1118 view_count_field
: view_count
,
1119 'live_status': live_status
,
1120 'channel_is_verified': True if self
._has
_badge
(owner_badges
, BadgeType
.VERIFIED
) else None,
1124 class YoutubeIE(YoutubeBaseInfoExtractor
):
1126 _VALID_URL
= r
'''(?x)^
1128 (?:https?://|//) # http(s):// or protocol-independent URL
1129 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
1130 (?:www\.)?deturl\.com/www\.youtube\.com|
1131 (?:www\.)?pwnyoutube\.com|
1132 (?:www\.)?hooktube\.com|
1133 (?:www\.)?yourepeat\.com|
1134 tube\.majestyc\.net|
1136 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
1137 (?:.*?\#/)? # handle anchor (#/) redirect urls
1138 (?: # the various things that can precede the ID:
1139 (?:(?:v|embed|e|shorts|live)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/
1140 |(?: # or the v= param in all its forms
1141 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
1142 (?:\?|\#!?) # the params delimiter ? or # or #!
1143 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)
1148 youtu\.be| # just youtu.be/xxxx
1149 vid\.plus| # or vid.plus/xxxx
1150 zwearz\.com/watch| # or zwearz.com/watch/xxxx
1153 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
1155 )? # all until now is optional -> you can pass the naked ID
1156 (?P<id>[0-9A-Za-z_-]{{11}}) # here is it! the YouTube video ID
1157 (?(1).+)? # if we found the ID, everything can follow
1159 invidious
='|'.join(YoutubeBaseInfoExtractor
._INVIDIOUS
_SITES
),
1164 <(?:[0-9A-Za-z-]+?)?iframe[^>]+?src=|
1172 (?P
<url
>(?
:https?
:)?
//(?
:www\
.)?
youtube(?
:-nocookie
)?\
.com
/
1173 (?
:embed|v|p
)/[0-9A
-Za
-z_
-]{11}
.*?
)
1175 # https://wordpress.org/plugins/lazy-load-for-videos/
1177 <a\s
[^
>]*\bhref
="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"
1178 \s
[^
>]*\bclass
="[^"]*\blazy
-load
-youtube
''',
1180 _RETURN_TYPE = 'video' # XXX: How to handle multifeed?
1183 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
1184 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
1185 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
1187 _formats = { # NB: Used in YoutubeWebArchiveIE and GoogleDriveIE
1188 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1189 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1190 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
1191 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
1192 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
1193 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1194 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1195 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1196 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
1197 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
1198 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1199 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1200 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1201 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1202 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
1203 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
1204 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1205 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1209 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1210 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1211 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1212 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1213 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
1214 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1215 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1217 # Apple HTTP Live Streaming
1218 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1219 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1220 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1221 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1222 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1223 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1224 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1225 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
1228 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
1229 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
1230 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1231 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
1232 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
1233 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
1234 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
1235 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1236 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
1237 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1238 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1239 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
1242 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1243 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1244 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1245 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1246 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1247 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1248 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
1251 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1252 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1253 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1254 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1255 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1256 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1257 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1258 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1259 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1260 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1261 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1262 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1263 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1264 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1265 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1266 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
1267 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1268 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1269 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1270 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1271 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1272 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1275 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1276 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
1278 # Dash webm audio with opus inside
1279 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1280 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1281 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
1284 '_rtmp': {'protocol': 'rtmp'},
1286 # av01 video only formats sometimes served with "unknown" codecs
1287 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1288 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1289 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
1290 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1291 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1292 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1293 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1294 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1296 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
1303 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
1305 'id': 'BaW_jenozKc',
1307 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1308 'channel': 'Philipp Hagemeister',
1309 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1310 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1311 'upload_date': '20121002',
1312 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
1313 'categories': ['Science & Technology'],
1314 'tags': ['youtube-dl'],
1318 'availability': 'public',
1319 'playable_in_embed': True,
1320 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1321 'live_status': 'not_live',
1325 'comment_count': int,
1326 'channel_follower_count': int,
1327 'uploader': 'Philipp Hagemeister',
1328 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
1329 'uploader_id': '@PhilippHagemeister',
1330 'heatmap': 'count:100',
1331 'timestamp': 1349198244,
1335 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1336 'note': 'Embed-only video (#1746)',
1338 'id': 'yZIXLfi8CZQ',
1340 'upload_date': '20120608',
1341 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1342 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1345 'skip': 'Private video',
1348 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1349 'note': 'Use the first video ID in the URL',
1351 'id': 'BaW_jenozKc',
1353 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1354 'channel': 'Philipp Hagemeister',
1355 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1356 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1357 'upload_date': '20121002',
1358 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
1359 'categories': ['Science & Technology'],
1360 'tags': ['youtube-dl'],
1364 'availability': 'public',
1365 'playable_in_embed': True,
1366 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1367 'live_status': 'not_live',
1369 'comment_count': int,
1370 'channel_follower_count': int,
1371 'uploader': 'Philipp Hagemeister',
1372 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
1373 'uploader_id': '@PhilippHagemeister',
1374 'heatmap': 'count:100',
1375 'timestamp': 1349198244,
1378 'skip_download': True,
1382 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1383 'note': '256k DASH audio (format 141) via DASH manifest',
1385 'id': 'a9LDPn-MO4I',
1387 'upload_date': '20121002',
1389 'title': 'UHDTV TEST 8K VIDEO.mp4',
1392 'youtube_include_dash_manifest': True,
1395 'skip': 'format 141 not served anymore',
1397 # DASH manifest with encrypted signature
1399 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1401 'id': 'IB3lcPjvWLA',
1403 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1404 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1406 'upload_date': '20131011',
1409 'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',
1410 'playable_in_embed': True,
1411 'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',
1413 'track': 'The Spark',
1414 'live_status': 'not_live',
1415 'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',
1416 'channel': 'Afrojack',
1418 'availability': 'public',
1419 'categories': ['Music'],
1421 'alt_title': 'The Spark',
1422 'channel_follower_count': int,
1423 'uploader': 'Afrojack',
1424 'uploader_url': 'https://www.youtube.com/@Afrojack',
1425 'uploader_id': '@Afrojack',
1428 'youtube_include_dash_manifest': True,
1429 'format': '141/bestaudio[ext=m4a]',
1432 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
1434 'note': 'Embed allowed age-gate video',
1435 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1437 'id': 'HtVdAasjOgU',
1439 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1440 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1442 'upload_date': '20140605',
1444 'categories': ['Gaming'],
1445 'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',
1446 'availability': 'needs_auth',
1447 'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
1449 'channel': 'The Witcher',
1450 'live_status': 'not_live',
1452 'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
1453 'playable_in_embed': True,
1455 'channel_follower_count': int,
1456 'uploader': 'The Witcher',
1457 'uploader_url': 'https://www.youtube.com/@thewitcher',
1458 'uploader_id': '@thewitcher',
1459 'comment_count': int,
1460 'channel_is_verified': True,
1461 'heatmap': 'count:100',
1462 'timestamp': 1401991663,
1466 'note': 'Age-gate video with embed allowed in public site',
1467 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1469 'id': 'HsUATh_Nc2U',
1471 'title': 'Godzilla 2 (Official Video)',
1472 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1473 'upload_date': '20200408',
1475 'availability': 'needs_auth',
1476 'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
1477 'channel': 'FlyingKitty',
1478 'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
1480 'categories': ['Entertainment'],
1481 'live_status': 'not_live',
1482 'tags': ['Flyingkitty', 'godzilla 2'],
1483 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
1486 'playable_in_embed': True,
1487 'channel_follower_count': int,
1488 'uploader': 'FlyingKitty',
1489 'uploader_url': 'https://www.youtube.com/@FlyingKitty900',
1490 'uploader_id': '@FlyingKitty900',
1491 'comment_count': int,
1492 'channel_is_verified': True,
1496 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1497 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1499 'id': 'Tq92D6wQ1mg',
1500 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
1502 'upload_date': '20191228',
1503 'description': 'md5:17eccca93a786d51bc67646756894066',
1506 'availability': 'needs_auth',
1507 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1509 'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',
1510 'channel': 'Projekt Melody',
1511 'live_status': 'not_live',
1512 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
1513 'playable_in_embed': True,
1514 'categories': ['Entertainment'],
1516 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
1517 'comment_count': int,
1518 'channel_follower_count': int,
1519 'uploader': 'Projekt Melody',
1520 'uploader_url': 'https://www.youtube.com/@ProjektMelody',
1521 'uploader_id': '@ProjektMelody',
1522 'timestamp': 1577508724,
1526 'note': 'Non-Agegated non-embeddable video',
1527 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1529 'id': 'MeJVWBSsPAY',
1531 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1532 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1533 'upload_date': '20130730',
1534 'track': 'Such mich find mich',
1536 'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
1538 'playable_in_embed': False,
1539 'creator': 'OOMPH!',
1540 'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',
1542 'alt_title': 'Such mich find mich',
1544 'channel': 'Herr Lurik',
1545 'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
1546 'categories': ['Music'],
1547 'availability': 'public',
1548 'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
1549 'live_status': 'not_live',
1551 'channel_follower_count': int,
1552 'uploader': 'Herr Lurik',
1553 'uploader_url': 'https://www.youtube.com/@HerrLurik',
1554 'uploader_id': '@HerrLurik',
1558 'note': 'Non-bypassable age-gated video',
1559 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1560 'only_matching': True,
1562 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1563 # YouTube Red ad is not captured for creator
1565 'url': '__2ABJjxzNo',
1567 'id': '__2ABJjxzNo',
1570 'upload_date': '20100430',
1571 'creator': 'deadmau5',
1572 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1573 'title': 'Deadmau5 - Some Chords (HD)',
1574 'alt_title': 'Some Chords',
1575 'availability': 'public',
1577 'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',
1579 'live_status': 'not_live',
1580 'channel': 'deadmau5',
1581 'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',
1583 'track': 'Some Chords',
1584 'artist': 'deadmau5',
1585 'playable_in_embed': True,
1587 'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',
1588 'categories': ['Music'],
1589 'album': 'Some Chords',
1590 'channel_follower_count': int,
1591 'uploader': 'deadmau5',
1592 'uploader_url': 'https://www.youtube.com/@deadmau5',
1593 'uploader_id': '@deadmau5',
1595 'expected_warnings': [
1596 'DASH manifest missing',
1599 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1601 'url': 'lqQg6PlCWgI',
1603 'id': 'lqQg6PlCWgI',
1606 'upload_date': '20150827',
1607 'description': 'md5:04bbbf3ccceb6795947572ca36f45904',
1608 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1610 'release_timestamp': 1343767800,
1611 'playable_in_embed': True,
1612 'categories': ['Sports'],
1613 'release_date': '20120731',
1614 'channel': 'Olympics',
1615 'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
1616 'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
1617 'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
1619 'availability': 'public',
1620 'live_status': 'was_live',
1622 'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
1623 'channel_follower_count': int,
1624 'uploader': 'Olympics',
1625 'uploader_url': 'https://www.youtube.com/@Olympics',
1626 'uploader_id': '@Olympics',
1627 'channel_is_verified': True,
1628 'timestamp': 1440707674,
1631 'skip_download': 'requires avconv',
1636 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1638 'id': '_b-2C3KPAM0',
1640 'stretched_ratio': 16 / 9.,
1642 'upload_date': '20110310',
1643 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1644 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1645 'playable_in_embed': True,
1649 'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',
1650 'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',
1651 'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',
1653 'categories': ['People & Blogs'],
1655 'live_status': 'not_live',
1656 'availability': 'unlisted',
1657 'comment_count': int,
1658 'channel_follower_count': int,
1660 'uploader_url': 'https://www.youtube.com/@AllenMeow',
1661 'uploader_id': '@AllenMeow',
1662 'timestamp': 1299776999,
1665 # url_encoded_fmt_stream_map is empty string
1667 'url': 'qEJwOuvDf7I',
1669 'id': 'qEJwOuvDf7I',
1671 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1673 'upload_date': '20150404',
1676 'skip_download': 'requires avconv',
1678 'skip': 'This live event has ended.',
1680 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1682 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1684 'id': 'FIl7x6_3R5Y',
1686 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1687 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1689 'upload_date': '20150625',
1690 'formats': 'mincount:31',
1692 'skip': 'not actual anymore',
1694 # DASH manifest with segment_list
1696 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1697 'md5': '8ce563a1d667b599d21064e982ab9e31',
1699 'id': 'CsmdDsKjzN8',
1701 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1702 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1703 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1706 'youtube_include_dash_manifest': True,
1707 'format': '135', # bestvideo
1709 'skip': 'This live event has ended.',
1712 # Multifeed videos (multiple cameras), URL can be of any Camera
1713 # TODO: fix multifeed titles
1714 'url': 'https://www.youtube.com/watch?v=zaPI8MvL8pg',
1716 'id': 'zaPI8MvL8pg',
1717 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04',
1718 'description': 'md5:563ccbc698b39298481ca3c571169519',
1722 'id': 'j5yGuxZ8lLU',
1724 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Chris)',
1725 'description': 'md5:563ccbc698b39298481ca3c571169519',
1727 'channel_follower_count': int,
1728 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
1729 'availability': 'public',
1730 'playable_in_embed': True,
1731 'upload_date': '20131105',
1732 'categories': ['Gaming'],
1733 'live_status': 'was_live',
1735 'release_timestamp': 1383701910,
1736 'thumbnail': 'https://i.ytimg.com/vi/j5yGuxZ8lLU/maxresdefault.jpg',
1737 'comment_count': int,
1740 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
1741 'channel': 'WiiLikeToPlay',
1743 'release_date': '20131106',
1744 'uploader': 'WiiLikeToPlay',
1745 'uploader_id': '@WLTP',
1746 'uploader_url': 'https://www.youtube.com/@WLTP',
1750 'id': 'zaPI8MvL8pg',
1752 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Tyson)',
1753 'availability': 'public',
1754 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
1755 'channel': 'WiiLikeToPlay',
1756 'channel_follower_count': int,
1757 'description': 'md5:563ccbc698b39298481ca3c571169519',
1762 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
1763 'release_timestamp': 1383701915,
1764 'comment_count': int,
1765 'upload_date': '20131105',
1766 'thumbnail': 'https://i.ytimg.com/vi/zaPI8MvL8pg/maxresdefault.jpg',
1767 'release_date': '20131106',
1768 'playable_in_embed': True,
1769 'live_status': 'was_live',
1770 'categories': ['Gaming'],
1772 'uploader': 'WiiLikeToPlay',
1773 'uploader_id': '@WLTP',
1774 'uploader_url': 'https://www.youtube.com/@WLTP',
1778 'id': 'R7r3vfO7Hao',
1780 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Spencer)',
1781 'thumbnail': 'https://i.ytimg.com/vi/R7r3vfO7Hao/maxresdefault.jpg',
1782 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
1784 'availability': 'public',
1785 'playable_in_embed': True,
1786 'upload_date': '20131105',
1787 'description': 'md5:563ccbc698b39298481ca3c571169519',
1788 'channel_follower_count': int,
1790 'release_date': '20131106',
1791 'comment_count': int,
1792 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
1793 'channel': 'WiiLikeToPlay',
1794 'categories': ['Gaming'],
1795 'release_timestamp': 1383701914,
1796 'live_status': 'was_live',
1800 'uploader': 'WiiLikeToPlay',
1801 'uploader_id': '@WLTP',
1802 'uploader_url': 'https://www.youtube.com/@WLTP',
1805 'params': {'skip_download': True},
1806 'skip': 'Not multifeed anymore',
1809 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1810 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1812 'id': 'gVfLd0zydlo',
1813 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1815 'playlist_count': 2,
1816 'skip': 'Not multifeed anymore',
1819 'url': 'https://vid.plus/FlRa-iH7PGw',
1820 'only_matching': True,
1823 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1824 'only_matching': True,
1827 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1828 # Also tests cut-off URL expansion in video description (see
1829 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1830 # https://github.com/ytdl-org/youtube-dl/issues/8164)
1831 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1833 'id': 'lsguqyKfVQg',
1835 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1836 'alt_title': 'Dark Walk',
1837 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1839 'upload_date': '20151119',
1840 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1841 'track': 'Dark Walk',
1842 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1843 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1844 'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',
1845 'categories': ['Film & Animation'],
1847 'live_status': 'not_live',
1848 'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',
1849 'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',
1851 'availability': 'public',
1852 'channel': 'IronSoulElf',
1853 'playable_in_embed': True,
1856 'channel_follower_count': int,
1859 'skip_download': True,
1863 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1864 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1865 'only_matching': True,
1868 # Video with yt:stretch=17:0
1869 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1871 'id': 'Q39EVAstoRM',
1873 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1874 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1875 'upload_date': '20151107',
1878 'skip_download': True,
1880 'skip': 'This video does not exist.',
1883 # Video with incomplete 'yt:stretch=16:'
1884 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1885 'only_matching': True,
1888 # Video licensed under Creative Commons
1889 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1891 'id': 'M4gD1WSo5mA',
1893 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1894 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1896 'upload_date': '20150128',
1897 'license': 'Creative Commons Attribution license (reuse allowed)',
1898 'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',
1899 'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',
1902 'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],
1903 'channel': 'The Berkman Klein Center for Internet & Society',
1904 'availability': 'public',
1906 'categories': ['Education'],
1907 'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',
1908 'live_status': 'not_live',
1909 'playable_in_embed': True,
1910 'channel_follower_count': int,
1912 'uploader': 'The Berkman Klein Center for Internet & Society',
1913 'uploader_id': '@BKCHarvard',
1914 'uploader_url': 'https://www.youtube.com/@BKCHarvard',
1915 'timestamp': 1422422076,
1918 'skip_download': True,
1922 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1924 'id': 'eQcmzGIKrzg',
1926 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1927 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1929 'upload_date': '20151120',
1930 'license': 'Creative Commons Attribution license (reuse allowed)',
1931 'playable_in_embed': True,
1934 'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1936 'availability': 'public',
1937 'categories': ['News & Politics'],
1938 'channel': 'Bernie Sanders',
1939 'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',
1941 'live_status': 'not_live',
1942 'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1943 'comment_count': int,
1944 'channel_follower_count': int,
1946 'uploader': 'Bernie Sanders',
1947 'uploader_url': 'https://www.youtube.com/@BernieSanders',
1948 'uploader_id': '@BernieSanders',
1949 'channel_is_verified': True,
1950 'heatmap': 'count:100',
1951 'timestamp': 1447987198,
1954 'skip_download': True,
1958 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',
1959 'only_matching': True,
1962 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1963 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1964 'only_matching': True,
1967 # Rental video preview
1968 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1970 'id': 'uGpuVWrhIzE',
1972 'title': 'Piku - Trailer',
1973 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1974 'upload_date': '20150811',
1975 'license': 'Standard YouTube License',
1978 'skip_download': True,
1980 'skip': 'This video is not available.',
1983 # YouTube Red video with episode data
1984 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1986 'id': 'iqKdEhx-dD4',
1988 'title': 'Isolation - Mind Field (Ep 1)',
1989 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1991 'upload_date': '20170118',
1992 'series': 'Mind Field',
1994 'episode_number': 1,
1995 'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',
1998 'availability': 'public',
2000 'channel': 'Vsauce',
2001 'episode': 'Episode 1',
2002 'categories': ['Entertainment'],
2003 'season': 'Season 1',
2004 'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',
2005 'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',
2007 'playable_in_embed': True,
2008 'live_status': 'not_live',
2009 'channel_follower_count': int,
2010 'uploader': 'Vsauce',
2011 'uploader_url': 'https://www.youtube.com/@Vsauce',
2012 'uploader_id': '@Vsauce',
2013 'comment_count': int,
2014 'channel_is_verified': True,
2015 'timestamp': 1484761047,
2018 'skip_download': True,
2020 'expected_warnings': [
2021 'Skipping DASH manifest',
2025 # The following content has been identified by the YouTube community
2026 # as inappropriate or offensive to some audiences.
2027 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
2029 'id': '6SJNVb0GnPI',
2031 'title': 'Race Differences in Intelligence',
2032 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
2034 'upload_date': '20140124',
2037 'skip_download': True,
2039 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
2043 'url': '1t24XAntNCY',
2044 'only_matching': True,
2047 # geo restricted to JP
2048 'url': 'sJL6WA-aGkQ',
2049 'only_matching': True,
2052 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
2053 'only_matching': True,
2056 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
2057 'only_matching': True,
2060 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
2061 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
2062 'only_matching': True,
2066 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
2067 'only_matching': True,
2070 # Video with unsupported adaptive stream type formats
2071 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
2073 'id': 'Z4Vy8R84T1U',
2075 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
2076 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
2078 'upload_date': '20130923',
2079 'formats': 'maxcount:10',
2082 'skip_download': True,
2083 'youtube_include_dash_manifest': False,
2085 'skip': 'not actual anymore',
2088 # Youtube Music Auto-generated description
2089 # TODO: fix metadata extraction
2090 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2092 'id': 'MgNrAu2pzNs',
2094 'title': 'Voyeur Girl',
2095 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
2096 'upload_date': '20190312',
2097 'artists': ['Stephen'],
2098 'creators': ['Stephen'],
2099 'track': 'Voyeur Girl',
2100 'album': 'it\'s too much love to know my dear',
2101 'release_date': '20190313',
2102 'alt_title': 'Voyeur Girl',
2104 'playable_in_embed': True,
2106 'categories': ['Music'],
2107 'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
2108 'channel': 'Stephen', # TODO: should be "Stephen - Topic"
2109 'uploader': 'Stephen',
2110 'availability': 'public',
2112 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
2114 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
2116 'live_status': 'not_live',
2117 'channel_follower_count': int,
2120 'skip_download': True,
2124 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
2125 'only_matching': True,
2128 # invalid -> valid video id redirection
2129 'url': 'DJztXj2GPfl',
2131 'id': 'DJztXj2GPfk',
2133 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
2134 'description': 'md5:bf577a41da97918e94fa9798d9228825',
2135 'upload_date': '20090125',
2136 'artist': 'Panjabi MC',
2137 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
2138 'album': 'Beware of the Boys (Mundian To Bach Ke)',
2141 'skip_download': True,
2143 'skip': 'Video unavailable',
2146 # empty description results in an empty string
2147 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
2149 'id': 'x41yOUIvK2k',
2151 'title': 'IMG 3456',
2153 'upload_date': '20170613',
2155 'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',
2157 'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',
2159 'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',
2160 'availability': 'public',
2162 'categories': ['Pets & Animals'],
2164 'playable_in_embed': True,
2165 'live_status': 'not_live',
2166 'channel': 'l\'Or Vert asbl',
2167 'channel_follower_count': int,
2168 'uploader': 'l\'Or Vert asbl',
2169 'uploader_url': 'https://www.youtube.com/@ElevageOrVert',
2170 'uploader_id': '@ElevageOrVert',
2171 'timestamp': 1497343210,
2174 'skip_download': True,
2178 # with '};' inside yt initial data (see [1])
2179 # see [2] for an example with '};' inside ytInitialPlayerResponse
2180 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
2181 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
2182 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
2184 'id': 'CHqg6qOn4no',
2186 'title': 'Part 77 Sort a list of simple types in c#',
2187 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
2188 'upload_date': '20130831',
2189 'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',
2191 'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',
2192 'live_status': 'not_live',
2193 'categories': ['Education'],
2194 'availability': 'public',
2195 'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',
2197 'playable_in_embed': True,
2201 'channel': 'kudvenkat',
2202 'comment_count': int,
2203 'channel_follower_count': int,
2205 'uploader': 'kudvenkat',
2206 'uploader_url': 'https://www.youtube.com/@Csharp-video-tutorialsBlogspot',
2207 'uploader_id': '@Csharp-video-tutorialsBlogspot',
2208 'channel_is_verified': True,
2209 'heatmap': 'count:100',
2210 'timestamp': 1377976349,
2213 'skip_download': True,
2217 # another example of '};' in ytInitialData
2218 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
2219 'only_matching': True,
2222 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
2223 'only_matching': True,
2226 # https://github.com/ytdl-org/youtube-dl/pull/28094
2227 'url': 'OtqTfy26tG0',
2229 'id': 'OtqTfy26tG0',
2231 'title': 'Burn Out',
2232 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
2233 'upload_date': '20141120',
2234 'artist': 'The Cinematic Orchestra',
2235 'track': 'Burn Out',
2236 'album': 'Every Day',
2238 'live_status': 'not_live',
2239 'alt_title': 'Burn Out',
2243 'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
2244 'creator': 'The Cinematic Orchestra',
2245 'channel': 'The Cinematic Orchestra',
2246 'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],
2247 'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
2248 'availability': 'public',
2249 'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',
2250 'categories': ['Music'],
2251 'playable_in_embed': True,
2252 'channel_follower_count': int,
2253 'uploader': 'The Cinematic Orchestra',
2254 'comment_count': int,
2257 'skip_download': True,
2261 # controversial video, only works with bpctr when authenticated with cookies
2262 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
2263 'only_matching': True,
2266 # controversial video, requires bpctr/contentCheckOk
2267 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
2269 'id': 'SZJvDhaSDnc',
2271 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
2272 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
2273 'upload_date': '20140716',
2274 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',
2276 'categories': ['News & Politics'],
2278 'channel': 'CBS Mornings',
2279 'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],
2280 'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',
2282 'availability': 'needs_auth',
2283 'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',
2285 'live_status': 'not_live',
2286 'playable_in_embed': True,
2287 'channel_follower_count': int,
2288 'uploader': 'CBS Mornings',
2289 'uploader_url': 'https://www.youtube.com/@CBSMornings',
2290 'uploader_id': '@CBSMornings',
2291 'comment_count': int,
2292 'channel_is_verified': True,
2293 'timestamp': 1405513526,
2297 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
2298 'url': 'cBvYw8_A0vQ',
2300 'id': 'cBvYw8_A0vQ',
2302 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
2303 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
2304 'upload_date': '20201120',
2306 'categories': ['Travel & Events'],
2307 'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2309 'channel': 'Walk around Japan',
2310 'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],
2311 'thumbnail': 'https://i.ytimg.com/vi/cBvYw8_A0vQ/hqdefault.jpg',
2313 'availability': 'public',
2314 'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2315 'live_status': 'not_live',
2316 'playable_in_embed': True,
2317 'channel_follower_count': int,
2318 'uploader': 'Walk around Japan',
2319 'uploader_url': 'https://www.youtube.com/@walkaroundjapan7124',
2320 'uploader_id': '@walkaroundjapan7124',
2321 'timestamp': 1605884416,
2324 'skip_download': True,
2327 # Has multiple audio streams
2328 'url': 'WaOKSUlf4TM',
2329 'only_matching': True,
2331 # Requires Premium: has format 141 when requested using YTM url
2332 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
2333 'only_matching': True,
2335 # multiple subtitles with same lang_code
2336 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
2337 'only_matching': True,
2339 # Force use android client fallback
2340 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
2342 'id': 'YOelRv7fMxY',
2343 'title': 'DIGGING A SECRET TUNNEL Part 1',
2345 'upload_date': '20210624',
2346 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
2347 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
2348 'description': 'md5:5d5991195d599b56cd0c4148907eec50',
2350 'categories': ['Entertainment'],
2352 'channel': 'colinfurze',
2353 'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],
2354 'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',
2356 'availability': 'public',
2358 'live_status': 'not_live',
2359 'playable_in_embed': True,
2360 'channel_follower_count': int,
2362 'uploader': 'colinfurze',
2363 'uploader_url': 'https://www.youtube.com/@colinfurze',
2364 'uploader_id': '@colinfurze',
2365 'comment_count': int,
2366 'channel_is_verified': True,
2367 'heatmap': 'count:100',
2370 'format': '17', # 3gp format available on android
2371 'extractor_args': {'youtube': {'player_client': ['android']}},
2373 'skip': 'android client broken',
2376 # Skip download of additional client configs (remix client config in this case)
2377 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2378 'only_matching': True,
2380 'extractor_args': {'youtube': {'player_skip': ['configs']}},
2384 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
2385 'only_matching': True,
2387 'note': 'Storyboards',
2388 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
2390 'id': '5KLPxDtMqe8',
2393 'title': 'Your Brain is Plastic',
2394 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
2395 'upload_date': '20140324',
2397 'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',
2398 'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',
2400 'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',
2401 'playable_in_embed': True,
2403 'availability': 'public',
2404 'channel': 'SciShow',
2405 'live_status': 'not_live',
2407 'categories': ['Education'],
2409 'channel_follower_count': int,
2411 'uploader': 'SciShow',
2412 'uploader_url': 'https://www.youtube.com/@SciShow',
2413 'uploader_id': '@SciShow',
2414 'comment_count': int,
2415 'channel_is_verified': True,
2416 'heatmap': 'count:100',
2417 'timestamp': 1395685455,
2418 }, 'params': {'format': 'mhtml', 'skip_download': True},
2420 # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
2421 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2423 'id': '2NUZ8W2llS4',
2425 'title': 'The NP that test your phone performance 🙂',
2426 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
2427 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2428 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2432 'categories': ['Gaming'],
2434 'playable_in_embed': True,
2435 'live_status': 'not_live',
2436 'upload_date': '20220103',
2438 'availability': 'public',
2439 'channel': 'Leon Nguyen',
2440 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
2441 'comment_count': int,
2442 'channel_follower_count': int,
2443 'uploader': 'Leon Nguyen',
2444 'uploader_url': 'https://www.youtube.com/@LeonNguyen',
2445 'uploader_id': '@LeonNguyen',
2446 'heatmap': 'count:100',
2447 'timestamp': 1641170939,
2450 # date text is premiered video, ensure upload date in UTC (published 1641172509)
2451 'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',
2453 'id': 'mzZzzBU6lrM',
2455 'title': 'I Met GeorgeNotFound In Real Life...',
2456 'description': 'md5:978296ec9783a031738b684d4ebf302d',
2457 'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',
2458 'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',
2462 'categories': ['Entertainment'],
2464 'playable_in_embed': True,
2465 'live_status': 'not_live',
2466 'release_timestamp': 1641172509,
2467 'release_date': '20220103',
2468 'upload_date': '20220103',
2470 'availability': 'public',
2471 'channel': 'Quackity',
2472 'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',
2473 'channel_follower_count': int,
2474 'uploader': 'Quackity',
2475 'uploader_id': '@Quackity',
2476 'uploader_url': 'https://www.youtube.com/@Quackity',
2477 'comment_count': int,
2478 'channel_is_verified': True,
2479 'heatmap': 'count:100',
2480 'timestamp': 1641172509,
2483 { # continuous livestream.
2484 # Upload date was 2022-07-12T05:12:29-07:00, while stream start is 2022-07-12T15:59:30+00:00
2485 'url': 'https://www.youtube.com/watch?v=jfKfPfyJRdk',
2487 'id': 'jfKfPfyJRdk',
2489 'channel_id': 'UCSJ4gkVC6NrvII8umztf0Ow',
2491 'uploader': 'Lofi Girl',
2492 'categories': ['Music'],
2493 'concurrent_view_count': int,
2494 'playable_in_embed': True,
2495 'timestamp': 1657627949,
2496 'release_date': '20220712',
2497 'channel_url': 'https://www.youtube.com/channel/UCSJ4gkVC6NrvII8umztf0Ow',
2498 'description': 'md5:13a6f76df898f5674f9127139f3df6f7',
2500 'thumbnail': 'https://i.ytimg.com/vi/jfKfPfyJRdk/maxresdefault.jpg',
2501 'release_timestamp': 1657641570,
2502 'uploader_url': 'https://www.youtube.com/@LofiGirl',
2503 'channel_follower_count': int,
2504 'channel_is_verified': True,
2505 'title': r're:^lofi hip hop radio 📚 - beats to relax/study to',
2507 'live_status': 'is_live',
2509 'channel': 'Lofi Girl',
2510 'availability': 'public',
2511 'upload_date': '20220712',
2512 'uploader_id': '@LofiGirl',
2514 'params': {'skip_download': True},
2516 'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
2518 'id': 'tjjjtzRLHvA',
2520 'title': 'ハッシュタグ無し };if window.ytcsi',
2521 'upload_date': '20220323',
2523 'availability': 'unlisted',
2524 'channel': 'Lesmiscore',
2525 'thumbnail': r're:^https?://.*\.jpg',
2527 'categories': ['Music'],
2530 'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',
2531 'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',
2532 'live_status': 'not_live',
2533 'playable_in_embed': True,
2534 'channel_follower_count': int,
2537 'uploader_id': '@lesmiscore',
2538 'uploader': 'Lesmiscore',
2539 'uploader_url': 'https://www.youtube.com/@lesmiscore',
2540 'timestamp': 1648005313,
2543 # Prefer primary title+description language metadata by default
2544 # Do not prefer translated description if primary is empty
2545 'url': 'https://www.youtube.com/watch?v=el3E4MbxRqQ',
2547 'id': 'el3E4MbxRqQ',
2549 'title': 'dlp test video 2 - primary sv no desc',
2551 'channel': 'cole-dlp-test-acc',
2554 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2556 'playable_in_embed': True,
2557 'availability': 'unlisted',
2558 'thumbnail': r're:^https?://.*\.jpg',
2561 'live_status': 'not_live',
2562 'upload_date': '20220908',
2563 'categories': ['People & Blogs'],
2564 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
2565 'uploader_url': 'https://www.youtube.com/@coletdjnz',
2566 'uploader_id': '@coletdjnz',
2567 'uploader': 'cole-dlp-test-acc',
2568 'timestamp': 1662677394,
2570 'params': {'skip_download': True},
2572 # Extractor argument: prefer translated title+description
2573 'url': 'https://www.youtube.com/watch?v=gHKT4uU8Zng',
2575 'id': 'gHKT4uU8Zng',
2577 'channel': 'cole-dlp-test-acc',
2580 'live_status': 'not_live',
2581 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
2582 'upload_date': '20220729',
2584 'categories': ['People & Blogs'],
2585 'thumbnail': r're:^https?://.*\.jpg',
2586 'title': 'dlp test video title translated (fr)',
2587 'availability': 'public',
2589 'description': 'dlp test video description translated (fr)',
2590 'playable_in_embed': True,
2591 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2592 'uploader_url': 'https://www.youtube.com/@coletdjnz',
2593 'uploader_id': '@coletdjnz',
2594 'uploader': 'cole-dlp-test-acc',
2595 'timestamp': 1659073275,
2598 'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}},
2599 'expected_warnings': [r'Preferring "fr" translated fields'],
2601 'note': '6 channel audio',
2602 'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',
2603 'only_matching': True,
2605 'note': 'Multiple HLS formats with same itag',
2606 'url': 'https://www.youtube.com/watch?v=kX3nB4PpJko',
2608 'id': 'kX3nB4PpJko',
2610 'categories': ['Entertainment'],
2611 'description': 'md5:e8031ff6e426cdb6a77670c9b81f6fa6',
2612 'live_status': 'not_live',
2614 'channel_follower_count': int,
2615 'thumbnail': 'https://i.ytimg.com/vi_webp/kX3nB4PpJko/maxresdefault.webp',
2616 'title': 'Last To Take Hand Off Jet, Keeps It!',
2617 'channel': 'MrBeast',
2618 'playable_in_embed': True,
2620 'upload_date': '20221112',
2621 'channel_url': 'https://www.youtube.com/channel/UCX6OQ3DkcsbYNE6H8uQQuVA',
2623 'availability': 'public',
2624 'channel_id': 'UCX6OQ3DkcsbYNE6H8uQQuVA',
2627 'uploader': 'MrBeast',
2628 'uploader_url': 'https://www.youtube.com/@MrBeast',
2629 'uploader_id': '@MrBeast',
2630 'comment_count': int,
2631 'channel_is_verified': True,
2632 'heatmap': 'count:100',
2634 'params': {'extractor_args': {'youtube': {'player_client': ['ios']}}, 'format': '233-1'},
2636 'note': 'Audio formats with Dynamic Range Compression',
2637 'url': 'https://www.youtube.com/watch?v=Tq92D6wQ1mg',
2639 'id': 'Tq92D6wQ1mg',
2641 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
2642 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
2643 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
2644 'channel_follower_count': int,
2645 'description': 'md5:17eccca93a786d51bc67646756894066',
2646 'upload_date': '20191228',
2647 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
2648 'playable_in_embed': True,
2650 'categories': ['Entertainment'],
2651 'thumbnail': 'https://i.ytimg.com/vi/Tq92D6wQ1mg/sddefault.jpg',
2653 'channel': 'Projekt Melody',
2655 'availability': 'needs_auth',
2656 'comment_count': int,
2657 'live_status': 'not_live',
2659 'uploader': 'Projekt Melody',
2660 'uploader_id': '@ProjektMelody',
2661 'uploader_url': 'https://www.youtube.com/@ProjektMelody',
2662 'timestamp': 1577508724,
2664 'params': {'extractor_args': {'youtube': {'player_client': ['tv_embedded']}}, 'format': '251-drc'},
2667 'url': 'https://www.youtube.com/live/qVv6vCqciTM',
2669 'id': 'qVv6vCqciTM',
2672 'comment_count': int,
2673 'chapters': 'count:13',
2674 'upload_date': '20221223',
2675 'thumbnail': 'https://i.ytimg.com/vi/qVv6vCqciTM/maxresdefault.jpg',
2676 'channel_url': 'https://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',
2678 'release_date': '20221223',
2679 'tags': ['Vtuber', '月ノ美兎', '名取さな', 'にじさんじ', 'クリスマス', '3D配信'],
2680 'title': '【 #インターネット女クリスマス 】3Dで歌ってはしゃぐインターネットの女たち【月ノ美兎/名取さな】',
2682 'playable_in_embed': True,
2684 'availability': 'public',
2685 'channel_follower_count': int,
2686 'channel_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',
2687 'categories': ['Entertainment'],
2688 'live_status': 'was_live',
2689 'release_timestamp': 1671793345,
2690 'channel': 'さなちゃんねる',
2691 'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d',
2692 'uploader': 'さなちゃんねる',
2693 'uploader_url': 'https://www.youtube.com/@sana_natori',
2694 'uploader_id': '@sana_natori',
2695 'channel_is_verified': True,
2696 'heatmap': 'count:100',
2697 'timestamp': 1671798112,
2701 # Fallbacks when webpage and web client is unavailable
2702 'url': 'https://www.youtube.com/watch?v=wSSmNUl9Snw',
2704 'id': 'wSSmNUl9Snw',
2706 # 'categories': ['Science & Technology'],
2708 'chapters': 'count:2',
2709 'channel': 'Scott Manley',
2712 # 'availability': 'public',
2713 'channel_follower_count': int,
2714 'live_status': 'not_live',
2715 'upload_date': '20170831',
2718 'uploader_url': 'https://www.youtube.com/@scottmanley',
2719 'description': 'md5:f4bed7b200404b72a394c2f97b782c02',
2720 'uploader': 'Scott Manley',
2721 'uploader_id': '@scottmanley',
2722 'title': 'The Computer Hack That Saved Apollo 14',
2723 'channel_id': 'UCxzC4EngIsMrPmbm6Nxvb-A',
2724 'thumbnail': r're:^https?://.*\.webp',
2725 'channel_url': 'https://www.youtube.com/channel/UCxzC4EngIsMrPmbm6Nxvb-A',
2726 'playable_in_embed': True,
2727 'comment_count': int,
2728 'channel_is_verified': True,
2729 'heatmap': 'count:100',
2732 'extractor_args': {'youtube': {'player_client': ['ios'], 'player_skip': ['webpage']}},
2738 # YouTube <object> embed
2740 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
2741 'md5': '873c81d308b979f0e23ee7e620b312a3',
2743 'id': 'msN87y-iEx0',
2745 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
2746 'upload_date': '20080526',
2747 'description': 'md5:873c81d308b979f0e23ee7e620b312a3',
2749 'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],
2750 'channel_id': 'UCCeo--lls1vna5YJABWAcVA',
2751 'playable_in_embed': True,
2752 'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',
2754 'comment_count': int,
2755 'channel': 'Christopher Sykes',
2756 'live_status': 'not_live',
2757 'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',
2758 'availability': 'public',
2761 'categories': ['Science & Technology'],
2762 'channel_follower_count': int,
2763 'uploader': 'Christopher Sykes',
2764 'uploader_url': 'https://www.youtube.com/@ChristopherSykesDocumentaries',
2765 'uploader_id': '@ChristopherSykesDocumentaries',
2766 'heatmap': 'count:100',
2767 'timestamp': 1211825920,
2770 'skip_download': True,
2776 def suitable(cls, url):
2777 from ..utils import parse_qs
2780 if qs.get('list', [None])[0]:
2782 return super().suitable(url)
2784 def __init__(self, *args, **kwargs):
2785 super().__init__(*args, **kwargs)
2786 self._code_cache = {}
2787 self._player_cache = {}
2789 def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data, is_live):
2790 lock = threading.Lock()
2791 start_time = time.time()
2792 formats = [f for f in formats if f.get('is_from_start')]
2794 def refetch_manifest(format_id, delay):
2795 nonlocal formats, start_time, is_live
2796 if time.time() <= start_time + delay:
2799 _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
2800 video_details = traverse_obj(prs, (..., 'videoDetails'), expected_type=dict)
2801 microformats = traverse_obj(
2802 prs, (..., 'microformat', 'playerMicroformatRenderer'),
2804 _, live_status, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)
2805 is_live = live_status == 'is_live'
2806 start_time = time.time()
2808 def mpd_feed(format_id, delay):
2810 @returns (manifest_url, manifest_stream_number, is_live) or None
2812 for retry in self.RetryManager(fatal=False):
2814 refetch_manifest(format_id, delay)
2816 f = next((f for f in formats if f['format_id'] == format_id), None)
2819 retry.error = f'{video_id}: Video is no longer live'
2821 retry.error = f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}'
2823 return f['manifest_url'], f['manifest_stream_number'], is_live
2827 f['is_live'] = is_live
2828 gen = functools.partial(self._live_dash_fragments, video_id, f['format_id'],
2829 live_start_time, mpd_feed, not is_live and f.copy())
2831 f['fragments'] = gen
2832 f['protocol'] = 'http_dash_segments_generator'
2834 f['fragments'] = LazyList(gen({}))
2835 del f['is_from_start']
2837 def _live_dash_fragments(self, video_id, format_id, live_start_time, mpd_feed, manifestless_orig_fmt, ctx):
2838 FETCH_SPAN, MAX_DURATION = 5, 432000
2840 mpd_url, stream_number, is_live = None, None, True
2843 download_start_time = ctx.get('start') or time.time()
2845 lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
2846 if lack_early_segments:
2847 self.report_warning(bug_reports_message(
2848 'Starting download from the last 120 hours of the live stream since '
2849 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
2850 lack_early_segments = True
2852 known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
2853 fragments, fragment_base_url = None, None
2855 def _extract_sequence_from_mpd(refresh_sequence, immediate):
2856 nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
2857 # Obtain from MPD's maximum seq value
2858 old_mpd_url = mpd_url
2859 last_error = ctx.pop('last_error', None)
2860 expire_fast = immediate or last_error and isinstance(last_error, HTTPError) and last_error.status == 403
2861 mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
2862 or (mpd_url, stream_number, False))
2863 if not refresh_sequence:
2864 if expire_fast and not is_live:
2865 return False, last_seq
2866 elif old_mpd_url == mpd_url:
2867 return True, last_seq
2868 if manifestless_orig_fmt:
2869 fmt_info = manifestless_orig_fmt
2872 fmts, _ = self._extract_mpd_formats_and_subtitles(
2873 mpd_url, None, note=False, errnote=False, fatal=False)
2874 except ExtractorError:
2877 no_fragment_score += 2
2878 return False, last_seq
2879 fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
2880 fragments = fmt_info['fragments']
2881 fragment_base_url = fmt_info['fragment_base_url']
2882 assert fragment_base_url
2884 _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
2885 return True, _last_seq
2887 self.write_debug(f'[{video_id}] Generating fragments for format {format_id}')
2889 fetch_time = time.time()
2890 if no_fragment_score > 30:
2892 if last_segment_url:
2893 # Obtain from "X-Head-Seqnum" header value from each segment
2895 urlh = self._request_webpage(
2896 last_segment_url, None, note=False, errnote=False, fatal=False)
2897 except ExtractorError:
2899 last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
2900 if last_seq is None:
2901 no_fragment_score += 2
2902 last_segment_url = None
2905 should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)
2906 no_fragment_score += 2
2907 if not should_continue:
2910 if known_idx > last_seq:
2911 last_segment_url = None
2916 if begin_index < 0 and known_idx < 0:
2917 # skip from the start when it's negative value
2918 known_idx = last_seq + begin_index
2919 if lack_early_segments:
2920 known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
2922 for idx in range(known_idx, last_seq):
2923 # do not update sequence here or you'll get skipped some part of it
2924 should_continue, _ = _extract_sequence_from_mpd(False, False)
2925 if not should_continue:
2927 raise ExtractorError('breaking out of outer loop')
2928 last_segment_url = urljoin(fragment_base_url, f'sq/{idx}')
2930 'url': last_segment_url,
2931 'fragment_count': last_seq,
2933 if known_idx == last_seq:
2934 no_fragment_score += 5
2936 no_fragment_score = 0
2937 known_idx = last_seq
2938 except ExtractorError:
2941 if manifestless_orig_fmt:
2942 # Stop at the first iteration if running for post-live manifestless;
2943 # fragment count no longer increase since it starts
2946 time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
2948 def _extract_player_url(self, *ytcfgs, webpage=None):
2949 player_url = traverse_obj(
2950 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
2951 get_all=False, expected_type=str)
2954 return urljoin('https://www.youtube.com', player_url)
2956 def _download_player_url(self, video_id, fatal=False):
2957 res = self._download_webpage(
2958 'https://www.youtube.com/iframe_api',
2959 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
2961 player_version = self._search_regex(
2962 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
2964 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
2966 def _signature_cache_id(self, example_sig):
2967 """ Return a string representation of a signature """
2968 return '.'.join(str(len(part)) for part in example_sig.split('.'))
2971 def _extract_player_info(cls, player_url):
2972 for player_re in cls._PLAYER_INFO_RE:
2973 id_m = re.search(player_re, player_url)
2977 raise ExtractorError(f'Cannot identify player {player_url!r}')
2978 return id_m.group('id')
2980 def _load_player(self, video_id, player_url, fatal=True):
2981 player_id = self._extract_player_info(player_url)
2982 if player_id not in self._code_cache:
2983 code = self._download_webpage(
2984 player_url, video_id, fatal=fatal,
2985 note='Downloading player ' + player_id,
2986 errnote=f'Download of {player_url} failed')
2988 self._code_cache[player_id] = code
2989 return self._code_cache.get(player_id)
2991 def _extract_signature_function(self, video_id, player_url, example_sig):
2992 player_id = self._extract_player_info(player_url)
2994 # Read from filesystem cache
2995 func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'
2996 assert os.path.basename(func_id) == func_id
2998 self.write_debug(f'Extracting signature function {func_id}')
2999 cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
3002 code = self._load_player(video_id, player_url)
3004 res = self._parse_sig_js(code)
3005 test_string = ''.join(map(chr, range(len(example_sig))))
3006 cache_spec = [ord(c) for c in res(test_string)]
3007 self.cache.store('youtube-sigfuncs', func_id, cache_spec)
3009 return lambda s: ''.join(s[i] for i in cache_spec)
3011 def _print_sig_code(self, func, example_sig):
3012 if not self.get_param('youtube_print_sig_code'):
3015 def gen_sig_code(idxs):
3016 def _genslice(start, end, step):
3017 starts = '' if start == 0 else str(start)
3018 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
3019 steps = '' if step == 1 else (':%d' % step)
3020 return f's[{starts}{ends}{steps}]'
3023 # Quelch pyflakes warnings - start will be set when step is set
3024 start = '(Never used)'
3025 for i, prev in zip(idxs[1:], idxs[:-1]):
3026 if step is not None:
3027 if i - prev == step:
3029 yield _genslice(start, prev, step)
3032 if i - prev in [-1, 1]:
3037 yield 's[%d]' % prev
3041 yield _genslice(start, i, step)
3043 test_string = ''.join(map(chr, range(len(example_sig))))
3044 cache_res = func(test_string)
3045 cache_spec = [ord(c) for c in cache_res]
3046 expr_code = ' + '.join(gen_sig_code(cache_spec))
3047 signature_id_tuple = '({})'.format(', '.join(str(len(p)) for p in example_sig.split('.')))
3048 code = (f'if tuple(len(p) for p in s.split(\'.\')) == {signature_id_tuple}:\n'
3049 f' return {expr_code}\n')
3050 self.to_screen('Extracted signature function:\n' + code)
3052 def _parse_sig_js(self, jscode):
3053 funcname = self._search_regex(
3054 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3055 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3056 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
3057 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
3058 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\))?',
3059 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
3061 r'("|\')signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3062 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
3063 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3064 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3065 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3066 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3067 jscode, 'Initial JS player signature function name', group='sig')
3069 jsi = JSInterpreter(jscode)
3070 initial_function = jsi.extract_function(funcname)
3071 return lambda s: initial_function([s])
3073 def _cached(self, func, *cache_id):
3074 def inner(*args, **kwargs):
3075 if cache_id not in self._player_cache:
3077 self._player_cache[cache_id] = func(*args, **kwargs)
3078 except ExtractorError as e:
3079 self._player_cache[cache_id] = e
3080 except Exception as e:
3081 self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)
3083 ret = self._player_cache[cache_id]
3084 if isinstance(ret, Exception):
3089 def _decrypt_signature(self, s, video_id, player_url):
3090 """Turn the encrypted s field into a working signature"""
3091 extract_sig = self._cached(
3092 self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))
3093 func = extract_sig(video_id, player_url, s)
3094 self._print_sig_code(func, s)
3097 def _decrypt_nsig(self, s, video_id, player_url):
3098 """Turn the encrypted n field into a working signature"""
3099 if player_url is None:
3100 raise ExtractorError('Cannot decrypt nsig without player_url')
3101 player_url = urljoin('https://www.youtube.com', player_url)
3104 jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
3105 except ExtractorError as e:
3106 raise ExtractorError('Unable to extract nsig function code', cause=e)
3107 if self.get_param('youtube_print_sig_code'):
3108 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
3111 extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)
3112 ret = extract_nsig(jsi, func_code)(s)
3113 except JSInterpreter.Exception as e:
3115 jsi = PhantomJSwrapper(self, timeout=5000)
3116 except ExtractorError:
3118 self.report_warning(
3119 f'Native nsig extraction failed: Trying with PhantomJS\n'
3120 f' n = {s} ; player = {player_url}', video_id)
3121 self.write_debug(e, only_once=True)
3123 args, func_body = func_code
3125 f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',
3126 video_id=video_id, note='Executing signature code').strip()
3128 self.write_debug(f'Decrypted nsig {s} => {ret}')
3131 def _extract_n_function_name(self, jscode):
3132 funcname, idx = self._search_regex(
3133 r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)',
3134 jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
3138 return json.loads(js_to_json(self._search_regex(
3139 rf'var {re.escape(funcname)}\s*=\s*(\[.+?\])\s*[,;]', jscode,
3140 f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
3142 def _extract_n_function_code(self, video_id, player_url):
3143 player_id = self._extract_player_info(player_url)
3144 func_code = self.cache.load('youtube-nsig', player_id, min_ver='2022.09.1')
3145 jscode = func_code or self._load_player(video_id, player_url)
3146 jsi = JSInterpreter(jscode)
3149 return jsi, player_id, func_code
3151 func_name = self._extract_n_function_name(jscode)
3154 func_code = self._search_regex(
3155 rf'''(?xs
){func_name}\s
*=\s
*function\s
*\
((?P
<var
>[\w$
]+)\
)\s
*
3156 # NB: The end of the regex is intentionally kept strict
3157 {{(?P<code>.+?}
}\s
*return\
[\w$
]+.join\
(""\
))}};''',
3158 jscode, 'nsig function', group=('var', 'code'), default=None)
3160 func_code = ([func_code[0]], func_code[1])
3162 self.write_debug('Extracting nsig function with jsinterp')
3163 func_code = jsi.extract_function_code(func_name)
3165 self.cache.store('youtube-nsig', player_id, func_code)
3166 return jsi, player_id, func_code
3168 def _extract_n_function_from_code(self, jsi, func_code):
3169 func = jsi.extract_function_from_code(*func_code)
3171 def extract_nsig(s):
3174 except JSInterpreter.Exception:
3176 except Exception as e:
3177 raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
3179 if ret.startswith('enhanced_except_'):
3180 raise JSInterpreter.Exception('Signature function returned an exception')
3185 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
3187 Extract signatureTimestamp (sts)
3188 Required to tell API what sig/player version is in use.
3191 if isinstance(ytcfg, dict):
3192 sts = int_or_none(ytcfg.get('STS'))
3195 # Attempt to extract from player
3196 if player_url is None:
3197 error_msg = 'Cannot extract signature timestamp without player_url.'
3199 raise ExtractorError(error_msg)
3200 self.report_warning(error_msg)
3202 code = self._load_player(video_id, player_url, fatal=fatal)
3204 sts = int_or_none(self._search_regex(
3205 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
3206 'JS player signature timestamp', group='sts', fatal=fatal))
3209 def _mark_watched(self, video_id, player_responses):
3210 for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):
3211 label = 'fully ' if is_full else ''
3212 url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),
3213 expected_type=url_or_none)
3215 self.report_warning(f'Unable to mark {label}watched')
3217 parsed_url = urllib.parse.urlparse(url)
3218 qs = urllib.parse.parse_qs(parsed_url.query)
3220 # cpn generation algorithm is reverse engineered from base.js.
3221 # In fact it works even with dummy cpn.
3222 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
3223 cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(16))
3225 # # more consistent results setting it to right before the end
3226 video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]
3231 'cmt': video_length,
3232 'el': 'detailpage', # otherwise defaults to "shorts"
3236 # these seem to mark watchtime "history" in the real world
3237 # they're required, so send in a single value
3243 url = urllib.parse.urlunparse(
3244 parsed_url._replace(query=urllib.parse.urlencode(qs, True)))
3246 self._download_webpage(
3247 url, video_id, f'Marking {label}watched',
3248 'Unable to mark watched', fatal=False)
3251 def _extract_from_webpage(cls, url, webpage):
3252 # Invidious Instances
3253 # https://github.com/yt-dlp/yt-dlp/issues/195
3254 # https://github.com/iv-org/invidious/pull/1730
3256 r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',
3259 yield cls.url_result(mobj.group('url'), cls)
3260 raise cls.StopExtraction
3262 yield from super()._extract_from_webpage(url, webpage)
3264 # lazyYT YouTube embed
3265 for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):
3266 yield cls.url_result(unescapeHTML(id_), cls, id_)
3268 # Wordpress "YouTube Video Importer" plugin
3269 for m in re.findall(r'''(?x
)<div
[^
>]+
3270 class=(?P
<q1
>[\'"])[^\'"]*\byvii
_single
_video
_player
\b[^
\'"]*(?P=q1)[^>]+
3271 data-video_id=(?P<q2>[\'"])([^
\'"]+)(?P=q2)''', webpage):
3272 yield cls.url_result(m[-1], cls, m[-1])
3275 def extract_id(cls, url):
3276 video_id = cls.get_temp_id(url)
3278 raise ExtractorError(f'Invalid URL: {url}')
3281 def _extract_chapters_from_json(self, data, duration):
3282 chapter_list = traverse_obj(
3284 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
3285 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters',
3286 ), expected_type=list)
3288 return self._extract_chapters_helper(
3290 start_function=lambda chapter: float_or_none(
3291 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
3292 title_function=lambda chapter: traverse_obj(
3293 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
3296 def _extract_chapters_from_engagement_panel(self, data, duration):
3297 content_list = traverse_obj(
3299 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
3301 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
3302 chapter_title = lambda chapter: self._get_text(chapter, 'title')
3304 return next(filter(None, (
3305 self._extract_chapters_helper(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
3306 chapter_time, chapter_title, duration)
3307 for contents in content_list)), [])
3309 def _extract_heatmap(self, data):
3310 return traverse_obj(data, (
3311 'frameworkUpdates', 'entityBatchUpdate', 'mutations',
3312 lambda _, v: v['payload']['macroMarkersListEntity']['markersList']['markerType'] == 'MARKER_TYPE_HEATMAP',
3313 'payload', 'macroMarkersListEntity', 'markersList', 'markers', ..., {
3314 'start_time': ('startMillis', {functools.partial(float_or_none, scale=1000)}),
3315 'end_time': {lambda x: (int(x['startMillis']) + int(x['durationMillis'])) / 1000},
3316 'value': ('intensityScoreNormalized', {float_or_none}),
3319 def _extract_comment(self, entities, parent=None):
3320 comment_entity_payload = get_first(entities, ('payload', 'commentEntityPayload', {dict}))
3321 if not (comment_id := traverse_obj(comment_entity_payload, ('properties', 'commentId', {str}))):
3324 toolbar_entity_payload = get_first(entities, ('payload', 'engagementToolbarStateEntityPayload', {dict}))
3325 time_text = traverse_obj(comment_entity_payload, ('properties', 'publishedTime', {str})) or ''
3329 'parent': parent or 'root',
3330 **traverse_obj(comment_entity_payload, {
3331 'text': ('properties', 'content', 'content', {str}),
3332 'like_count': ('toolbar', 'likeCountA11y', {parse_count}),
3333 'author_id': ('author', 'channelId', {self.ucid_or_none}),
3334 'author': ('author', 'displayName', {str}),
3335 'author_thumbnail': ('author', 'avatarThumbnailUrl', {url_or_none}),
3336 'author_is_uploader': ('author', 'isCreator', {bool}),
3337 'author_is_verified': ('author', 'isVerified', {bool}),
3338 'author_url': ('author', 'channelCommand', 'innertubeCommand', (
3339 ('browseEndpoint', 'canonicalBaseUrl'), ('commandMetadata', 'webCommandMetadata', 'url'),
3340 ), {lambda x: urljoin('https://www.youtube.com', x)}),
3342 'is_favorited': (None if toolbar_entity_payload is None else
3343 toolbar_entity_payload.get('heartState') == 'TOOLBAR_HEART_STATE_HEARTED'),
3344 '_time_text': time_text, # FIXME: non-standard, but we need a way of showing that it is an estimate.
3345 'timestamp': self._parse_time_text(time_text),
3348 def _extract_comment_old(self, comment_renderer, parent=None):
3349 comment_id = comment_renderer.get('commentId')
3355 'text': self._get_text(comment_renderer, 'contentText'),
3356 'like_count': self._get_count(comment_renderer, 'voteCount'),
3357 'author_id': traverse_obj(comment_renderer, ('authorEndpoint', 'browseEndpoint', 'browseId', {self.ucid_or_none})),
3358 'author': self._get_text(comment_renderer, 'authorText'),
3359 'author_thumbnail': traverse_obj(comment_renderer, ('authorThumbnail', 'thumbnails', -1, 'url', {url_or_none})),
3360 'parent': parent or 'root',
3363 # Timestamp is an estimate calculated from the current time and time_text
3364 time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
3365 timestamp = self._parse_time_text(time_text)
3368 # FIXME: non-standard, but we need a way of showing that it is an estimate.
3369 '_time_text': time_text,
3370 'timestamp': timestamp,
3373 info['author_url'] = urljoin(
3374 'https://www.youtube.com', traverse_obj(comment_renderer, ('authorEndpoint', (
3375 ('browseEndpoint', 'canonicalBaseUrl'), ('commandMetadata', 'webCommandMetadata', 'url'))),
3376 expected_type=str, get_all=False))
3378 author_is_uploader = traverse_obj(comment_renderer, 'authorIsChannelOwner')
3379 if author_is_uploader is not None:
3380 info['author_is_uploader'] = author_is_uploader
3382 comment_abr = traverse_obj(
3383 comment_renderer, ('actionButtons', 'commentActionButtonsRenderer'), expected_type=dict)
3384 if comment_abr is not None:
3385 info['is_favorited'] = 'creatorHeart' in comment_abr
3387 badges = self._extract_badges([traverse_obj(comment_renderer, 'authorCommentBadge')])
3388 if self._has_badge(badges, BadgeType.VERIFIED):
3389 info['author_is_verified'] = True
3391 is_pinned = traverse_obj(comment_renderer, 'pinnedCommentBadge')
3393 info['is_pinned'] = True
3397 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
3399 get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
3401 def extract_header(contents):
3402 _continuation = None
3403 for content in contents:
3404 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
3405 expected_comment_count = self._get_count(
3406 comments_header_renderer, 'countText', 'commentsCount')
3408 if expected_comment_count is not None:
3409 tracker['est_total'] = expected_comment_count
3410 self.to_screen(f'Downloading ~{expected_comment_count} comments')
3411 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
3413 sort_menu_item = try_get(
3414 comments_header_renderer,
3415 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
3416 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
3418 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
3419 if not _continuation:
3422 sort_text = str_or_none(sort_menu_item.get('title'))
3424 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
3425 self.to_screen(f'Sorting comments by {sort_text.lower()}')
3427 return _continuation
3429 def extract_thread(contents, entity_payloads):
3431 tracker['current_page_thread'] = 0
3432 for content in contents:
3433 if not parent and tracker['total_parent_comments'] >= max_parents:
3435 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
3437 # old comment format
3438 if not entity_payloads:
3439 comment_renderer = get_first(
3440 (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
3441 expected_type=dict, default={})
3443 comment = self._extract_comment_old(comment_renderer, parent)
3445 # new comment format
3448 traverse_obj(comment_thread_renderer, ('commentViewModel', 'commentViewModel', {dict}))
3449 or traverse_obj(content, ('commentViewModel', {dict})))
3450 comment_keys = traverse_obj(view_model, (('commentKey', 'toolbarStateKey'), {str}))
3451 if not comment_keys:
3453 entities = traverse_obj(entity_payloads, lambda _, v: v['entityKey'] in comment_keys)
3454 comment = self._extract_comment(entities, parent)
3456 comment['is_pinned'] = traverse_obj(view_model, ('pinnedText', {str})) is not None
3460 comment_id = comment['id']
3462 if comment.get('is_pinned'):
3463 tracker['pinned_comment_ids'].add(comment_id)
3464 # Sometimes YouTube may break and give us infinite looping comments.
3465 # See: https://github.com/yt-dlp/yt-dlp/issues/6290
3466 if comment_id in tracker['seen_comment_ids']:
3467 if comment_id in tracker['pinned_comment_ids'] and not comment.get('is_pinned'):
3468 # Pinned comments may appear a second time in newest first sort
3469 # See: https://github.com/yt-dlp/yt-dlp/issues/6712
3471 self.report_warning(
3472 'Detected YouTube comments looping. Stopping comment extraction '
3473 f'{"for this thread" if parent else ""} as we probably cannot get any more.')
3476 tracker['seen_comment_ids'].add(comment['id'])
3478 tracker['running_total'] += 1
3479 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
3482 # Attempt to get the replies
3483 comment_replies_renderer = try_get(
3484 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
3486 if comment_replies_renderer:
3487 tracker['current_page_thread'] += 1
3488 comment_entries_iter = self._comment_entries(
3489 comment_replies_renderer, ytcfg, video_id,
3490 parent=comment.get('id'), tracker=tracker)
3491 yield from itertools.islice(comment_entries_iter, min(
3492 max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))
3494 # Keeps track of counts across recursive calls
3499 'current_page_thread': 0,
3500 'total_parent_comments': 0,
3501 'total_reply_comments': 0,
3502 'seen_comment_ids': set(),
3503 'pinned_comment_ids': set(),
3507 # YouTube comments have a max depth of 2
3508 max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
3510 self._downloader.deprecated_feature('[youtube] max_comment_depth extractor argument is deprecated. '
3511 'Set max replies in the max-comments extractor argument instead')
3512 if max_depth == 1 and parent:
3515 max_comments, max_parents, max_replies, max_replies_per_thread, *_ = (
3516 int_or_none(p, default=sys.maxsize) for p in self._configuration_arg('max_comments') + [''] * 4)
3518 continuation = self._extract_continuation(root_continuation_data)
3521 is_forced_continuation = False
3522 is_first_continuation = parent is None
3523 if is_first_continuation and not continuation:
3524 # Sometimes you can get comments by generating the continuation yourself,
3525 # even if YouTube initially reports them being disabled - e.g. stories comments.
3526 # Note: if the comment section is actually disabled, YouTube may return a response with
3527 # required check_get_keys missing. So we will disable that check initially in this case.
3528 continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
3529 is_forced_continuation = True
3531 continuation_items_path = (
3532 'onResponseReceivedEndpoints', ..., ('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems')
3533 for page_num in itertools.count(0):
3534 if not continuation:
3536 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
3537 comment_prog_str = f"({tracker['running_total']}
/~{tracker['est_total']}
)"
3539 if is_first_continuation:
3540 note_prefix = 'Downloading comment section API JSON'
3542 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
3543 tracker['current_page_thread'], comment_prog_str)
3545 note_prefix = '{}Downloading comment{} API JSON page {} {}'.format(
3546 ' ' if parent else '', ' replies' if parent else '',
3547 page_num, comment_prog_str)
3549 # Do a deep check for incomplete data as sometimes YouTube may return no comments for a continuation
3550 # Ignore check if YouTube says the comment count is 0.
3551 check_get_keys = None
3552 if not is_forced_continuation and not (tracker['est_total'] == 0 and tracker['running_total'] == 0):
3553 check_get_keys = [[*continuation_items_path, ..., (
3554 'commentsHeaderRenderer' if is_first_continuation else ('commentThreadRenderer', 'commentViewModel', 'commentRenderer'))]]
3556 response = self._extract_response(
3557 item_id=None, query=continuation,
3558 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
3559 check_get_keys=check_get_keys)
3560 except ExtractorError as e:
3561 # Ignore incomplete data error for replies if retries didn't work.
3562 # This is to allow any other parent comments and comment threads to be downloaded.
3563 # See: https://github.com/yt-dlp/yt-dlp/issues/4669
3564 if 'incomplete data' in str(e).lower() and parent:
3565 if self.get_param('ignoreerrors') in (True, 'only_download'):
3566 self.report_warning(
3567 'Received incomplete data for a comment reply thread and retrying did not help. '
3568 'Ignoring to let other comments be downloaded. Pass --no-ignore-errors to not ignore.')
3571 raise ExtractorError(
3572 'Incomplete data received for comment reply thread. '
3573 'Pass --ignore-errors to ignore and allow rest of comments to download.',
3576 is_forced_continuation = False
3578 mutations = traverse_obj(response, ('frameworkUpdates', 'entityBatchUpdate', 'mutations', ..., {dict}))
3579 for continuation_items in traverse_obj(response, continuation_items_path, expected_type=list, default=[]):
3580 if is_first_continuation:
3581 continuation = extract_header(continuation_items)
3582 is_first_continuation = False
3587 for entry in extract_thread(continuation_items, mutations):
3591 continuation = self._extract_continuation({'contents': continuation_items})
3595 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
3596 if message and not parent and tracker['running_total'] == 0:
3597 self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)
3598 raise self.CommentsDisabled
3601 def _generate_comment_continuation(video_id):
3603 Generates initial comment section continuation token from given video id
3605 token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'
3606 return base64.b64encode(token.encode()).decode()
3608 def _get_comments(self, ytcfg, video_id, contents, webpage):
3609 """Entry for comment extraction"""
3610 def _real_comment_extract(contents):
3612 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
3613 if item.get('sectionIdentifier') == 'comment-item-section'), None)
3614 yield from self._comment_entries(renderer, ytcfg, video_id)
3616 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
3617 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
3620 def _get_checkok_params():
3621 return {'contentCheckOk': True, 'racyCheckOk': True}
3624 def _generate_player_context(cls, sts=None):
3626 'html5Preference': 'HTML5_PREF_WANTS',
3629 context['signatureTimestamp'] = sts
3631 'playbackContext': {
3632 'contentPlaybackContext': context,
3634 **cls._get_checkok_params(),
3638 def _is_agegated(player_response):
3639 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
3642 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')))
3643 AGE_GATE_REASONS = (
3644 'confirm your age', 'age-restricted', 'inappropriate', # reason
3645 'age_verification_required', 'age_check_required', # status
3647 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
3650 def _is_unplayable(player_response):
3651 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
3653 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):
3655 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
3656 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
3657 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
3658 headers = self.generate_api_headers(
3659 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
3662 'videoId': video_id,
3665 pp_arg = self._configuration_arg('player_params', [None], casesense=True)[0]
3667 yt_query['params'] = pp_arg
3669 yt_query.update(self._generate_player_context(sts))
3670 return self._extract_response(
3671 item_id=video_id, ep='player', query=yt_query,
3672 ytcfg=player_ytcfg, headers=headers, fatal=True,
3673 default_client=client,
3674 note='Downloading {} player API JSON'.format(client.replace('_', ' ').strip()),
3677 def _get_requested_clients(self, url, smuggled_data):
3678 requested_clients = []
3679 android_clients = []
3680 default = ['ios', 'web']
3681 allowed_clients = sorted(
3682 (client for client in INNERTUBE_CLIENTS if client[:1] != '_'),
3683 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
3684 for client in self._configuration_arg('player_client'):
3685 if client == 'default':
3686 requested_clients.extend(default)
3687 elif client == 'all':
3688 requested_clients.extend(allowed_clients)
3689 elif client not in allowed_clients:
3690 self.report_warning(f'Skipping unsupported client {client}')
3691 elif client.startswith('android'):
3692 android_clients.append(client)
3694 requested_clients.append(client)
3695 # Force deprioritization of broken Android clients for format de-duplication
3696 requested_clients.extend(android_clients)
3697 if not requested_clients:
3698 requested_clients = default
3700 if smuggled_data.get('is_music_url') or self.is_music_url(url):
3701 requested_clients.extend(
3702 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
3704 return orderedSet(requested_clients)
3706 def _invalid_player_response(self, pr, video_id):
3707 # YouTube may return a different video player response than expected.
3708 # See: https://github.com/TeamNewPipe/NewPipe/issues/8713
3709 if (pr_id := traverse_obj(pr, ('videoDetails', 'videoId'))) != video_id:
3712 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
3715 initial_pr = self._search_json(
3716 self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
3719 if initial_pr and not self._invalid_player_response(initial_pr, video_id):
3720 # Android player_response does not have microFormats which are needed for
3721 # extraction of some data. So we return the initial_pr with formats
3722 # stripped out even if not requested by the user
3723 # See: https://github.com/yt-dlp/yt-dlp/issues/501
3724 prs.append({**initial_pr, 'streamingData': None})
3726 all_clients = set(clients)
3727 clients = clients[::-1]
3729 def append_client(*client_names):
3730 """ Append the first client name that exists but not already used """
3731 for client_name in client_names:
3732 actual_client = _split_innertube_client(client_name)[0]
3733 if actual_client in INNERTUBE_CLIENTS:
3734 if actual_client not in all_clients:
3735 clients.append(client_name)
3736 all_clients.add(actual_client)
3739 tried_iframe_fallback = False
3741 skipped_clients = {}
3743 client, base_client, variant = _split_innertube_client(clients.pop())
3744 player_ytcfg = master_ytcfg if client == 'web' else {}
3745 if 'configs' not in self._configuration_arg('player_skip') and client != 'web':
3746 player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
3748 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
3749 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
3750 if 'js' in self._configuration_arg('player_skip'):
3751 require_js_player = False
3754 if not player_url and not tried_iframe_fallback and require_js_player:
3755 player_url = self._download_player_url(video_id)
3756 tried_iframe_fallback = True
3759 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
3760 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)
3761 except ExtractorError as e:
3762 self.report_warning(e)
3765 if pr_id := self._invalid_player_response(pr, video_id):
3766 skipped_clients[client] = pr_id
3768 # Save client name for introspection later
3769 name = short_client_name(client)
3770 sd = traverse_obj(pr, ('streamingData', {dict})) or {}
3771 sd[STREAMING_DATA_CLIENT_NAME] = name
3772 for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
3773 f[STREAMING_DATA_CLIENT_NAME] = name
3776 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
3777 if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
3778 append_client(f'{base_client}_creator')
3779 elif self._is_agegated(pr):
3780 if variant == 'tv_embedded':
3781 append_client(f'{base_client}_embedded')
3783 append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
3786 self.report_warning(
3787 f'Skipping player responses from {"/".join(skipped_clients)} clients '
3788 f'(got player responses for video "{"/".join(set(skipped_clients.values()))}
" instead of "{video_id}
")')
3790 raise ExtractorError(
3791 'All player responses are invalid. Your IP is likely being blocked by Youtube', expected=True)
3793 raise ExtractorError('Failed to extract any player response')
3794 return prs, player_url
3796 def _needs_live_processing(self, live_status, duration):
3797 if (live_status == 'is_live' and self.get_param('live_from_start')
3798 or live_status == 'post_live' and (duration or 0) > 2 * 3600):
3801 def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):
3802 CHUNK_SIZE = 10 << 20
3803 PREFERRED_LANG_VALUE = 10
3804 original_language = None
3805 itags, stream_ids = collections.defaultdict(set), []
3806 itag_qualities, res_qualities = {}, {0: None}
3808 # Normally tiny is the smallest video-only formats. But
3809 # audio-only formats with unknown quality may get tagged as tiny
3811 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
3812 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres',
3814 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...))
3815 format_types = self._configuration_arg('formats')
3816 all_formats = 'duplicate' in format_types
3817 if self._configuration_arg('include_duplicate_formats'):
3819 self._downloader.deprecated_feature('[youtube] include_duplicate_formats extractor argument is deprecated. '
3820 'Use formats=duplicate extractor argument instead')
3822 def build_fragments(f):
3824 'url': update_url_query(f['url'], {
3825 'range': f'{range_start}-{min(range_start + CHUNK_SIZE - 1, f["filesize"])}',
3827 } for range_start in range(0, f['filesize'], CHUNK_SIZE))
3829 for fmt in streaming_formats:
3830 if fmt.get('targetDurationSec'):
3833 itag = str_or_none(fmt.get('itag'))
3834 audio_track = fmt.get('audioTrack') or {}
3835 stream_id = (itag, audio_track.get('id'), fmt.get('isDrc'))
3837 if stream_id in stream_ids:
3840 quality = fmt.get('quality')
3841 height = int_or_none(fmt.get('height'))
3842 if quality == 'tiny' or not quality:
3843 quality = fmt.get('audioQuality', '').lower() or quality
3844 # The 3gp format (17) in android client has a quality of "small
",
3845 # but is actually worse than other formats
3850 itag_qualities[itag] = quality
3852 res_qualities[height] = quality
3854 is_default = audio_track.get('audioIsDefault')
3855 is_descriptive = 'descriptive' in (audio_track.get('displayName') or '').lower()
3856 language_code = audio_track.get('id', '').split('.')[0]
3857 if language_code and is_default:
3858 original_language = language_code
3860 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
3861 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
3862 # number of fragment that would subsequently requested with (`&sq=N`)
3863 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
3866 fmt_url = fmt.get('url')
3868 sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))
3869 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
3870 encrypted_sig = try_get(sc, lambda x: x['s'][0])
3871 if not all((sc, fmt_url, player_url, encrypted_sig)):
3874 fmt_url += '&{}={}'.format(
3875 traverse_obj(sc, ('sp', -1)) or 'signature',
3876 self._decrypt_signature(encrypted_sig, video_id, player_url),
3878 except ExtractorError as e:
3879 self.report_warning('Signature extraction failed: Some formats may be missing',
3880 video_id=video_id, only_once=True)
3881 self.write_debug(e, only_once=True)
3884 query = parse_qs(fmt_url)
3887 decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])
3888 fmt_url = update_url_query(fmt_url, {
3889 'n': decrypt_nsig(query['n'][0], video_id, player_url),
3891 except ExtractorError as e:
3893 if isinstance(e, JSInterpreter.Exception):
3894 phantomjs_hint = (f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} '
3895 f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')
3897 self.report_warning(
3898 f'nsig extraction failed: Some formats may be missing\n{phantomjs_hint}'
3899 f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)
3900 self.write_debug(e, only_once=True)
3902 self.report_warning(
3903 'Cannot decrypt nsig without player_url: Some formats may be missing',
3904 video_id=video_id, only_once=True)
3907 tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
3908 format_duration = traverse_obj(fmt, ('approxDurationMs', {lambda x: float_or_none(x, 1000)}))
3909 # Some formats may have much smaller duration than others (possibly damaged during encoding)
3910 # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
3911 # Make sure to avoid false positives with small duration differences.
3912 # E.g. __2ABJjxzNo, ySuUZEjARPY
3913 is_damaged = try_call(lambda: format_duration < duration // 2)
3915 self.report_warning(
3916 f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
3918 client_name = fmt.get(STREAMING_DATA_CLIENT_NAME)
3919 # Android client formats are broken due to integrity check enforcement
3920 # Ref: https://github.com/yt-dlp/yt-dlp/issues/9554
3921 is_broken = client_name and client_name.startswith(short_client_name('android'))
3923 self.report_warning(
3924 f'{video_id}: Android client formats are broken and may yield HTTP Error 403. '
3925 'They will be deprioritized', only_once=True)
3927 name = fmt.get('qualityLabel') or quality.replace('audio_quality_', '') or ''
3928 fps = int_or_none(fmt.get('fps')) or 0
3930 'asr': int_or_none(fmt.get('audioSampleRate')),
3931 'filesize': int_or_none(fmt.get('contentLength')),
3932 'format_id': f'{itag}{"-drc" if fmt.get("isDrc") else ""}',
3933 'format_note': join_nonempty(
3934 join_nonempty(audio_track.get('displayName'), is_default and ' (default)', delim=''),
3935 name, fmt.get('isDrc') and 'DRC',
3936 try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
3937 try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
3938 is_damaged and 'DAMAGED', is_broken and 'BROKEN',
3939 (self.get_param('verbose') or all_formats) and client_name,
3941 # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
3942 'source_preference': (-5 if itag == '22' else -1) + (100 if 'Premium' in name else 0),
3943 'fps': fps if fps > 1 else None, # For some formats, fps is wrongly returned as 1
3944 'audio_channels': fmt.get('audioChannels'),
3946 'quality': q(quality) - bool(fmt.get('isDrc')) / 2,
3947 'has_drm': bool(fmt.get('drmFamilies')),
3949 'filesize_approx': filesize_from_tbr(tbr, format_duration),
3951 'width': int_or_none(fmt.get('width')),
3952 'language': join_nonempty(language_code, 'desc' if is_descriptive else '') or None,
3953 'language_preference': PREFERRED_LANG_VALUE if is_default else -10 if is_descriptive else -1,
3954 # Strictly de-prioritize broken, damaged and 3gp formats
3955 'preference': -20 if is_broken else -10 if is_damaged else -2 if itag == '17' else None,
3957 mime_mobj = re.match(
3958 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^
"]+)")?
', fmt.get('mimeType
') or '')
3960 dct['ext
'] = mimetype2ext(mime_mobj.group(1))
3961 dct.update(parse_codecs(mime_mobj.group(2)))
3963 itags[itag].add(('https
', dct.get('language
')))
3964 stream_ids.append(stream_id)
3965 single_stream = 'none
' in (dct.get('acodec
'), dct.get('vcodec
'))
3966 if single_stream and dct.get('ext
'):
3967 dct['container
'] = dct['ext
'] + '_dash
'
3969 if (all_formats or 'dashy
' in format_types) and dct['filesize
']:
3972 'format_id
': f'{dct["format_id"]}
-dashy
' if all_formats else dct['format_id
'],
3973 'protocol
': 'http_dash_segments
',
3974 'fragments
': build_fragments(dct),
3976 if all_formats or 'dashy
' not in format_types:
3977 dct['downloader_options
'] = {'http_chunk_size': CHUNK_SIZE}
3980 needs_live_processing = self._needs_live_processing(live_status, duration)
3981 skip_bad_formats = 'incomplete
' not in format_types
3982 if self._configuration_arg('include_incomplete_formats
'):
3983 skip_bad_formats = False
3984 self._downloader.deprecated_feature('[youtube
] include_incomplete_formats extractor argument
is deprecated
. '
3985 'Use formats
=incomplete extractor argument instead
')
3987 skip_manifests = set(self._configuration_arg('skip
'))
3988 if (not self.get_param('youtube_include_hls_manifest
', True)
3989 or needs_live_processing == 'is_live
' # These will be filtered out by YoutubeDL anyway
3990 or needs_live_processing and skip_bad_formats):
3991 skip_manifests.add('hls
')
3993 if not self.get_param('youtube_include_dash_manifest
', True):
3994 skip_manifests.add('dash
')
3995 if self._configuration_arg('include_live_dash
'):
3996 self._downloader.deprecated_feature('[youtube
] include_live_dash extractor argument
is deprecated
. '
3997 'Use formats
=incomplete extractor argument instead
')
3998 elif skip_bad_formats and live_status == 'is_live
' and needs_live_processing != 'is_live
':
3999 skip_manifests.add('dash
')
4001 def process_manifest_format(f, proto, client_name, itag):
4002 key = (proto, f.get('language
'))
4003 if not all_formats and key in itags[itag]:
4005 itags[itag].add(key)
4007 if itag and all_formats:
4008 f['format_id
'] = f'{itag}
-{proto}
'
4009 elif any(p != proto for p, _ in itags[itag]):
4010 f['format_id
'] = f'{itag}
-{proto}
'
4012 f['format_id
'] = itag
4014 if original_language and f.get('language
') == original_language:
4015 f['format_note
'] = join_nonempty(f.get('format_note
'), '(default
)', delim=' ')
4016 f['language_preference
'] = PREFERRED_LANG_VALUE
4018 if f.get('source_preference
') is None:
4019 f['source_preference
'] = -1
4021 if itag in ('616', '235'):
4022 f['format_note
'] = join_nonempty(f.get('format_note
'), 'Premium
', delim=' ')
4023 f['source_preference
'] += 100
4025 f['quality
'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id
'].split('-')[0]), -1))
4026 if f['quality
'] == -1 and f.get('height
'):
4027 f['quality
'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height
']))])
4028 if self.get_param('verbose
') or all_formats:
4029 f['format_note
'] = join_nonempty(f.get('format_note
'), client_name, delim=', ')
4030 if f.get('fps
') and f['fps
'] <= 1:
4033 if proto == 'hls
' and f.get('has_drm
'):
4034 f['has_drm
'] = 'maybe
'
4035 f['source_preference
'] -= 5
4039 for sd in streaming_data:
4040 client_name = sd.get(STREAMING_DATA_CLIENT_NAME)
4042 hls_manifest_url = 'hls
' not in skip_manifests and sd.get('hlsManifestUrl
')
4043 if hls_manifest_url:
4044 fmts, subs = self._extract_m3u8_formats_and_subtitles(
4045 hls_manifest_url, video_id, 'mp4
', fatal=False, live=live_status == 'is_live
')
4046 subtitles = self._merge_subtitles(subs, subtitles)
4048 if process_manifest_format(f, 'hls
', client_name, self._search_regex(
4049 r'/itag
/(\d
+)', f['url
'], 'itag
', default=None)):
4052 dash_manifest_url = 'dash
' not in skip_manifests and sd.get('dashManifestUrl
')
4053 if dash_manifest_url:
4054 formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
4055 subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
4057 if process_manifest_format(f, 'dash
', client_name, f['format_id
']):
4058 f['filesize
'] = int_or_none(self._search_regex(
4059 r'/clen
/(\d
+)', f.get('fragment_base_url
') or f['url
'], 'file size
', default=None))
4060 if needs_live_processing:
4061 f['is_from_start
'] = True
4066 def _extract_storyboard(self, player_responses, duration):
4068 player_responses, ('storyboards
', 'playerStoryboardSpecRenderer
', 'spec
'), default='').split('|
')[::-1]
4069 base_url = url_or_none(urljoin('https
://i
.ytimg
.com
/', spec.pop() or None))
4073 for i, args in enumerate(spec):
4074 args = args.split('#')
4075 counts
= list(map(int_or_none
, args
[:5]))
4076 if len(args
) != 8 or not all(counts
):
4077 self
.report_warning(f
'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
4079 width
, height
, frame_count
, cols
, rows
= counts
4082 url
= base_url
.replace('$L', str(L
- i
)).replace('$N', N
) + f
'&sigh={sigh}'
4083 fragment_count
= frame_count
/ (cols
* rows
)
4084 fragment_duration
= duration
/ fragment_count
4086 'format_id': f
'sb{i}',
4087 'format_note': 'storyboard',
4089 'protocol': 'mhtml',
4095 'fps': frame_count
/ duration
,
4099 'url': url
.replace('$M', str(j
)),
4100 'duration': min(fragment_duration
, duration
- (j
* fragment_duration
)),
4101 } for j
in range(math
.ceil(fragment_count
))],
4104 def _download_player_responses(self
, url
, smuggled_data
, video_id
, webpage_url
):
4106 if 'webpage' not in self
._configuration
_arg
('player_skip'):
4107 query
= {'bpctr': '9999999999', 'has_verified': '1'}
4108 pp
= self
._configuration
_arg
('player_params', [None], casesense
=True)[0]
4111 webpage
= self
._download
_webpage
(
4112 webpage_url
, video_id
, fatal
=False, query
=query
)
4114 master_ytcfg
= self
.extract_ytcfg(video_id
, webpage
) or self
._get
_default
_ytcfg
()
4116 player_responses
, player_url
= self
._extract
_player
_responses
(
4117 self
._get
_requested
_clients
(url
, smuggled_data
),
4118 video_id
, webpage
, master_ytcfg
, smuggled_data
)
4120 return webpage
, master_ytcfg
, player_responses
, player_url
4122 def _list_formats(self
, video_id
, microformats
, video_details
, player_responses
, player_url
, duration
=None):
4123 live_broadcast_details
= traverse_obj(microformats
, (..., 'liveBroadcastDetails'))
4124 is_live
= get_first(video_details
, 'isLive')
4126 is_live
= get_first(live_broadcast_details
, 'isLiveNow')
4127 live_content
= get_first(video_details
, 'isLiveContent')
4128 is_upcoming
= get_first(video_details
, 'isUpcoming')
4129 post_live
= get_first(video_details
, 'isPostLiveDvr')
4130 live_status
= ('post_live' if post_live
4131 else 'is_live' if is_live
4132 else 'is_upcoming' if is_upcoming
4133 else 'was_live' if live_content
4134 else 'not_live' if False in (is_live
, live_content
)
4136 streaming_data
= traverse_obj(player_responses
, (..., 'streamingData'))
4137 *formats
, subtitles
= self
._extract
_formats
_and
_subtitles
(streaming_data
, video_id
, player_url
, live_status
, duration
)
4138 if all(f
.get('has_drm') for f
in formats
):
4139 # If there are no formats that definitely don't have DRM, all have DRM
4143 return live_broadcast_details
, live_status
, streaming_data
, formats
, subtitles
4145 def _real_extract(self
, url
):
4146 url
, smuggled_data
= unsmuggle_url(url
, {})
4147 video_id
= self
._match
_id
(url
)
4149 base_url
= self
.http_scheme() + '//www.youtube.com/'
4150 webpage_url
= base_url
+ 'watch?v=' + video_id
4152 webpage
, master_ytcfg
, player_responses
, player_url
= self
._download
_player
_responses
(url
, smuggled_data
, video_id
, webpage_url
)
4154 playability_statuses
= traverse_obj(
4155 player_responses
, (..., 'playabilityStatus'), expected_type
=dict)
4157 trailer_video_id
= get_first(
4158 playability_statuses
,
4159 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
4161 if trailer_video_id
:
4162 return self
.url_result(
4163 trailer_video_id
, self
.ie_key(), trailer_video_id
)
4165 search_meta
= ((lambda x
: self
._html
_search
_meta
(x
, webpage
, default
=None))
4166 if webpage
else (lambda x
: None))
4168 video_details
= traverse_obj(player_responses
, (..., 'videoDetails'), expected_type
=dict)
4169 microformats
= traverse_obj(
4170 player_responses
, (..., 'microformat', 'playerMicroformatRenderer'),
4173 translated_title
= self
._get
_text
(microformats
, (..., 'title'))
4174 video_title
= (self
._preferred
_lang
and translated_title
4175 or get_first(video_details
, 'title') # primary
4177 or search_meta(['og:title', 'twitter:title', 'title']))
4178 translated_description
= self
._get
_text
(microformats
, (..., 'description'))
4179 original_description
= get_first(video_details
, 'shortDescription')
4180 video_description
= (
4181 self
._preferred
_lang
and translated_description
4182 # If original description is blank, it will be an empty string.
4183 # Do not prefer translated description in this case.
4184 or original_description
if original_description
is not None else translated_description
)
4186 multifeed_metadata_list
= get_first(
4188 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
4190 if multifeed_metadata_list
and not smuggled_data
.get('force_singlefeed'):
4191 if self
.get_param('noplaylist'):
4192 self
.to_screen(f
'Downloading just video {video_id} because of --no-playlist')
4196 for feed
in multifeed_metadata_list
.split(','):
4197 # Unquote should take place before split on comma (,) since textual
4198 # fields may contain comma as well (see
4199 # https://github.com/ytdl-org/youtube-dl/issues/8536)
4200 feed_data
= urllib
.parse
.parse_qs(
4201 urllib
.parse
.unquote_plus(feed
))
4203 def feed_entry(name
):
4205 feed_data
, lambda x
: x
[name
][0], str)
4207 feed_id
= feed_entry('id')
4210 feed_title
= feed_entry('title')
4213 title
+= f
' ({feed_title})'
4215 '_type': 'url_transparent',
4216 'ie_key': 'Youtube',
4218 '{}watch?v={}'.format(base_url
, feed_data
['id'][0]),
4219 {'force_singlefeed': True}
),
4222 feed_ids
.append(feed_id
)
4224 'Downloading multifeed video ({}) - add --no-playlist to just download video {}'.format(
4225 ', '.join(feed_ids
), video_id
))
4226 return self
.playlist_result(
4227 entries
, video_id
, video_title
, video_description
)
4229 duration
= (int_or_none(get_first(video_details
, 'lengthSeconds'))
4230 or int_or_none(get_first(microformats
, 'lengthSeconds'))
4231 or parse_duration(search_meta('duration')) or None)
4233 live_broadcast_details
, live_status
, streaming_data
, formats
, automatic_captions
= \
4234 self
._list
_formats
(video_id
, microformats
, video_details
, player_responses
, player_url
, duration
)
4235 if live_status
== 'post_live':
4236 self
.write_debug(f
'{video_id}: Video is in Post-Live Manifestless mode')
4239 if not self
.get_param('allow_unplayable_formats') and traverse_obj(streaming_data
, (..., 'licenseInfos')):
4240 self
.report_drm(video_id
)
4242 playability_statuses
,
4243 ('errorScreen', 'playerErrorMessageRenderer'), expected_type
=dict) or {}
4244 reason
= self
._get
_text
(pemr
, 'reason') or get_first(playability_statuses
, 'reason')
4245 subreason
= clean_html(self
._get
_text
(pemr
, 'subreason') or '')
4247 if subreason
== 'The uploader has not made this video available in your country.':
4248 countries
= get_first(microformats
, 'availableCountries')
4250 regions_allowed
= search_meta('regionsAllowed')
4251 countries
= regions_allowed
.split(',') if regions_allowed
else None
4252 self
.raise_geo_restricted(subreason
, countries
, metadata_available
=True)
4253 reason
+= f
'. {subreason}'
4255 self
.raise_no_formats(reason
, expected
=True)
4257 keywords
= get_first(video_details
, 'keywords', expected_type
=list) or []
4258 if not keywords
and webpage
:
4260 unescapeHTML(m
.group('content'))
4261 for m
in re
.finditer(self
._meta
_regex
('og:video:tag'), webpage
)]
4262 for keyword
in keywords
:
4263 if keyword
.startswith('yt:stretch='):
4264 mobj
= re
.search(r
'(\d+)\s*:\s*(\d+)', keyword
)
4266 # NB: float is intentional for forcing float division
4267 w
, h
= (float(v
) for v
in mobj
.groups())
4271 if f
.get('vcodec') != 'none':
4272 f
['stretched_ratio'] = ratio
4274 thumbnails
= self
._extract
_thumbnails
((video_details
, microformats
), (..., ..., 'thumbnail'))
4275 thumbnail_url
= search_meta(['og:image', 'twitter:image'])
4278 'url': thumbnail_url
,
4280 original_thumbnails
= thumbnails
.copy()
4282 # The best resolution thumbnails sometimes does not appear in the webpage
4283 # See: https://github.com/yt-dlp/yt-dlp/issues/340
4284 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
4286 # While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants
4287 # in resolution, these are not the custom thumbnail. So de-prioritize them
4288 'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',
4289 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3',
4291 n_thumbnail_names
= len(thumbnail_names
)
4293 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
4294 video_id
=video_id
, name
=name
, ext
=ext
,
4295 webp
='_webp' if ext
== 'webp' else '', live
='_live' if live_status
== 'is_live' else ''),
4296 } for name
in thumbnail_names
for ext
in ('webp', 'jpg'))
4297 for thumb
in thumbnails
:
4298 i
= next((i
for i
, t
in enumerate(thumbnail_names
) if f
'/{video_id}/{t}' in thumb
['url']), n_thumbnail_names
)
4299 thumb
['preference'] = (0 if '.webp' in thumb
['url'] else -1) - (2 * i
)
4300 self
._remove
_duplicate
_formats
(thumbnails
)
4301 self
._downloader
._sort
_thumbnails
(original_thumbnails
)
4303 category
= get_first(microformats
, 'category') or search_meta('genre')
4304 channel_id
= self
.ucid_or_none(str_or_none(
4305 get_first(video_details
, 'channelId')
4306 or get_first(microformats
, 'externalChannelId')
4307 or search_meta('channelId')))
4308 owner_profile_url
= get_first(microformats
, 'ownerProfileUrl')
4310 live_start_time
= parse_iso8601(get_first(live_broadcast_details
, 'startTimestamp'))
4311 live_end_time
= parse_iso8601(get_first(live_broadcast_details
, 'endTimestamp'))
4312 if not duration
and live_end_time
and live_start_time
:
4313 duration
= live_end_time
- live_start_time
4315 needs_live_processing
= self
._needs
_live
_processing
(live_status
, duration
)
4317 def is_bad_format(fmt
):
4318 if needs_live_processing
and not fmt
.get('is_from_start'):
4320 elif (live_status
== 'is_live' and needs_live_processing
!= 'is_live'
4321 and fmt
.get('protocol') == 'http_dash_segments'):
4324 for fmt
in filter(is_bad_format
, formats
):
4325 fmt
['preference'] = (fmt
.get('preference') or -1) - 10
4326 fmt
['format_note'] = join_nonempty(fmt
.get('format_note'), '(Last 2 hours)', delim
=' ')
4328 if needs_live_processing
:
4329 self
._prepare
_live
_from
_start
_formats
(
4330 formats
, video_id
, live_start_time
, url
, webpage_url
, smuggled_data
, live_status
== 'is_live')
4332 formats
.extend(self
._extract
_storyboard
(player_responses
, duration
))
4334 channel_handle
= self
.handle_from_url(owner_profile_url
)
4338 'title': video_title
,
4340 'thumbnails': thumbnails
,
4341 # The best thumbnail that we are sure exists. Prevents unnecessary
4342 # URL checking if user don't care about getting the best possible thumbnail
4343 'thumbnail': traverse_obj(original_thumbnails
, (-1, 'url')),
4344 'description': video_description
,
4345 'channel_id': channel_id
,
4346 'channel_url': format_field(channel_id
, None, 'https://www.youtube.com/channel/%s', default
=None),
4347 'duration': duration
,
4348 'view_count': int_or_none(
4349 get_first((video_details
, microformats
), (..., 'viewCount'))
4350 or search_meta('interactionCount')),
4351 'average_rating': float_or_none(get_first(video_details
, 'averageRating')),
4352 'age_limit': 18 if (
4353 get_first(microformats
, 'isFamilySafe') is False
4354 or search_meta('isFamilyFriendly') == 'false'
4355 or search_meta('og:restrictions:age') == '18+') else 0,
4356 'webpage_url': webpage_url
,
4357 'categories': [category
] if category
else None,
4359 'playable_in_embed': get_first(playability_statuses
, 'playableInEmbed'),
4360 'live_status': live_status
,
4361 'release_timestamp': live_start_time
,
4362 '_format_sort_fields': ( # source_preference is lower for potentially damaged formats
4363 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto'),
4367 pctr
= traverse_obj(player_responses
, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type
=dict)
4369 def get_lang_code(track
):
4370 return (remove_start(track
.get('vssId') or '', '.').replace('.', '-')
4371 or track
.get('languageCode'))
4373 # Converted into dicts to remove duplicates
4375 get_lang_code(sub
): sub
4376 for sub
in traverse_obj(pctr
, (..., 'captionTracks', ...))}
4377 translation_languages
= {
4378 lang
.get('languageCode'): self
._get
_text
(lang
.get('languageName'), max_runs
=1)
4379 for lang
in traverse_obj(pctr
, (..., 'translationLanguages', ...))}
4381 def process_language(container
, base_url
, lang_code
, sub_name
, query
):
4382 lang_subs
= container
.setdefault(lang_code
, [])
4383 for fmt
in self
._SUBTITLE
_FORMATS
:
4389 'url': urljoin('https://www.youtube.com', update_url_query(base_url
, query
)),
4393 # NB: Constructing the full subtitle dictionary is slow
4394 get_translated_subs
= 'translated_subs' not in self
._configuration
_arg
('skip') and (
4395 self
.get_param('writeautomaticsub', False) or self
.get_param('listsubtitles'))
4396 for lang_code
, caption_track
in captions
.items():
4397 base_url
= caption_track
.get('baseUrl')
4398 orig_lang
= parse_qs(base_url
).get('lang', [None])[-1]
4401 lang_name
= self
._get
_text
(caption_track
, 'name', max_runs
=1)
4402 if caption_track
.get('kind') != 'asr':
4406 subtitles
, base_url
, lang_code
, lang_name
, {})
4407 if not caption_track
.get('isTranslatable'):
4409 for trans_code
, trans_name
in translation_languages
.items():
4412 orig_trans_code
= trans_code
4413 if caption_track
.get('kind') != 'asr' and trans_code
!= 'und':
4414 if not get_translated_subs
:
4416 trans_code
+= f
'-{lang_code}'
4417 trans_name
+= format_field(lang_name
, None, ' from %s')
4418 if lang_code
== f
'a-{orig_trans_code}':
4419 # Set audio language based on original subtitles
4421 if f
.get('acodec') != 'none' and not f
.get('language'):
4422 f
['language'] = orig_trans_code
4423 # Add an "-orig" label to the original language so that it can be distinguished.
4424 # The subs are returned without "-orig" as well for compatibility
4426 automatic_captions
, base_url
, f
'{trans_code}-orig', f
'{trans_name} (Original)', {})
4427 # Setting tlang=lang returns damaged subtitles.
4428 process_language(automatic_captions
, base_url
, trans_code
, trans_name
,
4429 {} if orig_lang == orig_trans_code else {'tlang': trans_code}
)
4431 info
['automatic_captions'] = automatic_captions
4432 info
['subtitles'] = subtitles
4434 parsed_url
= urllib
.parse
.urlparse(url
)
4435 for component
in [parsed_url
.fragment
, parsed_url
.query
]:
4436 query
= urllib
.parse
.parse_qs(component
)
4437 for k
, v
in query
.items():
4438 for d_k
, s_ks
in [('start', ('start', 't')), ('end', ('end',))]:
4440 if d_k
not in info
and k
in s_ks
:
4441 info
[d_k
] = parse_duration(v
[0])
4443 # Youtube Music Auto-generated description
4444 if (video_description
or '').strip().endswith('\nAuto-generated by YouTube.'):
4445 # XXX: Causes catastrophic backtracking if description has "·"
4446 # E.g. https://www.youtube.com/watch?v=DoPaAxMQoiI
4447 # Simulating atomic groups: (?P<a>[^xy]+)x => (?=(?P<a>[^xy]+))(?P=a)x
4448 # reduces it, but does not fully fix it. https://regex101.com/r/8Ssf2h/2
4451 (?=(?P<track>[^\n·]+))(?P=track)·
4452 (?=(?P<artist>[^\n]+))(?P=artist)\n+
4453 (?=(?P<album>[^\n]+))(?P=album)\n
4454 (?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
4455 (?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
4457 (?=(?P<clean_artist>[^\n]+))(?P=clean_artist)\n
4458 )?.+\nAuto-generated\ by\ YouTube\.\s*$
4459 ''', video_description
)
4461 release_year
= mobj
.group('release_year')
4462 release_date
= mobj
.group('release_date')
4464 release_date
= release_date
.replace('-', '')
4465 if not release_year
:
4466 release_year
= release_date
[:4]
4468 'album': mobj
.group('album'.strip()),
4469 'artists': ([a
] if (a
:= mobj
.group('clean_artist'))
4470 else [a
.strip() for a
in mobj
.group('artist').split('·')]),
4471 'track': mobj
.group('track').strip(),
4472 'release_date': release_date
,
4473 'release_year': int_or_none(release_year
),
4478 initial_data
= self
.extract_yt_initial_data(video_id
, webpage
, fatal
=False)
4479 if not traverse_obj(initial_data
, 'contents'):
4480 self
.report_warning('Incomplete data received in embedded initial data; re-fetching using API.')
4482 if not initial_data
:
4483 query
= {'videoId': video_id}
4484 query
.update(self
._get
_checkok
_params
())
4485 initial_data
= self
._extract
_response
(
4486 item_id
=video_id
, ep
='next', fatal
=False,
4487 ytcfg
=master_ytcfg
, query
=query
, check_get_keys
='contents',
4488 headers
=self
.generate_api_headers(ytcfg
=master_ytcfg
),
4489 note
='Downloading initial data API JSON')
4491 info
['comment_count'] = traverse_obj(initial_data
, (
4492 'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',
4493 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount',
4495 'engagementPanels', lambda _
, v
: v
['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',
4496 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo',
4497 ), expected_type
=self
._get
_count
, get_all
=False)
4499 try: # This will error if there is no livechat
4500 initial_data
['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
4501 except (KeyError, IndexError, TypeError):
4504 info
.setdefault('subtitles', {})['live_chat'] = [{
4505 # url is needed to set cookies
4506 'url': f
'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',
4507 'video_id': video_id
,
4509 'protocol': ('youtube_live_chat' if live_status
in ('is_live', 'is_upcoming')
4510 else 'youtube_live_chat_replay'),
4514 info
['chapters'] = (
4515 self
._extract
_chapters
_from
_json
(initial_data
, duration
)
4516 or self
._extract
_chapters
_from
_engagement
_panel
(initial_data
, duration
)
4517 or self
._extract
_chapters
_from
_description
(video_description
, duration
)
4520 info
['heatmap'] = self
._extract
_heatmap
(initial_data
)
4522 contents
= traverse_obj(
4523 initial_data
, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
4524 expected_type
=list, default
=[])
4526 vpir
= get_first(contents
, 'videoPrimaryInfoRenderer')
4528 stl
= vpir
.get('superTitleLink')
4530 stl
= self
._get
_text
(stl
)
4533 lambda x
: x
['superTitleIcon']['iconType']) == 'LOCATION_PIN':
4534 info
['location'] = stl
4536 mobj
= re
.search(r
'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl
)
4539 'series': mobj
.group(1),
4540 'season_number': int(mobj
.group(2)),
4541 'episode_number': int(mobj
.group(3)),
4543 for tlb
in (try_get(
4545 lambda x
: x
['videoActions']['menuRenderer']['topLevelButtons'],
4549 tlb
, ('toggleButtonRenderer', ...),
4550 ('segmentedLikeDislikeButtonRenderer', ..., 'toggleButtonRenderer')))
4552 for getter
, regex
in [(
4553 lambda x
: x
['defaultText']['accessibility']['accessibilityData'],
4554 r
'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
4555 lambda x
: x
['accessibility'],
4556 lambda x
: x
['accessibilityData']['accessibilityData'],
4557 ], r
'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
4558 label
= (try_get(tbr
, getter
, dict) or {}).get('label')
4560 mobj
= re
.match(regex
, label
)
4562 info
[mobj
.group('type') + '_count'] = str_to_int(mobj
.group('count'))
4565 info
['like_count'] = traverse_obj(vpir
, (
4566 'videoActions', 'menuRenderer', 'topLevelButtons', ...,
4567 'segmentedLikeDislikeButtonViewModel', 'likeButtonViewModel', 'likeButtonViewModel',
4568 'toggleButtonViewModel', 'toggleButtonViewModel', 'defaultButtonViewModel',
4569 'buttonViewModel', 'accessibilityText', {parse_count}
), get_all
=False)
4571 vcr
= traverse_obj(vpir
, ('viewCount', 'videoViewCountRenderer'))
4573 vc
= self
._get
_count
(vcr
, 'viewCount')
4574 # Upcoming premieres with waiting count are treated as live here
4575 if vcr
.get('isLive'):
4576 info
['concurrent_view_count'] = vc
4577 elif info
.get('view_count') is None:
4578 info
['view_count'] = vc
4580 vsir
= get_first(contents
, 'videoSecondaryInfoRenderer')
4582 vor
= traverse_obj(vsir
, ('owner', 'videoOwnerRenderer'))
4584 'channel': self
._get
_text
(vor
, 'title'),
4585 'channel_follower_count': self
._get
_count
(vor
, 'subscriberCountText')})
4587 if not channel_handle
:
4588 channel_handle
= self
.handle_from_url(
4590 ('navigationEndpoint', ('title', 'runs', ..., 'navigationEndpoint')),
4591 (('commandMetadata', 'webCommandMetadata', 'url'), ('browseEndpoint', 'canonicalBaseUrl')),
4592 {str}
), get_all
=False))
4596 lambda x
: x
['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
4598 multiple_songs
= False
4600 if try_get(row
, lambda x
: x
['metadataRowRenderer']['hasDividerLine']) is True:
4601 multiple_songs
= True
4604 mrr
= row
.get('metadataRowRenderer') or {}
4605 mrr_title
= mrr
.get('title')
4608 mrr_title
= self
._get
_text
(mrr
, 'title')
4609 mrr_contents_text
= self
._get
_text
(mrr
, ('contents', 0))
4610 if mrr_title
== 'License':
4611 info
['license'] = mrr_contents_text
4612 elif not multiple_songs
:
4613 if mrr_title
== 'Album':
4614 info
['album'] = mrr_contents_text
4615 elif mrr_title
== 'Artist':
4616 info
['artists'] = [mrr_contents_text
] if mrr_contents_text
else None
4617 elif mrr_title
== 'Song':
4618 info
['track'] = mrr_contents_text
4619 owner_badges
= self
._extract
_badges
(traverse_obj(vsir
, ('owner', 'videoOwnerRenderer', 'badges')))
4620 if self
._has
_badge
(owner_badges
, BadgeType
.VERIFIED
):
4621 info
['channel_is_verified'] = True
4624 'uploader': info
.get('channel'),
4625 'uploader_id': channel_handle
,
4626 'uploader_url': format_field(channel_handle
, None, 'https://www.youtube.com/%s', default
=None),
4629 # We only want timestamp IF it has time precision AND a timezone
4630 # Currently the uploadDate in microformats appears to be in US/Pacific timezone.
4632 parse_iso8601(get_first(microformats
, 'uploadDate'), timezone
=NO_DEFAULT
)
4633 or parse_iso8601(search_meta('uploadDate'), timezone
=NO_DEFAULT
)
4636 dt
.datetime
.fromtimestamp(timestamp
, dt
.timezone
.utc
).strftime('%Y%m%d') if timestamp
else
4638 unified_strdate(get_first(microformats
, 'uploadDate'))
4639 or unified_strdate(search_meta('uploadDate'))
4642 # In the case we cannot get the timestamp:
4643 # The upload date for scheduled, live and past live streams / premieres in microformats
4644 # may be different from the stream date. Although not in UTC, we will prefer it in this case.
4645 # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139
4646 if not upload_date
or (not timestamp
and live_status
in ('not_live', None)):
4647 # this should be in UTC, as configured in the cookie/client context
4648 upload_date
= strftime_or_none(
4649 self
._parse
_time
_text
(self
._get
_text
(vpir
, 'dateText'))) or upload_date
4651 info
['upload_date'] = upload_date
4652 info
['timestamp'] = timestamp
4654 if upload_date
and live_status
not in ('is_live', 'post_live', 'is_upcoming'):
4655 # Newly uploaded videos' HLS formats are potentially problematic and need to be checked
4656 upload_datetime
= datetime_from_str(upload_date
).replace(tzinfo
=dt
.timezone
.utc
)
4657 if upload_datetime
>= datetime_from_str('today-2days'):
4658 for fmt
in info
['formats']:
4659 if fmt
.get('protocol') == 'm3u8_native':
4660 fmt
['__needs_testing'] = True
4662 for s_k
, d_k
in [('artists', 'creators'), ('track', 'alt_title')]:
4667 badges
= self
._extract
_badges
(traverse_obj(vpir
, 'badges'))
4669 is_private
= (self
._has
_badge
(badges
, BadgeType
.AVAILABILITY_PRIVATE
)
4670 or get_first(video_details
, 'isPrivate', expected_type
=bool))
4672 info
['availability'] = (
4673 'public' if self
._has
_badge
(badges
, BadgeType
.AVAILABILITY_PUBLIC
)
4674 else self
._availability
(
4675 is_private
=is_private
,
4677 self
._has
_badge
(badges
, BadgeType
.AVAILABILITY_PREMIUM
)
4678 or False if initial_data
and is_private
is not None else None),
4679 needs_subscription
=(
4680 self
._has
_badge
(badges
, BadgeType
.AVAILABILITY_SUBSCRIPTION
)
4681 or False if initial_data
and is_private
is not None else None),
4682 needs_auth
=info
['age_limit'] >= 18,
4683 is_unlisted
=None if is_private
is None else (
4684 self
._has
_badge
(badges
, BadgeType
.AVAILABILITY_UNLISTED
)
4685 or get_first(microformats
, 'isUnlisted', expected_type
=bool))))
4687 info
['__post_extractor'] = self
.extract_comments(master_ytcfg
, video_id
, contents
, webpage
)
4689 self
.mark_watched(video_id
, player_responses
)
4694 class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor
):
4696 def passthrough_smuggled_data(func
):
4697 def _smuggle(info
, smuggled_data
):
4698 if info
.get('_type') not in ('url', 'url_transparent'):
4700 if smuggled_data
.get('is_music_url'):
4701 parsed_url
= urllib
.parse
.urlparse(info
['url'])
4702 if parsed_url
.netloc
in ('www.youtube.com', 'music.youtube.com'):
4703 smuggled_data
.pop('is_music_url')
4704 info
['url'] = urllib
.parse
.urlunparse(parsed_url
._replace
(netloc
='music.youtube.com'))
4706 info
['url'] = smuggle_url(info
['url'], smuggled_data
)
4709 @functools.wraps(func
)
4710 def wrapper(self
, url
):
4711 url
, smuggled_data
= unsmuggle_url(url
, {})
4712 if self
.is_music_url(url
):
4713 smuggled_data
['is_music_url'] = True
4714 info_dict
= func(self
, url
, smuggled_data
)
4716 _smuggle(info_dict
, smuggled_data
)
4717 if info_dict
.get('entries'):
4718 info_dict
['entries'] = (_smuggle(i
, smuggled_data
.copy()) for i
in info_dict
['entries'])
4723 def _extract_basic_item_renderer(item
):
4724 # Modified from _extract_grid_item_renderer
4725 known_basic_renderers
= (
4726 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer',
4728 for key
, renderer
in item
.items():
4729 if not isinstance(renderer
, dict):
4731 elif key
in known_basic_renderers
:
4733 elif key
.startswith('grid') and key
.endswith('Renderer'):
4736 def _extract_channel_renderer(self
, renderer
):
4737 channel_id
= self
.ucid_or_none(renderer
['channelId'])
4738 title
= self
._get
_text
(renderer
, 'title')
4739 channel_url
= format_field(channel_id
, None, 'https://www.youtube.com/channel/%s', default
=None)
4740 channel_handle
= self
.handle_from_url(
4741 traverse_obj(renderer
, (
4742 'navigationEndpoint', (('commandMetadata', 'webCommandMetadata', 'url'),
4743 ('browseEndpoint', 'canonicalBaseUrl')),
4744 {str}
), get_all
=False))
4745 if not channel_handle
:
4746 # As of 2023-06-01, YouTube sets subscriberCountText to the handle in search
4747 channel_handle
= self
.handle_or_none(self
._get
_text
(renderer
, 'subscriberCountText'))
4752 'ie_key': YoutubeTabIE
.ie_key(),
4755 'channel_id': channel_id
,
4756 'channel_url': channel_url
,
4758 'uploader_id': channel_handle
,
4759 'uploader_url': format_field(channel_handle
, None, 'https://www.youtube.com/%s', default
=None),
4760 # See above. YouTube sets videoCountText to the subscriber text in search channel renderers.
4761 # However, in feed/channels this is set correctly to the subscriber count
4762 'channel_follower_count': traverse_obj(
4763 renderer
, 'subscriberCountText', 'videoCountText', expected_type
=self
._get
_count
),
4764 'thumbnails': self
._extract
_thumbnails
(renderer
, 'thumbnail'),
4766 # videoCountText may be the subscriber count
4767 self
._get
_count
(renderer
, 'videoCountText')
4768 if self
._get
_count
(renderer
, 'subscriberCountText') is not None else None),
4769 'description': self
._get
_text
(renderer
, 'descriptionSnippet'),
4770 'channel_is_verified': True if self
._has
_badge
(
4771 self
._extract
_badges
(traverse_obj(renderer
, 'ownerBadges')), BadgeType
.VERIFIED
) else None,
4774 def _grid_entries(self
, grid_renderer
):
4775 for item
in grid_renderer
['items']:
4776 if not isinstance(item
, dict):
4778 renderer
= self
._extract
_basic
_item
_renderer
(item
)
4779 if not isinstance(renderer
, dict):
4781 title
= self
._get
_text
(renderer
, 'title')
4784 playlist_id
= renderer
.get('playlistId')
4786 yield self
.url_result(
4787 f
'https://www.youtube.com/playlist?list={playlist_id}',
4788 ie
=YoutubeTabIE
.ie_key(), video_id
=playlist_id
,
4792 video_id
= renderer
.get('videoId')
4794 yield self
._extract
_video
(renderer
)
4797 channel_id
= renderer
.get('channelId')
4799 yield self
._extract
_channel
_renderer
(renderer
)
4801 # generic endpoint URL support
4802 ep_url
= urljoin('https://www.youtube.com/', try_get(
4803 renderer
, lambda x
: x
['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
4806 for ie
in (YoutubeTabIE
, YoutubePlaylistIE
, YoutubeIE
):
4807 if ie
.suitable(ep_url
):
4808 yield self
.url_result(
4809 ep_url
, ie
=ie
.ie_key(), video_id
=ie
._match
_id
(ep_url
), video_title
=title
)
4812 def _music_reponsive_list_entry(self
, renderer
):
4813 video_id
= traverse_obj(renderer
, ('playlistItemData', 'videoId'))
4815 title
= traverse_obj(renderer
, (
4816 'flexColumns', 0, 'musicResponsiveListItemFlexColumnRenderer',
4817 'text', 'runs', 0, 'text'))
4818 return self
.url_result(f
'https://music.youtube.com/watch?v={video_id}',
4819 ie
=YoutubeIE
.ie_key(), video_id
=video_id
, title
=title
)
4820 playlist_id
= traverse_obj(renderer
, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))
4822 video_id
= traverse_obj(renderer
, ('navigationEndpoint', 'watchEndpoint', 'videoId'))
4824 return self
.url_result(f
'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',
4825 ie
=YoutubeTabIE
.ie_key(), video_id
=playlist_id
)
4826 return self
.url_result(f
'https://music.youtube.com/playlist?list={playlist_id}',
4827 ie
=YoutubeTabIE
.ie_key(), video_id
=playlist_id
)
4828 browse_id
= traverse_obj(renderer
, ('navigationEndpoint', 'browseEndpoint', 'browseId'))
4830 return self
.url_result(f
'https://music.youtube.com/browse/{browse_id}',
4831 ie
=YoutubeTabIE
.ie_key(), video_id
=browse_id
)
4833 def _shelf_entries_from_content(self
, shelf_renderer
):
4834 content
= shelf_renderer
.get('content')
4835 if not isinstance(content
, dict):
4837 renderer
= content
.get('gridRenderer') or content
.get('expandedShelfContentsRenderer')
4839 # TODO: add support for nested playlists so each shelf is processed
4840 # as separate playlist
4841 # TODO: this includes only first N items
4842 yield from self
._grid
_entries
(renderer
)
4843 renderer
= content
.get('horizontalListRenderer')
4848 def _shelf_entries(self
, shelf_renderer
, skip_channels
=False):
4850 shelf_renderer
, lambda x
: x
['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4852 shelf_url
= urljoin('https://www.youtube.com', ep
)
4854 # Skipping links to another channels, note that checking for
4855 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
4857 if skip_channels
and '/channels?' in shelf_url
:
4859 title
= self
._get
_text
(shelf_renderer
, 'title')
4860 yield self
.url_result(shelf_url
, video_title
=title
)
4861 # Shelf may not contain shelf URL, fallback to extraction from content
4862 yield from self
._shelf
_entries
_from
_content
(shelf_renderer
)
4864 def _playlist_entries(self
, video_list_renderer
):
4865 for content
in video_list_renderer
['contents']:
4866 if not isinstance(content
, dict):
4868 renderer
= content
.get('playlistVideoRenderer') or content
.get('playlistPanelVideoRenderer')
4869 if not isinstance(renderer
, dict):
4871 video_id
= renderer
.get('videoId')
4874 yield self
._extract
_video
(renderer
)
4876 def _rich_entries(self
, rich_grid_renderer
):
4877 renderer
= traverse_obj(
4879 ('content', ('videoRenderer', 'reelItemRenderer', 'playlistRenderer')), get_all
=False) or {}
4880 video_id
= renderer
.get('videoId')
4882 yield self
._extract
_video
(renderer
)
4884 playlist_id
= renderer
.get('playlistId')
4886 yield self
.url_result(
4887 f
'https://www.youtube.com/playlist?list={playlist_id}',
4888 ie
=YoutubeTabIE
.ie_key(), video_id
=playlist_id
,
4889 video_title
=self
._get
_text
(renderer
, 'title'))
4892 def _video_entry(self
, video_renderer
):
4893 video_id
= video_renderer
.get('videoId')
4895 return self
._extract
_video
(video_renderer
)
4897 def _hashtag_tile_entry(self
, hashtag_tile_renderer
):
4898 url
= urljoin('https://youtube.com', traverse_obj(
4899 hashtag_tile_renderer
, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))
4901 return self
.url_result(
4902 url
, ie
=YoutubeTabIE
.ie_key(), title
=self
._get
_text
(hashtag_tile_renderer
, 'hashtag'))
4904 def _post_thread_entries(self
, post_thread_renderer
):
4905 post_renderer
= try_get(
4906 post_thread_renderer
, lambda x
: x
['post']['backstagePostRenderer'], dict)
4907 if not post_renderer
:
4910 video_renderer
= try_get(
4911 post_renderer
, lambda x
: x
['backstageAttachment']['videoRenderer'], dict) or {}
4912 video_id
= video_renderer
.get('videoId')
4914 entry
= self
._extract
_video
(video_renderer
)
4917 # playlist attachment
4918 playlist_id
= try_get(
4919 post_renderer
, lambda x
: x
['backstageAttachment']['playlistRenderer']['playlistId'], str)
4921 yield self
.url_result(
4922 f
'https://www.youtube.com/playlist?list={playlist_id}',
4923 ie
=YoutubeTabIE
.ie_key(), video_id
=playlist_id
)
4924 # inline video links
4925 runs
= try_get(post_renderer
, lambda x
: x
['contentText']['runs'], list) or []
4927 if not isinstance(run
, dict):
4930 run
, lambda x
: x
['navigationEndpoint']['urlEndpoint']['url'], str)
4933 if not YoutubeIE
.suitable(ep_url
):
4935 ep_video_id
= YoutubeIE
._match
_id
(ep_url
)
4936 if video_id
== ep_video_id
:
4938 yield self
.url_result(ep_url
, ie
=YoutubeIE
.ie_key(), video_id
=ep_video_id
)
4940 def _post_thread_continuation_entries(self
, post_thread_continuation
):
4941 contents
= post_thread_continuation
.get('contents')
4942 if not isinstance(contents
, list):
4944 for content
in contents
:
4945 renderer
= content
.get('backstagePostThreadRenderer')
4946 if isinstance(renderer
, dict):
4947 yield from self
._post
_thread
_entries
(renderer
)
4949 renderer
= content
.get('videoRenderer')
4950 if isinstance(renderer
, dict):
4951 yield self
._video
_entry
(renderer
)
4954 def _rich_grid_entries(self, contents):
4955 for content in contents:
4956 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
4958 entry = self._video_entry(video_renderer)
4963 def _report_history_entries(self
, renderer
):
4964 for url
in traverse_obj(renderer
, (
4965 'rows', ..., 'reportHistoryTableRowRenderer', 'cells', ...,
4966 'reportHistoryTableCellRenderer', 'cell', 'reportHistoryTableTextCellRenderer', 'text', 'runs', ...,
4967 'navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url')):
4968 yield self
.url_result(urljoin('https://www.youtube.com', url
), YoutubeIE
)
4970 def _extract_entries(self
, parent_renderer
, continuation_list
):
4971 # continuation_list is modified in-place with continuation_list = [continuation_token]
4972 continuation_list
[:] = [None]
4973 contents
= try_get(parent_renderer
, lambda x
: x
['contents'], list) or []
4974 for content
in contents
:
4975 if not isinstance(content
, dict):
4977 is_renderer
= traverse_obj(
4978 content
, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',
4981 if content
.get('richItemRenderer'):
4982 for entry
in self
._rich
_entries
(content
['richItemRenderer']):
4984 continuation_list
[0] = self
._extract
_continuation
(parent_renderer
)
4985 elif content
.get('reportHistorySectionRenderer'): # https://www.youtube.com/reporthistory
4986 table
= traverse_obj(content
, ('reportHistorySectionRenderer', 'table', 'tableRenderer'))
4987 yield from self
._report
_history
_entries
(table
)
4988 continuation_list
[0] = self
._extract
_continuation
(table
)
4991 isr_contents
= try_get(is_renderer
, lambda x
: x
['contents'], list) or []
4992 for isr_content
in isr_contents
:
4993 if not isinstance(isr_content
, dict):
4997 'playlistVideoListRenderer': self
._playlist
_entries
,
4998 'gridRenderer': self
._grid
_entries
,
4999 'reelShelfRenderer': self
._grid
_entries
,
5000 'shelfRenderer': self
._shelf
_entries
,
5001 'musicResponsiveListItemRenderer': lambda x
: [self
._music
_reponsive
_list
_entry
(x
)],
5002 'backstagePostThreadRenderer': self
._post
_thread
_entries
,
5003 'videoRenderer': lambda x
: [self
._video
_entry
(x
)],
5004 'playlistRenderer': lambda x
: self
._grid
_entries
({'items': [{'playlistRenderer': x}
]}),
5005 'channelRenderer': lambda x
: self
._grid
_entries
({'items': [{'channelRenderer': x}
]}),
5006 'hashtagTileRenderer': lambda x
: [self
._hashtag
_tile
_entry
(x
)],
5007 'richGridRenderer': lambda x
: self
._extract
_entries
(x
, continuation_list
),
5009 for key
, renderer
in isr_content
.items():
5010 if key
not in known_renderers
:
5012 for entry
in known_renderers
[key
](renderer
):
5015 continuation_list
[0] = self
._extract
_continuation
(renderer
)
5018 if not continuation_list
[0]:
5019 continuation_list
[0] = self
._extract
_continuation
(is_renderer
)
5021 if not continuation_list
[0]:
5022 continuation_list
[0] = self
._extract
_continuation
(parent_renderer
)
5024 def _entries(self
, tab
, item_id
, ytcfg
, account_syncid
, visitor_data
):
5025 continuation_list
= [None]
5026 extract_entries
= lambda x
: self
._extract
_entries
(x
, continuation_list
)
5027 tab_content
= try_get(tab
, lambda x
: x
['content'], dict)
5031 try_get(tab_content
, lambda x
: x
['sectionListRenderer'], dict)
5032 or try_get(tab_content
, lambda x
: x
['richGridRenderer'], dict) or {})
5033 yield from extract_entries(parent_renderer
)
5034 continuation
= continuation_list
[0]
5035 seen_continuations
= set()
5036 for page_num
in itertools
.count(1):
5037 if not continuation
:
5039 continuation_token
= continuation
.get('continuation')
5040 if continuation_token
is not None and continuation_token
in seen_continuations
:
5041 self
.write_debug('Detected YouTube feed looping - assuming end of feed.')
5043 seen_continuations
.add(continuation_token
)
5044 headers
= self
.generate_api_headers(
5045 ytcfg
=ytcfg
, account_syncid
=account_syncid
, visitor_data
=visitor_data
)
5046 response
= self
._extract
_response
(
5047 item_id
=f
'{item_id} page {page_num}',
5048 query
=continuation
, headers
=headers
, ytcfg
=ytcfg
,
5049 check_get_keys
=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
5053 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
5054 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
5055 visitor_data
= self
._extract
_visitor
_data
(response
) or visitor_data
5058 'videoRenderer': (self
._grid
_entries
, 'items'), # for membership tab
5059 'gridPlaylistRenderer': (self
._grid
_entries
, 'items'),
5060 'gridVideoRenderer': (self
._grid
_entries
, 'items'),
5061 'gridChannelRenderer': (self
._grid
_entries
, 'items'),
5062 'playlistVideoRenderer': (self
._playlist
_entries
, 'contents'),
5063 'itemSectionRenderer': (extract_entries
, 'contents'), # for feeds
5064 'richItemRenderer': (extract_entries
, 'contents'), # for hashtag
5065 'backstagePostThreadRenderer': (self
._post
_thread
_continuation
_entries
, 'contents'),
5066 'reportHistoryTableRowRenderer': (self
._report
_history
_entries
, 'rows'),
5067 'playlistVideoListContinuation': (self
._playlist
_entries
, None),
5068 'gridContinuation': (self
._grid
_entries
, None),
5069 'itemSectionContinuation': (self
._post
_thread
_continuation
_entries
, None),
5070 'sectionListContinuation': (extract_entries
, None), # for feeds
5073 continuation_items
= traverse_obj(response
, (
5074 ('onResponseReceivedActions', 'onResponseReceivedEndpoints'), ...,
5075 'appendContinuationItemsAction', 'continuationItems',
5076 ), 'continuationContents', get_all
=False)
5077 continuation_item
= traverse_obj(continuation_items
, 0, None, expected_type
=dict, default
={})
5079 video_items_renderer
= None
5080 for key
in continuation_item
:
5081 if key
not in known_renderers
:
5083 func
, parent_key
= known_renderers
[key
]
5084 video_items_renderer
= {parent_key: continuation_items}
if parent_key
else continuation_items
5085 continuation_list
= [None]
5086 yield from func(video_items_renderer
)
5087 continuation
= continuation_list
[0] or self
._extract
_continuation
(video_items_renderer
)
5089 if not video_items_renderer
:
5093 def _extract_selected_tab(tabs
, fatal
=True):
5094 for tab_renderer
in tabs
:
5095 if tab_renderer
.get('selected'):
5098 raise ExtractorError('Unable to find selected tab')
5101 def _extract_tab_renderers(response
):
5102 return traverse_obj(
5103 response
, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., ('tabRenderer', 'expandableTabRenderer')), expected_type
=dict)
5105 def _extract_from_tabs(self
, item_id
, ytcfg
, data
, tabs
):
5106 metadata
= self
._extract
_metadata
_from
_tabs
(item_id
, data
)
5108 selected_tab
= self
._extract
_selected
_tab
(tabs
)
5109 metadata
['title'] += format_field(selected_tab
, 'title', ' - %s')
5110 metadata
['title'] += format_field(selected_tab
, 'expandedText', ' - %s')
5112 return self
.playlist_result(
5114 selected_tab
, metadata
['id'], ytcfg
,
5115 self
._extract
_account
_syncid
(ytcfg
, data
),
5116 self
._extract
_visitor
_data
(data
, ytcfg
)),
5119 def _extract_metadata_from_tabs(self
, item_id
, data
):
5120 info
= {'id': item_id}
5122 metadata_renderer
= traverse_obj(data
, ('metadata', 'channelMetadataRenderer'), expected_type
=dict)
5123 if metadata_renderer
:
5124 channel_id
= traverse_obj(metadata_renderer
, ('externalId', {self.ucid_or_none}
),
5125 ('channelUrl', {self.ucid_from_url}
))
5127 'channel': metadata_renderer
.get('title'),
5128 'channel_id': channel_id
,
5130 if info
['channel_id']:
5131 info
['id'] = info
['channel_id']
5133 metadata_renderer
= traverse_obj(data
, ('metadata', 'playlistMetadataRenderer'), expected_type
=dict)
5135 # pageHeaderViewModel slow rollout began April 2024
5136 page_header_view_model
= traverse_obj(data
, (
5137 'header', 'pageHeaderRenderer', 'content', 'pageHeaderViewModel', {dict}
))
5139 # We can get the uncropped banner/avatar by replacing the crop params with '=s0'
5140 # See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714
5141 def _get_uncropped(url
):
5142 return url_or_none((url
or '').split('=')[0] + '=s0')
5144 avatar_thumbnails
= self
._extract
_thumbnails
(metadata_renderer
, 'avatar')
5145 if avatar_thumbnails
:
5146 uncropped_avatar
= _get_uncropped(avatar_thumbnails
[0]['url'])
5147 if uncropped_avatar
:
5148 avatar_thumbnails
.append({
5149 'url': uncropped_avatar
,
5150 'id': 'avatar_uncropped',
5155 self
._extract
_thumbnails
(data
, ('header', ..., ('banner', 'mobileBanner', 'tvBanner')))
5156 or self
._extract
_thumbnails
(
5157 page_header_view_model
, ('banner', 'imageBannerViewModel', 'image'), final_key
='sources'))
5158 for banner
in channel_banners
:
5159 banner
['preference'] = -10
5162 uncropped_banner
= _get_uncropped(channel_banners
[0]['url'])
5163 if uncropped_banner
:
5164 channel_banners
.append({
5165 'url': uncropped_banner
,
5166 'id': 'banner_uncropped',
5170 # Deprecated - remove primary_sidebar_renderer when layout discontinued
5171 primary_sidebar_renderer
= self
._extract
_sidebar
_info
_renderer
(data
, 'playlistSidebarPrimaryInfoRenderer')
5172 playlist_header_renderer
= traverse_obj(data
, ('header', 'playlistHeaderRenderer'), expected_type
=dict)
5174 primary_thumbnails
= self
._extract
_thumbnails
(
5175 primary_sidebar_renderer
, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))
5176 playlist_thumbnails
= self
._extract
_thumbnails
(
5177 playlist_header_renderer
, ('playlistHeaderBanner', 'heroPlaylistThumbnailRenderer', 'thumbnail'))
5180 'title': (traverse_obj(metadata_renderer
, 'title')
5181 or self
._get
_text
(data
, ('header', 'hashtagHeaderRenderer', 'hashtag'))
5183 'availability': self
._extract
_availability
(data
),
5184 'channel_follower_count': (
5185 self
._get
_count
(data
, ('header', ..., 'subscriberCountText'))
5186 or traverse_obj(page_header_view_model
, (
5187 'metadata', 'contentMetadataViewModel', 'metadataRows', ..., 'metadataParts',
5188 lambda _
, v
: 'subscribers' in v
['text']['content'], 'text', 'content', {parse_count}
, any
))),
5189 'description': try_get(metadata_renderer
, lambda x
: x
.get('description', '')),
5190 'tags': (traverse_obj(data
, ('microformat', 'microformatDataRenderer', 'tags', ..., {str}
))
5191 or traverse_obj(metadata_renderer
, ('keywords', {lambda x: x and shlex.split(x)}
, ...))),
5192 'thumbnails': (primary_thumbnails
or playlist_thumbnails
) + avatar_thumbnails
+ channel_banners
,
5196 traverse_obj(metadata_renderer
, (('vanityChannelUrl', ('ownerUrls', ...)), {self.handle_from_url}
), get_all
=False)
5197 or traverse_obj(data
, ('header', ..., 'channelHandleText', {self.handle_or_none}
), get_all
=False))
5201 'uploader_id': channel_handle
,
5202 'uploader_url': format_field(channel_handle
, None, 'https://www.youtube.com/%s', default
=None),
5205 channel_badges
= self
._extract
_badges
(traverse_obj(data
, ('header', ..., 'badges'), get_all
=False))
5206 if self
._has
_badge
(channel_badges
, BadgeType
.VERIFIED
):
5207 info
['channel_is_verified'] = True
5208 # Playlist stats is a text runs array containing [video count, view count, last updated].
5209 # last updated or (view count and last updated) may be missing.
5210 playlist_stats
= get_first(
5211 (primary_sidebar_renderer
, playlist_header_renderer
), (('stats', 'briefStats', 'numVideosText'), ))
5213 last_updated_unix
= self
._parse
_time
_text
(
5214 self
._get
_text
(playlist_stats
, 2) # deprecated, remove when old layout discontinued
5215 or self
._get
_text
(playlist_header_renderer
, ('byline', 1, 'playlistBylineRenderer', 'text')))
5216 info
['modified_date'] = strftime_or_none(last_updated_unix
)
5218 info
['view_count'] = self
._get
_count
(playlist_stats
, 1)
5219 if info
['view_count'] is None: # 0 is allowed
5220 info
['view_count'] = self
._get
_count
(playlist_header_renderer
, 'viewCountText')
5221 if info
['view_count'] is None:
5222 info
['view_count'] = self
._get
_count
(data
, (
5223 'contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., 'tabRenderer', 'content', 'sectionListRenderer',
5224 'contents', ..., 'itemSectionRenderer', 'contents', ..., 'channelAboutFullMetadataRenderer', 'viewCountText'))
5226 info
['playlist_count'] = self
._get
_count
(playlist_stats
, 0)
5227 if info
['playlist_count'] is None: # 0 is allowed
5228 info
['playlist_count'] = self
._get
_count
(playlist_header_renderer
, ('byline', 0, 'playlistBylineRenderer', 'text'))
5230 if not info
.get('channel_id'):
5231 owner
= traverse_obj(playlist_header_renderer
, 'ownerText')
5232 if not owner
: # Deprecated
5233 owner
= traverse_obj(
5234 self
._extract
_sidebar
_info
_renderer
(data
, 'playlistSidebarSecondaryInfoRenderer'),
5235 ('videoOwner', 'videoOwnerRenderer', 'title'))
5236 owner_text
= self
._get
_text
(owner
)
5237 browse_ep
= traverse_obj(owner
, ('runs', 0, 'navigationEndpoint', 'browseEndpoint')) or {}
5239 'channel': self
._search
_regex
(r
'^by (.+) and \d+ others?$', owner_text
, 'uploader', default
=owner_text
),
5240 'channel_id': self
.ucid_or_none(browse_ep
.get('browseId')),
5241 'uploader_id': self
.handle_from_url(urljoin('https://www.youtube.com', browse_ep
.get('canonicalBaseUrl'))),
5245 'uploader': info
['channel'],
5246 'channel_url': format_field(info
.get('channel_id'), None, 'https://www.youtube.com/channel/%s', default
=None),
5247 'uploader_url': format_field(info
.get('uploader_id'), None, 'https://www.youtube.com/%s', default
=None),
5252 def _extract_inline_playlist(self
, playlist
, playlist_id
, data
, ytcfg
):
5253 first_id
= last_id
= response
= None
5254 for page_num
in itertools
.count(1):
5255 videos
= list(self
._playlist
_entries
(playlist
))
5258 start
= next((i
for i
, v
in enumerate(videos
) if v
['id'] == last_id
), -1) + 1
5259 if start
>= len(videos
):
5261 yield from videos
[start
:]
5262 first_id
= first_id
or videos
[0]['id']
5263 last_id
= videos
[-1]['id']
5264 watch_endpoint
= try_get(
5265 playlist
, lambda x
: x
['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
5266 headers
= self
.generate_api_headers(
5267 ytcfg
=ytcfg
, account_syncid
=self
._extract
_account
_syncid
(ytcfg
, data
),
5268 visitor_data
=self
._extract
_visitor
_data
(response
, data
, ytcfg
))
5270 'playlistId': playlist_id
,
5271 'videoId': watch_endpoint
.get('videoId') or last_id
,
5272 'index': watch_endpoint
.get('index') or len(videos
),
5273 'params': watch_endpoint
.get('params') or 'OAE%3D',
5275 response
= self
._extract
_response
(
5276 item_id
=f
'{playlist_id} page {page_num}',
5277 query
=query
, ep
='next', headers
=headers
, ytcfg
=ytcfg
,
5278 check_get_keys
='contents',
5281 response
, lambda x
: x
['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
5283 def _extract_from_playlist(self
, item_id
, url
, data
, playlist
, ytcfg
):
5284 title
= playlist
.get('title') or try_get(
5285 data
, lambda x
: x
['titleText']['simpleText'], str)
5286 playlist_id
= playlist
.get('playlistId') or item_id
5288 # Delegating everything except mix playlists to regular tab-based playlist URL
5289 playlist_url
= urljoin(url
, try_get(
5290 playlist
, lambda x
: x
['endpoint']['commandMetadata']['webCommandMetadata']['url'],
5293 # Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]
5294 # [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg
5295 is_known_unviewable
= re
.fullmatch(r
'MLCT|RLTD[\w-]{22}', playlist_id
)
5297 if playlist_url
and playlist_url
!= url
and not is_known_unviewable
:
5298 return self
.url_result(
5299 playlist_url
, ie
=YoutubeTabIE
.ie_key(), video_id
=playlist_id
,
5302 return self
.playlist_result(
5303 self
._extract
_inline
_playlist
(playlist
, playlist_id
, data
, ytcfg
),
5304 playlist_id
=playlist_id
, playlist_title
=title
)
5306 def _extract_availability(self
, data
):
5308 Gets the availability of a given playlist/tab.
5309 Note: Unless YouTube tells us explicitly, we do not assume it is public
5310 @param data: response
5312 sidebar_renderer
= self
._extract
_sidebar
_info
_renderer
(data
, 'playlistSidebarPrimaryInfoRenderer') or {}
5313 playlist_header_renderer
= traverse_obj(data
, ('header', 'playlistHeaderRenderer')) or {}
5314 player_header_privacy
= playlist_header_renderer
.get('privacy')
5316 badges
= self
._extract
_badges
(traverse_obj(sidebar_renderer
, 'badges'))
5318 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
5319 privacy_setting_icon
= get_first(
5320 (playlist_header_renderer
, sidebar_renderer
),
5321 ('privacyForm', 'dropdownFormFieldRenderer', 'dropdown', 'dropdownRenderer', 'entries',
5322 lambda _
, v
: v
['privacyDropdownItemRenderer']['isSelected'], 'privacyDropdownItemRenderer', 'icon', 'iconType'),
5325 microformats_is_unlisted
= traverse_obj(
5326 data
, ('microformat', 'microformatDataRenderer', 'unlisted'), expected_type
=bool)
5330 self
._has
_badge
(badges
, BadgeType
.AVAILABILITY_PUBLIC
)
5331 or player_header_privacy
== 'PUBLIC'
5332 or privacy_setting_icon
== 'PRIVACY_PUBLIC')
5333 else self
._availability
(
5335 self
._has
_badge
(badges
, BadgeType
.AVAILABILITY_PRIVATE
)
5336 or player_header_privacy
== 'PRIVATE' if player_header_privacy
is not None
5337 else privacy_setting_icon
== 'PRIVACY_PRIVATE' if privacy_setting_icon
is not None else None),
5339 self
._has
_badge
(badges
, BadgeType
.AVAILABILITY_UNLISTED
)
5340 or player_header_privacy
== 'UNLISTED' if player_header_privacy
is not None
5341 else privacy_setting_icon
== 'PRIVACY_UNLISTED' if privacy_setting_icon
is not None
5342 else microformats_is_unlisted
if microformats_is_unlisted
is not None else None),
5343 needs_subscription
=self
._has
_badge
(badges
, BadgeType
.AVAILABILITY_SUBSCRIPTION
) or None,
5344 needs_premium
=self
._has
_badge
(badges
, BadgeType
.AVAILABILITY_PREMIUM
) or None,
5348 def _extract_sidebar_info_renderer(data
, info_renderer
, expected_type
=dict):
5349 sidebar_renderer
= try_get(
5350 data
, lambda x
: x
['sidebar']['playlistSidebarRenderer']['items'], list) or []
5351 for item
in sidebar_renderer
:
5352 renderer
= try_get(item
, lambda x
: x
[info_renderer
], expected_type
)
5356 def _reload_with_unavailable_videos(self
, item_id
, data
, ytcfg
):
5358 Reload playlists with unavailable videos (e.g. private videos, region blocked, etc.)
5360 is_playlist
= bool(traverse_obj(
5361 data
, ('metadata', 'playlistMetadataRenderer'), ('header', 'playlistHeaderRenderer')))
5364 headers
= self
.generate_api_headers(
5365 ytcfg
=ytcfg
, account_syncid
=self
._extract
_account
_syncid
(ytcfg
, data
),
5366 visitor_data
=self
._extract
_visitor
_data
(data
, ytcfg
))
5368 'params': 'wgYCCAA=',
5369 'browseId': f
'VL{item_id}',
5371 return self
._extract
_response
(
5372 item_id
=item_id
, headers
=headers
, query
=query
,
5373 check_get_keys
='contents', fatal
=False, ytcfg
=ytcfg
,
5374 note
='Redownloading playlist API JSON with unavailable videos')
5376 @functools.cached_property
5377 def skip_webpage(self
):
5378 return 'webpage' in self
._configuration
_arg
('skip', ie_key
=YoutubeTabIE
.ie_key())
5380 def _extract_webpage(self
, url
, item_id
, fatal
=True):
5381 webpage
, data
= None, None
5382 for retry
in self
.RetryManager(fatal
=fatal
):
5384 webpage
= self
._download
_webpage
(url
, item_id
, note
='Downloading webpage')
5385 data
= self
.extract_yt_initial_data(item_id
, webpage
or '', fatal
=fatal
) or {}
5386 except ExtractorError
as e
:
5387 if isinstance(e
.cause
, network_exceptions
):
5388 if not isinstance(e
.cause
, HTTPError
) or e
.cause
.status
not in (403, 429):
5391 self
._error
_or
_warning
(e
, fatal
=fatal
)
5395 self
._extract
_and
_report
_alerts
(data
)
5396 except ExtractorError
as e
:
5397 self
._error
_or
_warning
(e
, fatal
=fatal
)
5400 # Sometimes youtube returns a webpage with incomplete ytInitialData
5401 # See: https://github.com/yt-dlp/yt-dlp/issues/116
5402 if not traverse_obj(data
, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):
5403 retry
.error
= ExtractorError('Incomplete yt initial data received')
5407 return webpage
, data
5409 def _report_playlist_authcheck(self
, ytcfg
, fatal
=True):
5410 """Use if failed to extract ytcfg (and data) from initial webpage"""
5411 if not ytcfg
and self
.is_authenticated
:
5412 msg
= 'Playlists that require authentication may not extract correctly without a successful webpage download'
5413 if 'authcheck' not in self
._configuration
_arg
('skip', ie_key
=YoutubeTabIE
.ie_key()) and fatal
:
5414 raise ExtractorError(
5415 f
'{msg}. If you are not downloading private content, or '
5416 'your cookies are only for the first account and channel,'
5417 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
5419 self
.report_warning(msg
, only_once
=True)
5421 def _extract_data(self
, url
, item_id
, ytcfg
=None, fatal
=True, webpage_fatal
=False, default_client
='web'):
5423 if not self
.skip_webpage
:
5424 webpage
, data
= self
._extract
_webpage
(url
, item_id
, fatal
=webpage_fatal
)
5425 ytcfg
= ytcfg
or self
.extract_ytcfg(item_id
, webpage
)
5426 # Reject webpage data if redirected to home page without explicitly requesting
5427 selected_tab
= self
._extract
_selected
_tab
(self
._extract
_tab
_renderers
(data
), fatal
=False) or {}
5428 if (url
!= 'https://www.youtube.com/feed/recommended'
5429 and selected_tab
.get('tabIdentifier') == 'FEwhat_to_watch' # Home page
5430 and 'no-youtube-channel-redirect' not in self
.get_param('compat_opts', [])):
5431 msg
= 'The channel/playlist does not exist and the URL redirected to youtube.com home page'
5433 raise ExtractorError(msg
, expected
=True)
5434 self
.report_warning(msg
, only_once
=True)
5436 self
._report
_playlist
_authcheck
(ytcfg
, fatal
=fatal
)
5437 data
= self
._extract
_tab
_endpoint
(url
, item_id
, ytcfg
, fatal
=fatal
, default_client
=default_client
)
5440 def _extract_tab_endpoint(self
, url
, item_id
, ytcfg
=None, fatal
=True, default_client
='web'):
5441 headers
= self
.generate_api_headers(ytcfg
=ytcfg
, default_client
=default_client
)
5442 resolve_response
= self
._extract
_response
(
5443 item_id
=item_id
, query
={'url': url}
, check_get_keys
='endpoint', headers
=headers
, ytcfg
=ytcfg
, fatal
=fatal
,
5444 ep
='navigation/resolve_url', note
='Downloading API parameters API JSON', default_client
=default_client
)
5445 endpoints
= {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
5446 for ep_key
, ep
in endpoints
.items():
5447 params
= try_get(resolve_response
, lambda x
: x
['endpoint'][ep_key
], dict)
5449 return self
._extract
_response
(
5450 item_id
=item_id
, query
=params
, ep
=ep
, headers
=headers
,
5451 ytcfg
=ytcfg
, fatal
=fatal
, default_client
=default_client
,
5452 check_get_keys
=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))
5453 err_note
= 'Failed to resolve url (does the playlist exist?)'
5455 raise ExtractorError(err_note
, expected
=True)
5456 self
.report_warning(err_note
, item_id
)
5458 _SEARCH_PARAMS
= None
5460 def _search_results(self
, query
, params
=NO_DEFAULT
, default_client
='web'):
5461 data
= {'query': query}
5462 if params
is NO_DEFAULT
:
5463 params
= self
._SEARCH
_PARAMS
5465 data
['params'] = params
5468 ('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),
5469 ('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),
5471 ('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),
5472 ('continuationContents', ),
5474 display_id
= f
'query "{query}"'
5475 check_get_keys
= tuple({keys[0] for keys in content_keys}
)
5476 ytcfg
= self
._download
_ytcfg
(default_client
, display_id
) if not self
.skip_webpage
else {}
5477 self
._report
_playlist
_authcheck
(ytcfg
, fatal
=False)
5479 continuation_list
= [None]
5481 for page_num
in itertools
.count(1):
5482 data
.update(continuation_list
[0] or {})
5483 headers
= self
.generate_api_headers(
5484 ytcfg
=ytcfg
, visitor_data
=self
._extract
_visitor
_data
(search
), default_client
=default_client
)
5485 search
= self
._extract
_response
(
5486 item_id
=f
'{display_id} page {page_num}', ep
='search', query
=data
,
5487 default_client
=default_client
, check_get_keys
=check_get_keys
, ytcfg
=ytcfg
, headers
=headers
)
5488 slr_contents
= traverse_obj(search
, *content_keys
)
5489 yield from self
._extract
_entries
({'contents': list(variadic(slr_contents))}
, continuation_list
)
5490 if not continuation_list
[0]:
5494 class YoutubeTabIE(YoutubeTabBaseInfoExtractor
):
5495 IE_DESC
= 'YouTube Tabs'
5496 _VALID_URL
= r
'''(?x:
5498 (?!consent\.)(?:\w+\.)?
5500 youtube(?:kids)?\.com|
5504 (?P<channel_type>channel|c|user|browse)/|
5507 (?:playlist|watch)\?.*?\blist=
5509 (?!(?:{reserved_names})\b) # Direct URLs
5513 reserved_names
=YoutubeBaseInfoExtractor
._RESERVED
_NAMES
,
5514 invidious
='|'.join(YoutubeBaseInfoExtractor
._INVIDIOUS
_SITES
),
5516 IE_NAME
= 'youtube:tab'
5519 'note': 'playlists, multipage',
5520 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
5521 'playlist_mincount': 94,
5523 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
5524 'title': 'Igor Kleiner Ph.D. - Playlists',
5525 'description': 'md5:15d7dd9e333cb987907fcb0d604b233a',
5526 'uploader': 'Igor Kleiner Ph.D.',
5527 'uploader_id': '@IgorDataScience',
5528 'uploader_url': 'https://www.youtube.com/@IgorDataScience',
5529 'channel': 'Igor Kleiner Ph.D.',
5530 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
5531 'tags': ['критическое мышление', 'наука просто', 'математика', 'анализ данных'],
5532 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
5533 'channel_follower_count': int,
5536 'note': 'playlists, multipage, different order',
5537 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
5538 'playlist_mincount': 94,
5540 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
5541 'title': 'Igor Kleiner Ph.D. - Playlists',
5542 'description': 'md5:15d7dd9e333cb987907fcb0d604b233a',
5543 'uploader': 'Igor Kleiner Ph.D.',
5544 'uploader_id': '@IgorDataScience',
5545 'uploader_url': 'https://www.youtube.com/@IgorDataScience',
5546 'tags': ['критическое мышление', 'наука просто', 'математика', 'анализ данных'],
5547 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
5548 'channel': 'Igor Kleiner Ph.D.',
5549 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
5550 'channel_follower_count': int,
5553 'note': 'playlists, series',
5554 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
5555 'playlist_mincount': 5,
5557 'id': 'UCYO_jab_esuFRV4b17AJtAw',
5558 'title': '3Blue1Brown - Playlists',
5559 'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',
5560 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5561 'channel': '3Blue1Brown',
5562 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
5563 'uploader_id': '@3blue1brown',
5564 'uploader_url': 'https://www.youtube.com/@3blue1brown',
5565 'uploader': '3Blue1Brown',
5566 'tags': ['Mathematics'],
5567 'channel_follower_count': int,
5568 'channel_is_verified': True,
5571 'note': 'playlists, singlepage',
5572 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
5573 'playlist_mincount': 4,
5575 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
5576 'title': 'ThirstForScience - Playlists',
5577 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
5578 'uploader': 'ThirstForScience',
5579 'uploader_url': 'https://www.youtube.com/@ThirstForScience',
5580 'uploader_id': '@ThirstForScience',
5581 'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
5582 'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
5584 'channel': 'ThirstForScience',
5585 'channel_follower_count': int,
5588 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
5589 'only_matching': True,
5591 'note': 'basic, single video playlist',
5592 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5594 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5595 'title': 'youtube-dl public playlist',
5599 'modified_date': '20201130',
5600 'channel': 'Sergey M.',
5601 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5602 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5603 'availability': 'public',
5604 'uploader': 'Sergey M.',
5605 'uploader_url': 'https://www.youtube.com/@sergeym.6173',
5606 'uploader_id': '@sergeym.6173',
5608 'playlist_count': 1,
5610 'note': 'empty playlist',
5611 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
5613 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
5614 'title': 'youtube-dl empty playlist',
5616 'channel': 'Sergey M.',
5618 'modified_date': '20230921',
5619 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5620 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5621 'availability': 'unlisted',
5622 'uploader_url': 'https://www.youtube.com/@sergeym.6173',
5623 'uploader_id': '@sergeym.6173',
5624 'uploader': 'Sergey M.',
5626 'playlist_count': 0,
5629 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
5631 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5632 'title': 'lex will - Home',
5633 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5634 'uploader': 'lex will',
5635 'uploader_id': '@lexwill718',
5636 'channel': 'lex will',
5637 'tags': ['bible', 'history', 'prophesy'],
5638 'uploader_url': 'https://www.youtube.com/@lexwill718',
5639 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5640 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5641 'channel_follower_count': int,
5643 'playlist_mincount': 2,
5645 'note': 'Videos tab',
5646 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
5648 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5649 'title': 'lex will - Videos',
5650 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5651 'uploader': 'lex will',
5652 'uploader_id': '@lexwill718',
5653 'tags': ['bible', 'history', 'prophesy'],
5654 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5655 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5656 'uploader_url': 'https://www.youtube.com/@lexwill718',
5657 'channel': 'lex will',
5658 'channel_follower_count': int,
5660 'playlist_mincount': 975,
5662 'note': 'Videos tab, sorted by popular',
5663 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
5665 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5666 'title': 'lex will - Videos',
5667 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5668 'uploader': 'lex will',
5669 'uploader_id': '@lexwill718',
5670 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5671 'uploader_url': 'https://www.youtube.com/@lexwill718',
5672 'channel': 'lex will',
5673 'tags': ['bible', 'history', 'prophesy'],
5674 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5675 'channel_follower_count': int,
5677 'playlist_mincount': 199,
5679 'note': 'Playlists tab',
5680 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
5682 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5683 'title': 'lex will - Playlists',
5684 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5685 'uploader': 'lex will',
5686 'uploader_id': '@lexwill718',
5687 'uploader_url': 'https://www.youtube.com/@lexwill718',
5688 'channel': 'lex will',
5689 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5690 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5691 'tags': ['bible', 'history', 'prophesy'],
5692 'channel_follower_count': int,
5694 'playlist_mincount': 17,
5696 'note': 'Community tab',
5697 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
5699 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5700 'title': 'lex will - Community',
5701 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5702 'channel': 'lex will',
5703 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5704 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5705 'tags': ['bible', 'history', 'prophesy'],
5706 'channel_follower_count': int,
5707 'uploader_url': 'https://www.youtube.com/@lexwill718',
5708 'uploader_id': '@lexwill718',
5709 'uploader': 'lex will',
5711 'playlist_mincount': 18,
5713 'note': 'Channels tab',
5714 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
5716 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5717 'title': 'lex will - Channels',
5718 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5719 'channel': 'lex will',
5720 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5721 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5722 'tags': ['bible', 'history', 'prophesy'],
5723 'channel_follower_count': int,
5724 'uploader_url': 'https://www.youtube.com/@lexwill718',
5725 'uploader_id': '@lexwill718',
5726 'uploader': 'lex will',
5728 'playlist_mincount': 12,
5730 'note': 'Search tab',
5731 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
5732 'playlist_mincount': 40,
5734 'id': 'UCYO_jab_esuFRV4b17AJtAw',
5735 'title': '3Blue1Brown - Search - linear algebra',
5736 'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',
5737 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5738 'tags': ['Mathematics'],
5739 'channel': '3Blue1Brown',
5740 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
5741 'channel_follower_count': int,
5742 'uploader_url': 'https://www.youtube.com/@3blue1brown',
5743 'uploader_id': '@3blue1brown',
5744 'uploader': '3Blue1Brown',
5745 'channel_is_verified': True,
5748 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5749 'only_matching': True,
5751 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5752 'only_matching': True,
5754 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5755 'only_matching': True,
5757 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
5758 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
5760 'title': '29C3: Not my department',
5761 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
5762 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
5765 'modified_date': '20150605',
5766 'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
5767 'channel_url': 'https://www.youtube.com/channel/UCEPzS1rYsrkqzSLNp76nrcg',
5768 'channel': 'Christiaan008',
5769 'availability': 'public',
5770 'uploader_id': '@ChRiStIaAn008',
5771 'uploader': 'Christiaan008',
5772 'uploader_url': 'https://www.youtube.com/@ChRiStIaAn008',
5774 'playlist_count': 96,
5776 'note': 'Large playlist',
5777 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
5779 'title': 'Uploads from Cauchemar',
5780 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
5781 'channel_url': 'https://www.youtube.com/channel/UCBABnxM4Ar9ten8Mdjj1j0Q',
5783 'modified_date': r
're:\d{8}',
5784 'channel': 'Cauchemar',
5787 'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
5788 'availability': 'public',
5789 'uploader_id': '@Cauchemar89',
5790 'uploader': 'Cauchemar',
5791 'uploader_url': 'https://www.youtube.com/@Cauchemar89',
5793 'playlist_mincount': 1123,
5794 'expected_warnings': [r
'[Uu]navailable videos (are|will be) hidden'],
5796 'note': 'even larger playlist, 8832 videos',
5797 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
5798 'only_matching': True,
5800 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
5801 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
5803 'title': 'Uploads from Interstellar Movie',
5804 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
5807 'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
5808 'channel_url': 'https://www.youtube.com/channel/UCXw-G3eDE9trcvY2sBMM_aA',
5809 'channel': 'Interstellar Movie',
5811 'modified_date': r
're:\d{8}',
5812 'availability': 'public',
5813 'uploader_id': '@InterstellarMovie',
5814 'uploader': 'Interstellar Movie',
5815 'uploader_url': 'https://www.youtube.com/@InterstellarMovie',
5817 'playlist_mincount': 21,
5819 'note': 'Playlist with "show unavailable videos" button',
5820 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
5822 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
5823 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
5825 'channel': 'Phim Siêu Nhân Nhật Bản',
5828 'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5829 'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
5830 'modified_date': r
're:\d{8}',
5831 'availability': 'public',
5832 'uploader_url': 'https://www.youtube.com/@phimsieunhannhatban',
5833 'uploader_id': '@phimsieunhannhatban',
5834 'uploader': 'Phim Siêu Nhân Nhật Bản',
5836 'playlist_mincount': 200,
5837 'expected_warnings': [r
'[Uu]navailable videos (are|will be) hidden'],
5839 'note': 'Playlist with unavailable videos in page 7',
5840 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
5842 'title': 'Uploads from BlankTV',
5843 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
5844 'channel': 'BlankTV',
5845 'channel_url': 'https://www.youtube.com/channel/UC8l9frL61Yl5KFOl87nIm2w',
5846 'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',
5849 'modified_date': r
're:\d{8}',
5851 'availability': 'public',
5852 'uploader_id': '@blanktv',
5853 'uploader': 'BlankTV',
5854 'uploader_url': 'https://www.youtube.com/@blanktv',
5856 'playlist_mincount': 1000,
5857 'expected_warnings': [r
'[Uu]navailable videos (are|will be) hidden'],
5859 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
5860 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5862 'title': 'Data Analysis with Dr Mike Pound',
5863 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5864 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
5867 'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
5868 'channel_url': 'https://www.youtube.com/channel/UC9-y-6csu5WGm29I7JiwpnA',
5869 'channel': 'Computerphile',
5870 'availability': 'public',
5871 'modified_date': '20190712',
5872 'uploader_id': '@Computerphile',
5873 'uploader': 'Computerphile',
5874 'uploader_url': 'https://www.youtube.com/@Computerphile',
5876 'playlist_mincount': 11,
5878 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5879 'only_matching': True,
5881 'note': 'Playlist URL that does not actually serve a playlist',
5882 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
5884 'id': 'FqZTN594JQw',
5886 'title': "Smiley's People 01 detective, Adventure Series, Action",
5887 'upload_date': '20150526',
5888 'license': 'Standard YouTube License',
5889 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
5890 'categories': ['People & Blogs'],
5896 'skip_download': True,
5898 'skip': 'This video is not available.',
5899 'add_ie': [YoutubeIE
.ie_key()],
5901 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
5902 'only_matching': True,
5904 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
5905 'only_matching': True,
5907 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
5909 'id': 'hGkQjiJLjWQ', # This will keep changing
5912 'upload_date': r
're:\d{8}',
5914 'categories': ['News & Politics'],
5917 'release_timestamp': int,
5918 'channel': 'Sky News',
5919 'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',
5922 'thumbnail': r
're:https?://i\.ytimg\.com/vi/[^/]+/maxresdefault(?:_live)?\.jpg',
5923 'playable_in_embed': True,
5924 'release_date': r
're:\d+',
5925 'availability': 'public',
5926 'live_status': 'is_live',
5927 'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',
5928 'channel_follower_count': int,
5929 'concurrent_view_count': int,
5930 'uploader_url': 'https://www.youtube.com/@SkyNews',
5931 'uploader_id': '@SkyNews',
5932 'uploader': 'Sky News',
5933 'channel_is_verified': True,
5936 'skip_download': True,
5938 'expected_warnings': ['Ignoring subtitle tracks found in '],
5940 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
5942 'id': 'a48o2S1cPoo',
5944 'title': 'The Young Turks - Live Main Show',
5945 'upload_date': '20150715',
5946 'license': 'Standard YouTube License',
5947 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
5948 'categories': ['News & Politics'],
5949 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
5953 'skip_download': True,
5955 'only_matching': True,
5957 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
5958 'only_matching': True,
5960 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
5961 'only_matching': True,
5963 'note': 'A channel that is not live. Should raise error',
5964 'url': 'https://www.youtube.com/user/numberphile/live',
5965 'only_matching': True,
5967 'url': 'https://www.youtube.com/feed/trending',
5968 'only_matching': True,
5970 'url': 'https://www.youtube.com/feed/library',
5971 'only_matching': True,
5973 'url': 'https://www.youtube.com/feed/history',
5974 'only_matching': True,
5976 'url': 'https://www.youtube.com/feed/subscriptions',
5977 'only_matching': True,
5979 'url': 'https://www.youtube.com/feed/watch_later',
5980 'only_matching': True,
5982 'note': 'Recommended - redirects to home page.',
5983 'url': 'https://www.youtube.com/feed/recommended',
5984 'only_matching': True,
5986 'note': 'inline playlist with not always working continuations',
5987 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
5988 'only_matching': True,
5990 'url': 'https://www.youtube.com/course',
5991 'only_matching': True,
5993 'url': 'https://www.youtube.com/zsecurity',
5994 'only_matching': True,
5996 'url': 'http://www.youtube.com/NASAgovVideo/videos',
5997 'only_matching': True,
5999 'url': 'https://www.youtube.com/TheYoungTurks/live',
6000 'only_matching': True,
6002 'url': 'https://www.youtube.com/hashtag/cctv9',
6005 'title': 'cctv9 - All',
6008 'playlist_mincount': 300, # not consistent but should be over 300
6010 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
6011 'only_matching': True,
6013 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
6014 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
6015 'only_matching': True,
6017 'note': '/browse/ should redirect to /channel/',
6018 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
6019 'only_matching': True,
6021 'note': 'VLPL, should redirect to playlist?list=PL...',
6022 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
6024 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
6025 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
6026 'title': 'NCS : All Releases 💿',
6027 'channel_url': 'https://www.youtube.com/channel/UC_aEa8K-EOJ3D6gOs7HcyNg',
6028 'modified_date': r
're:\d{8}',
6030 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
6032 'channel': 'NoCopyrightSounds',
6033 'availability': 'public',
6034 'uploader_url': 'https://www.youtube.com/@NoCopyrightSounds',
6035 'uploader': 'NoCopyrightSounds',
6036 'uploader_id': '@NoCopyrightSounds',
6038 'playlist_mincount': 166,
6039 'expected_warnings': [r
'[Uu]navailable videos (are|will be) hidden', 'YouTube Music is not directly supported'],
6041 # TODO: fix 'unviewable' issue with this playlist when reloading with unavailable videos
6042 'note': 'Topic, should redirect to playlist?list=UU...',
6043 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
6045 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
6046 'title': 'Uploads from Royalty Free Music - Topic',
6048 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
6049 'channel': 'Royalty Free Music - Topic',
6051 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
6052 'modified_date': r
're:\d{8}',
6054 'availability': 'public',
6055 'uploader': 'Royalty Free Music - Topic',
6057 'playlist_mincount': 101,
6058 'expected_warnings': ['YouTube Music is not directly supported', r
'[Uu]navailable videos (are|will be) hidden'],
6060 # Destination channel with only a hidden self tab (tab id is UCtFRv9O2AHqOZjjynzrv-xg)
6061 # Treat as a general feed
6062 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
6064 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
6065 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
6068 'playlist_mincount': 9,
6070 'note': 'Youtube music Album',
6071 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
6073 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
6074 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
6078 'availability': 'unlisted',
6079 'modified_date': r
're:\d{8}',
6081 'playlist_count': 50,
6082 'expected_warnings': ['YouTube Music is not directly supported'],
6084 'note': 'unlisted single video playlist',
6085 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
6087 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
6088 'title': 'yt-dlp unlisted playlist test',
6089 'availability': 'unlisted',
6091 'modified_date': '20220418',
6092 'channel': 'colethedj',
6095 'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
6096 'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
6097 'uploader_url': 'https://www.youtube.com/@colethedj1894',
6098 'uploader_id': '@colethedj1894',
6099 'uploader': 'colethedj',
6103 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
6104 'id': 'BaW_jenozKc',
6106 'ie_key': 'Youtube',
6108 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
6109 'channel_url': 'https://www.youtube.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
6111 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc',
6112 'channel': 'Philipp Hagemeister',
6113 'uploader_id': '@PhilippHagemeister',
6114 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
6115 'uploader': 'Philipp Hagemeister',
6118 'playlist_count': 1,
6119 'params': {'extract_flat': True}
,
6121 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
6122 'url': 'https://www.youtube.com/feed/recommended',
6124 'id': 'recommended',
6125 'title': 'recommended',
6128 'playlist_mincount': 50,
6130 'skip_download': True,
6131 'extractor_args': {'youtubetab': {'skip': ['webpage']}
},
6134 'note': 'API Fallback: /videos tab, sorted by oldest first',
6135 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
6137 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
6138 'title': 'Cody\'sLab - Videos',
6139 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
6140 'channel': 'Cody\'sLab',
6141 'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
6143 'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
6144 'channel_follower_count': int,
6146 'playlist_mincount': 650,
6148 'skip_download': True,
6149 'extractor_args': {'youtubetab': {'skip': ['webpage']}
},
6151 'skip': 'Query for sorting no longer works',
6153 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
6154 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
6156 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
6157 'title': 'Uploads from Royalty Free Music - Topic',
6158 'modified_date': r
're:\d{8}',
6159 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
6161 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
6163 'channel': 'Royalty Free Music - Topic',
6165 'availability': 'public',
6166 'uploader': 'Royalty Free Music - Topic',
6168 'playlist_mincount': 101,
6170 'skip_download': True,
6171 'extractor_args': {'youtubetab': {'skip': ['webpage']}
},
6173 'expected_warnings': ['YouTube Music is not directly supported', r
'[Uu]navailable videos (are|will be) hidden'],
6175 'note': 'non-standard redirect to regional channel',
6176 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
6177 'only_matching': True,
6179 'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
6180 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
6182 'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
6183 'modified_date': '20220407',
6184 'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
6186 'availability': 'unlisted',
6187 'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
6188 'channel': 'pukkandan',
6189 'description': 'Test for collaborative playlist',
6190 'title': 'yt-dlp test - collaborative playlist',
6192 'uploader_url': 'https://www.youtube.com/@pukkandan',
6193 'uploader_id': '@pukkandan',
6194 'uploader': 'pukkandan',
6196 'playlist_mincount': 2,
6198 'note': 'translated tab name',
6199 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/playlists',
6201 'id': 'UCiu-3thuViMebBjw_5nWYrA',
6203 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6204 'description': 'test description',
6205 'title': 'cole-dlp-test-acc - 再生リスト',
6206 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6207 'channel': 'cole-dlp-test-acc',
6208 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6209 'uploader_id': '@coletdjnz',
6210 'uploader': 'cole-dlp-test-acc',
6212 'playlist_mincount': 1,
6213 'params': {'extractor_args': {'youtube': {'lang': ['ja']}
}},
6214 'expected_warnings': ['Preferring "ja"'],
6216 # XXX: this should really check flat playlist entries, but the test suite doesn't support that
6217 'note': 'preferred lang set with playlist with translated video titles',
6218 'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
6220 'id': 'PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
6223 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6224 'channel': 'cole-dlp-test-acc',
6225 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6226 'description': 'test',
6227 'title': 'dlp test playlist',
6228 'availability': 'public',
6229 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6230 'uploader_id': '@coletdjnz',
6231 'uploader': 'cole-dlp-test-acc',
6233 'playlist_mincount': 1,
6234 'params': {'extractor_args': {'youtube': {'lang': ['ja']}
}},
6235 'expected_warnings': ['Preferring "ja"'],
6237 # shorts audio pivot for 2GtVksBMYFM.
6238 'url': 'https://www.youtube.com/feed/sfv_audio_pivot?bp=8gUrCikSJwoLMkd0VmtzQk1ZRk0SCzJHdFZrc0JNWUZNGgsyR3RWa3NCTVlGTQ==',
6240 'id': 'sfv_audio_pivot',
6241 'title': 'sfv_audio_pivot',
6244 'playlist_mincount': 50,
6247 # Channel with a real live tab (not to be mistaken with streams tab)
6248 # Do not treat like it should redirect to live stream
6249 'url': 'https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live',
6251 'id': 'UCEH7P7kyJIkS_gJf93VYbmg',
6252 'title': 'UCEH7P7kyJIkS_gJf93VYbmg - Live',
6255 'playlist_mincount': 20,
6257 # Tab name is not the same as tab id
6258 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/letsplay',
6260 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
6261 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Let\'s play',
6264 'playlist_mincount': 8,
6266 # Home tab id is literally home. Not to get mistaken with featured
6267 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/home',
6269 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
6270 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Home',
6273 'playlist_mincount': 8,
6275 # Should get three playlists for videos, shorts and streams tabs
6276 'url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
6278 'id': 'UCK9V2B22uJYu3N7eR_BT9QA',
6279 'title': 'Polka Ch. 尾丸ポルカ',
6280 'channel_follower_count': int,
6281 'channel_id': 'UCK9V2B22uJYu3N7eR_BT9QA',
6282 'channel_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
6283 'description': 'md5:49809d8bf9da539bc48ed5d1f83c33f2',
6284 'channel': 'Polka Ch. 尾丸ポルカ',
6286 'uploader_url': 'https://www.youtube.com/@OmaruPolka',
6287 'uploader': 'Polka Ch. 尾丸ポルカ',
6288 'uploader_id': '@OmaruPolka',
6289 'channel_is_verified': True,
6291 'playlist_count': 3,
6293 # Shorts tab with channel with handle
6294 # TODO: fix channel description
6295 'url': 'https://www.youtube.com/@NotJustBikes/shorts',
6297 'id': 'UC0intLFzLaudFG-xAvUEO-A',
6298 'title': 'Not Just Bikes - Shorts',
6300 'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',
6301 'description': 'md5:5e82545b3a041345927a92d0585df247',
6302 'channel_follower_count': int,
6303 'channel_id': 'UC0intLFzLaudFG-xAvUEO-A',
6304 'channel': 'Not Just Bikes',
6305 'uploader_url': 'https://www.youtube.com/@NotJustBikes',
6306 'uploader': 'Not Just Bikes',
6307 'uploader_id': '@NotJustBikes',
6308 'channel_is_verified': True,
6310 'playlist_mincount': 10,
6313 'url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig/streams',
6315 'id': 'UC3eYAvjCVwNHgkaGbXX3sig',
6316 'title': '中村悠一 - Live',
6318 'channel_id': 'UC3eYAvjCVwNHgkaGbXX3sig',
6319 'channel_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',
6321 'channel_follower_count': int,
6322 'description': 'md5:e744f6c93dafa7a03c0c6deecb157300',
6323 'uploader_url': 'https://www.youtube.com/@Yuichi-Nakamura',
6324 'uploader_id': '@Yuichi-Nakamura',
6327 'playlist_mincount': 60,
6329 # Channel with no uploads and hence no videos, streams, shorts tabs or uploads playlist. This should fail.
6330 # See test_youtube_lists
6331 'url': 'https://www.youtube.com/channel/UC2yXPzFejc422buOIzn_0CA',
6332 'only_matching': True,
6334 # No uploads and no UCID given. Should fail with no uploads error
6335 # See test_youtube_lists
6336 'url': 'https://www.youtube.com/news',
6337 'only_matching': True,
6339 # No videos tab but has a shorts tab
6340 'url': 'https://www.youtube.com/c/TKFShorts',
6342 'id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
6343 'title': 'Shorts Break - Shorts',
6345 'channel_id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
6346 'channel': 'Shorts Break',
6347 'description': 'md5:6de33c5e7ba686e5f3efd4e19c7ef499',
6348 'channel_follower_count': int,
6349 'channel_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg',
6350 'uploader_url': 'https://www.youtube.com/@ShortsBreak_Official',
6351 'uploader': 'Shorts Break',
6352 'uploader_id': '@ShortsBreak_Official',
6354 'playlist_mincount': 30,
6356 # Trending Now Tab. tab id is empty
6357 'url': 'https://www.youtube.com/feed/trending',
6360 'title': 'trending - Now',
6363 'playlist_mincount': 30,
6365 # Trending Gaming Tab. tab id is empty
6366 'url': 'https://www.youtube.com/feed/trending?bp=4gIcGhpnYW1pbmdfY29ycHVzX21vc3RfcG9wdWxhcg%3D%3D',
6369 'title': 'trending - Gaming',
6372 'playlist_mincount': 30,
6374 # Shorts url result in shorts tab
6375 # TODO: Fix channel id extraction
6376 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/shorts',
6378 'id': 'UCiu-3thuViMebBjw_5nWYrA',
6379 'title': 'cole-dlp-test-acc - Shorts',
6380 'channel': 'cole-dlp-test-acc',
6381 'description': 'test description',
6382 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6383 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6385 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6386 'uploader_id': '@coletdjnz',
6387 'uploader': 'cole-dlp-test-acc',
6391 # Channel data is not currently available for short renderers (as of 2023-03-01)
6393 'ie_key': 'Youtube',
6394 'url': 'https://www.youtube.com/shorts/sSM9J5YH_60',
6395 'id': 'sSM9J5YH_60',
6396 'title': 'SHORT short',
6401 'params': {'extract_flat': True}
,
6403 # Live video status should be extracted
6404 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live',
6406 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
6407 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Live', # TODO: should be Minecraft - Live or Minecraft - Topic - Live
6413 'ie_key': 'Youtube',
6414 'url': 'startswith:https://www.youtube.com/watch?v=',
6417 'live_status': 'is_live',
6420 'concurrent_view_count': int,
6423 'uploader_url': str,
6425 'channel_is_verified': bool, # this will keep changing
6428 'params': {'extract_flat': True, 'playlist_items': '1'}
,
6429 'playlist_mincount': 1,
6431 # Channel renderer metadata. Contains number of videos on the channel
6432 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/channels',
6434 'id': 'UCiu-3thuViMebBjw_5nWYrA',
6435 'title': 'cole-dlp-test-acc - Channels',
6436 'channel': 'cole-dlp-test-acc',
6437 'description': 'test description',
6438 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6439 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6441 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6442 'uploader_id': '@coletdjnz',
6443 'uploader': 'cole-dlp-test-acc',
6448 'ie_key': 'YoutubeTab',
6449 'url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6450 'id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6451 'channel_id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6452 'title': 'PewDiePie',
6453 'channel': 'PewDiePie',
6454 'channel_url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6456 'channel_follower_count': int,
6457 'playlist_count': int,
6458 'uploader': 'PewDiePie',
6459 'uploader_url': 'https://www.youtube.com/@PewDiePie',
6460 'uploader_id': '@PewDiePie',
6461 'channel_is_verified': True,
6464 'params': {'extract_flat': True}
,
6466 'url': 'https://www.youtube.com/@3blue1brown/about',
6468 'id': '@3blue1brown',
6469 'tags': ['Mathematics'],
6470 'title': '3Blue1Brown',
6471 'channel_follower_count': int,
6472 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
6473 'channel': '3Blue1Brown',
6474 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
6475 'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',
6476 'uploader_url': 'https://www.youtube.com/@3blue1brown',
6477 'uploader_id': '@3blue1brown',
6478 'uploader': '3Blue1Brown',
6479 'channel_is_verified': True,
6481 'playlist_count': 0,
6483 # Podcasts tab, with rich entry playlistRenderers
6484 'url': 'https://www.youtube.com/@99percentinvisiblepodcast/podcasts',
6486 'id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw',
6487 'channel_id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw',
6488 'uploader_url': 'https://www.youtube.com/@99percentinvisiblepodcast',
6489 'description': 'md5:3a0ed38f1ad42a68ef0428c04a15695c',
6490 'title': '99 Percent Invisible - Podcasts',
6491 'uploader': '99 Percent Invisible',
6492 'channel_follower_count': int,
6493 'channel_url': 'https://www.youtube.com/channel/UCVMF2HD4ZgC0QHpU9Yq5Xrw',
6495 'channel': '99 Percent Invisible',
6496 'uploader_id': '@99percentinvisiblepodcast',
6498 'playlist_count': 0,
6500 # Releases tab, with rich entry playlistRenderers (same as Podcasts tab)
6501 'url': 'https://www.youtube.com/@AHimitsu/releases',
6503 'id': 'UCgFwu-j5-xNJml2FtTrrB3A',
6504 'channel': 'A Himitsu',
6505 'uploader_url': 'https://www.youtube.com/@AHimitsu',
6506 'title': 'A Himitsu - Releases',
6507 'uploader_id': '@AHimitsu',
6508 'uploader': 'A Himitsu',
6509 'channel_id': 'UCgFwu-j5-xNJml2FtTrrB3A',
6511 'description': 'I make music',
6512 'channel_url': 'https://www.youtube.com/channel/UCgFwu-j5-xNJml2FtTrrB3A',
6513 'channel_follower_count': int,
6514 'channel_is_verified': True,
6516 'playlist_mincount': 10,
6518 # Playlist with only shorts, shown as reel renderers
6519 # FIXME: future: YouTube currently doesn't give continuation for this,
6521 'url': 'https://www.youtube.com/playlist?list=UUxqPAgubo4coVn9Lx1FuKcg',
6523 'id': 'UUxqPAgubo4coVn9Lx1FuKcg',
6524 'channel_url': 'https://www.youtube.com/channel/UCxqPAgubo4coVn9Lx1FuKcg',
6526 'uploader_id': '@BangyShorts',
6528 'uploader_url': 'https://www.youtube.com/@BangyShorts',
6529 'channel_id': 'UCxqPAgubo4coVn9Lx1FuKcg',
6530 'channel': 'Bangy Shorts',
6531 'uploader': 'Bangy Shorts',
6533 'availability': 'public',
6534 'modified_date': r
're:\d{8}',
6535 'title': 'Uploads from Bangy Shorts',
6537 'playlist_mincount': 100,
6538 'expected_warnings': [r
'[Uu]navailable videos (are|will be) hidden'],
6540 'note': 'Tags containing spaces',
6541 'url': 'https://www.youtube.com/channel/UC7_YxT-KID8kRbqZo7MyscQ',
6542 'playlist_count': 3,
6544 'id': 'UC7_YxT-KID8kRbqZo7MyscQ',
6545 'channel': 'Markiplier',
6546 'channel_id': 'UC7_YxT-KID8kRbqZo7MyscQ',
6547 'title': 'Markiplier',
6548 'channel_follower_count': int,
6549 'description': 'md5:0c010910558658824402809750dc5d97',
6550 'uploader_id': '@markiplier',
6551 'uploader_url': 'https://www.youtube.com/@markiplier',
6552 'uploader': 'Markiplier',
6553 'channel_url': 'https://www.youtube.com/channel/UC7_YxT-KID8kRbqZo7MyscQ',
6554 'channel_is_verified': True,
6555 'tags': ['markiplier', 'comedy', 'gaming', 'funny videos', 'funny moments',
6556 'sketch comedy', 'laughing', 'lets play', 'challenge videos', 'hilarious',
6557 'challenges', 'sketches', 'scary games', 'funny games', 'rage games',
6563 def suitable(cls
, url
):
6564 return False if YoutubeIE
.suitable(url
) else super().suitable(url
)
6566 _URL_RE
= re
.compile(rf
'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/[^?#/]+))?(?P<post>.*)$')
6568 def _get_url_mobj(self
, url
):
6569 mobj
= self
._URL
_RE
.match(url
).groupdict()
6570 mobj
.update((k
, '') for k
, v
in mobj
.items() if v
is None)
6573 def _extract_tab_id_and_name(self
, tab
, base_url
='https://www.youtube.com'):
6574 tab_name
= (tab
.get('title') or '').lower()
6575 tab_url
= urljoin(base_url
, traverse_obj(
6576 tab
, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url')))
6578 tab_id
= (tab_url
and self
._get
_url
_mobj
(tab_url
)['tab'][1:]
6579 or traverse_obj(tab
, 'tabIdentifier', expected_type
=str))
6582 'TAB_ID_SPONSORSHIPS': 'membership',
6583 }.get(tab_id
, tab_id
), tab_name
6585 # Fallback to tab name if we cannot get the tab id.
6586 # XXX: should we strip non-ascii letters? e.g. in case of 'let's play' tab example on special gaming channel
6587 # Note that in the case of translated tab name this may result in an empty string, which we don't want.
6589 self
.write_debug(f
'Falling back to selected tab name: {tab_name}')
6593 }.get(tab_name
, tab_name
), tab_name
6595 def _has_tab(self
, tabs
, tab_id
):
6596 return any(self
._extract
_tab
_id
_and
_name
(tab
)[0] == tab_id
for tab
in tabs
)
6598 def _empty_playlist(self
, item_id
, data
):
6599 return self
.playlist_result([], item_id
, **self
._extract
_metadata
_from
_tabs
(item_id
, data
))
6601 @YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
6602 def _real_extract(self
, url
, smuggled_data
):
6603 item_id
= self
._match
_id
(url
)
6604 url
= urllib
.parse
.urlunparse(
6605 urllib
.parse
.urlparse(url
)._replace
(netloc
='www.youtube.com'))
6606 compat_opts
= self
.get_param('compat_opts', [])
6608 mobj
= self
._get
_url
_mobj
(url
)
6609 pre
, tab
, post
, is_channel
= mobj
['pre'], mobj
['tab'], mobj
['post'], not mobj
['not_channel']
6610 if is_channel
and smuggled_data
.get('is_music_url'):
6611 if item_id
[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
6612 return self
.url_result(
6613 f
'https://music.youtube.com/playlist?list={item_id[2:]}', YoutubeTabIE
, item_id
[2:])
6614 elif item_id
[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
6615 mdata
= self
._extract
_tab
_endpoint
(
6616 f
'https://music.youtube.com/channel/{item_id}', item_id
, default_client
='web_music')
6617 murl
= traverse_obj(mdata
, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
6618 get_all
=False, expected_type
=str)
6620 raise ExtractorError('Failed to resolve album to playlist')
6621 return self
.url_result(murl
, YoutubeTabIE
)
6622 elif mobj
['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
6623 return self
.url_result(
6624 f
'https://music.youtube.com/channel/{item_id}{tab}{post}', YoutubeTabIE
, item_id
)
6626 original_tab_id
, display_id
= tab
[1:], f
'{item_id}{tab}'
6627 if is_channel
and not tab
and 'no-youtube-channel-redirect' not in compat_opts
:
6628 url
= f
'{pre}/videos{post}'
6629 if smuggled_data
.get('is_music_url'):
6630 self
.report_warning(f
'YouTube Music is not directly supported. Redirecting to {url}')
6632 # Handle both video/playlist URLs
6634 video_id
, playlist_id
= (traverse_obj(qs
, (key
, 0)) for key
in ('v', 'list'))
6635 if not video_id
and mobj
['not_channel'].startswith('watch'):
6637 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
6638 raise ExtractorError('A video URL was given without video ID', expected
=True)
6639 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6640 self
.report_warning(f
'A video URL was given without video ID. Trying to download playlist {playlist_id}')
6641 return self
.url_result(
6642 f
'https://www.youtube.com/playlist?list={playlist_id}', YoutubeTabIE
, playlist_id
)
6644 if not self
._yes
_playlist
(playlist_id
, video_id
):
6645 return self
.url_result(
6646 f
'https://www.youtube.com/watch?v={video_id}', YoutubeIE
, video_id
)
6648 data
, ytcfg
= self
._extract
_data
(url
, display_id
)
6650 # YouTube may provide a non-standard redirect to the regional channel
6651 # See: https://github.com/yt-dlp/yt-dlp/issues/2694
6652 # https://support.google.com/youtube/answer/2976814#zippy=,conditional-redirects
6653 redirect_url
= traverse_obj(
6654 data
, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all
=False)
6655 if redirect_url
and 'no-youtube-channel-redirect' not in compat_opts
:
6656 redirect_url
= ''.join((urljoin('https://www.youtube.com', redirect_url
), tab
, post
))
6657 self
.to_screen(f
'This playlist is likely not available in your region. Following conditional redirect to {redirect_url}')
6658 return self
.url_result(redirect_url
, YoutubeTabIE
)
6660 tabs
, extra_tabs
= self
._extract
_tab
_renderers
(data
), []
6661 if is_channel
and tabs
and 'no-youtube-channel-redirect' not in compat_opts
:
6662 selected_tab
= self
._extract
_selected
_tab
(tabs
)
6663 selected_tab_id
, selected_tab_name
= self
._extract
_tab
_id
_and
_name
(selected_tab
, url
) # NB: Name may be translated
6664 self
.write_debug(f
'Selected tab: {selected_tab_id!r} ({selected_tab_name}), Requested tab: {original_tab_id!r}')
6666 # /about is no longer a tab
6667 if original_tab_id
== 'about':
6668 return self
._empty
_playlist
(item_id
, data
)
6670 if not original_tab_id
and selected_tab_name
:
6671 self
.to_screen('Downloading all uploads of the channel. '
6672 'To download only the videos in a specific tab, pass the tab\'s URL')
6673 if self
._has
_tab
(tabs
, 'streams'):
6674 extra_tabs
.append(''.join((pre
, '/streams', post
)))
6675 if self
._has
_tab
(tabs
, 'shorts'):
6676 extra_tabs
.append(''.join((pre
, '/shorts', post
)))
6677 # XXX: Members-only tab should also be extracted
6679 if not extra_tabs
and selected_tab_id
!= 'videos':
6680 # Channel does not have streams, shorts or videos tabs
6681 if item_id
[:2] != 'UC':
6682 return self
._empty
_playlist
(item_id
, data
)
6684 # Topic channels don't have /videos. Use the equivalent playlist instead
6685 pl_id
= f
'UU{item_id[2:]}'
6686 pl_url
= f
'https://www.youtube.com/playlist?list={pl_id}'
6688 data
, ytcfg
= self
._extract
_data
(pl_url
, pl_id
, ytcfg
=ytcfg
, fatal
=True, webpage_fatal
=True)
6689 except ExtractorError
:
6690 return self
._empty
_playlist
(item_id
, data
)
6692 item_id
, url
= pl_id
, pl_url
6694 f
'The channel does not have a videos, shorts, or live tab. Redirecting to playlist {pl_id} instead')
6696 elif extra_tabs
and selected_tab_id
!= 'videos':
6697 # When there are shorts/live tabs but not videos tab
6698 url
, data
= f
'{pre}{post}', None
6700 elif (original_tab_id
or 'videos') != selected_tab_id
:
6701 if original_tab_id
== 'live':
6702 # Live tab should have redirected to the video
6703 # Except in the case the channel has an actual live tab
6704 # Example: https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live
6705 raise UserNotLive(video_id
=item_id
)
6706 elif selected_tab_name
:
6707 raise ExtractorError(f
'This channel does not have a {original_tab_id} tab', expected
=True)
6709 # For channels such as https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg
6710 url
= f
'{pre}{post}'
6712 # YouTube sometimes provides a button to reload playlist with unavailable videos.
6713 if 'no-youtube-unavailable-videos' not in compat_opts
:
6714 data
= self
._reload
_with
_unavailable
_videos
(display_id
, data
, ytcfg
) or data
6715 self
._extract
_and
_report
_alerts
(data
, only_once
=True)
6717 tabs
, entries
= self
._extract
_tab
_renderers
(data
), []
6719 entries
= [self
._extract
_from
_tabs
(item_id
, ytcfg
, data
, tabs
)]
6721 'extractor_key': YoutubeTabIE
.ie_key(),
6722 'extractor': YoutubeTabIE
.IE_NAME
,
6725 if self
.get_param('playlist_items') == '0':
6726 entries
.extend(self
.url_result(u
, YoutubeTabIE
) for u
in extra_tabs
)
6727 else: # Users expect to get all `video_id`s even with `--flat-playlist`. So don't return `url_result`
6728 entries
.extend(map(self
._real
_extract
, extra_tabs
))
6730 if len(entries
) == 1:
6733 metadata
= self
._extract
_metadata
_from
_tabs
(item_id
, data
)
6734 uploads_url
= 'the Uploads (UU) playlist URL'
6735 if try_get(metadata
, lambda x
: x
['channel_id'].startswith('UC')):
6736 uploads_url
= f
'https://www.youtube.com/playlist?list=UU{metadata["channel_id"][2:]}'
6738 'Downloading as multiple playlists, separated by tabs. '
6739 f
'To download as a single playlist instead, pass {uploads_url}')
6740 return self
.playlist_result(entries
, item_id
, **metadata
)
6743 playlist
= traverse_obj(
6744 data
, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type
=dict)
6746 return self
._extract
_from
_playlist
(item_id
, url
, data
, playlist
, ytcfg
)
6748 video_id
= traverse_obj(
6749 data
, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type
=str) or video_id
6751 if tab
!= '/live': # live tab is expected to redirect to video
6752 self
.report_warning(f
'Unable to recognize playlist. Downloading just video {video_id}')
6753 return self
.url_result(f
'https://www.youtube.com/watch?v={video_id}', YoutubeIE
, video_id
)
6755 raise ExtractorError('Unable to recognize tab page')
6758 class YoutubePlaylistIE(InfoExtractor
):
6759 IE_DESC
= 'YouTube playlists'
6760 _VALID_URL
= r
'''(?x)(?:
6765 youtube(?:kids)?\.com|
6770 (?P<id>{playlist_id})
6772 playlist_id
=YoutubeBaseInfoExtractor
._PLAYLIST
_ID
_RE
,
6773 invidious
='|'.join(YoutubeBaseInfoExtractor
._INVIDIOUS
_SITES
),
6775 IE_NAME
= 'youtube:playlist'
6777 'note': 'issue #673',
6778 'url': 'PLBB231211A4F62143',
6780 'title': '[OLD]Team Fortress 2 (Class-based LP)',
6781 'id': 'PLBB231211A4F62143',
6782 'uploader': 'Wickman',
6783 'uploader_id': '@WickmanVT',
6784 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
6786 'uploader_url': 'https://www.youtube.com/@WickmanVT',
6787 'modified_date': r
're:\d{8}',
6788 'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
6789 'channel': 'Wickman',
6791 'channel_url': 'https://www.youtube.com/channel/UCKSpbfbl5kRQpTdL7kMc-1Q',
6792 'availability': 'public',
6794 'playlist_mincount': 29,
6796 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
6798 'title': 'YDL_safe_search',
6799 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
6801 'playlist_count': 2,
6802 'skip': 'This playlist is private',
6805 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
6806 'playlist_count': 4,
6809 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
6810 'uploader': 'milan',
6811 'uploader_id': '@milan5503',
6813 'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
6815 'modified_date': '20140919',
6818 'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
6819 'uploader_url': 'https://www.youtube.com/@milan5503',
6820 'availability': 'public',
6822 'expected_warnings': [r
'[Uu]navailable videos? (is|are|will be) hidden', 'Retrying', 'Giving up'],
6824 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
6825 'playlist_mincount': 455,
6827 'title': '2018 Chinese New Singles (11/6 updated)',
6828 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
6830 'uploader_id': '@music_king',
6831 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
6834 'channel_url': 'https://www.youtube.com/channel/UC21nz3_MesPLqtDqwdvnoxA',
6836 'uploader_url': 'https://www.youtube.com/@music_king',
6837 'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
6838 'modified_date': r
're:\d{8}',
6839 'availability': 'public',
6841 'expected_warnings': [r
'[Uu]navailable videos (are|will be) hidden'],
6843 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
6844 'only_matching': True,
6846 # music album playlist
6847 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
6848 'only_matching': True,
6852 def suitable(cls
, url
):
6853 if YoutubeTabIE
.suitable(url
):
6855 from ..utils
import parse_qs
6857 if qs
.get('v', [None])[0]:
6859 return super().suitable(url
)
6861 def _real_extract(self
, url
):
6862 playlist_id
= self
._match
_id
(url
)
6863 is_music_url
= YoutubeBaseInfoExtractor
.is_music_url(url
)
6864 url
= update_url_query(
6865 'https://www.youtube.com/playlist',
6866 parse_qs(url
) or {'list': playlist_id}
)
6868 url
= smuggle_url(url
, {'is_music_url': True}
)
6869 return self
.url_result(url
, ie
=YoutubeTabIE
.ie_key(), video_id
=playlist_id
)
6872 class YoutubeYtBeIE(InfoExtractor
):
6873 IE_DESC
= 'youtu.be'
6874 _VALID_URL
= rf
'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{{11}})/*?.*?\blist=(?P<playlist_id>{YoutubeBaseInfoExtractor._PLAYLIST_ID_RE})'
6876 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
6878 'id': 'yeWKywCrFtk',
6880 'title': 'Small Scale Baler and Braiding Rugs',
6881 'uploader': 'Backus-Page House Museum',
6882 'uploader_id': '@backuspagemuseum',
6883 'uploader_url': r
're:https?://(?:www\.)?youtube\.com/@backuspagemuseum',
6884 'upload_date': '20161008',
6885 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
6886 'categories': ['Nonprofits & Activism'],
6890 'playable_in_embed': True,
6891 'thumbnail': r
're:^https?://.*\.webp',
6892 'channel': 'Backus-Page House Museum',
6893 'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',
6894 'live_status': 'not_live',
6896 'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',
6897 'availability': 'public',
6899 'comment_count': int,
6900 'channel_follower_count': int,
6904 'skip_download': True,
6907 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
6908 'only_matching': True,
6911 def _real_extract(self
, url
):
6912 mobj
= self
._match
_valid
_url
(url
)
6913 video_id
= mobj
.group('id')
6914 playlist_id
= mobj
.group('playlist_id')
6915 return self
.url_result(
6916 update_url_query('https://www.youtube.com/watch', {
6918 'list': playlist_id
,
6919 'feature': 'youtu.be',
6920 }), ie
=YoutubeTabIE
.ie_key(), video_id
=playlist_id
)
6923 class YoutubeLivestreamEmbedIE(InfoExtractor
):
6924 IE_DESC
= 'YouTube livestream embeds'
6925 _VALID_URL
= r
'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'
6927 'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',
6928 'only_matching': True,
6931 def _real_extract(self
, url
):
6932 channel_id
= self
._match
_id
(url
)
6933 return self
.url_result(
6934 f
'https://www.youtube.com/channel/{channel_id}/live',
6935 ie
=YoutubeTabIE
.ie_key(), video_id
=channel_id
)
6938 class YoutubeYtUserIE(InfoExtractor
):
6939 IE_DESC
= 'YouTube user videos; "ytuser:" prefix'
6940 IE_NAME
= 'youtube:user'
6941 _VALID_URL
= r
'ytuser:(?P<id>.+)'
6943 'url': 'ytuser:phihag',
6944 'only_matching': True,
6947 def _real_extract(self
, url
):
6948 user_id
= self
._match
_id
(url
)
6949 return self
.url_result(f
'https://www.youtube.com/user/{user_id}', YoutubeTabIE
, user_id
)
6952 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor
):
6953 IE_NAME
= 'youtube:favorites'
6954 IE_DESC
= 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
6955 _VALID_URL
= r
':ytfav(?:ou?rite)?s?'
6956 _LOGIN_REQUIRED
= True
6959 'only_matching': True,
6961 'url': ':ytfavorites',
6962 'only_matching': True,
6965 def _real_extract(self
, url
):
6966 return self
.url_result(
6967 'https://www.youtube.com/playlist?list=LL',
6968 ie
=YoutubeTabIE
.ie_key())
6971 class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor
):
6972 IE_NAME
= 'youtube:notif'
6973 IE_DESC
= 'YouTube notifications; ":ytnotif" keyword (requires cookies)'
6974 _VALID_URL
= r
':ytnotif(?:ication)?s?'
6975 _LOGIN_REQUIRED
= True
6978 'only_matching': True,
6980 'url': ':ytnotifications',
6981 'only_matching': True,
6984 def _extract_notification_menu(self
, response
, continuation_list
):
6985 notification_list
= traverse_obj(
6987 ('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),
6988 ('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),
6989 expected_type
=list) or []
6990 continuation_list
[0] = None
6991 for item
in notification_list
:
6992 entry
= self
._extract
_notification
_renderer
(item
.get('notificationRenderer'))
6995 continuation
= item
.get('continuationItemRenderer')
6997 continuation_list
[0] = continuation
6999 def _extract_notification_renderer(self
, notification
):
7000 video_id
= traverse_obj(
7001 notification
, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type
=str)
7002 url
= f
'https://www.youtube.com/watch?v={video_id}'
7005 browse_ep
= traverse_obj(
7006 notification
, ('navigationEndpoint', 'browseEndpoint'), expected_type
=dict)
7007 channel_id
= self
.ucid_or_none(traverse_obj(browse_ep
, 'browseId', expected_type
=str))
7008 post_id
= self
._search
_regex
(
7009 r
'/post/(.+)', traverse_obj(browse_ep
, 'canonicalBaseUrl', expected_type
=str),
7010 'post id', default
=None)
7011 if not channel_id
or not post_id
:
7013 # The direct /post url redirects to this in the browser
7014 url
= f
'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'
7016 channel
= traverse_obj(
7017 notification
, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),
7019 notification_title
= self
._get
_text
(notification
, 'shortMessage')
7020 if notification_title
:
7021 notification_title
= notification_title
.replace('\xad', '') # remove soft hyphens
7022 # TODO: handle recommended videos
7023 title
= self
._search
_regex
(
7024 rf
'{re.escape(channel or "")}[^:]+: (.+)', notification_title
,
7025 'video title', default
=None)
7026 timestamp
= (self
._parse
_time
_text
(self
._get
_text
(notification
, 'sentTimeText'))
7027 if self
._configuration
_arg
('approximate_date', ie_key
=YoutubeTabIE
)
7032 'ie_key': (YoutubeIE
if video_id
else YoutubeTabIE
).ie_key(),
7033 'video_id': video_id
,
7035 'channel_id': channel_id
,
7037 'uploader': channel
,
7038 'thumbnails': self
._extract
_thumbnails
(notification
, 'videoThumbnail'),
7039 'timestamp': timestamp
,
7042 def _notification_menu_entries(self
, ytcfg
):
7043 continuation_list
= [None]
7045 for page
in itertools
.count(1):
7046 ctoken
= traverse_obj(
7047 continuation_list
, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type
=str)
7048 response
= self
._extract
_response
(
7049 item_id
=f
'page {page}', query
={'ctoken': ctoken}
if ctoken
else {}, ytcfg
=ytcfg
,
7050 ep
='notification/get_notification_menu', check_get_keys
='actions',
7051 headers
=self
.generate_api_headers(ytcfg
=ytcfg
, visitor_data
=self
._extract
_visitor
_data
(response
)))
7052 yield from self
._extract
_notification
_menu
(response
, continuation_list
)
7053 if not continuation_list
[0]:
7056 def _real_extract(self
, url
):
7057 display_id
= 'notifications'
7058 ytcfg
= self
._download
_ytcfg
('web', display_id
) if not self
.skip_webpage
else {}
7059 self
._report
_playlist
_authcheck
(ytcfg
)
7060 return self
.playlist_result(self
._notification
_menu
_entries
(ytcfg
), display_id
, display_id
)
7063 class YoutubeSearchIE(YoutubeTabBaseInfoExtractor
, SearchInfoExtractor
):
7064 IE_DESC
= 'YouTube search'
7065 IE_NAME
= 'youtube:search'
7066 _SEARCH_KEY
= 'ytsearch'
7067 _SEARCH_PARAMS
= 'EgIQAfABAQ==' # Videos only
7069 'url': 'ytsearch5:youtube-dl test video',
7070 'playlist_count': 5,
7072 'id': 'youtube-dl test video',
7073 'title': 'youtube-dl test video',
7076 'note': 'Suicide/self-harm search warning',
7077 'url': 'ytsearch1:i hate myself and i wanna die',
7078 'playlist_count': 1,
7080 'id': 'i hate myself and i wanna die',
7081 'title': 'i hate myself and i wanna die',
7086 class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor
, SearchInfoExtractor
):
7087 IE_NAME
= YoutubeSearchIE
.IE_NAME
+ ':date'
7088 _SEARCH_KEY
= 'ytsearchdate'
7089 IE_DESC
= 'YouTube search, newest videos first'
7090 _SEARCH_PARAMS
= 'CAISAhAB8AEB' # Videos only, sorted by date
7092 'url': 'ytsearchdate5:youtube-dl test video',
7093 'playlist_count': 5,
7095 'id': 'youtube-dl test video',
7096 'title': 'youtube-dl test video',
7101 class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor
):
7102 IE_DESC
= 'YouTube search URLs with sorting and filter support'
7103 IE_NAME
= YoutubeSearchIE
.IE_NAME
+ '_url'
7104 _VALID_URL
= r
'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
7106 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
7107 'playlist_mincount': 5,
7109 'id': 'youtube-dl test video',
7110 'title': 'youtube-dl test video',
7113 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
7114 'playlist_mincount': 5,
7120 'url': 'https://www.youtube.com/results?search_query=%23cats',
7121 'playlist_mincount': 1,
7125 # The test suite does not have support for nested playlists
7127 # 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',
7133 'url': 'https://www.youtube.com/results?search_query=kurzgesagt&sp=EgIQAg%253D%253D',
7136 'title': 'kurzgesagt',
7141 'id': 'UCsXVk37bltHxD1rDPwtNM8Q',
7142 'url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
7143 'ie_key': 'YoutubeTab',
7144 'channel': 'Kurzgesagt – In a Nutshell',
7145 'description': 'md5:4ae48dfa9505ffc307dad26342d06bfc',
7146 'title': 'Kurzgesagt – In a Nutshell',
7147 'channel_id': 'UCsXVk37bltHxD1rDPwtNM8Q',
7148 # No longer available for search as it is set to the handle.
7149 # 'playlist_count': int,
7150 'channel_url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
7152 'uploader_id': '@kurzgesagt',
7153 'uploader_url': 'https://www.youtube.com/@kurzgesagt',
7154 'uploader': 'Kurzgesagt – In a Nutshell',
7155 'channel_is_verified': True,
7156 'channel_follower_count': int,
7159 'params': {'extract_flat': True, 'playlist_items': '1'}
,
7160 'playlist_mincount': 1,
7162 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
7163 'only_matching': True,
7166 def _real_extract(self
, url
):
7168 query
= (qs
.get('search_query') or qs
.get('q'))[0]
7169 return self
.playlist_result(self
._search
_results
(query
, qs
.get('sp', (None,))[0]), query
, query
)
7172 class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor
):
7173 IE_DESC
= 'YouTube music search URLs with selectable sections, e.g. #songs'
7174 IE_NAME
= 'youtube:music:search_url'
7175 _VALID_URL
= r
'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
7177 'url': 'https://music.youtube.com/search?q=royalty+free+music',
7178 'playlist_count': 16,
7180 'id': 'royalty free music',
7181 'title': 'royalty free music',
7184 'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',
7185 'playlist_mincount': 30,
7187 'id': 'royalty free music - songs',
7188 'title': 'royalty free music - songs',
7190 'params': {'extract_flat': 'in_playlist'}
,
7192 'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',
7193 'playlist_mincount': 30,
7195 'id': 'royalty free music - community playlists',
7196 'title': 'royalty free music - community playlists',
7198 'params': {'extract_flat': 'in_playlist'}
,
7202 'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',
7203 'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',
7204 'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',
7205 'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',
7206 'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',
7207 'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',
7210 def _real_extract(self
, url
):
7212 query
= (qs
.get('search_query') or qs
.get('q'))[0]
7213 params
= qs
.get('sp', (None,))[0]
7215 section
= next((k
for k
, v
in self
._SECTIONS
.items() if v
== params
), params
)
7217 section
= urllib
.parse
.unquote_plus(([*url
.split('#'), ''])[1]).lower()
7218 params
= self
._SECTIONS
.get(section
)
7221 title
= join_nonempty(query
, section
, delim
=' - ')
7222 return self
.playlist_result(self
._search
_results
(query
, params
, default_client
='web_music'), title
, title
)
7225 class YoutubeFeedsInfoExtractor(InfoExtractor
):
7227 Base class for feed extractors
7228 Subclasses must re-define the _FEED_NAME property.
7230 _LOGIN_REQUIRED
= True
7231 _FEED_NAME
= 'feeds'
7233 def _real_initialize(self
):
7234 YoutubeBaseInfoExtractor
._check
_login
_required
(self
)
7238 return f
'youtube:{cls._FEED_NAME}'
7240 def _real_extract(self
, url
):
7241 return self
.url_result(
7242 f
'https://www.youtube.com/feed/{self._FEED_NAME}', ie
=YoutubeTabIE
.ie_key())
7245 class YoutubeWatchLaterIE(InfoExtractor
):
7246 IE_NAME
= 'youtube:watchlater'
7247 IE_DESC
= 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
7248 _VALID_URL
= r
':ytwatchlater'
7250 'url': ':ytwatchlater',
7251 'only_matching': True,
7254 def _real_extract(self
, url
):
7255 return self
.url_result(
7256 'https://www.youtube.com/playlist?list=WL', ie
=YoutubeTabIE
.ie_key())
7259 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor
):
7260 IE_DESC
= 'YouTube recommended videos; ":ytrec" keyword'
7261 _VALID_URL
= r
'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
7262 _FEED_NAME
= 'recommended'
7263 _LOGIN_REQUIRED
= False
7266 'only_matching': True,
7268 'url': ':ytrecommended',
7269 'only_matching': True,
7271 'url': 'https://youtube.com',
7272 'only_matching': True,
7276 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor
):
7277 IE_DESC
= 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
7278 _VALID_URL
= r
':ytsub(?:scription)?s?'
7279 _FEED_NAME
= 'subscriptions'
7282 'only_matching': True,
7284 'url': ':ytsubscriptions',
7285 'only_matching': True,
7289 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor
):
7290 IE_DESC
= 'Youtube watch history; ":ythis" keyword (requires cookies)'
7291 _VALID_URL
= r
':ythis(?:tory)?'
7292 _FEED_NAME
= 'history'
7294 'url': ':ythistory',
7295 'only_matching': True,
7299 class YoutubeShortsAudioPivotIE(InfoExtractor
):
7300 IE_DESC
= 'YouTube Shorts audio pivot (Shorts using audio of a given video)'
7301 IE_NAME
= 'youtube:shorts:pivot:audio'
7302 _VALID_URL
= r
'https?://(?:www\.)?youtube\.com/source/(?P<id>[\w-]{11})/shorts'
7304 'url': 'https://www.youtube.com/source/Lyj-MZSAA9o/shorts',
7305 'only_matching': True,
7309 def _generate_audio_pivot_params(video_id
):
7311 Generates sfv_audio_pivot browse params for this video id
7313 pb_params
= b
'\xf2\x05+\n)\x12\'\n\x0b%b\x12\x0b%b\x1a\x0b%b' % ((video_id
.encode(),) * 3)
7314 return urllib
.parse
.quote(base64
.b64encode(pb_params
).decode())
7316 def _real_extract(self
, url
):
7317 video_id
= self
._match
_id
(url
)
7318 return self
.url_result(
7319 f
'https://www.youtube.com/feed/sfv_audio_pivot?bp={self._generate_audio_pivot_params(video_id)}',
7323 class YoutubeTruncatedURLIE(InfoExtractor
):
7324 IE_NAME
= 'youtube:truncated_url'
7325 IE_DESC
= False # Do not list
7326 _VALID_URL
= r
'''(?x)
7328 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
7331 annotation_id=annotation_[^&]+|
7337 attribution_link\?a=[^&]+
7343 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
7344 'only_matching': True,
7346 'url': 'https://www.youtube.com/watch?',
7347 'only_matching': True,
7349 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
7350 'only_matching': True,
7352 'url': 'https://www.youtube.com/watch?feature=foo',
7353 'only_matching': True,
7355 'url': 'https://www.youtube.com/watch?hl=en-GB',
7356 'only_matching': True,
7358 'url': 'https://www.youtube.com/watch?t=2372',
7359 'only_matching': True,
7362 def _real_extract(self
, url
):
7363 raise ExtractorError(
7364 'Did you forget to quote the URL? Remember that & is a meta '
7365 'character in most shells, so you want to put the URL in quotes, '
7367 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
7368 ' or simply youtube-dl BaW_jenozKc .',
7372 class YoutubeClipIE(YoutubeTabBaseInfoExtractor
):
7373 IE_NAME
= 'youtube:clip'
7374 _VALID_URL
= r
'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'
7376 # FIXME: Other metadata should be extracted from the clip, not from the base video
7377 'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',
7379 'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',
7381 'section_start': 29.0,
7382 'section_end': 39.7,
7385 'availability': 'public',
7386 'categories': ['Gaming'],
7387 'channel': 'Scott The Woz',
7388 'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',
7389 'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',
7390 'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',
7392 'playable_in_embed': True,
7394 'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',
7395 'title': 'Mobile Games on Console - Scott The Woz',
7396 'upload_date': '20210920',
7397 'uploader': 'Scott The Woz',
7398 'uploader_id': '@ScottTheWoz',
7399 'uploader_url': 'https://www.youtube.com/@ScottTheWoz',
7401 'live_status': 'not_live',
7402 'channel_follower_count': int,
7403 'chapters': 'count:20',
7404 'comment_count': int,
7405 'heatmap': 'count:100',
7409 def _real_extract(self
, url
):
7410 clip_id
= self
._match
_id
(url
)
7411 _
, data
= self
._extract
_webpage
(url
, clip_id
)
7413 video_id
= traverse_obj(data
, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))
7415 raise ExtractorError('Unable to find video ID')
7417 clip_data
= traverse_obj(data
, (
7418 'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',
7419 'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,
7420 'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',
7421 'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all
=False)
7424 '_type': 'url_transparent',
7425 'url': f
'https://www.youtube.com/watch?v={video_id}',
7426 'ie_key': YoutubeIE
.ie_key(),
7428 'section_start': int(clip_data
['startTimeMs']) / 1000,
7429 'section_end': int(clip_data
['endTimeMs']) / 1000,
7433 class YoutubeConsentRedirectIE(YoutubeBaseInfoExtractor
):
7434 IE_NAME
= 'youtube:consent'
7435 IE_DESC
= False # Do not list
7436 _VALID_URL
= r
'https?://consent\.youtube\.com/m\?'
7438 'url': 'https://consent.youtube.com/m?continue=https%3A%2F%2Fwww.youtube.com%2Flive%2FqVv6vCqciTM%3Fcbrd%3D1&gl=NL&m=0&pc=yt&hl=en&src=1',
7440 'id': 'qVv6vCqciTM',
7443 'uploader_id': '@sana_natori',
7444 'comment_count': int,
7445 'chapters': 'count:13',
7446 'upload_date': '20221223',
7447 'thumbnail': 'https://i.ytimg.com/vi/qVv6vCqciTM/maxresdefault.jpg',
7448 'channel_url': 'https://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',
7449 'uploader_url': 'https://www.youtube.com/@sana_natori',
7451 'release_date': '20221223',
7452 'tags': ['Vtuber', '月ノ美兎', '名取さな', 'にじさんじ', 'クリスマス', '3D配信'],
7453 'title': '【 #インターネット女クリスマス 】3Dで歌ってはしゃぐインターネットの女たち【月ノ美兎/名取さな】',
7455 'playable_in_embed': True,
7457 'availability': 'public',
7458 'channel_follower_count': int,
7459 'channel_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',
7460 'categories': ['Entertainment'],
7461 'live_status': 'was_live',
7462 'release_timestamp': 1671793345,
7463 'channel': 'さなちゃんねる',
7464 'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d',
7465 'uploader': 'さなちゃんねる',
7466 'channel_is_verified': True,
7467 'heatmap': 'count:100',
7469 'add_ie': ['Youtube'],
7470 'params': {'skip_download': 'Youtube'}
,
7473 def _real_extract(self
, url
):
7474 redirect_url
= url_or_none(parse_qs(url
).get('continue', [None])[-1])
7475 if not redirect_url
:
7476 raise ExtractorError('Invalid cookie consent redirect URL', expected
=True)
7477 return self
.url_result(redirect_url
)
7480 class YoutubeTruncatedIDIE(InfoExtractor
):
7481 IE_NAME
= 'youtube:truncated_id'
7482 IE_DESC
= False # Do not list
7483 _VALID_URL
= r
'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
7486 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
7487 'only_matching': True,
7490 def _real_extract(self
, url
):
7491 video_id
= self
._match
_id
(url
)
7492 raise ExtractorError(
7493 f
'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',