]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/youtube.py
[extractor/youtube] Extract DRC formats
[yt-dlp.git] / yt_dlp / extractor / youtube.py
1 import base64
2 import calendar
3 import collections
4 import copy
5 import datetime
6 import enum
7 import hashlib
8 import itertools
9 import json
10 import math
11 import os.path
12 import random
13 import re
14 import sys
15 import threading
16 import time
17 import traceback
18 import urllib.error
19 import urllib.parse
20
21 from .common import InfoExtractor, SearchInfoExtractor
22 from .openload import PhantomJSwrapper
23 from ..compat import functools
24 from ..jsinterp import JSInterpreter
25 from ..utils import (
26 NO_DEFAULT,
27 ExtractorError,
28 LazyList,
29 UserNotLive,
30 bug_reports_message,
31 classproperty,
32 clean_html,
33 datetime_from_str,
34 dict_get,
35 filter_dict,
36 float_or_none,
37 format_field,
38 get_first,
39 int_or_none,
40 is_html,
41 join_nonempty,
42 js_to_json,
43 mimetype2ext,
44 network_exceptions,
45 orderedSet,
46 parse_codecs,
47 parse_count,
48 parse_duration,
49 parse_iso8601,
50 parse_qs,
51 qualities,
52 remove_start,
53 smuggle_url,
54 str_or_none,
55 str_to_int,
56 strftime_or_none,
57 traverse_obj,
58 try_get,
59 unescapeHTML,
60 unified_strdate,
61 unified_timestamp,
62 unsmuggle_url,
63 update_url_query,
64 url_or_none,
65 urljoin,
66 variadic,
67 )
68
69 # any clients starting with _ cannot be explicitly requested by the user
70 INNERTUBE_CLIENTS = {
71 'web': {
72 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
73 'INNERTUBE_CONTEXT': {
74 'client': {
75 'clientName': 'WEB',
76 'clientVersion': '2.20220801.00.00',
77 }
78 },
79 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
80 },
81 'web_embedded': {
82 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
83 'INNERTUBE_CONTEXT': {
84 'client': {
85 'clientName': 'WEB_EMBEDDED_PLAYER',
86 'clientVersion': '1.20220731.00.00',
87 },
88 },
89 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
90 },
91 'web_music': {
92 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
93 'INNERTUBE_HOST': 'music.youtube.com',
94 'INNERTUBE_CONTEXT': {
95 'client': {
96 'clientName': 'WEB_REMIX',
97 'clientVersion': '1.20220727.01.00',
98 }
99 },
100 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
101 },
102 'web_creator': {
103 'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
104 'INNERTUBE_CONTEXT': {
105 'client': {
106 'clientName': 'WEB_CREATOR',
107 'clientVersion': '1.20220726.00.00',
108 }
109 },
110 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
111 },
112 'android': {
113 'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
114 'INNERTUBE_CONTEXT': {
115 'client': {
116 'clientName': 'ANDROID',
117 'clientVersion': '17.31.35',
118 'androidSdkVersion': 30,
119 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
120 }
121 },
122 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
123 'REQUIRE_JS_PLAYER': False
124 },
125 'android_embedded': {
126 'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
127 'INNERTUBE_CONTEXT': {
128 'client': {
129 'clientName': 'ANDROID_EMBEDDED_PLAYER',
130 'clientVersion': '17.31.35',
131 'androidSdkVersion': 30,
132 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
133 },
134 },
135 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
136 'REQUIRE_JS_PLAYER': False
137 },
138 'android_music': {
139 'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
140 'INNERTUBE_CONTEXT': {
141 'client': {
142 'clientName': 'ANDROID_MUSIC',
143 'clientVersion': '5.16.51',
144 'androidSdkVersion': 30,
145 'userAgent': 'com.google.android.apps.youtube.music/5.16.51 (Linux; U; Android 11) gzip'
146 }
147 },
148 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
149 'REQUIRE_JS_PLAYER': False
150 },
151 'android_creator': {
152 'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
153 'INNERTUBE_CONTEXT': {
154 'client': {
155 'clientName': 'ANDROID_CREATOR',
156 'clientVersion': '22.30.100',
157 'androidSdkVersion': 30,
158 'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip'
159 },
160 },
161 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
162 'REQUIRE_JS_PLAYER': False
163 },
164 # iOS clients have HLS live streams. Setting device model to get 60fps formats.
165 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
166 'ios': {
167 'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
168 'INNERTUBE_CONTEXT': {
169 'client': {
170 'clientName': 'IOS',
171 'clientVersion': '17.33.2',
172 'deviceModel': 'iPhone14,3',
173 'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
174 }
175 },
176 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
177 'REQUIRE_JS_PLAYER': False
178 },
179 'ios_embedded': {
180 'INNERTUBE_CONTEXT': {
181 'client': {
182 'clientName': 'IOS_MESSAGES_EXTENSION',
183 'clientVersion': '17.33.2',
184 'deviceModel': 'iPhone14,3',
185 'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
186 },
187 },
188 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
189 'REQUIRE_JS_PLAYER': False
190 },
191 'ios_music': {
192 'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
193 'INNERTUBE_CONTEXT': {
194 'client': {
195 'clientName': 'IOS_MUSIC',
196 'clientVersion': '5.21',
197 'deviceModel': 'iPhone14,3',
198 'userAgent': 'com.google.ios.youtubemusic/5.21 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
199 },
200 },
201 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
202 'REQUIRE_JS_PLAYER': False
203 },
204 'ios_creator': {
205 'INNERTUBE_CONTEXT': {
206 'client': {
207 'clientName': 'IOS_CREATOR',
208 'clientVersion': '22.33.101',
209 'deviceModel': 'iPhone14,3',
210 'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
211 },
212 },
213 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
214 'REQUIRE_JS_PLAYER': False
215 },
216 # mweb has 'ultralow' formats
217 # See: https://github.com/yt-dlp/yt-dlp/pull/557
218 'mweb': {
219 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
220 'INNERTUBE_CONTEXT': {
221 'client': {
222 'clientName': 'MWEB',
223 'clientVersion': '2.20220801.00.00',
224 }
225 },
226 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
227 },
228 # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
229 # See: https://github.com/zerodytrash/YouTube-Internal-Clients
230 'tv_embedded': {
231 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
232 'INNERTUBE_CONTEXT': {
233 'client': {
234 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
235 'clientVersion': '2.0',
236 },
237 },
238 'INNERTUBE_CONTEXT_CLIENT_NAME': 85
239 },
240 }
241
242
243 def _split_innertube_client(client_name):
244 variant, *base = client_name.rsplit('.', 1)
245 if base:
246 return variant, base[0], variant
247 base, *variant = client_name.split('_', 1)
248 return client_name, base, variant[0] if variant else None
249
250
251 def build_innertube_clients():
252 THIRD_PARTY = {
253 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
254 }
255 BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
256 priority = qualities(BASE_CLIENTS[::-1])
257
258 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
259 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
260 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
261 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
262 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
263
264 _, base_client, variant = _split_innertube_client(client)
265 ytcfg['priority'] = 10 * priority(base_client)
266
267 if not variant:
268 INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
269 embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
270 embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
271 embedscreen['priority'] -= 3
272 elif variant == 'embedded':
273 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
274 ytcfg['priority'] -= 2
275 else:
276 ytcfg['priority'] -= 3
277
278
279 build_innertube_clients()
280
281
282 class BadgeType(enum.Enum):
283 AVAILABILITY_UNLISTED = enum.auto()
284 AVAILABILITY_PRIVATE = enum.auto()
285 AVAILABILITY_PUBLIC = enum.auto()
286 AVAILABILITY_PREMIUM = enum.auto()
287 AVAILABILITY_SUBSCRIPTION = enum.auto()
288 LIVE_NOW = enum.auto()
289
290
291 class YoutubeBaseInfoExtractor(InfoExtractor):
292 """Provide base functions for Youtube extractors"""
293
294 _RESERVED_NAMES = (
295 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
296 r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|'
297 r'browse|oembed|get_video_info|iframe_api|s/player|source|'
298 r'storefront|oops|index|account|t/terms|about|upload|signin|logout')
299
300 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
301
302 # _NETRC_MACHINE = 'youtube'
303
304 # If True it will raise an error if no login info is provided
305 _LOGIN_REQUIRED = False
306
307 _INVIDIOUS_SITES = (
308 # invidious-redirect websites
309 r'(?:www\.)?redirect\.invidious\.io',
310 r'(?:(?:www|dev)\.)?invidio\.us',
311 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
312 r'(?:www\.)?invidious\.pussthecat\.org',
313 r'(?:www\.)?invidious\.zee\.li',
314 r'(?:www\.)?invidious\.ethibox\.fr',
315 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
316 r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
317 r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
318 # youtube-dl invidious instances list
319 r'(?:(?:www|no)\.)?invidiou\.sh',
320 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
321 r'(?:www\.)?invidious\.kabi\.tk',
322 r'(?:www\.)?invidious\.mastodon\.host',
323 r'(?:www\.)?invidious\.zapashcanon\.fr',
324 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
325 r'(?:www\.)?invidious\.tinfoil-hat\.net',
326 r'(?:www\.)?invidious\.himiko\.cloud',
327 r'(?:www\.)?invidious\.reallyancient\.tech',
328 r'(?:www\.)?invidious\.tube',
329 r'(?:www\.)?invidiou\.site',
330 r'(?:www\.)?invidious\.site',
331 r'(?:www\.)?invidious\.xyz',
332 r'(?:www\.)?invidious\.nixnet\.xyz',
333 r'(?:www\.)?invidious\.048596\.xyz',
334 r'(?:www\.)?invidious\.drycat\.fr',
335 r'(?:www\.)?inv\.skyn3t\.in',
336 r'(?:www\.)?tube\.poal\.co',
337 r'(?:www\.)?tube\.connect\.cafe',
338 r'(?:www\.)?vid\.wxzm\.sx',
339 r'(?:www\.)?vid\.mint\.lgbt',
340 r'(?:www\.)?vid\.puffyan\.us',
341 r'(?:www\.)?yewtu\.be',
342 r'(?:www\.)?yt\.elukerio\.org',
343 r'(?:www\.)?yt\.lelux\.fi',
344 r'(?:www\.)?invidious\.ggc-project\.de',
345 r'(?:www\.)?yt\.maisputain\.ovh',
346 r'(?:www\.)?ytprivate\.com',
347 r'(?:www\.)?invidious\.13ad\.de',
348 r'(?:www\.)?invidious\.toot\.koeln',
349 r'(?:www\.)?invidious\.fdn\.fr',
350 r'(?:www\.)?watch\.nettohikari\.com',
351 r'(?:www\.)?invidious\.namazso\.eu',
352 r'(?:www\.)?invidious\.silkky\.cloud',
353 r'(?:www\.)?invidious\.exonip\.de',
354 r'(?:www\.)?invidious\.riverside\.rocks',
355 r'(?:www\.)?invidious\.blamefran\.net',
356 r'(?:www\.)?invidious\.moomoo\.de',
357 r'(?:www\.)?ytb\.trom\.tf',
358 r'(?:www\.)?yt\.cyberhost\.uk',
359 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
360 r'(?:www\.)?qklhadlycap4cnod\.onion',
361 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
362 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
363 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
364 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
365 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
366 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
367 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
368 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
369 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
370 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
371 # piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
372 r'(?:www\.)?piped\.kavin\.rocks',
373 r'(?:www\.)?piped\.tokhmi\.xyz',
374 r'(?:www\.)?piped\.syncpundit\.io',
375 r'(?:www\.)?piped\.mha\.fi',
376 r'(?:www\.)?watch\.whatever\.social',
377 r'(?:www\.)?piped\.garudalinux\.org',
378 r'(?:www\.)?piped\.rivo\.lol',
379 r'(?:www\.)?piped-libre\.kavin\.rocks',
380 r'(?:www\.)?yt\.jae\.fi',
381 r'(?:www\.)?piped\.mint\.lgbt',
382 r'(?:www\.)?il\.ax',
383 r'(?:www\.)?piped\.esmailelbob\.xyz',
384 r'(?:www\.)?piped\.projectsegfau\.lt',
385 r'(?:www\.)?piped\.privacydev\.net',
386 r'(?:www\.)?piped\.palveluntarjoaja\.eu',
387 r'(?:www\.)?piped\.smnz\.de',
388 r'(?:www\.)?piped\.adminforge\.de',
389 r'(?:www\.)?watch\.whatevertinfoil\.de',
390 r'(?:www\.)?piped\.qdi\.fi',
391 r'(?:www\.)?piped\.video',
392 r'(?:www\.)?piped\.aeong\.one',
393 )
394
395 # extracted from account/account_menu ep
396 # XXX: These are the supported YouTube UI and API languages,
397 # which is slightly different from languages supported for translation in YouTube studio
398 _SUPPORTED_LANG_CODES = [
399 'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',
400 'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',
401 'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',
402 'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',
403 'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',
404 'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko'
405 ]
406
407 _IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'}
408
409 @functools.cached_property
410 def _preferred_lang(self):
411 """
412 Returns a language code supported by YouTube for the user preferred language.
413 Returns None if no preferred language set.
414 """
415 preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0]
416 if not preferred_lang:
417 return
418 if preferred_lang not in self._SUPPORTED_LANG_CODES:
419 raise ExtractorError(
420 f'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.',
421 expected=True)
422 elif preferred_lang != 'en':
423 self.report_warning(
424 f'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.')
425 return preferred_lang
426
427 def _initialize_consent(self):
428 cookies = self._get_cookies('https://www.youtube.com/')
429 if cookies.get('__Secure-3PSID'):
430 return
431 consent_id = None
432 consent = cookies.get('CONSENT')
433 if consent:
434 if 'YES' in consent.value:
435 return
436 consent_id = self._search_regex(
437 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
438 if not consent_id:
439 consent_id = random.randint(100, 999)
440 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
441
442 def _initialize_pref(self):
443 cookies = self._get_cookies('https://www.youtube.com/')
444 pref_cookie = cookies.get('PREF')
445 pref = {}
446 if pref_cookie:
447 try:
448 pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
449 except ValueError:
450 self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
451 pref.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'})
452 self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
453
454 def _real_initialize(self):
455 self._initialize_pref()
456 self._initialize_consent()
457 self._check_login_required()
458
459 def _check_login_required(self):
460 if self._LOGIN_REQUIRED and not self._cookies_passed:
461 self.raise_login_required('Login details are needed to download this content', method='cookies')
462
463 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='
464 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
465
466 def _get_default_ytcfg(self, client='web'):
467 return copy.deepcopy(INNERTUBE_CLIENTS[client])
468
469 def _get_innertube_host(self, client='web'):
470 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
471
472 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
473 # try_get but with fallback to default ytcfg client values when present
474 _func = lambda y: try_get(y, getter, expected_type)
475 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
476
477 def _extract_client_name(self, ytcfg, default_client='web'):
478 return self._ytcfg_get_safe(
479 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
480 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
481
482 def _extract_client_version(self, ytcfg, default_client='web'):
483 return self._ytcfg_get_safe(
484 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
485 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
486
487 def _select_api_hostname(self, req_api_hostname, default_client=None):
488 return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
489 or req_api_hostname or self._get_innertube_host(default_client or 'web'))
490
491 def _extract_api_key(self, ytcfg=None, default_client='web'):
492 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
493
494 def _extract_context(self, ytcfg=None, default_client='web'):
495 context = get_first(
496 (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
497 # Enforce language and tz for extraction
498 client_context = traverse_obj(context, 'client', expected_type=dict, default={})
499 client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
500 return context
501
502 _SAPISID = None
503
504 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
505 time_now = round(time.time())
506 if self._SAPISID is None:
507 yt_cookies = self._get_cookies('https://www.youtube.com')
508 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
509 # See: https://github.com/yt-dlp/yt-dlp/issues/393
510 sapisid_cookie = dict_get(
511 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
512 if sapisid_cookie and sapisid_cookie.value:
513 self._SAPISID = sapisid_cookie.value
514 self.write_debug('Extracted SAPISID cookie')
515 # SAPISID cookie is required if not already present
516 if not yt_cookies.get('SAPISID'):
517 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
518 self._set_cookie(
519 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
520 else:
521 self._SAPISID = False
522 if not self._SAPISID:
523 return None
524 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
525 sapisidhash = hashlib.sha1(
526 f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
527 return f'SAPISIDHASH {time_now}_{sapisidhash}'
528
529 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
530 note='Downloading API JSON', errnote='Unable to download API page',
531 context=None, api_key=None, api_hostname=None, default_client='web'):
532
533 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
534 data.update(query)
535 real_headers = self.generate_api_headers(default_client=default_client)
536 real_headers.update({'content-type': 'application/json'})
537 if headers:
538 real_headers.update(headers)
539 api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]
540 or api_key or self._extract_api_key(default_client=default_client))
541 return self._download_json(
542 f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
543 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
544 data=json.dumps(data).encode('utf8'), headers=real_headers,
545 query={'key': api_key, 'prettyPrint': 'false'})
546
547 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
548 return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
549
550 @staticmethod
551 def _extract_session_index(*data):
552 """
553 Index of current account in account list.
554 See: https://github.com/yt-dlp/yt-dlp/pull/519
555 """
556 for ytcfg in data:
557 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
558 if session_index is not None:
559 return session_index
560
561 # Deprecated?
562 def _extract_identity_token(self, ytcfg=None, webpage=None):
563 if ytcfg:
564 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)
565 if token:
566 return token
567 if webpage:
568 return self._search_regex(
569 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
570 'identity token', default=None, fatal=False)
571
572 @staticmethod
573 def _extract_account_syncid(*args):
574 """
575 Extract syncId required to download private playlists of secondary channels
576 @params response and/or ytcfg
577 """
578 for data in args:
579 # ytcfg includes channel_syncid if on secondary channel
580 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)
581 if delegated_sid:
582 return delegated_sid
583 sync_ids = (try_get(
584 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
585 lambda x: x['DATASYNC_ID']), str) or '').split('||')
586 if len(sync_ids) >= 2 and sync_ids[1]:
587 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
588 # and just "user_syncid||" for primary channel. We only want the channel_syncid
589 return sync_ids[0]
590
591 @staticmethod
592 def _extract_visitor_data(*args):
593 """
594 Extracts visitorData from an API response or ytcfg
595 Appears to be used to track session state
596 """
597 return get_first(
598 args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
599 expected_type=str)
600
601 @functools.cached_property
602 def is_authenticated(self):
603 return bool(self._generate_sapisidhash_header())
604
605 def extract_ytcfg(self, video_id, webpage):
606 if not webpage:
607 return {}
608 return self._parse_json(
609 self._search_regex(
610 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
611 default='{}'), video_id, fatal=False) or {}
612
613 def generate_api_headers(
614 self, *, ytcfg=None, account_syncid=None, session_index=None,
615 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
616
617 origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))
618 headers = {
619 'X-YouTube-Client-Name': str(
620 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
621 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
622 'Origin': origin,
623 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
624 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
625 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),
626 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client)
627 }
628 if session_index is None:
629 session_index = self._extract_session_index(ytcfg)
630 if account_syncid or session_index is not None:
631 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
632
633 auth = self._generate_sapisidhash_header(origin)
634 if auth is not None:
635 headers['Authorization'] = auth
636 headers['X-Origin'] = origin
637 return filter_dict(headers)
638
639 def _download_ytcfg(self, client, video_id):
640 url = {
641 'web': 'https://www.youtube.com',
642 'web_music': 'https://music.youtube.com',
643 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
644 }.get(client)
645 if not url:
646 return {}
647 webpage = self._download_webpage(
648 url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')
649 return self.extract_ytcfg(video_id, webpage) or {}
650
651 @staticmethod
652 def _build_api_continuation_query(continuation, ctp=None):
653 query = {
654 'continuation': continuation
655 }
656 # TODO: Inconsistency with clickTrackingParams.
657 # Currently we have a fixed ctp contained within context (from ytcfg)
658 # and a ctp in root query for continuation.
659 if ctp:
660 query['clickTracking'] = {'clickTrackingParams': ctp}
661 return query
662
663 @classmethod
664 def _extract_next_continuation_data(cls, renderer):
665 next_continuation = try_get(
666 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
667 lambda x: x['continuation']['reloadContinuationData']), dict)
668 if not next_continuation:
669 return
670 continuation = next_continuation.get('continuation')
671 if not continuation:
672 return
673 ctp = next_continuation.get('clickTrackingParams')
674 return cls._build_api_continuation_query(continuation, ctp)
675
676 @classmethod
677 def _extract_continuation_ep_data(cls, continuation_ep: dict):
678 if isinstance(continuation_ep, dict):
679 continuation = try_get(
680 continuation_ep, lambda x: x['continuationCommand']['token'], str)
681 if not continuation:
682 return
683 ctp = continuation_ep.get('clickTrackingParams')
684 return cls._build_api_continuation_query(continuation, ctp)
685
686 @classmethod
687 def _extract_continuation(cls, renderer):
688 next_continuation = cls._extract_next_continuation_data(renderer)
689 if next_continuation:
690 return next_continuation
691
692 return traverse_obj(renderer, (
693 ('contents', 'items', 'rows'), ..., 'continuationItemRenderer',
694 ('continuationEndpoint', ('button', 'buttonRenderer', 'command'))
695 ), get_all=False, expected_type=cls._extract_continuation_ep_data)
696
697 @classmethod
698 def _extract_alerts(cls, data):
699 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
700 if not isinstance(alert_dict, dict):
701 continue
702 for alert in alert_dict.values():
703 alert_type = alert.get('type')
704 if not alert_type:
705 continue
706 message = cls._get_text(alert, 'text')
707 if message:
708 yield alert_type, message
709
710 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
711 errors, warnings = [], []
712 for alert_type, alert_message in alerts:
713 if alert_type.lower() == 'error' and fatal:
714 errors.append([alert_type, alert_message])
715 elif alert_message not in self._IGNORED_WARNINGS:
716 warnings.append([alert_type, alert_message])
717
718 for alert_type, alert_message in (warnings + errors[:-1]):
719 self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)
720 if errors:
721 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
722
723 def _extract_and_report_alerts(self, data, *args, **kwargs):
724 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
725
726 def _extract_badges(self, renderer: dict):
727 privacy_icon_map = {
728 'PRIVACY_UNLISTED': BadgeType.AVAILABILITY_UNLISTED,
729 'PRIVACY_PRIVATE': BadgeType.AVAILABILITY_PRIVATE,
730 'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC
731 }
732
733 badge_style_map = {
734 'BADGE_STYLE_TYPE_MEMBERS_ONLY': BadgeType.AVAILABILITY_SUBSCRIPTION,
735 'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM,
736 'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW
737 }
738
739 label_map = {
740 'unlisted': BadgeType.AVAILABILITY_UNLISTED,
741 'private': BadgeType.AVAILABILITY_PRIVATE,
742 'members only': BadgeType.AVAILABILITY_SUBSCRIPTION,
743 'live': BadgeType.LIVE_NOW,
744 'premium': BadgeType.AVAILABILITY_PREMIUM
745 }
746
747 badges = []
748 for badge in traverse_obj(renderer, ('badges', ..., 'metadataBadgeRenderer'), default=[]):
749 badge_type = (
750 privacy_icon_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))
751 or badge_style_map.get(traverse_obj(badge, 'style'))
752 )
753 if badge_type:
754 badges.append({'type': badge_type})
755 continue
756
757 # fallback, won't work in some languages
758 label = traverse_obj(badge, 'label', expected_type=str, default='')
759 for match, label_badge_type in label_map.items():
760 if match in label.lower():
761 badges.append({'type': badge_type})
762 continue
763
764 return badges
765
766 @staticmethod
767 def _has_badge(badges, badge_type):
768 return bool(traverse_obj(badges, lambda _, v: v['type'] == badge_type))
769
770 @staticmethod
771 def _get_text(data, *path_list, max_runs=None):
772 for path in path_list or [None]:
773 if path is None:
774 obj = [data]
775 else:
776 obj = traverse_obj(data, path, default=[])
777 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
778 obj = [obj]
779 for item in obj:
780 text = try_get(item, lambda x: x['simpleText'], str)
781 if text:
782 return text
783 runs = try_get(item, lambda x: x['runs'], list) or []
784 if not runs and isinstance(item, list):
785 runs = item
786
787 runs = runs[:min(len(runs), max_runs or len(runs))]
788 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
789 if text:
790 return text
791
792 def _get_count(self, data, *path_list):
793 count_text = self._get_text(data, *path_list) or ''
794 count = parse_count(count_text)
795 if count is None:
796 count = str_to_int(
797 self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))
798 return count
799
800 @staticmethod
801 def _extract_thumbnails(data, *path_list):
802 """
803 Extract thumbnails from thumbnails dict
804 @param path_list: path list to level that contains 'thumbnails' key
805 """
806 thumbnails = []
807 for path in path_list or [()]:
808 for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):
809 thumbnail_url = url_or_none(thumbnail.get('url'))
810 if not thumbnail_url:
811 continue
812 # Sometimes youtube gives a wrong thumbnail URL. See:
813 # https://github.com/yt-dlp/yt-dlp/issues/233
814 # https://github.com/ytdl-org/youtube-dl/issues/28023
815 if 'maxresdefault' in thumbnail_url:
816 thumbnail_url = thumbnail_url.split('?')[0]
817 thumbnails.append({
818 'url': thumbnail_url,
819 'height': int_or_none(thumbnail.get('height')),
820 'width': int_or_none(thumbnail.get('width')),
821 })
822 return thumbnails
823
824 @staticmethod
825 def extract_relative_time(relative_time_text):
826 """
827 Extracts a relative time from string and converts to dt object
828 e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'
829 """
830 mobj = re.search(r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?\s*ago', relative_time_text)
831 if mobj:
832 start = mobj.group('start')
833 if start:
834 return datetime_from_str(start)
835 try:
836 return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))
837 except ValueError:
838 return None
839
840 def _parse_time_text(self, text):
841 if not text:
842 return
843 dt = self.extract_relative_time(text)
844 timestamp = None
845 if isinstance(dt, datetime.datetime):
846 timestamp = calendar.timegm(dt.timetuple())
847
848 if timestamp is None:
849 timestamp = (
850 unified_timestamp(text) or unified_timestamp(
851 self._search_regex(
852 (r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),
853 text.lower(), 'time text', default=None)))
854
855 if text and timestamp is None and self._preferred_lang in (None, 'en'):
856 self.report_warning(
857 f'Cannot parse localized time text "{text}"', only_once=True)
858 return timestamp
859
860 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
861 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
862 default_client='web'):
863 for retry in self.RetryManager():
864 try:
865 response = self._call_api(
866 ep=ep, fatal=True, headers=headers,
867 video_id=item_id, query=query, note=note,
868 context=self._extract_context(ytcfg, default_client),
869 api_key=self._extract_api_key(ytcfg, default_client),
870 api_hostname=api_hostname, default_client=default_client)
871 except ExtractorError as e:
872 if not isinstance(e.cause, network_exceptions):
873 return self._error_or_warning(e, fatal=fatal)
874 elif not isinstance(e.cause, urllib.error.HTTPError):
875 retry.error = e
876 continue
877
878 first_bytes = e.cause.read(512)
879 if not is_html(first_bytes):
880 yt_error = try_get(
881 self._parse_json(
882 self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
883 lambda x: x['error']['message'], str)
884 if yt_error:
885 self._report_alerts([('ERROR', yt_error)], fatal=False)
886 # Downloading page may result in intermittent 5xx HTTP error
887 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
888 # We also want to catch all other network exceptions since errors in later pages can be troublesome
889 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
890 if e.cause.code not in (403, 429):
891 retry.error = e
892 continue
893 return self._error_or_warning(e, fatal=fatal)
894
895 try:
896 self._extract_and_report_alerts(response, only_once=True)
897 except ExtractorError as e:
898 # YouTube servers may return errors we want to retry on in a 200 OK response
899 # See: https://github.com/yt-dlp/yt-dlp/issues/839
900 if 'unknown error' in e.msg.lower():
901 retry.error = e
902 continue
903 return self._error_or_warning(e, fatal=fatal)
904 # Youtube sometimes sends incomplete data
905 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
906 if not traverse_obj(response, *variadic(check_get_keys)):
907 retry.error = ExtractorError('Incomplete data received', expected=True)
908 continue
909
910 return response
911
912 @staticmethod
913 def is_music_url(url):
914 return re.match(r'https?://music\.youtube\.com/', url) is not None
915
916 def _extract_video(self, renderer):
917 video_id = renderer.get('videoId')
918
919 reel_header_renderer = traverse_obj(renderer, (
920 'navigationEndpoint', 'reelWatchEndpoint', 'overlay', 'reelPlayerOverlayRenderer',
921 'reelPlayerHeaderSupportedRenderers', 'reelPlayerHeaderRenderer'))
922
923 title = self._get_text(renderer, 'title', 'headline') or self._get_text(reel_header_renderer, 'reelTitleText')
924 description = self._get_text(renderer, 'descriptionSnippet')
925
926 duration = int_or_none(renderer.get('lengthSeconds'))
927 if duration is None:
928 duration = parse_duration(self._get_text(
929 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
930 if duration is None:
931 # XXX: should write a parser to be more general to support more cases (e.g. shorts in shorts tab)
932 duration = parse_duration(self._search_regex(
933 r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',
934 traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),
935 video_id, default=None, group='duration'))
936
937 channel_id = traverse_obj(
938 renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
939 expected_type=str, get_all=False)
940 if not channel_id:
941 channel_id = traverse_obj(reel_header_renderer, ('channelNavigationEndpoint', 'browseEndpoint', 'browseId'))
942
943 overlay_style = traverse_obj(
944 renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
945 get_all=False, expected_type=str)
946 badges = self._extract_badges(renderer)
947
948 navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
949 renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
950 expected_type=str)) or ''
951 url = f'https://www.youtube.com/watch?v={video_id}'
952 if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:
953 url = f'https://www.youtube.com/shorts/{video_id}'
954
955 time_text = (self._get_text(renderer, 'publishedTimeText', 'videoInfo')
956 or self._get_text(reel_header_renderer, 'timestampText') or '')
957 scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
958
959 live_status = (
960 'is_upcoming' if scheduled_timestamp is not None
961 else 'was_live' if 'streamed' in time_text.lower()
962 else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW)
963 else None)
964
965 # videoInfo is a string like '50K views • 10 years ago'.
966 view_count_text = self._get_text(renderer, 'viewCountText', 'shortViewCountText', 'videoInfo') or ''
967 view_count = (0 if 'no views' in view_count_text.lower()
968 else self._get_count({'simpleText': view_count_text}))
969 view_count_field = 'concurrent_view_count' if live_status in ('is_live', 'is_upcoming') else 'view_count'
970
971 return {
972 '_type': 'url',
973 'ie_key': YoutubeIE.ie_key(),
974 'id': video_id,
975 'url': url,
976 'title': title,
977 'description': description,
978 'duration': duration,
979 'channel_id': channel_id,
980 'channel': (self._get_text(renderer, 'ownerText', 'shortBylineText')
981 or self._get_text(reel_header_renderer, 'channelTitleText')),
982 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
983 'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
984 'timestamp': (self._parse_time_text(time_text)
985 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)
986 else None),
987 'release_timestamp': scheduled_timestamp,
988 'availability':
989 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
990 else self._availability(
991 is_private=self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE) or None,
992 needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
993 needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
994 is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None),
995 view_count_field: view_count,
996 'live_status': live_status
997 }
998
999
1000 class YoutubeIE(YoutubeBaseInfoExtractor):
1001 IE_DESC = 'YouTube'
1002 _VALID_URL = r"""(?x)^
1003 (
1004 (?:https?://|//) # http(s):// or protocol-independent URL
1005 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
1006 (?:www\.)?deturl\.com/www\.youtube\.com|
1007 (?:www\.)?pwnyoutube\.com|
1008 (?:www\.)?hooktube\.com|
1009 (?:www\.)?yourepeat\.com|
1010 tube\.majestyc\.net|
1011 %(invidious)s|
1012 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
1013 (?:.*?\#/)? # handle anchor (#/) redirect urls
1014 (?: # the various things that can precede the ID:
1015 (?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/
1016 |(?: # or the v= param in all its forms
1017 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
1018 (?:\?|\#!?) # the params delimiter ? or # or #!
1019 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
1020 v=
1021 )
1022 ))
1023 |(?:
1024 youtu\.be| # just youtu.be/xxxx
1025 vid\.plus| # or vid.plus/xxxx
1026 zwearz\.com/watch| # or zwearz.com/watch/xxxx
1027 %(invidious)s
1028 )/
1029 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
1030 )
1031 )? # all until now is optional -> you can pass the naked ID
1032 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
1033 (?(1).+)? # if we found the ID, everything can follow
1034 (?:\#|$)""" % {
1035 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
1036 }
1037 _EMBED_REGEX = [
1038 r'''(?x)
1039 (?:
1040 <(?:[0-9A-Za-z-]+?)?iframe[^>]+?src=|
1041 data-video-url=|
1042 <embed[^>]+?src=|
1043 embedSWF\(?:\s*|
1044 <object[^>]+data=|
1045 new\s+SWFObject\(
1046 )
1047 (["\'])
1048 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1049 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1050 \1''',
1051 # https://wordpress.org/plugins/lazy-load-for-videos/
1052 r'''(?xs)
1053 <a\s[^>]*\bhref="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"
1054 \s[^>]*\bclass="[^"]*\blazy-load-youtube''',
1055 ]
1056 _RETURN_TYPE = 'video' # XXX: How to handle multifeed?
1057
1058 _PLAYER_INFO_RE = (
1059 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
1060 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
1061 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
1062 )
1063 _formats = {
1064 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1065 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1066 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
1067 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
1068 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
1069 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1070 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1071 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1072 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
1073 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
1074 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1075 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1076 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1077 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1078 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
1079 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
1080 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1081 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1082
1083
1084 # 3D videos
1085 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1086 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1087 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1088 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1089 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
1090 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1091 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1092
1093 # Apple HTTP Live Streaming
1094 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1095 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1096 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1097 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1098 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1099 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1100 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1101 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
1102
1103 # DASH mp4 video
1104 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
1105 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
1106 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1107 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
1108 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
1109 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
1110 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
1111 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1112 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
1113 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1114 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1115 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
1116
1117 # Dash mp4 audio
1118 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1119 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1120 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1121 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1122 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1123 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1124 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
1125
1126 # Dash webm
1127 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1128 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1129 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1130 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1131 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1132 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1133 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1134 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1135 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1136 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1137 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1138 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1139 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1140 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1141 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1142 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
1143 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1144 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1145 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1146 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1147 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1148 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1149
1150 # Dash webm audio
1151 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1152 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
1153
1154 # Dash webm audio with opus inside
1155 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1156 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1157 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
1158
1159 # RTMP (unnamed)
1160 '_rtmp': {'protocol': 'rtmp'},
1161
1162 # av01 video only formats sometimes served with "unknown" codecs
1163 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1164 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1165 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
1166 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1167 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1168 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1169 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1170 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1171 }
1172 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
1173
1174 _GEO_BYPASS = False
1175
1176 IE_NAME = 'youtube'
1177 _TESTS = [
1178 {
1179 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
1180 'info_dict': {
1181 'id': 'BaW_jenozKc',
1182 'ext': 'mp4',
1183 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1184 'uploader': 'Philipp Hagemeister',
1185 'uploader_id': 'phihag',
1186 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1187 'channel': 'Philipp Hagemeister',
1188 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1189 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1190 'upload_date': '20121002',
1191 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
1192 'categories': ['Science & Technology'],
1193 'tags': ['youtube-dl'],
1194 'duration': 10,
1195 'view_count': int,
1196 'like_count': int,
1197 'availability': 'public',
1198 'playable_in_embed': True,
1199 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1200 'live_status': 'not_live',
1201 'age_limit': 0,
1202 'start_time': 1,
1203 'end_time': 9,
1204 'comment_count': int,
1205 'channel_follower_count': int
1206 }
1207 },
1208 {
1209 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1210 'note': 'Embed-only video (#1746)',
1211 'info_dict': {
1212 'id': 'yZIXLfi8CZQ',
1213 'ext': 'mp4',
1214 'upload_date': '20120608',
1215 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1216 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1217 'uploader': 'SET India',
1218 'uploader_id': 'setindia',
1219 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
1220 'age_limit': 18,
1221 },
1222 'skip': 'Private video',
1223 },
1224 {
1225 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1226 'note': 'Use the first video ID in the URL',
1227 'info_dict': {
1228 'id': 'BaW_jenozKc',
1229 'ext': 'mp4',
1230 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1231 'uploader': 'Philipp Hagemeister',
1232 'uploader_id': 'phihag',
1233 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1234 'channel': 'Philipp Hagemeister',
1235 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1236 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1237 'upload_date': '20121002',
1238 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
1239 'categories': ['Science & Technology'],
1240 'tags': ['youtube-dl'],
1241 'duration': 10,
1242 'view_count': int,
1243 'like_count': int,
1244 'availability': 'public',
1245 'playable_in_embed': True,
1246 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1247 'live_status': 'not_live',
1248 'age_limit': 0,
1249 'comment_count': int,
1250 'channel_follower_count': int
1251 },
1252 'params': {
1253 'skip_download': True,
1254 },
1255 },
1256 {
1257 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1258 'note': '256k DASH audio (format 141) via DASH manifest',
1259 'info_dict': {
1260 'id': 'a9LDPn-MO4I',
1261 'ext': 'm4a',
1262 'upload_date': '20121002',
1263 'uploader_id': '8KVIDEO',
1264 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1265 'description': '',
1266 'uploader': '8KVIDEO',
1267 'title': 'UHDTV TEST 8K VIDEO.mp4'
1268 },
1269 'params': {
1270 'youtube_include_dash_manifest': True,
1271 'format': '141',
1272 },
1273 'skip': 'format 141 not served anymore',
1274 },
1275 # DASH manifest with encrypted signature
1276 {
1277 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1278 'info_dict': {
1279 'id': 'IB3lcPjvWLA',
1280 'ext': 'm4a',
1281 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1282 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1283 'duration': 244,
1284 'uploader': 'AfrojackVEVO',
1285 'uploader_id': 'AfrojackVEVO',
1286 'upload_date': '20131011',
1287 'abr': 129.495,
1288 'like_count': int,
1289 'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',
1290 'playable_in_embed': True,
1291 'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',
1292 'view_count': int,
1293 'track': 'The Spark',
1294 'live_status': 'not_live',
1295 'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',
1296 'channel': 'Afrojack',
1297 'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',
1298 'tags': 'count:19',
1299 'availability': 'public',
1300 'categories': ['Music'],
1301 'age_limit': 0,
1302 'alt_title': 'The Spark',
1303 'channel_follower_count': int
1304 },
1305 'params': {
1306 'youtube_include_dash_manifest': True,
1307 'format': '141/bestaudio[ext=m4a]',
1308 },
1309 },
1310 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
1311 {
1312 'note': 'Embed allowed age-gate video',
1313 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1314 'info_dict': {
1315 'id': 'HtVdAasjOgU',
1316 'ext': 'mp4',
1317 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1318 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1319 'duration': 142,
1320 'uploader': 'The Witcher',
1321 'uploader_id': 'WitcherGame',
1322 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1323 'upload_date': '20140605',
1324 'age_limit': 18,
1325 'categories': ['Gaming'],
1326 'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',
1327 'availability': 'needs_auth',
1328 'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
1329 'like_count': int,
1330 'channel': 'The Witcher',
1331 'live_status': 'not_live',
1332 'tags': 'count:17',
1333 'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
1334 'playable_in_embed': True,
1335 'view_count': int,
1336 'channel_follower_count': int
1337 },
1338 },
1339 {
1340 'note': 'Age-gate video with embed allowed in public site',
1341 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1342 'info_dict': {
1343 'id': 'HsUATh_Nc2U',
1344 'ext': 'mp4',
1345 'title': 'Godzilla 2 (Official Video)',
1346 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1347 'upload_date': '20200408',
1348 'uploader_id': 'FlyingKitty900',
1349 'uploader': 'FlyingKitty',
1350 'age_limit': 18,
1351 'availability': 'needs_auth',
1352 'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
1353 'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',
1354 'channel': 'FlyingKitty',
1355 'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
1356 'view_count': int,
1357 'categories': ['Entertainment'],
1358 'live_status': 'not_live',
1359 'tags': ['Flyingkitty', 'godzilla 2'],
1360 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
1361 'like_count': int,
1362 'duration': 177,
1363 'playable_in_embed': True,
1364 'channel_follower_count': int
1365 },
1366 },
1367 {
1368 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1369 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1370 'info_dict': {
1371 'id': 'Tq92D6wQ1mg',
1372 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
1373 'ext': 'mp4',
1374 'upload_date': '20191228',
1375 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1376 'uploader': 'Projekt Melody',
1377 'description': 'md5:17eccca93a786d51bc67646756894066',
1378 'age_limit': 18,
1379 'like_count': int,
1380 'availability': 'needs_auth',
1381 'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
1382 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1383 'view_count': int,
1384 'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',
1385 'channel': 'Projekt Melody',
1386 'live_status': 'not_live',
1387 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
1388 'playable_in_embed': True,
1389 'categories': ['Entertainment'],
1390 'duration': 106,
1391 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
1392 'comment_count': int,
1393 'channel_follower_count': int
1394 },
1395 },
1396 {
1397 'note': 'Non-Agegated non-embeddable video',
1398 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1399 'info_dict': {
1400 'id': 'MeJVWBSsPAY',
1401 'ext': 'mp4',
1402 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1403 'uploader': 'Herr Lurik',
1404 'uploader_id': 'st3in234',
1405 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1406 'upload_date': '20130730',
1407 'track': 'Such mich find mich',
1408 'age_limit': 0,
1409 'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
1410 'like_count': int,
1411 'playable_in_embed': False,
1412 'creator': 'OOMPH!',
1413 'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',
1414 'view_count': int,
1415 'alt_title': 'Such mich find mich',
1416 'duration': 210,
1417 'channel': 'Herr Lurik',
1418 'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
1419 'categories': ['Music'],
1420 'availability': 'public',
1421 'uploader_url': 'http://www.youtube.com/user/st3in234',
1422 'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
1423 'live_status': 'not_live',
1424 'artist': 'OOMPH!',
1425 'channel_follower_count': int
1426 },
1427 },
1428 {
1429 'note': 'Non-bypassable age-gated video',
1430 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1431 'only_matching': True,
1432 },
1433 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1434 # YouTube Red ad is not captured for creator
1435 {
1436 'url': '__2ABJjxzNo',
1437 'info_dict': {
1438 'id': '__2ABJjxzNo',
1439 'ext': 'mp4',
1440 'duration': 266,
1441 'upload_date': '20100430',
1442 'uploader_id': 'deadmau5',
1443 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1444 'creator': 'deadmau5',
1445 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1446 'uploader': 'deadmau5',
1447 'title': 'Deadmau5 - Some Chords (HD)',
1448 'alt_title': 'Some Chords',
1449 'availability': 'public',
1450 'tags': 'count:14',
1451 'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',
1452 'view_count': int,
1453 'live_status': 'not_live',
1454 'channel': 'deadmau5',
1455 'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',
1456 'like_count': int,
1457 'track': 'Some Chords',
1458 'artist': 'deadmau5',
1459 'playable_in_embed': True,
1460 'age_limit': 0,
1461 'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',
1462 'categories': ['Music'],
1463 'album': 'Some Chords',
1464 'channel_follower_count': int
1465 },
1466 'expected_warnings': [
1467 'DASH manifest missing',
1468 ]
1469 },
1470 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1471 {
1472 'url': 'lqQg6PlCWgI',
1473 'info_dict': {
1474 'id': 'lqQg6PlCWgI',
1475 'ext': 'mp4',
1476 'duration': 6085,
1477 'upload_date': '20150827',
1478 'uploader_id': 'olympic',
1479 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1480 'description': 'md5:04bbbf3ccceb6795947572ca36f45904',
1481 'uploader': 'Olympics',
1482 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1483 'like_count': int,
1484 'release_timestamp': 1343767800,
1485 'playable_in_embed': True,
1486 'categories': ['Sports'],
1487 'release_date': '20120731',
1488 'channel': 'Olympics',
1489 'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
1490 'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
1491 'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
1492 'age_limit': 0,
1493 'availability': 'public',
1494 'live_status': 'was_live',
1495 'view_count': int,
1496 'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
1497 'channel_follower_count': int
1498 },
1499 'params': {
1500 'skip_download': 'requires avconv',
1501 }
1502 },
1503 # Non-square pixels
1504 {
1505 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1506 'info_dict': {
1507 'id': '_b-2C3KPAM0',
1508 'ext': 'mp4',
1509 'stretched_ratio': 16 / 9.,
1510 'duration': 85,
1511 'upload_date': '20110310',
1512 'uploader_id': 'AllenMeow',
1513 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1514 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1515 'uploader': '孫ᄋᄅ',
1516 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1517 'playable_in_embed': True,
1518 'channel': '孫ᄋᄅ',
1519 'age_limit': 0,
1520 'tags': 'count:11',
1521 'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',
1522 'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',
1523 'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',
1524 'view_count': int,
1525 'categories': ['People & Blogs'],
1526 'like_count': int,
1527 'live_status': 'not_live',
1528 'availability': 'unlisted',
1529 'comment_count': int,
1530 'channel_follower_count': int
1531 },
1532 },
1533 # url_encoded_fmt_stream_map is empty string
1534 {
1535 'url': 'qEJwOuvDf7I',
1536 'info_dict': {
1537 'id': 'qEJwOuvDf7I',
1538 'ext': 'webm',
1539 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1540 'description': '',
1541 'upload_date': '20150404',
1542 'uploader_id': 'spbelect',
1543 'uploader': 'Наблюдатели Петербурга',
1544 },
1545 'params': {
1546 'skip_download': 'requires avconv',
1547 },
1548 'skip': 'This live event has ended.',
1549 },
1550 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1551 {
1552 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1553 'info_dict': {
1554 'id': 'FIl7x6_3R5Y',
1555 'ext': 'webm',
1556 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1557 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1558 'duration': 220,
1559 'upload_date': '20150625',
1560 'uploader_id': 'dorappi2000',
1561 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1562 'uploader': 'dorappi2000',
1563 'formats': 'mincount:31',
1564 },
1565 'skip': 'not actual anymore',
1566 },
1567 # DASH manifest with segment_list
1568 {
1569 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1570 'md5': '8ce563a1d667b599d21064e982ab9e31',
1571 'info_dict': {
1572 'id': 'CsmdDsKjzN8',
1573 'ext': 'mp4',
1574 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1575 'uploader': 'Airtek',
1576 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1577 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1578 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1579 },
1580 'params': {
1581 'youtube_include_dash_manifest': True,
1582 'format': '135', # bestvideo
1583 },
1584 'skip': 'This live event has ended.',
1585 },
1586 {
1587 # Multifeed videos (multiple cameras), URL can be of any Camera
1588 'url': 'https://www.youtube.com/watch?v=zaPI8MvL8pg',
1589 'info_dict': {
1590 'id': 'zaPI8MvL8pg',
1591 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04',
1592 'description': 'md5:563ccbc698b39298481ca3c571169519',
1593 },
1594 'playlist': [{
1595 'info_dict': {
1596 'id': 'j5yGuxZ8lLU',
1597 'ext': 'mp4',
1598 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Chris)',
1599 'uploader': 'WiiLikeToPlay',
1600 'description': 'md5:563ccbc698b39298481ca3c571169519',
1601 'uploader_url': 'http://www.youtube.com/user/WiiRikeToPray',
1602 'duration': 10120,
1603 'channel_follower_count': int,
1604 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
1605 'availability': 'public',
1606 'playable_in_embed': True,
1607 'upload_date': '20131105',
1608 'uploader_id': 'WiiRikeToPray',
1609 'categories': ['Gaming'],
1610 'live_status': 'was_live',
1611 'tags': 'count:24',
1612 'release_timestamp': 1383701910,
1613 'thumbnail': 'https://i.ytimg.com/vi/j5yGuxZ8lLU/maxresdefault.jpg',
1614 'comment_count': int,
1615 'age_limit': 0,
1616 'like_count': int,
1617 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
1618 'channel': 'WiiLikeToPlay',
1619 'view_count': int,
1620 'release_date': '20131106',
1621 },
1622 }, {
1623 'info_dict': {
1624 'id': 'zaPI8MvL8pg',
1625 'ext': 'mp4',
1626 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Tyson)',
1627 'uploader_id': 'WiiRikeToPray',
1628 'availability': 'public',
1629 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
1630 'channel': 'WiiLikeToPlay',
1631 'uploader_url': 'http://www.youtube.com/user/WiiRikeToPray',
1632 'channel_follower_count': int,
1633 'description': 'md5:563ccbc698b39298481ca3c571169519',
1634 'duration': 10108,
1635 'age_limit': 0,
1636 'like_count': int,
1637 'tags': 'count:24',
1638 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
1639 'uploader': 'WiiLikeToPlay',
1640 'release_timestamp': 1383701915,
1641 'comment_count': int,
1642 'upload_date': '20131105',
1643 'thumbnail': 'https://i.ytimg.com/vi/zaPI8MvL8pg/maxresdefault.jpg',
1644 'release_date': '20131106',
1645 'playable_in_embed': True,
1646 'live_status': 'was_live',
1647 'categories': ['Gaming'],
1648 'view_count': int,
1649 },
1650 }, {
1651 'info_dict': {
1652 'id': 'R7r3vfO7Hao',
1653 'ext': 'mp4',
1654 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Spencer)',
1655 'thumbnail': 'https://i.ytimg.com/vi/R7r3vfO7Hao/maxresdefault.jpg',
1656 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
1657 'like_count': int,
1658 'availability': 'public',
1659 'playable_in_embed': True,
1660 'upload_date': '20131105',
1661 'description': 'md5:563ccbc698b39298481ca3c571169519',
1662 'uploader_id': 'WiiRikeToPray',
1663 'uploader_url': 'http://www.youtube.com/user/WiiRikeToPray',
1664 'channel_follower_count': int,
1665 'tags': 'count:24',
1666 'release_date': '20131106',
1667 'uploader': 'WiiLikeToPlay',
1668 'comment_count': int,
1669 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
1670 'channel': 'WiiLikeToPlay',
1671 'categories': ['Gaming'],
1672 'release_timestamp': 1383701914,
1673 'live_status': 'was_live',
1674 'age_limit': 0,
1675 'duration': 10128,
1676 'view_count': int,
1677 },
1678 }],
1679 'params': {'skip_download': True},
1680 },
1681 {
1682 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1683 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1684 'info_dict': {
1685 'id': 'gVfLd0zydlo',
1686 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1687 },
1688 'playlist_count': 2,
1689 'skip': 'Not multifeed anymore',
1690 },
1691 {
1692 'url': 'https://vid.plus/FlRa-iH7PGw',
1693 'only_matching': True,
1694 },
1695 {
1696 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1697 'only_matching': True,
1698 },
1699 {
1700 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1701 # Also tests cut-off URL expansion in video description (see
1702 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1703 # https://github.com/ytdl-org/youtube-dl/issues/8164)
1704 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1705 'info_dict': {
1706 'id': 'lsguqyKfVQg',
1707 'ext': 'mp4',
1708 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1709 'alt_title': 'Dark Walk',
1710 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1711 'duration': 133,
1712 'upload_date': '20151119',
1713 'uploader_id': 'IronSoulElf',
1714 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1715 'uploader': 'IronSoulElf',
1716 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1717 'track': 'Dark Walk',
1718 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1719 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1720 'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',
1721 'categories': ['Film & Animation'],
1722 'view_count': int,
1723 'live_status': 'not_live',
1724 'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',
1725 'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',
1726 'tags': 'count:13',
1727 'availability': 'public',
1728 'channel': 'IronSoulElf',
1729 'playable_in_embed': True,
1730 'like_count': int,
1731 'age_limit': 0,
1732 'channel_follower_count': int
1733 },
1734 'params': {
1735 'skip_download': True,
1736 },
1737 },
1738 {
1739 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1740 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1741 'only_matching': True,
1742 },
1743 {
1744 # Video with yt:stretch=17:0
1745 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1746 'info_dict': {
1747 'id': 'Q39EVAstoRM',
1748 'ext': 'mp4',
1749 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1750 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1751 'upload_date': '20151107',
1752 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1753 'uploader': 'CH GAMER DROID',
1754 },
1755 'params': {
1756 'skip_download': True,
1757 },
1758 'skip': 'This video does not exist.',
1759 },
1760 {
1761 # Video with incomplete 'yt:stretch=16:'
1762 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1763 'only_matching': True,
1764 },
1765 {
1766 # Video licensed under Creative Commons
1767 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1768 'info_dict': {
1769 'id': 'M4gD1WSo5mA',
1770 'ext': 'mp4',
1771 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1772 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1773 'duration': 721,
1774 'upload_date': '20150128',
1775 'uploader_id': 'BerkmanCenter',
1776 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1777 'uploader': 'The Berkman Klein Center for Internet & Society',
1778 'license': 'Creative Commons Attribution license (reuse allowed)',
1779 'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',
1780 'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',
1781 'like_count': int,
1782 'age_limit': 0,
1783 'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],
1784 'channel': 'The Berkman Klein Center for Internet & Society',
1785 'availability': 'public',
1786 'view_count': int,
1787 'categories': ['Education'],
1788 'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',
1789 'live_status': 'not_live',
1790 'playable_in_embed': True,
1791 'comment_count': int,
1792 'channel_follower_count': int,
1793 'chapters': list,
1794 },
1795 'params': {
1796 'skip_download': True,
1797 },
1798 },
1799 {
1800 # Channel-like uploader_url
1801 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1802 'info_dict': {
1803 'id': 'eQcmzGIKrzg',
1804 'ext': 'mp4',
1805 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1806 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1807 'duration': 4060,
1808 'upload_date': '20151120',
1809 'uploader': 'Bernie Sanders',
1810 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1811 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1812 'license': 'Creative Commons Attribution license (reuse allowed)',
1813 'playable_in_embed': True,
1814 'tags': 'count:12',
1815 'like_count': int,
1816 'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1817 'age_limit': 0,
1818 'availability': 'public',
1819 'categories': ['News & Politics'],
1820 'channel': 'Bernie Sanders',
1821 'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',
1822 'view_count': int,
1823 'live_status': 'not_live',
1824 'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1825 'comment_count': int,
1826 'channel_follower_count': int,
1827 'chapters': list,
1828 },
1829 'params': {
1830 'skip_download': True,
1831 },
1832 },
1833 {
1834 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1835 'only_matching': True,
1836 },
1837 {
1838 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1839 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1840 'only_matching': True,
1841 },
1842 {
1843 # Rental video preview
1844 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1845 'info_dict': {
1846 'id': 'uGpuVWrhIzE',
1847 'ext': 'mp4',
1848 'title': 'Piku - Trailer',
1849 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1850 'upload_date': '20150811',
1851 'uploader': 'FlixMatrix',
1852 'uploader_id': 'FlixMatrixKaravan',
1853 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1854 'license': 'Standard YouTube License',
1855 },
1856 'params': {
1857 'skip_download': True,
1858 },
1859 'skip': 'This video is not available.',
1860 },
1861 {
1862 # YouTube Red video with episode data
1863 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1864 'info_dict': {
1865 'id': 'iqKdEhx-dD4',
1866 'ext': 'mp4',
1867 'title': 'Isolation - Mind Field (Ep 1)',
1868 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1869 'duration': 2085,
1870 'upload_date': '20170118',
1871 'uploader': 'Vsauce',
1872 'uploader_id': 'Vsauce',
1873 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1874 'series': 'Mind Field',
1875 'season_number': 1,
1876 'episode_number': 1,
1877 'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',
1878 'tags': 'count:12',
1879 'view_count': int,
1880 'availability': 'public',
1881 'age_limit': 0,
1882 'channel': 'Vsauce',
1883 'episode': 'Episode 1',
1884 'categories': ['Entertainment'],
1885 'season': 'Season 1',
1886 'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',
1887 'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',
1888 'like_count': int,
1889 'playable_in_embed': True,
1890 'live_status': 'not_live',
1891 'channel_follower_count': int
1892 },
1893 'params': {
1894 'skip_download': True,
1895 },
1896 'expected_warnings': [
1897 'Skipping DASH manifest',
1898 ],
1899 },
1900 {
1901 # The following content has been identified by the YouTube community
1902 # as inappropriate or offensive to some audiences.
1903 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1904 'info_dict': {
1905 'id': '6SJNVb0GnPI',
1906 'ext': 'mp4',
1907 'title': 'Race Differences in Intelligence',
1908 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1909 'duration': 965,
1910 'upload_date': '20140124',
1911 'uploader': 'New Century Foundation',
1912 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1913 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1914 },
1915 'params': {
1916 'skip_download': True,
1917 },
1918 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1919 },
1920 {
1921 # itag 212
1922 'url': '1t24XAntNCY',
1923 'only_matching': True,
1924 },
1925 {
1926 # geo restricted to JP
1927 'url': 'sJL6WA-aGkQ',
1928 'only_matching': True,
1929 },
1930 {
1931 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1932 'only_matching': True,
1933 },
1934 {
1935 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1936 'only_matching': True,
1937 },
1938 {
1939 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1940 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1941 'only_matching': True,
1942 },
1943 {
1944 # DRM protected
1945 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1946 'only_matching': True,
1947 },
1948 {
1949 # Video with unsupported adaptive stream type formats
1950 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1951 'info_dict': {
1952 'id': 'Z4Vy8R84T1U',
1953 'ext': 'mp4',
1954 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1955 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1956 'duration': 433,
1957 'upload_date': '20130923',
1958 'uploader': 'Amelia Putri Harwita',
1959 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1960 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1961 'formats': 'maxcount:10',
1962 },
1963 'params': {
1964 'skip_download': True,
1965 'youtube_include_dash_manifest': False,
1966 },
1967 'skip': 'not actual anymore',
1968 },
1969 {
1970 # Youtube Music Auto-generated description
1971 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1972 'info_dict': {
1973 'id': 'MgNrAu2pzNs',
1974 'ext': 'mp4',
1975 'title': 'Voyeur Girl',
1976 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1977 'upload_date': '20190312',
1978 'uploader': 'Stephen - Topic',
1979 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1980 'artist': 'Stephen',
1981 'track': 'Voyeur Girl',
1982 'album': 'it\'s too much love to know my dear',
1983 'release_date': '20190313',
1984 'release_year': 2019,
1985 'alt_title': 'Voyeur Girl',
1986 'view_count': int,
1987 'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1988 'playable_in_embed': True,
1989 'like_count': int,
1990 'categories': ['Music'],
1991 'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1992 'channel': 'Stephen',
1993 'availability': 'public',
1994 'creator': 'Stephen',
1995 'duration': 169,
1996 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
1997 'age_limit': 0,
1998 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1999 'tags': 'count:11',
2000 'live_status': 'not_live',
2001 'channel_follower_count': int
2002 },
2003 'params': {
2004 'skip_download': True,
2005 },
2006 },
2007 {
2008 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
2009 'only_matching': True,
2010 },
2011 {
2012 # invalid -> valid video id redirection
2013 'url': 'DJztXj2GPfl',
2014 'info_dict': {
2015 'id': 'DJztXj2GPfk',
2016 'ext': 'mp4',
2017 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
2018 'description': 'md5:bf577a41da97918e94fa9798d9228825',
2019 'upload_date': '20090125',
2020 'uploader': 'Prochorowka',
2021 'uploader_id': 'Prochorowka',
2022 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
2023 'artist': 'Panjabi MC',
2024 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
2025 'album': 'Beware of the Boys (Mundian To Bach Ke)',
2026 },
2027 'params': {
2028 'skip_download': True,
2029 },
2030 'skip': 'Video unavailable',
2031 },
2032 {
2033 # empty description results in an empty string
2034 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
2035 'info_dict': {
2036 'id': 'x41yOUIvK2k',
2037 'ext': 'mp4',
2038 'title': 'IMG 3456',
2039 'description': '',
2040 'upload_date': '20170613',
2041 'uploader_id': 'ElevageOrVert',
2042 'uploader': 'ElevageOrVert',
2043 'view_count': int,
2044 'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',
2045 'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',
2046 'like_count': int,
2047 'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',
2048 'tags': [],
2049 'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',
2050 'availability': 'public',
2051 'age_limit': 0,
2052 'categories': ['Pets & Animals'],
2053 'duration': 7,
2054 'playable_in_embed': True,
2055 'live_status': 'not_live',
2056 'channel': 'ElevageOrVert',
2057 'channel_follower_count': int
2058 },
2059 'params': {
2060 'skip_download': True,
2061 },
2062 },
2063 {
2064 # with '};' inside yt initial data (see [1])
2065 # see [2] for an example with '};' inside ytInitialPlayerResponse
2066 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
2067 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
2068 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
2069 'info_dict': {
2070 'id': 'CHqg6qOn4no',
2071 'ext': 'mp4',
2072 'title': 'Part 77 Sort a list of simple types in c#',
2073 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
2074 'upload_date': '20130831',
2075 'uploader_id': 'kudvenkat',
2076 'uploader': 'kudvenkat',
2077 'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',
2078 'like_count': int,
2079 'uploader_url': 'http://www.youtube.com/user/kudvenkat',
2080 'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',
2081 'live_status': 'not_live',
2082 'categories': ['Education'],
2083 'availability': 'public',
2084 'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',
2085 'tags': 'count:12',
2086 'playable_in_embed': True,
2087 'age_limit': 0,
2088 'view_count': int,
2089 'duration': 522,
2090 'channel': 'kudvenkat',
2091 'comment_count': int,
2092 'channel_follower_count': int,
2093 'chapters': list,
2094 },
2095 'params': {
2096 'skip_download': True,
2097 },
2098 },
2099 {
2100 # another example of '};' in ytInitialData
2101 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
2102 'only_matching': True,
2103 },
2104 {
2105 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
2106 'only_matching': True,
2107 },
2108 {
2109 # https://github.com/ytdl-org/youtube-dl/pull/28094
2110 'url': 'OtqTfy26tG0',
2111 'info_dict': {
2112 'id': 'OtqTfy26tG0',
2113 'ext': 'mp4',
2114 'title': 'Burn Out',
2115 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
2116 'upload_date': '20141120',
2117 'uploader': 'The Cinematic Orchestra - Topic',
2118 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
2119 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
2120 'artist': 'The Cinematic Orchestra',
2121 'track': 'Burn Out',
2122 'album': 'Every Day',
2123 'like_count': int,
2124 'live_status': 'not_live',
2125 'alt_title': 'Burn Out',
2126 'duration': 614,
2127 'age_limit': 0,
2128 'view_count': int,
2129 'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
2130 'creator': 'The Cinematic Orchestra',
2131 'channel': 'The Cinematic Orchestra',
2132 'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],
2133 'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
2134 'availability': 'public',
2135 'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',
2136 'categories': ['Music'],
2137 'playable_in_embed': True,
2138 'channel_follower_count': int
2139 },
2140 'params': {
2141 'skip_download': True,
2142 },
2143 },
2144 {
2145 # controversial video, only works with bpctr when authenticated with cookies
2146 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
2147 'only_matching': True,
2148 },
2149 {
2150 # controversial video, requires bpctr/contentCheckOk
2151 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
2152 'info_dict': {
2153 'id': 'SZJvDhaSDnc',
2154 'ext': 'mp4',
2155 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
2156 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
2157 'uploader': 'CBS Mornings',
2158 'uploader_id': 'CBSThisMorning',
2159 'upload_date': '20140716',
2160 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',
2161 'duration': 170,
2162 'categories': ['News & Politics'],
2163 'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',
2164 'view_count': int,
2165 'channel': 'CBS Mornings',
2166 'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],
2167 'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',
2168 'age_limit': 18,
2169 'availability': 'needs_auth',
2170 'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',
2171 'like_count': int,
2172 'live_status': 'not_live',
2173 'playable_in_embed': True,
2174 'channel_follower_count': int
2175 }
2176 },
2177 {
2178 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
2179 'url': 'cBvYw8_A0vQ',
2180 'info_dict': {
2181 'id': 'cBvYw8_A0vQ',
2182 'ext': 'mp4',
2183 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
2184 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
2185 'upload_date': '20201120',
2186 'uploader': 'Walk around Japan',
2187 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2188 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2189 'duration': 1456,
2190 'categories': ['Travel & Events'],
2191 'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2192 'view_count': int,
2193 'channel': 'Walk around Japan',
2194 'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],
2195 'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',
2196 'age_limit': 0,
2197 'availability': 'public',
2198 'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2199 'live_status': 'not_live',
2200 'playable_in_embed': True,
2201 'channel_follower_count': int
2202 },
2203 'params': {
2204 'skip_download': True,
2205 },
2206 }, {
2207 # Has multiple audio streams
2208 'url': 'WaOKSUlf4TM',
2209 'only_matching': True
2210 }, {
2211 # Requires Premium: has format 141 when requested using YTM url
2212 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
2213 'only_matching': True
2214 }, {
2215 # multiple subtitles with same lang_code
2216 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
2217 'only_matching': True,
2218 }, {
2219 # Force use android client fallback
2220 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
2221 'info_dict': {
2222 'id': 'YOelRv7fMxY',
2223 'title': 'DIGGING A SECRET TUNNEL Part 1',
2224 'ext': '3gp',
2225 'upload_date': '20210624',
2226 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
2227 'uploader': 'colinfurze',
2228 'uploader_id': 'colinfurze',
2229 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
2230 'description': 'md5:5d5991195d599b56cd0c4148907eec50',
2231 'duration': 596,
2232 'categories': ['Entertainment'],
2233 'uploader_url': 'http://www.youtube.com/user/colinfurze',
2234 'view_count': int,
2235 'channel': 'colinfurze',
2236 'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],
2237 'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',
2238 'age_limit': 0,
2239 'availability': 'public',
2240 'like_count': int,
2241 'live_status': 'not_live',
2242 'playable_in_embed': True,
2243 'channel_follower_count': int,
2244 'chapters': list,
2245 },
2246 'params': {
2247 'format': '17', # 3gp format available on android
2248 'extractor_args': {'youtube': {'player_client': ['android']}},
2249 },
2250 },
2251 {
2252 # Skip download of additional client configs (remix client config in this case)
2253 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2254 'only_matching': True,
2255 'params': {
2256 'extractor_args': {'youtube': {'player_skip': ['configs']}},
2257 },
2258 }, {
2259 # shorts
2260 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
2261 'only_matching': True,
2262 }, {
2263 'note': 'Storyboards',
2264 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
2265 'info_dict': {
2266 'id': '5KLPxDtMqe8',
2267 'ext': 'mhtml',
2268 'format_id': 'sb0',
2269 'title': 'Your Brain is Plastic',
2270 'uploader_id': 'scishow',
2271 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
2272 'upload_date': '20140324',
2273 'uploader': 'SciShow',
2274 'like_count': int,
2275 'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',
2276 'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',
2277 'view_count': int,
2278 'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',
2279 'playable_in_embed': True,
2280 'tags': 'count:12',
2281 'uploader_url': 'http://www.youtube.com/user/scishow',
2282 'availability': 'public',
2283 'channel': 'SciShow',
2284 'live_status': 'not_live',
2285 'duration': 248,
2286 'categories': ['Education'],
2287 'age_limit': 0,
2288 'channel_follower_count': int,
2289 'chapters': list,
2290 }, 'params': {'format': 'mhtml', 'skip_download': True}
2291 }, {
2292 # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
2293 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2294 'info_dict': {
2295 'id': '2NUZ8W2llS4',
2296 'ext': 'mp4',
2297 'title': 'The NP that test your phone performance 🙂',
2298 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
2299 'uploader': 'Leon Nguyen',
2300 'uploader_id': 'VNSXIII',
2301 'uploader_url': 'http://www.youtube.com/user/VNSXIII',
2302 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2303 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2304 'duration': 21,
2305 'view_count': int,
2306 'age_limit': 0,
2307 'categories': ['Gaming'],
2308 'tags': 'count:23',
2309 'playable_in_embed': True,
2310 'live_status': 'not_live',
2311 'upload_date': '20220103',
2312 'like_count': int,
2313 'availability': 'public',
2314 'channel': 'Leon Nguyen',
2315 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
2316 'comment_count': int,
2317 'channel_follower_count': int
2318 }
2319 }, {
2320 # Same video as above, but with --compat-opt no-youtube-prefer-utc-upload-date
2321 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2322 'info_dict': {
2323 'id': '2NUZ8W2llS4',
2324 'ext': 'mp4',
2325 'title': 'The NP that test your phone performance 🙂',
2326 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
2327 'uploader': 'Leon Nguyen',
2328 'uploader_id': 'VNSXIII',
2329 'uploader_url': 'http://www.youtube.com/user/VNSXIII',
2330 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2331 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2332 'duration': 21,
2333 'view_count': int,
2334 'age_limit': 0,
2335 'categories': ['Gaming'],
2336 'tags': 'count:23',
2337 'playable_in_embed': True,
2338 'live_status': 'not_live',
2339 'upload_date': '20220102',
2340 'like_count': int,
2341 'availability': 'public',
2342 'channel': 'Leon Nguyen',
2343 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
2344 'comment_count': int,
2345 'channel_follower_count': int
2346 },
2347 'params': {'compat_opts': ['no-youtube-prefer-utc-upload-date']}
2348 }, {
2349 # date text is premiered video, ensure upload date in UTC (published 1641172509)
2350 'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',
2351 'info_dict': {
2352 'id': 'mzZzzBU6lrM',
2353 'ext': 'mp4',
2354 'title': 'I Met GeorgeNotFound In Real Life...',
2355 'description': 'md5:cca98a355c7184e750f711f3a1b22c84',
2356 'uploader': 'Quackity',
2357 'uploader_id': 'QuackityHQ',
2358 'uploader_url': 'http://www.youtube.com/user/QuackityHQ',
2359 'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',
2360 'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',
2361 'duration': 955,
2362 'view_count': int,
2363 'age_limit': 0,
2364 'categories': ['Entertainment'],
2365 'tags': 'count:26',
2366 'playable_in_embed': True,
2367 'live_status': 'not_live',
2368 'release_timestamp': 1641172509,
2369 'release_date': '20220103',
2370 'upload_date': '20220103',
2371 'like_count': int,
2372 'availability': 'public',
2373 'channel': 'Quackity',
2374 'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',
2375 'channel_follower_count': int
2376 }
2377 },
2378 { # continuous livestream. Microformat upload date should be preferred.
2379 # Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27
2380 'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',
2381 'info_dict': {
2382 'id': 'kgx4WGK0oNU',
2383 'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
2384 'ext': 'mp4',
2385 'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2386 'availability': 'public',
2387 'age_limit': 0,
2388 'release_timestamp': 1637975704,
2389 'upload_date': '20210619',
2390 'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2391 'live_status': 'is_live',
2392 'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',
2393 'uploader': '阿鲍Abao',
2394 'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2395 'channel': 'Abao in Tokyo',
2396 'channel_follower_count': int,
2397 'release_date': '20211127',
2398 'tags': 'count:39',
2399 'categories': ['People & Blogs'],
2400 'like_count': int,
2401 'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2402 'view_count': int,
2403 'playable_in_embed': True,
2404 'description': 'md5:2ef1d002cad520f65825346e2084e49d',
2405 'concurrent_view_count': int,
2406 },
2407 'params': {'skip_download': True}
2408 }, {
2409 # Story. Requires specific player params to work.
2410 'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',
2411 'info_dict': {
2412 'id': 'vv8qTUWmulI',
2413 'ext': 'mp4',
2414 'availability': 'unlisted',
2415 'view_count': int,
2416 'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',
2417 'upload_date': '20220526',
2418 'categories': ['Education'],
2419 'title': 'Story',
2420 'channel': 'IT\'S HISTORY',
2421 'description': '',
2422 'uploader_id': 'BlastfromthePast',
2423 'duration': 12,
2424 'uploader': 'IT\'S HISTORY',
2425 'playable_in_embed': True,
2426 'age_limit': 0,
2427 'live_status': 'not_live',
2428 'tags': [],
2429 'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',
2430 'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',
2431 'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',
2432 },
2433 'skip': 'stories get removed after some period of time',
2434 }, {
2435 'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
2436 'info_dict': {
2437 'id': 'tjjjtzRLHvA',
2438 'ext': 'mp4',
2439 'title': 'ハッシュタグ無し };if window.ytcsi',
2440 'upload_date': '20220323',
2441 'like_count': int,
2442 'availability': 'unlisted',
2443 'channel': 'nao20010128nao',
2444 'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',
2445 'age_limit': 0,
2446 'uploader': 'nao20010128nao',
2447 'uploader_id': 'nao20010128nao',
2448 'categories': ['Music'],
2449 'view_count': int,
2450 'description': '',
2451 'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',
2452 'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',
2453 'live_status': 'not_live',
2454 'playable_in_embed': True,
2455 'channel_follower_count': int,
2456 'duration': 6,
2457 'tags': [],
2458 'uploader_url': 'http://www.youtube.com/user/nao20010128nao',
2459 }
2460 }, {
2461 # Prefer primary title+description language metadata by default
2462 # Do not prefer translated description if primary is empty
2463 'url': 'https://www.youtube.com/watch?v=el3E4MbxRqQ',
2464 'info_dict': {
2465 'id': 'el3E4MbxRqQ',
2466 'ext': 'mp4',
2467 'title': 'dlp test video 2 - primary sv no desc',
2468 'description': '',
2469 'channel': 'cole-dlp-test-acc',
2470 'tags': [],
2471 'view_count': int,
2472 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2473 'like_count': int,
2474 'playable_in_embed': True,
2475 'availability': 'unlisted',
2476 'thumbnail': 'https://i.ytimg.com/vi_webp/el3E4MbxRqQ/maxresdefault.webp',
2477 'age_limit': 0,
2478 'duration': 5,
2479 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
2480 'uploader_url': 'http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2481 'live_status': 'not_live',
2482 'upload_date': '20220908',
2483 'categories': ['People & Blogs'],
2484 'uploader': 'cole-dlp-test-acc',
2485 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
2486 },
2487 'params': {'skip_download': True}
2488 }, {
2489 # Extractor argument: prefer translated title+description
2490 'url': 'https://www.youtube.com/watch?v=gHKT4uU8Zng',
2491 'info_dict': {
2492 'id': 'gHKT4uU8Zng',
2493 'ext': 'mp4',
2494 'channel': 'cole-dlp-test-acc',
2495 'tags': [],
2496 'duration': 5,
2497 'live_status': 'not_live',
2498 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
2499 'upload_date': '20220728',
2500 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
2501 'view_count': int,
2502 'categories': ['People & Blogs'],
2503 'thumbnail': 'https://i.ytimg.com/vi_webp/gHKT4uU8Zng/maxresdefault.webp',
2504 'title': 'dlp test video title translated (fr)',
2505 'availability': 'public',
2506 'uploader': 'cole-dlp-test-acc',
2507 'age_limit': 0,
2508 'description': 'dlp test video description translated (fr)',
2509 'playable_in_embed': True,
2510 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2511 'uploader_url': 'http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2512 },
2513 'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}},
2514 'expected_warnings': [r'Preferring "fr" translated fields'],
2515 }, {
2516 'note': '6 channel audio',
2517 'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',
2518 'only_matching': True,
2519 }, {
2520 'note': 'Multiple HLS formats with same itag',
2521 'url': 'https://www.youtube.com/watch?v=kX3nB4PpJko',
2522 'info_dict': {
2523 'id': 'kX3nB4PpJko',
2524 'ext': 'mp4',
2525 'categories': ['Entertainment'],
2526 'description': 'md5:e8031ff6e426cdb6a77670c9b81f6fa6',
2527 'uploader_url': 'http://www.youtube.com/user/MrBeast6000',
2528 'live_status': 'not_live',
2529 'duration': 937,
2530 'channel_follower_count': int,
2531 'thumbnail': 'https://i.ytimg.com/vi_webp/kX3nB4PpJko/maxresdefault.webp',
2532 'title': 'Last To Take Hand Off Jet, Keeps It!',
2533 'channel': 'MrBeast',
2534 'playable_in_embed': True,
2535 'view_count': int,
2536 'upload_date': '20221112',
2537 'uploader': 'MrBeast',
2538 'uploader_id': 'MrBeast6000',
2539 'channel_url': 'https://www.youtube.com/channel/UCX6OQ3DkcsbYNE6H8uQQuVA',
2540 'age_limit': 0,
2541 'availability': 'public',
2542 'channel_id': 'UCX6OQ3DkcsbYNE6H8uQQuVA',
2543 'like_count': int,
2544 'tags': [],
2545 },
2546 'params': {'extractor_args': {'youtube': {'player_client': ['ios']}}, 'format': '233-1'},
2547 }, {
2548 'note': 'Audio formats with Dynamic Range Compression',
2549 'url': 'https://www.youtube.com/watch?v=Tq92D6wQ1mg',
2550 'info_dict': {
2551 'id': 'Tq92D6wQ1mg',
2552 'ext': 'weba',
2553 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
2554 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
2555 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
2556 'channel_follower_count': int,
2557 'description': 'md5:17eccca93a786d51bc67646756894066',
2558 'upload_date': '20191228',
2559 'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
2560 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
2561 'playable_in_embed': True,
2562 'like_count': int,
2563 'categories': ['Entertainment'],
2564 'thumbnail': 'https://i.ytimg.com/vi/Tq92D6wQ1mg/sddefault.jpg',
2565 'age_limit': 18,
2566 'channel': 'Projekt Melody',
2567 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
2568 'view_count': int,
2569 'availability': 'needs_auth',
2570 'comment_count': int,
2571 'live_status': 'not_live',
2572 'uploader': 'Projekt Melody',
2573 'duration': 106,
2574 },
2575 'params': {'extractor_args': {'youtube': {'player_client': ['tv_embedded']}}, 'format': '251-drc'},
2576 }
2577 ]
2578
2579 _WEBPAGE_TESTS = [
2580 # YouTube <object> embed
2581 {
2582 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
2583 'md5': '873c81d308b979f0e23ee7e620b312a3',
2584 'info_dict': {
2585 'id': 'msN87y-iEx0',
2586 'ext': 'mp4',
2587 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
2588 'upload_date': '20080526',
2589 'description': 'md5:873c81d308b979f0e23ee7e620b312a3',
2590 'uploader': 'Christopher Sykes',
2591 'uploader_id': 'ChristopherJSykes',
2592 'age_limit': 0,
2593 'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],
2594 'channel_id': 'UCCeo--lls1vna5YJABWAcVA',
2595 'playable_in_embed': True,
2596 'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',
2597 'like_count': int,
2598 'comment_count': int,
2599 'channel': 'Christopher Sykes',
2600 'live_status': 'not_live',
2601 'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',
2602 'availability': 'public',
2603 'duration': 195,
2604 'view_count': int,
2605 'categories': ['Science & Technology'],
2606 'channel_follower_count': int,
2607 'uploader_url': 'http://www.youtube.com/user/ChristopherJSykes',
2608 },
2609 'params': {
2610 'skip_download': True,
2611 }
2612 },
2613 ]
2614
2615 @classmethod
2616 def suitable(cls, url):
2617 from ..utils import parse_qs
2618
2619 qs = parse_qs(url)
2620 if qs.get('list', [None])[0]:
2621 return False
2622 return super().suitable(url)
2623
2624 def __init__(self, *args, **kwargs):
2625 super().__init__(*args, **kwargs)
2626 self._code_cache = {}
2627 self._player_cache = {}
2628
2629 def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data, is_live):
2630 lock = threading.Lock()
2631 start_time = time.time()
2632 formats = [f for f in formats if f.get('is_from_start')]
2633
2634 def refetch_manifest(format_id, delay):
2635 nonlocal formats, start_time, is_live
2636 if time.time() <= start_time + delay:
2637 return
2638
2639 _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
2640 video_details = traverse_obj(
2641 prs, (..., 'videoDetails'), expected_type=dict, default=[])
2642 microformats = traverse_obj(
2643 prs, (..., 'microformat', 'playerMicroformatRenderer'),
2644 expected_type=dict, default=[])
2645 _, live_status, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)
2646 is_live = live_status == 'is_live'
2647 start_time = time.time()
2648
2649 def mpd_feed(format_id, delay):
2650 """
2651 @returns (manifest_url, manifest_stream_number, is_live) or None
2652 """
2653 with lock:
2654 refetch_manifest(format_id, delay)
2655
2656 f = next((f for f in formats if f['format_id'] == format_id), None)
2657 if not f:
2658 if not is_live:
2659 self.to_screen(f'{video_id}: Video is no longer live')
2660 else:
2661 self.report_warning(
2662 f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')
2663 return None
2664 return f['manifest_url'], f['manifest_stream_number'], is_live
2665
2666 for f in formats:
2667 f['is_live'] = is_live
2668 gen = functools.partial(self._live_dash_fragments, video_id, f['format_id'],
2669 live_start_time, mpd_feed, not is_live and f.copy())
2670 if is_live:
2671 f['fragments'] = gen
2672 f['protocol'] = 'http_dash_segments_generator'
2673 else:
2674 f['fragments'] = LazyList(gen({}))
2675 del f['is_from_start']
2676
2677 def _live_dash_fragments(self, video_id, format_id, live_start_time, mpd_feed, manifestless_orig_fmt, ctx):
2678 FETCH_SPAN, MAX_DURATION = 5, 432000
2679
2680 mpd_url, stream_number, is_live = None, None, True
2681
2682 begin_index = 0
2683 download_start_time = ctx.get('start') or time.time()
2684
2685 lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
2686 if lack_early_segments:
2687 self.report_warning(bug_reports_message(
2688 'Starting download from the last 120 hours of the live stream since '
2689 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
2690 lack_early_segments = True
2691
2692 known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
2693 fragments, fragment_base_url = None, None
2694
2695 def _extract_sequence_from_mpd(refresh_sequence, immediate):
2696 nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
2697 # Obtain from MPD's maximum seq value
2698 old_mpd_url = mpd_url
2699 last_error = ctx.pop('last_error', None)
2700 expire_fast = immediate or last_error and isinstance(last_error, urllib.error.HTTPError) and last_error.code == 403
2701 mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
2702 or (mpd_url, stream_number, False))
2703 if not refresh_sequence:
2704 if expire_fast and not is_live:
2705 return False, last_seq
2706 elif old_mpd_url == mpd_url:
2707 return True, last_seq
2708 if manifestless_orig_fmt:
2709 fmt_info = manifestless_orig_fmt
2710 else:
2711 try:
2712 fmts, _ = self._extract_mpd_formats_and_subtitles(
2713 mpd_url, None, note=False, errnote=False, fatal=False)
2714 except ExtractorError:
2715 fmts = None
2716 if not fmts:
2717 no_fragment_score += 2
2718 return False, last_seq
2719 fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
2720 fragments = fmt_info['fragments']
2721 fragment_base_url = fmt_info['fragment_base_url']
2722 assert fragment_base_url
2723
2724 _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
2725 return True, _last_seq
2726
2727 self.write_debug(f'[{video_id}] Generating fragments for format {format_id}')
2728 while is_live:
2729 fetch_time = time.time()
2730 if no_fragment_score > 30:
2731 return
2732 if last_segment_url:
2733 # Obtain from "X-Head-Seqnum" header value from each segment
2734 try:
2735 urlh = self._request_webpage(
2736 last_segment_url, None, note=False, errnote=False, fatal=False)
2737 except ExtractorError:
2738 urlh = None
2739 last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
2740 if last_seq is None:
2741 no_fragment_score += 2
2742 last_segment_url = None
2743 continue
2744 else:
2745 should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)
2746 no_fragment_score += 2
2747 if not should_continue:
2748 continue
2749
2750 if known_idx > last_seq:
2751 last_segment_url = None
2752 continue
2753
2754 last_seq += 1
2755
2756 if begin_index < 0 and known_idx < 0:
2757 # skip from the start when it's negative value
2758 known_idx = last_seq + begin_index
2759 if lack_early_segments:
2760 known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
2761 try:
2762 for idx in range(known_idx, last_seq):
2763 # do not update sequence here or you'll get skipped some part of it
2764 should_continue, _ = _extract_sequence_from_mpd(False, False)
2765 if not should_continue:
2766 known_idx = idx - 1
2767 raise ExtractorError('breaking out of outer loop')
2768 last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)
2769 yield {
2770 'url': last_segment_url,
2771 'fragment_count': last_seq,
2772 }
2773 if known_idx == last_seq:
2774 no_fragment_score += 5
2775 else:
2776 no_fragment_score = 0
2777 known_idx = last_seq
2778 except ExtractorError:
2779 continue
2780
2781 if manifestless_orig_fmt:
2782 # Stop at the first iteration if running for post-live manifestless;
2783 # fragment count no longer increase since it starts
2784 break
2785
2786 time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
2787
2788 def _extract_player_url(self, *ytcfgs, webpage=None):
2789 player_url = traverse_obj(
2790 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
2791 get_all=False, expected_type=str)
2792 if not player_url:
2793 return
2794 return urljoin('https://www.youtube.com', player_url)
2795
2796 def _download_player_url(self, video_id, fatal=False):
2797 res = self._download_webpage(
2798 'https://www.youtube.com/iframe_api',
2799 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
2800 if res:
2801 player_version = self._search_regex(
2802 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
2803 if player_version:
2804 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
2805
2806 def _signature_cache_id(self, example_sig):
2807 """ Return a string representation of a signature """
2808 return '.'.join(str(len(part)) for part in example_sig.split('.'))
2809
2810 @classmethod
2811 def _extract_player_info(cls, player_url):
2812 for player_re in cls._PLAYER_INFO_RE:
2813 id_m = re.search(player_re, player_url)
2814 if id_m:
2815 break
2816 else:
2817 raise ExtractorError('Cannot identify player %r' % player_url)
2818 return id_m.group('id')
2819
2820 def _load_player(self, video_id, player_url, fatal=True):
2821 player_id = self._extract_player_info(player_url)
2822 if player_id not in self._code_cache:
2823 code = self._download_webpage(
2824 player_url, video_id, fatal=fatal,
2825 note='Downloading player ' + player_id,
2826 errnote='Download of %s failed' % player_url)
2827 if code:
2828 self._code_cache[player_id] = code
2829 return self._code_cache.get(player_id)
2830
2831 def _extract_signature_function(self, video_id, player_url, example_sig):
2832 player_id = self._extract_player_info(player_url)
2833
2834 # Read from filesystem cache
2835 func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'
2836 assert os.path.basename(func_id) == func_id
2837
2838 self.write_debug(f'Extracting signature function {func_id}')
2839 cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
2840
2841 if not cache_spec:
2842 code = self._load_player(video_id, player_url)
2843 if code:
2844 res = self._parse_sig_js(code)
2845 test_string = ''.join(map(chr, range(len(example_sig))))
2846 cache_spec = [ord(c) for c in res(test_string)]
2847 self.cache.store('youtube-sigfuncs', func_id, cache_spec)
2848
2849 return lambda s: ''.join(s[i] for i in cache_spec)
2850
2851 def _print_sig_code(self, func, example_sig):
2852 if not self.get_param('youtube_print_sig_code'):
2853 return
2854
2855 def gen_sig_code(idxs):
2856 def _genslice(start, end, step):
2857 starts = '' if start == 0 else str(start)
2858 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
2859 steps = '' if step == 1 else (':%d' % step)
2860 return f's[{starts}{ends}{steps}]'
2861
2862 step = None
2863 # Quelch pyflakes warnings - start will be set when step is set
2864 start = '(Never used)'
2865 for i, prev in zip(idxs[1:], idxs[:-1]):
2866 if step is not None:
2867 if i - prev == step:
2868 continue
2869 yield _genslice(start, prev, step)
2870 step = None
2871 continue
2872 if i - prev in [-1, 1]:
2873 step = i - prev
2874 start = prev
2875 continue
2876 else:
2877 yield 's[%d]' % prev
2878 if step is None:
2879 yield 's[%d]' % i
2880 else:
2881 yield _genslice(start, i, step)
2882
2883 test_string = ''.join(map(chr, range(len(example_sig))))
2884 cache_res = func(test_string)
2885 cache_spec = [ord(c) for c in cache_res]
2886 expr_code = ' + '.join(gen_sig_code(cache_spec))
2887 signature_id_tuple = '(%s)' % (
2888 ', '.join(str(len(p)) for p in example_sig.split('.')))
2889 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
2890 ' return %s\n') % (signature_id_tuple, expr_code)
2891 self.to_screen('Extracted signature function:\n' + code)
2892
2893 def _parse_sig_js(self, jscode):
2894 funcname = self._search_regex(
2895 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2896 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2897 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
2898 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
2899 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
2900 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
2901 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
2902 # Obsolete patterns
2903 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2904 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
2905 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2906 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2907 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2908 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2909 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2910 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
2911 jscode, 'Initial JS player signature function name', group='sig')
2912
2913 jsi = JSInterpreter(jscode)
2914 initial_function = jsi.extract_function(funcname)
2915 return lambda s: initial_function([s])
2916
2917 def _cached(self, func, *cache_id):
2918 def inner(*args, **kwargs):
2919 if cache_id not in self._player_cache:
2920 try:
2921 self._player_cache[cache_id] = func(*args, **kwargs)
2922 except ExtractorError as e:
2923 self._player_cache[cache_id] = e
2924 except Exception as e:
2925 self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)
2926
2927 ret = self._player_cache[cache_id]
2928 if isinstance(ret, Exception):
2929 raise ret
2930 return ret
2931 return inner
2932
2933 def _decrypt_signature(self, s, video_id, player_url):
2934 """Turn the encrypted s field into a working signature"""
2935 extract_sig = self._cached(
2936 self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))
2937 func = extract_sig(video_id, player_url, s)
2938 self._print_sig_code(func, s)
2939 return func(s)
2940
2941 def _decrypt_nsig(self, s, video_id, player_url):
2942 """Turn the encrypted n field into a working signature"""
2943 if player_url is None:
2944 raise ExtractorError('Cannot decrypt nsig without player_url')
2945 player_url = urljoin('https://www.youtube.com', player_url)
2946
2947 try:
2948 jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
2949 except ExtractorError as e:
2950 raise ExtractorError('Unable to extract nsig function code', cause=e)
2951 if self.get_param('youtube_print_sig_code'):
2952 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
2953
2954 try:
2955 extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)
2956 ret = extract_nsig(jsi, func_code)(s)
2957 except JSInterpreter.Exception as e:
2958 try:
2959 jsi = PhantomJSwrapper(self, timeout=5000)
2960 except ExtractorError:
2961 raise e
2962 self.report_warning(
2963 f'Native nsig extraction failed: Trying with PhantomJS\n'
2964 f' n = {s} ; player = {player_url}', video_id)
2965 self.write_debug(e, only_once=True)
2966
2967 args, func_body = func_code
2968 ret = jsi.execute(
2969 f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',
2970 video_id=video_id, note='Executing signature code').strip()
2971
2972 self.write_debug(f'Decrypted nsig {s} => {ret}')
2973 return ret
2974
2975 def _extract_n_function_name(self, jscode):
2976 funcname, idx = self._search_regex(
2977 r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)',
2978 jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
2979 if not idx:
2980 return funcname
2981
2982 return json.loads(js_to_json(self._search_regex(
2983 rf'var {re.escape(funcname)}\s*=\s*(\[.+?\]);', jscode,
2984 f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
2985
2986 def _extract_n_function_code(self, video_id, player_url):
2987 player_id = self._extract_player_info(player_url)
2988 func_code = self.cache.load('youtube-nsig', player_id, min_ver='2022.09.1')
2989 jscode = func_code or self._load_player(video_id, player_url)
2990 jsi = JSInterpreter(jscode)
2991
2992 if func_code:
2993 return jsi, player_id, func_code
2994
2995 func_name = self._extract_n_function_name(jscode)
2996
2997 # For redundancy
2998 func_code = self._search_regex(
2999 r'''(?xs)%s\s*=\s*function\s*\((?P<var>[\w$]+)\)\s*
3000 # NB: The end of the regex is intentionally kept strict
3001 {(?P<code>.+?}\s*return\ [\w$]+.join\(""\))};''' % func_name,
3002 jscode, 'nsig function', group=('var', 'code'), default=None)
3003 if func_code:
3004 func_code = ([func_code[0]], func_code[1])
3005 else:
3006 self.write_debug('Extracting nsig function with jsinterp')
3007 func_code = jsi.extract_function_code(func_name)
3008
3009 self.cache.store('youtube-nsig', player_id, func_code)
3010 return jsi, player_id, func_code
3011
3012 def _extract_n_function_from_code(self, jsi, func_code):
3013 func = jsi.extract_function_from_code(*func_code)
3014
3015 def extract_nsig(s):
3016 try:
3017 ret = func([s])
3018 except JSInterpreter.Exception:
3019 raise
3020 except Exception as e:
3021 raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
3022
3023 if ret.startswith('enhanced_except_'):
3024 raise JSInterpreter.Exception('Signature function returned an exception')
3025 return ret
3026
3027 return extract_nsig
3028
3029 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
3030 """
3031 Extract signatureTimestamp (sts)
3032 Required to tell API what sig/player version is in use.
3033 """
3034 sts = None
3035 if isinstance(ytcfg, dict):
3036 sts = int_or_none(ytcfg.get('STS'))
3037
3038 if not sts:
3039 # Attempt to extract from player
3040 if player_url is None:
3041 error_msg = 'Cannot extract signature timestamp without player_url.'
3042 if fatal:
3043 raise ExtractorError(error_msg)
3044 self.report_warning(error_msg)
3045 return
3046 code = self._load_player(video_id, player_url, fatal=fatal)
3047 if code:
3048 sts = int_or_none(self._search_regex(
3049 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
3050 'JS player signature timestamp', group='sts', fatal=fatal))
3051 return sts
3052
3053 def _mark_watched(self, video_id, player_responses):
3054 for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):
3055 label = 'fully ' if is_full else ''
3056 url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),
3057 expected_type=url_or_none)
3058 if not url:
3059 self.report_warning(f'Unable to mark {label}watched')
3060 return
3061 parsed_url = urllib.parse.urlparse(url)
3062 qs = urllib.parse.parse_qs(parsed_url.query)
3063
3064 # cpn generation algorithm is reverse engineered from base.js.
3065 # In fact it works even with dummy cpn.
3066 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
3067 cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))
3068
3069 # # more consistent results setting it to right before the end
3070 video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]
3071
3072 qs.update({
3073 'ver': ['2'],
3074 'cpn': [cpn],
3075 'cmt': video_length,
3076 'el': 'detailpage', # otherwise defaults to "shorts"
3077 })
3078
3079 if is_full:
3080 # these seem to mark watchtime "history" in the real world
3081 # they're required, so send in a single value
3082 qs.update({
3083 'st': 0,
3084 'et': video_length,
3085 })
3086
3087 url = urllib.parse.urlunparse(
3088 parsed_url._replace(query=urllib.parse.urlencode(qs, True)))
3089
3090 self._download_webpage(
3091 url, video_id, f'Marking {label}watched',
3092 'Unable to mark watched', fatal=False)
3093
3094 @classmethod
3095 def _extract_from_webpage(cls, url, webpage):
3096 # Invidious Instances
3097 # https://github.com/yt-dlp/yt-dlp/issues/195
3098 # https://github.com/iv-org/invidious/pull/1730
3099 mobj = re.search(
3100 r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',
3101 webpage)
3102 if mobj:
3103 yield cls.url_result(mobj.group('url'), cls)
3104 raise cls.StopExtraction()
3105
3106 yield from super()._extract_from_webpage(url, webpage)
3107
3108 # lazyYT YouTube embed
3109 for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):
3110 yield cls.url_result(unescapeHTML(id_), cls, id_)
3111
3112 # Wordpress "YouTube Video Importer" plugin
3113 for m in re.findall(r'''(?x)<div[^>]+
3114 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
3115 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):
3116 yield cls.url_result(m[-1], cls, m[-1])
3117
3118 @classmethod
3119 def extract_id(cls, url):
3120 video_id = cls.get_temp_id(url)
3121 if not video_id:
3122 raise ExtractorError(f'Invalid URL: {url}')
3123 return video_id
3124
3125 def _extract_chapters_from_json(self, data, duration):
3126 chapter_list = traverse_obj(
3127 data, (
3128 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
3129 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
3130 ), expected_type=list)
3131
3132 return self._extract_chapters(
3133 chapter_list,
3134 chapter_time=lambda chapter: float_or_none(
3135 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
3136 chapter_title=lambda chapter: traverse_obj(
3137 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
3138 duration=duration)
3139
3140 def _extract_chapters_from_engagement_panel(self, data, duration):
3141 content_list = traverse_obj(
3142 data,
3143 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
3144 expected_type=list, default=[])
3145 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
3146 chapter_title = lambda chapter: self._get_text(chapter, 'title')
3147
3148 return next(filter(None, (
3149 self._extract_chapters(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
3150 chapter_time, chapter_title, duration)
3151 for contents in content_list)), [])
3152
3153 def _extract_chapters_from_description(self, description, duration):
3154 duration_re = r'(?:\d+:)?\d{1,2}:\d{2}'
3155 sep_re = r'(?m)^\s*(%s)\b\W*\s(%s)\s*$'
3156 return self._extract_chapters(
3157 re.findall(sep_re % (duration_re, r'.+?'), description or ''),
3158 chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1],
3159 duration=duration, strict=False) or self._extract_chapters(
3160 re.findall(sep_re % (r'.+?', duration_re), description or ''),
3161 chapter_time=lambda x: parse_duration(x[1]), chapter_title=lambda x: x[0],
3162 duration=duration, strict=False)
3163
3164 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True):
3165 if not duration:
3166 return
3167 chapter_list = [{
3168 'start_time': chapter_time(chapter),
3169 'title': chapter_title(chapter),
3170 } for chapter in chapter_list or []]
3171 if not strict:
3172 chapter_list.sort(key=lambda c: c['start_time'] or 0)
3173
3174 chapters = [{'start_time': 0}]
3175 for idx, chapter in enumerate(chapter_list):
3176 if chapter['start_time'] is None:
3177 self.report_warning(f'Incomplete chapter {idx}')
3178 elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:
3179 chapters.append(chapter)
3180 elif chapter not in chapters:
3181 self.report_warning(
3182 f'Invalid start time ({chapter["start_time"]} < {chapters[-1]["start_time"]}) for chapter "{chapter["title"]}"')
3183 return chapters[1:]
3184
3185 def _extract_comment(self, comment_renderer, parent=None):
3186 comment_id = comment_renderer.get('commentId')
3187 if not comment_id:
3188 return
3189
3190 text = self._get_text(comment_renderer, 'contentText')
3191
3192 # Timestamp is an estimate calculated from the current time and time_text
3193 time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
3194 timestamp = self._parse_time_text(time_text)
3195
3196 author = self._get_text(comment_renderer, 'authorText')
3197 author_id = try_get(comment_renderer,
3198 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str)
3199
3200 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
3201 lambda x: x['likeCount']), str)) or 0
3202 author_thumbnail = try_get(comment_renderer,
3203 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], str)
3204
3205 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
3206 is_favorited = 'creatorHeart' in (try_get(
3207 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
3208 return {
3209 'id': comment_id,
3210 'text': text,
3211 'timestamp': timestamp,
3212 'time_text': time_text,
3213 'like_count': votes,
3214 'is_favorited': is_favorited,
3215 'author': author,
3216 'author_id': author_id,
3217 'author_thumbnail': author_thumbnail,
3218 'author_is_uploader': author_is_uploader,
3219 'parent': parent or 'root'
3220 }
3221
3222 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
3223
3224 get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
3225
3226 def extract_header(contents):
3227 _continuation = None
3228 for content in contents:
3229 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
3230 expected_comment_count = self._get_count(
3231 comments_header_renderer, 'countText', 'commentsCount')
3232
3233 if expected_comment_count:
3234 tracker['est_total'] = expected_comment_count
3235 self.to_screen(f'Downloading ~{expected_comment_count} comments')
3236 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
3237
3238 sort_menu_item = try_get(
3239 comments_header_renderer,
3240 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
3241 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
3242
3243 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
3244 if not _continuation:
3245 continue
3246
3247 sort_text = str_or_none(sort_menu_item.get('title'))
3248 if not sort_text:
3249 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
3250 self.to_screen('Sorting comments by %s' % sort_text.lower())
3251 break
3252 return _continuation
3253
3254 def extract_thread(contents):
3255 if not parent:
3256 tracker['current_page_thread'] = 0
3257 for content in contents:
3258 if not parent and tracker['total_parent_comments'] >= max_parents:
3259 yield
3260 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
3261 comment_renderer = get_first(
3262 (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
3263 expected_type=dict, default={})
3264
3265 comment = self._extract_comment(comment_renderer, parent)
3266 if not comment:
3267 continue
3268
3269 tracker['running_total'] += 1
3270 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
3271 yield comment
3272
3273 # Attempt to get the replies
3274 comment_replies_renderer = try_get(
3275 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
3276
3277 if comment_replies_renderer:
3278 tracker['current_page_thread'] += 1
3279 comment_entries_iter = self._comment_entries(
3280 comment_replies_renderer, ytcfg, video_id,
3281 parent=comment.get('id'), tracker=tracker)
3282 yield from itertools.islice(comment_entries_iter, min(
3283 max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))
3284
3285 # Keeps track of counts across recursive calls
3286 if not tracker:
3287 tracker = dict(
3288 running_total=0,
3289 est_total=0,
3290 current_page_thread=0,
3291 total_parent_comments=0,
3292 total_reply_comments=0)
3293
3294 # TODO: Deprecated
3295 # YouTube comments have a max depth of 2
3296 max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
3297 if max_depth:
3298 self._downloader.deprecated_feature('[youtube] max_comment_depth extractor argument is deprecated. '
3299 'Set max replies in the max-comments extractor argument instead')
3300 if max_depth == 1 and parent:
3301 return
3302
3303 max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(
3304 lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
3305
3306 continuation = self._extract_continuation(root_continuation_data)
3307
3308 response = None
3309 is_forced_continuation = False
3310 is_first_continuation = parent is None
3311 if is_first_continuation and not continuation:
3312 # Sometimes you can get comments by generating the continuation yourself,
3313 # even if YouTube initially reports them being disabled - e.g. stories comments.
3314 # Note: if the comment section is actually disabled, YouTube may return a response with
3315 # required check_get_keys missing. So we will disable that check initially in this case.
3316 continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
3317 is_forced_continuation = True
3318
3319 for page_num in itertools.count(0):
3320 if not continuation:
3321 break
3322 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
3323 comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"
3324 if page_num == 0:
3325 if is_first_continuation:
3326 note_prefix = 'Downloading comment section API JSON'
3327 else:
3328 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
3329 tracker['current_page_thread'], comment_prog_str)
3330 else:
3331 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
3332 ' ' if parent else '', ' replies' if parent else '',
3333 page_num, comment_prog_str)
3334 try:
3335 response = self._extract_response(
3336 item_id=None, query=continuation,
3337 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
3338 check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)
3339 except ExtractorError as e:
3340 # Ignore incomplete data error for replies if retries didn't work.
3341 # This is to allow any other parent comments and comment threads to be downloaded.
3342 # See: https://github.com/yt-dlp/yt-dlp/issues/4669
3343 if 'incomplete data' in str(e).lower() and parent and self.get_param('ignoreerrors') is True:
3344 self.report_warning(
3345 'Received incomplete data for a comment reply thread and retrying did not help. '
3346 'Ignoring to let other comments be downloaded.')
3347 else:
3348 raise
3349 is_forced_continuation = False
3350 continuation_contents = traverse_obj(
3351 response, 'onResponseReceivedEndpoints', expected_type=list, default=[])
3352
3353 continuation = None
3354 for continuation_section in continuation_contents:
3355 continuation_items = traverse_obj(
3356 continuation_section,
3357 (('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),
3358 get_all=False, expected_type=list) or []
3359 if is_first_continuation:
3360 continuation = extract_header(continuation_items)
3361 is_first_continuation = False
3362 if continuation:
3363 break
3364 continue
3365
3366 for entry in extract_thread(continuation_items):
3367 if not entry:
3368 return
3369 yield entry
3370 continuation = self._extract_continuation({'contents': continuation_items})
3371 if continuation:
3372 break
3373
3374 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
3375 if message and not parent and tracker['running_total'] == 0:
3376 self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)
3377 raise self.CommentsDisabled
3378
3379 @staticmethod
3380 def _generate_comment_continuation(video_id):
3381 """
3382 Generates initial comment section continuation token from given video id
3383 """
3384 token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'
3385 return base64.b64encode(token.encode()).decode()
3386
3387 def _get_comments(self, ytcfg, video_id, contents, webpage):
3388 """Entry for comment extraction"""
3389 def _real_comment_extract(contents):
3390 renderer = next((
3391 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
3392 if item.get('sectionIdentifier') == 'comment-item-section'), None)
3393 yield from self._comment_entries(renderer, ytcfg, video_id)
3394
3395 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
3396 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
3397
3398 @staticmethod
3399 def _get_checkok_params():
3400 return {'contentCheckOk': True, 'racyCheckOk': True}
3401
3402 @classmethod
3403 def _generate_player_context(cls, sts=None):
3404 context = {
3405 'html5Preference': 'HTML5_PREF_WANTS',
3406 }
3407 if sts is not None:
3408 context['signatureTimestamp'] = sts
3409 return {
3410 'playbackContext': {
3411 'contentPlaybackContext': context
3412 },
3413 **cls._get_checkok_params()
3414 }
3415
3416 @staticmethod
3417 def _is_agegated(player_response):
3418 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
3419 return True
3420
3421 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
3422 AGE_GATE_REASONS = (
3423 'confirm your age', 'age-restricted', 'inappropriate', # reason
3424 'age_verification_required', 'age_check_required', # status
3425 )
3426 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
3427
3428 @staticmethod
3429 def _is_unplayable(player_response):
3430 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
3431
3432 _STORY_PLAYER_PARAMS = '8AEB'
3433
3434 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):
3435
3436 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
3437 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
3438 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
3439 headers = self.generate_api_headers(
3440 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
3441
3442 yt_query = {
3443 'videoId': video_id,
3444 }
3445 if smuggled_data.get('is_story') or _split_innertube_client(client)[0] == 'android':
3446 yt_query['params'] = self._STORY_PLAYER_PARAMS
3447
3448 yt_query.update(self._generate_player_context(sts))
3449 return self._extract_response(
3450 item_id=video_id, ep='player', query=yt_query,
3451 ytcfg=player_ytcfg, headers=headers, fatal=True,
3452 default_client=client,
3453 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
3454 ) or None
3455
3456 def _get_requested_clients(self, url, smuggled_data):
3457 requested_clients = []
3458 default = ['android', 'web']
3459 allowed_clients = sorted(
3460 (client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),
3461 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
3462 for client in self._configuration_arg('player_client'):
3463 if client in allowed_clients:
3464 requested_clients.append(client)
3465 elif client == 'default':
3466 requested_clients.extend(default)
3467 elif client == 'all':
3468 requested_clients.extend(allowed_clients)
3469 else:
3470 self.report_warning(f'Skipping unsupported client {client}')
3471 if not requested_clients:
3472 requested_clients = default
3473
3474 if smuggled_data.get('is_music_url') or self.is_music_url(url):
3475 requested_clients.extend(
3476 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
3477
3478 return orderedSet(requested_clients)
3479
3480 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
3481 initial_pr = None
3482 if webpage:
3483 initial_pr = self._search_json(
3484 self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
3485
3486 all_clients = set(clients)
3487 clients = clients[::-1]
3488 prs = []
3489
3490 def append_client(*client_names):
3491 """ Append the first client name that exists but not already used """
3492 for client_name in client_names:
3493 actual_client = _split_innertube_client(client_name)[0]
3494 if actual_client in INNERTUBE_CLIENTS:
3495 if actual_client not in all_clients:
3496 clients.append(client_name)
3497 all_clients.add(actual_client)
3498 return
3499
3500 # Android player_response does not have microFormats which are needed for
3501 # extraction of some data. So we return the initial_pr with formats
3502 # stripped out even if not requested by the user
3503 # See: https://github.com/yt-dlp/yt-dlp/issues/501
3504 if initial_pr:
3505 pr = dict(initial_pr)
3506 pr['streamingData'] = None
3507 prs.append(pr)
3508
3509 last_error = None
3510 tried_iframe_fallback = False
3511 player_url = None
3512 while clients:
3513 client, base_client, variant = _split_innertube_client(clients.pop())
3514 player_ytcfg = master_ytcfg if client == 'web' else {}
3515 if 'configs' not in self._configuration_arg('player_skip') and client != 'web':
3516 player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
3517
3518 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
3519 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
3520 if 'js' in self._configuration_arg('player_skip'):
3521 require_js_player = False
3522 player_url = None
3523
3524 if not player_url and not tried_iframe_fallback and require_js_player:
3525 player_url = self._download_player_url(video_id)
3526 tried_iframe_fallback = True
3527
3528 try:
3529 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
3530 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)
3531 except ExtractorError as e:
3532 if last_error:
3533 self.report_warning(last_error)
3534 last_error = e
3535 continue
3536
3537 if pr:
3538 # YouTube may return a different video player response than expected.
3539 # See: https://github.com/TeamNewPipe/NewPipe/issues/8713
3540 pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId'))
3541 if pr_video_id and pr_video_id != video_id:
3542 self.report_warning(
3543 f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message())
3544 else:
3545 prs.append(pr)
3546
3547 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
3548 if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
3549 append_client(f'{base_client}_creator')
3550 elif self._is_agegated(pr):
3551 if variant == 'tv_embedded':
3552 append_client(f'{base_client}_embedded')
3553 elif not variant:
3554 append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
3555
3556 if last_error:
3557 if not len(prs):
3558 raise last_error
3559 self.report_warning(last_error)
3560 return prs, player_url
3561
3562 def _needs_live_processing(self, live_status, duration):
3563 if (live_status == 'is_live' and self.get_param('live_from_start')
3564 or live_status == 'post_live' and (duration or 0) > 4 * 3600):
3565 return live_status
3566
3567 def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):
3568 itags, stream_ids = collections.defaultdict(set), []
3569 itag_qualities, res_qualities = {}, {0: None}
3570 q = qualities([
3571 # Normally tiny is the smallest video-only formats. But
3572 # audio-only formats with unknown quality may get tagged as tiny
3573 'tiny',
3574 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
3575 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
3576 ])
3577 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
3578
3579 for fmt in streaming_formats:
3580 if fmt.get('targetDurationSec'):
3581 continue
3582
3583 itag = str_or_none(fmt.get('itag'))
3584 audio_track = fmt.get('audioTrack') or {}
3585 stream_id = (itag, audio_track.get('id'), fmt.get('isDrc'))
3586 if stream_id in stream_ids:
3587 continue
3588
3589 quality = fmt.get('quality')
3590 height = int_or_none(fmt.get('height'))
3591 if quality == 'tiny' or not quality:
3592 quality = fmt.get('audioQuality', '').lower() or quality
3593 # The 3gp format (17) in android client has a quality of "small",
3594 # but is actually worse than other formats
3595 if itag == '17':
3596 quality = 'tiny'
3597 if quality:
3598 if itag:
3599 itag_qualities[itag] = quality
3600 if height:
3601 res_qualities[height] = quality
3602 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
3603 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
3604 # number of fragment that would subsequently requested with (`&sq=N`)
3605 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
3606 continue
3607
3608 fmt_url = fmt.get('url')
3609 if not fmt_url:
3610 sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))
3611 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
3612 encrypted_sig = try_get(sc, lambda x: x['s'][0])
3613 if not all((sc, fmt_url, player_url, encrypted_sig)):
3614 continue
3615 try:
3616 fmt_url += '&%s=%s' % (
3617 traverse_obj(sc, ('sp', -1)) or 'signature',
3618 self._decrypt_signature(encrypted_sig, video_id, player_url)
3619 )
3620 except ExtractorError as e:
3621 self.report_warning('Signature extraction failed: Some formats may be missing',
3622 video_id=video_id, only_once=True)
3623 self.write_debug(e, only_once=True)
3624 continue
3625
3626 query = parse_qs(fmt_url)
3627 throttled = False
3628 if query.get('n'):
3629 try:
3630 decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])
3631 fmt_url = update_url_query(fmt_url, {
3632 'n': decrypt_nsig(query['n'][0], video_id, player_url)
3633 })
3634 except ExtractorError as e:
3635 phantomjs_hint = ''
3636 if isinstance(e, JSInterpreter.Exception):
3637 phantomjs_hint = (f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} '
3638 f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')
3639 if player_url:
3640 self.report_warning(
3641 f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}'
3642 f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)
3643 self.write_debug(e, only_once=True)
3644 else:
3645 self.report_warning(
3646 'Cannot decrypt nsig without player_url: You may experience throttling for some formats',
3647 video_id=video_id, only_once=True)
3648 throttled = True
3649
3650 tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
3651 language_preference = (
3652 10 if audio_track.get('audioIsDefault') and 10
3653 else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
3654 else -1)
3655 # Some formats may have much smaller duration than others (possibly damaged during encoding)
3656 # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
3657 # Make sure to avoid false positives with small duration differences.
3658 # E.g. __2ABJjxzNo, ySuUZEjARPY
3659 is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)
3660 if is_damaged:
3661 self.report_warning(
3662 f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
3663 dct = {
3664 'asr': int_or_none(fmt.get('audioSampleRate')),
3665 'filesize': int_or_none(fmt.get('contentLength')),
3666 'format_id': f'{itag}{"-drc" if fmt.get("isDrc") else ""}',
3667 'format_note': join_nonempty(
3668 '%s%s' % (audio_track.get('displayName') or '',
3669 ' (default)' if language_preference > 0 else ''),
3670 fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
3671 'DRC' if fmt.get('isDrc') else None,
3672 try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
3673 try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
3674 throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),
3675 # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
3676 'source_preference': -10 if throttled else -5 if itag == '22' else -1,
3677 'fps': int_or_none(fmt.get('fps')) or None,
3678 'audio_channels': fmt.get('audioChannels'),
3679 'height': height,
3680 'quality': q(quality) - bool(fmt.get('isDrc')) / 2,
3681 'has_drm': bool(fmt.get('drmFamilies')),
3682 'tbr': tbr,
3683 'url': fmt_url,
3684 'width': int_or_none(fmt.get('width')),
3685 'language': join_nonempty(audio_track.get('id', '').split('.')[0],
3686 'desc' if language_preference < -1 else ''),
3687 'language_preference': language_preference,
3688 # Strictly de-prioritize damaged and 3gp formats
3689 'preference': -10 if is_damaged else -2 if itag == '17' else None,
3690 }
3691 mime_mobj = re.match(
3692 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
3693 if mime_mobj:
3694 dct['ext'] = mimetype2ext(mime_mobj.group(1))
3695 dct.update(parse_codecs(mime_mobj.group(2)))
3696 no_audio = dct.get('acodec') == 'none'
3697 no_video = dct.get('vcodec') == 'none'
3698 if no_audio:
3699 dct['vbr'] = tbr
3700 if no_video:
3701 dct['abr'] = tbr
3702 if no_audio or no_video:
3703 dct['downloader_options'] = {
3704 # Youtube throttles chunks >~10M
3705 'http_chunk_size': 10485760,
3706 }
3707 if dct.get('ext'):
3708 dct['container'] = dct['ext'] + '_dash'
3709
3710 if itag:
3711 itags[itag].add(('https', dct.get('language')))
3712 stream_ids.append(stream_id)
3713 yield dct
3714
3715 needs_live_processing = self._needs_live_processing(live_status, duration)
3716 skip_bad_formats = not self._configuration_arg('include_incomplete_formats')
3717
3718 skip_manifests = set(self._configuration_arg('skip'))
3719 if (not self.get_param('youtube_include_hls_manifest', True)
3720 or needs_live_processing == 'is_live' # These will be filtered out by YoutubeDL anyway
3721 or needs_live_processing and skip_bad_formats):
3722 skip_manifests.add('hls')
3723
3724 if not self.get_param('youtube_include_dash_manifest', True):
3725 skip_manifests.add('dash')
3726 if self._configuration_arg('include_live_dash'):
3727 self._downloader.deprecated_feature('[youtube] include_live_dash extractor argument is deprecated. '
3728 'Use include_incomplete_formats extractor argument instead')
3729 elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':
3730 skip_manifests.add('dash')
3731
3732 def process_manifest_format(f, proto, itag):
3733 key = (proto, f.get('language'))
3734 if key in itags[itag]:
3735 return False
3736 itags[itag].add(key)
3737
3738 if any(p != proto for p, _ in itags[itag]):
3739 f['format_id'] = f'{itag}-{proto}'
3740 elif itag:
3741 f['format_id'] = itag
3742
3743 f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
3744 if f['quality'] == -1 and f.get('height'):
3745 f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
3746 return True
3747
3748 subtitles = {}
3749 for sd in streaming_data:
3750 hls_manifest_url = 'hls' not in skip_manifests and sd.get('hlsManifestUrl')
3751 if hls_manifest_url:
3752 fmts, subs = self._extract_m3u8_formats_and_subtitles(
3753 hls_manifest_url, video_id, 'mp4', fatal=False, live=live_status == 'is_live')
3754 subtitles = self._merge_subtitles(subs, subtitles)
3755 for f in fmts:
3756 if process_manifest_format(f, 'hls', self._search_regex(
3757 r'/itag/(\d+)', f['url'], 'itag', default=None)):
3758 yield f
3759
3760 dash_manifest_url = 'dash' not in skip_manifests and sd.get('dashManifestUrl')
3761 if dash_manifest_url:
3762 formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
3763 subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
3764 for f in formats:
3765 if process_manifest_format(f, 'dash', f['format_id']):
3766 f['filesize'] = int_or_none(self._search_regex(
3767 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
3768 if needs_live_processing:
3769 f['is_from_start'] = True
3770
3771 yield f
3772 yield subtitles
3773
3774 def _extract_storyboard(self, player_responses, duration):
3775 spec = get_first(
3776 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
3777 base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))
3778 if not base_url:
3779 return
3780 L = len(spec) - 1
3781 for i, args in enumerate(spec):
3782 args = args.split('#')
3783 counts = list(map(int_or_none, args[:5]))
3784 if len(args) != 8 or not all(counts):
3785 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
3786 continue
3787 width, height, frame_count, cols, rows = counts
3788 N, sigh = args[6:]
3789
3790 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
3791 fragment_count = frame_count / (cols * rows)
3792 fragment_duration = duration / fragment_count
3793 yield {
3794 'format_id': f'sb{i}',
3795 'format_note': 'storyboard',
3796 'ext': 'mhtml',
3797 'protocol': 'mhtml',
3798 'acodec': 'none',
3799 'vcodec': 'none',
3800 'url': url,
3801 'width': width,
3802 'height': height,
3803 'fps': frame_count / duration,
3804 'rows': rows,
3805 'columns': cols,
3806 'fragments': [{
3807 'url': url.replace('$M', str(j)),
3808 'duration': min(fragment_duration, duration - (j * fragment_duration)),
3809 } for j in range(math.ceil(fragment_count))],
3810 }
3811
3812 def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
3813 webpage = None
3814 if 'webpage' not in self._configuration_arg('player_skip'):
3815 query = {'bpctr': '9999999999', 'has_verified': '1'}
3816 if smuggled_data.get('is_story'):
3817 query['pp'] = self._STORY_PLAYER_PARAMS
3818 webpage = self._download_webpage(
3819 webpage_url, video_id, fatal=False, query=query)
3820
3821 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
3822
3823 player_responses, player_url = self._extract_player_responses(
3824 self._get_requested_clients(url, smuggled_data),
3825 video_id, webpage, master_ytcfg, smuggled_data)
3826
3827 return webpage, master_ytcfg, player_responses, player_url
3828
3829 def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):
3830 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
3831 is_live = get_first(video_details, 'isLive')
3832 if is_live is None:
3833 is_live = get_first(live_broadcast_details, 'isLiveNow')
3834 live_content = get_first(video_details, 'isLiveContent')
3835 is_upcoming = get_first(video_details, 'isUpcoming')
3836 post_live = get_first(video_details, 'isPostLiveDvr')
3837 live_status = ('post_live' if post_live
3838 else 'is_live' if is_live
3839 else 'is_upcoming' if is_upcoming
3840 else 'was_live' if live_content
3841 else 'not_live' if False in (is_live, live_content)
3842 else None)
3843 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
3844 *formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, live_status, duration)
3845
3846 return live_broadcast_details, live_status, streaming_data, formats, subtitles
3847
3848 def _real_extract(self, url):
3849 url, smuggled_data = unsmuggle_url(url, {})
3850 video_id = self._match_id(url)
3851
3852 base_url = self.http_scheme() + '//www.youtube.com/'
3853 webpage_url = base_url + 'watch?v=' + video_id
3854
3855 webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
3856
3857 playability_statuses = traverse_obj(
3858 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
3859
3860 trailer_video_id = get_first(
3861 playability_statuses,
3862 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
3863 expected_type=str)
3864 if trailer_video_id:
3865 return self.url_result(
3866 trailer_video_id, self.ie_key(), trailer_video_id)
3867
3868 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
3869 if webpage else (lambda x: None))
3870
3871 video_details = traverse_obj(
3872 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
3873 microformats = traverse_obj(
3874 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
3875 expected_type=dict, default=[])
3876
3877 translated_title = self._get_text(microformats, (..., 'title'))
3878 video_title = (self._preferred_lang and translated_title
3879 or get_first(video_details, 'title') # primary
3880 or translated_title
3881 or search_meta(['og:title', 'twitter:title', 'title']))
3882 translated_description = self._get_text(microformats, (..., 'description'))
3883 original_description = get_first(video_details, 'shortDescription')
3884 video_description = (
3885 self._preferred_lang and translated_description
3886 # If original description is blank, it will be an empty string.
3887 # Do not prefer translated description in this case.
3888 or original_description if original_description is not None else translated_description)
3889
3890 multifeed_metadata_list = get_first(
3891 player_responses,
3892 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
3893 expected_type=str)
3894 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
3895 if self.get_param('noplaylist'):
3896 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3897 else:
3898 entries = []
3899 feed_ids = []
3900 for feed in multifeed_metadata_list.split(','):
3901 # Unquote should take place before split on comma (,) since textual
3902 # fields may contain comma as well (see
3903 # https://github.com/ytdl-org/youtube-dl/issues/8536)
3904 feed_data = urllib.parse.parse_qs(
3905 urllib.parse.unquote_plus(feed))
3906
3907 def feed_entry(name):
3908 return try_get(
3909 feed_data, lambda x: x[name][0], str)
3910
3911 feed_id = feed_entry('id')
3912 if not feed_id:
3913 continue
3914 feed_title = feed_entry('title')
3915 title = video_title
3916 if feed_title:
3917 title += ' (%s)' % feed_title
3918 entries.append({
3919 '_type': 'url_transparent',
3920 'ie_key': 'Youtube',
3921 'url': smuggle_url(
3922 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
3923 {'force_singlefeed': True}),
3924 'title': title,
3925 })
3926 feed_ids.append(feed_id)
3927 self.to_screen(
3928 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
3929 % (', '.join(feed_ids), video_id))
3930 return self.playlist_result(
3931 entries, video_id, video_title, video_description)
3932
3933 duration = (int_or_none(get_first(video_details, 'lengthSeconds'))
3934 or int_or_none(get_first(microformats, 'lengthSeconds'))
3935 or parse_duration(search_meta('duration')) or None)
3936
3937 live_broadcast_details, live_status, streaming_data, formats, automatic_captions = \
3938 self._list_formats(video_id, microformats, video_details, player_responses, player_url, duration)
3939 if live_status == 'post_live':
3940 self.write_debug(f'{video_id}: Video is in Post-Live Manifestless mode')
3941
3942 if not formats:
3943 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
3944 self.report_drm(video_id)
3945 pemr = get_first(
3946 playability_statuses,
3947 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
3948 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
3949 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
3950 if subreason:
3951 if subreason == 'The uploader has not made this video available in your country.':
3952 countries = get_first(microformats, 'availableCountries')
3953 if not countries:
3954 regions_allowed = search_meta('regionsAllowed')
3955 countries = regions_allowed.split(',') if regions_allowed else None
3956 self.raise_geo_restricted(subreason, countries, metadata_available=True)
3957 reason += f'. {subreason}'
3958 if reason:
3959 self.raise_no_formats(reason, expected=True)
3960
3961 keywords = get_first(video_details, 'keywords', expected_type=list) or []
3962 if not keywords and webpage:
3963 keywords = [
3964 unescapeHTML(m.group('content'))
3965 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
3966 for keyword in keywords:
3967 if keyword.startswith('yt:stretch='):
3968 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
3969 if mobj:
3970 # NB: float is intentional for forcing float division
3971 w, h = (float(v) for v in mobj.groups())
3972 if w > 0 and h > 0:
3973 ratio = w / h
3974 for f in formats:
3975 if f.get('vcodec') != 'none':
3976 f['stretched_ratio'] = ratio
3977 break
3978 thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))
3979 thumbnail_url = search_meta(['og:image', 'twitter:image'])
3980 if thumbnail_url:
3981 thumbnails.append({
3982 'url': thumbnail_url,
3983 })
3984 original_thumbnails = thumbnails.copy()
3985
3986 # The best resolution thumbnails sometimes does not appear in the webpage
3987 # See: https://github.com/yt-dlp/yt-dlp/issues/340
3988 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
3989 thumbnail_names = [
3990 # While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants
3991 # in resolution, these are not the custom thumbnail. So de-prioritize them
3992 'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',
3993 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'
3994 ]
3995 n_thumbnail_names = len(thumbnail_names)
3996 thumbnails.extend({
3997 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
3998 video_id=video_id, name=name, ext=ext,
3999 webp='_webp' if ext == 'webp' else '', live='_live' if live_status == 'is_live' else ''),
4000 } for name in thumbnail_names for ext in ('webp', 'jpg'))
4001 for thumb in thumbnails:
4002 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
4003 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
4004 self._remove_duplicate_formats(thumbnails)
4005 self._downloader._sort_thumbnails(original_thumbnails)
4006
4007 category = get_first(microformats, 'category') or search_meta('genre')
4008 channel_id = str_or_none(
4009 get_first(video_details, 'channelId')
4010 or get_first(microformats, 'externalChannelId')
4011 or search_meta('channelId'))
4012 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
4013
4014 live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
4015 live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
4016 if not duration and live_end_time and live_start_time:
4017 duration = live_end_time - live_start_time
4018
4019 needs_live_processing = self._needs_live_processing(live_status, duration)
4020
4021 def is_bad_format(fmt):
4022 if needs_live_processing and not fmt.get('is_from_start'):
4023 return True
4024 elif (live_status == 'is_live' and needs_live_processing != 'is_live'
4025 and fmt.get('protocol') == 'http_dash_segments'):
4026 return True
4027
4028 for fmt in filter(is_bad_format, formats):
4029 fmt['preference'] = (fmt.get('preference') or -1) - 10
4030 fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 4 hours)', delim=' ')
4031
4032 if needs_live_processing:
4033 self._prepare_live_from_start_formats(
4034 formats, video_id, live_start_time, url, webpage_url, smuggled_data, live_status == 'is_live')
4035
4036 formats.extend(self._extract_storyboard(player_responses, duration))
4037
4038 info = {
4039 'id': video_id,
4040 'title': video_title,
4041 'formats': formats,
4042 'thumbnails': thumbnails,
4043 # The best thumbnail that we are sure exists. Prevents unnecessary
4044 # URL checking if user don't care about getting the best possible thumbnail
4045 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
4046 'description': video_description,
4047 'uploader': get_first(video_details, 'author'),
4048 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
4049 'uploader_url': owner_profile_url,
4050 'channel_id': channel_id,
4051 'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s'),
4052 'duration': duration,
4053 'view_count': int_or_none(
4054 get_first((video_details, microformats), (..., 'viewCount'))
4055 or search_meta('interactionCount')),
4056 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
4057 'age_limit': 18 if (
4058 get_first(microformats, 'isFamilySafe') is False
4059 or search_meta('isFamilyFriendly') == 'false'
4060 or search_meta('og:restrictions:age') == '18+') else 0,
4061 'webpage_url': webpage_url,
4062 'categories': [category] if category else None,
4063 'tags': keywords,
4064 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
4065 'live_status': live_status,
4066 'release_timestamp': live_start_time,
4067 '_format_sort_fields': ( # source_preference is lower for throttled/potentially damaged formats
4068 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto')
4069 }
4070
4071 subtitles = {}
4072 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
4073 if pctr:
4074 def get_lang_code(track):
4075 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
4076 or track.get('languageCode'))
4077
4078 # Converted into dicts to remove duplicates
4079 captions = {
4080 get_lang_code(sub): sub
4081 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
4082 translation_languages = {
4083 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
4084 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
4085
4086 def process_language(container, base_url, lang_code, sub_name, query):
4087 lang_subs = container.setdefault(lang_code, [])
4088 for fmt in self._SUBTITLE_FORMATS:
4089 query.update({
4090 'fmt': fmt,
4091 })
4092 lang_subs.append({
4093 'ext': fmt,
4094 'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),
4095 'name': sub_name,
4096 })
4097
4098 # NB: Constructing the full subtitle dictionary is slow
4099 get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (
4100 self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))
4101 for lang_code, caption_track in captions.items():
4102 base_url = caption_track.get('baseUrl')
4103 orig_lang = parse_qs(base_url).get('lang', [None])[-1]
4104 if not base_url:
4105 continue
4106 lang_name = self._get_text(caption_track, 'name', max_runs=1)
4107 if caption_track.get('kind') != 'asr':
4108 if not lang_code:
4109 continue
4110 process_language(
4111 subtitles, base_url, lang_code, lang_name, {})
4112 if not caption_track.get('isTranslatable'):
4113 continue
4114 for trans_code, trans_name in translation_languages.items():
4115 if not trans_code:
4116 continue
4117 orig_trans_code = trans_code
4118 if caption_track.get('kind') != 'asr' and trans_code != 'und':
4119 if not get_translated_subs:
4120 continue
4121 trans_code += f'-{lang_code}'
4122 trans_name += format_field(lang_name, None, ' from %s')
4123 # Add an "-orig" label to the original language so that it can be distinguished.
4124 # The subs are returned without "-orig" as well for compatibility
4125 if lang_code == f'a-{orig_trans_code}':
4126 process_language(
4127 automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
4128 # Setting tlang=lang returns damaged subtitles.
4129 process_language(automatic_captions, base_url, trans_code, trans_name,
4130 {} if orig_lang == orig_trans_code else {'tlang': trans_code})
4131
4132 info['automatic_captions'] = automatic_captions
4133 info['subtitles'] = subtitles
4134
4135 parsed_url = urllib.parse.urlparse(url)
4136 for component in [parsed_url.fragment, parsed_url.query]:
4137 query = urllib.parse.parse_qs(component)
4138 for k, v in query.items():
4139 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
4140 d_k += '_time'
4141 if d_k not in info and k in s_ks:
4142 info[d_k] = parse_duration(query[k][0])
4143
4144 # Youtube Music Auto-generated description
4145 if video_description:
4146 mobj = re.search(
4147 r'''(?xs)
4148 (?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+
4149 (?P<album>[^\n]+)
4150 (?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
4151 (?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
4152 (.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?
4153 .+\nAuto-generated\ by\ YouTube\.\s*$
4154 ''', video_description)
4155 if mobj:
4156 release_year = mobj.group('release_year')
4157 release_date = mobj.group('release_date')
4158 if release_date:
4159 release_date = release_date.replace('-', '')
4160 if not release_year:
4161 release_year = release_date[:4]
4162 info.update({
4163 'album': mobj.group('album'.strip()),
4164 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
4165 'track': mobj.group('track').strip(),
4166 'release_date': release_date,
4167 'release_year': int_or_none(release_year),
4168 })
4169
4170 initial_data = None
4171 if webpage:
4172 initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)
4173 if not initial_data:
4174 query = {'videoId': video_id}
4175 query.update(self._get_checkok_params())
4176 initial_data = self._extract_response(
4177 item_id=video_id, ep='next', fatal=False,
4178 ytcfg=master_ytcfg, query=query,
4179 headers=self.generate_api_headers(ytcfg=master_ytcfg),
4180 note='Downloading initial data API JSON')
4181
4182 info['comment_count'] = traverse_obj(initial_data, (
4183 'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',
4184 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount', 'simpleText'
4185 ), (
4186 'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',
4187 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo', 'runs', ..., 'text'
4188 ), expected_type=int_or_none, get_all=False)
4189
4190 try: # This will error if there is no livechat
4191 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
4192 except (KeyError, IndexError, TypeError):
4193 pass
4194 else:
4195 info.setdefault('subtitles', {})['live_chat'] = [{
4196 # url is needed to set cookies
4197 'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',
4198 'video_id': video_id,
4199 'ext': 'json',
4200 'protocol': ('youtube_live_chat' if live_status in ('is_live', 'is_upcoming')
4201 else 'youtube_live_chat_replay'),
4202 }]
4203
4204 if initial_data:
4205 info['chapters'] = (
4206 self._extract_chapters_from_json(initial_data, duration)
4207 or self._extract_chapters_from_engagement_panel(initial_data, duration)
4208 or self._extract_chapters_from_description(video_description, duration)
4209 or None)
4210
4211 contents = traverse_obj(
4212 initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
4213 expected_type=list, default=[])
4214
4215 vpir = get_first(contents, 'videoPrimaryInfoRenderer')
4216 if vpir:
4217 stl = vpir.get('superTitleLink')
4218 if stl:
4219 stl = self._get_text(stl)
4220 if try_get(
4221 vpir,
4222 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
4223 info['location'] = stl
4224 else:
4225 mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)
4226 if mobj:
4227 info.update({
4228 'series': mobj.group(1),
4229 'season_number': int(mobj.group(2)),
4230 'episode_number': int(mobj.group(3)),
4231 })
4232 for tlb in (try_get(
4233 vpir,
4234 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
4235 list) or []):
4236 tbrs = variadic(
4237 traverse_obj(
4238 tlb, 'toggleButtonRenderer',
4239 ('segmentedLikeDislikeButtonRenderer', ..., 'toggleButtonRenderer'),
4240 default=[]))
4241 for tbr in tbrs:
4242 for getter, regex in [(
4243 lambda x: x['defaultText']['accessibility']['accessibilityData'],
4244 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
4245 lambda x: x['accessibility'],
4246 lambda x: x['accessibilityData']['accessibilityData'],
4247 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
4248 label = (try_get(tbr, getter, dict) or {}).get('label')
4249 if label:
4250 mobj = re.match(regex, label)
4251 if mobj:
4252 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
4253 break
4254 sbr_tooltip = try_get(
4255 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
4256 if sbr_tooltip:
4257 like_count, dislike_count = sbr_tooltip.split(' / ')
4258 info.update({
4259 'like_count': str_to_int(like_count),
4260 'dislike_count': str_to_int(dislike_count),
4261 })
4262 vcr = traverse_obj(vpir, ('viewCount', 'videoViewCountRenderer'))
4263 if vcr:
4264 vc = self._get_count(vcr, 'viewCount')
4265 # Upcoming premieres with waiting count are treated as live here
4266 if vcr.get('isLive'):
4267 info['concurrent_view_count'] = vc
4268 elif info.get('view_count') is None:
4269 info['view_count'] = vc
4270
4271 vsir = get_first(contents, 'videoSecondaryInfoRenderer')
4272 if vsir:
4273 vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))
4274 info.update({
4275 'channel': self._get_text(vor, 'title'),
4276 'channel_follower_count': self._get_count(vor, 'subscriberCountText')})
4277
4278 rows = try_get(
4279 vsir,
4280 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
4281 list) or []
4282 multiple_songs = False
4283 for row in rows:
4284 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
4285 multiple_songs = True
4286 break
4287 for row in rows:
4288 mrr = row.get('metadataRowRenderer') or {}
4289 mrr_title = mrr.get('title')
4290 if not mrr_title:
4291 continue
4292 mrr_title = self._get_text(mrr, 'title')
4293 mrr_contents_text = self._get_text(mrr, ('contents', 0))
4294 if mrr_title == 'License':
4295 info['license'] = mrr_contents_text
4296 elif not multiple_songs:
4297 if mrr_title == 'Album':
4298 info['album'] = mrr_contents_text
4299 elif mrr_title == 'Artist':
4300 info['artist'] = mrr_contents_text
4301 elif mrr_title == 'Song':
4302 info['track'] = mrr_contents_text
4303
4304 fallbacks = {
4305 'channel': 'uploader',
4306 'channel_id': 'uploader_id',
4307 'channel_url': 'uploader_url',
4308 }
4309
4310 # The upload date for scheduled, live and past live streams / premieres in microformats
4311 # may be different from the stream date. Although not in UTC, we will prefer it in this case.
4312 # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139
4313 upload_date = (
4314 unified_strdate(get_first(microformats, 'uploadDate'))
4315 or unified_strdate(search_meta('uploadDate')))
4316 if not upload_date or (
4317 live_status in ('not_live', None)
4318 and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', [])
4319 ):
4320 upload_date = strftime_or_none(
4321 self._parse_time_text(self._get_text(vpir, 'dateText')), '%Y%m%d') or upload_date
4322 info['upload_date'] = upload_date
4323
4324 for to, frm in fallbacks.items():
4325 if not info.get(to):
4326 info[to] = info.get(frm)
4327
4328 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
4329 v = info.get(s_k)
4330 if v:
4331 info[d_k] = v
4332
4333 badges = self._extract_badges(traverse_obj(contents, (..., 'videoPrimaryInfoRenderer'), get_all=False))
4334
4335 is_private = (self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
4336 or get_first(video_details, 'isPrivate', expected_type=bool))
4337
4338 info['availability'] = (
4339 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
4340 else self._availability(
4341 is_private=is_private,
4342 needs_premium=(
4343 self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM)
4344 or False if initial_data and is_private is not None else None),
4345 needs_subscription=(
4346 self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION)
4347 or False if initial_data and is_private is not None else None),
4348 needs_auth=info['age_limit'] >= 18,
4349 is_unlisted=None if is_private is None else (
4350 self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
4351 or get_first(microformats, 'isUnlisted', expected_type=bool))))
4352
4353 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
4354
4355 self.mark_watched(video_id, player_responses)
4356
4357 return info
4358
4359
4360 class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
4361 @staticmethod
4362 def passthrough_smuggled_data(func):
4363 def _smuggle(info, smuggled_data):
4364 if info.get('_type') not in ('url', 'url_transparent'):
4365 return info
4366 if smuggled_data.get('is_music_url'):
4367 parsed_url = urllib.parse.urlparse(info['url'])
4368 if parsed_url.netloc in ('www.youtube.com', 'music.youtube.com'):
4369 smuggled_data.pop('is_music_url')
4370 info['url'] = urllib.parse.urlunparse(parsed_url._replace(netloc='music.youtube.com'))
4371 if smuggled_data:
4372 info['url'] = smuggle_url(info['url'], smuggled_data)
4373 return info
4374
4375 @functools.wraps(func)
4376 def wrapper(self, url):
4377 url, smuggled_data = unsmuggle_url(url, {})
4378 if self.is_music_url(url):
4379 smuggled_data['is_music_url'] = True
4380 info_dict = func(self, url, smuggled_data)
4381 if smuggled_data:
4382 _smuggle(info_dict, smuggled_data)
4383 if info_dict.get('entries'):
4384 info_dict['entries'] = (_smuggle(i, smuggled_data.copy()) for i in info_dict['entries'])
4385 return info_dict
4386 return wrapper
4387
4388 def _extract_channel_id(self, webpage):
4389 channel_id = self._html_search_meta(
4390 'channelId', webpage, 'channel id', default=None)
4391 if channel_id:
4392 return channel_id
4393 channel_url = self._html_search_meta(
4394 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
4395 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
4396 'twitter:app:url:googleplay'), webpage, 'channel url')
4397 return self._search_regex(
4398 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
4399 channel_url, 'channel id')
4400
4401 @staticmethod
4402 def _extract_basic_item_renderer(item):
4403 # Modified from _extract_grid_item_renderer
4404 known_basic_renderers = (
4405 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'
4406 )
4407 for key, renderer in item.items():
4408 if not isinstance(renderer, dict):
4409 continue
4410 elif key in known_basic_renderers:
4411 return renderer
4412 elif key.startswith('grid') and key.endswith('Renderer'):
4413 return renderer
4414
4415 def _extract_channel_renderer(self, renderer):
4416 channel_id = renderer['channelId']
4417 title = self._get_text(renderer, 'title')
4418 channel_url = f'https://www.youtube.com/channel/{channel_id}'
4419 return {
4420 '_type': 'url',
4421 'url': channel_url,
4422 'id': channel_id,
4423 'ie_key': YoutubeTabIE.ie_key(),
4424 'channel': title,
4425 'channel_id': channel_id,
4426 'channel_url': channel_url,
4427 'title': title,
4428 'channel_follower_count': self._get_count(renderer, 'subscriberCountText'),
4429 'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
4430 'playlist_count': self._get_count(renderer, 'videoCountText'),
4431 'description': self._get_text(renderer, 'descriptionSnippet'),
4432 }
4433
4434 def _grid_entries(self, grid_renderer):
4435 for item in grid_renderer['items']:
4436 if not isinstance(item, dict):
4437 continue
4438 renderer = self._extract_basic_item_renderer(item)
4439 if not isinstance(renderer, dict):
4440 continue
4441 title = self._get_text(renderer, 'title')
4442
4443 # playlist
4444 playlist_id = renderer.get('playlistId')
4445 if playlist_id:
4446 yield self.url_result(
4447 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4448 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4449 video_title=title)
4450 continue
4451 # video
4452 video_id = renderer.get('videoId')
4453 if video_id:
4454 yield self._extract_video(renderer)
4455 continue
4456 # channel
4457 channel_id = renderer.get('channelId')
4458 if channel_id:
4459 yield self._extract_channel_renderer(renderer)
4460 continue
4461 # generic endpoint URL support
4462 ep_url = urljoin('https://www.youtube.com/', try_get(
4463 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
4464 str))
4465 if ep_url:
4466 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
4467 if ie.suitable(ep_url):
4468 yield self.url_result(
4469 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
4470 break
4471
4472 def _music_reponsive_list_entry(self, renderer):
4473 video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))
4474 if video_id:
4475 return self.url_result(f'https://music.youtube.com/watch?v={video_id}',
4476 ie=YoutubeIE.ie_key(), video_id=video_id)
4477 playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))
4478 if playlist_id:
4479 video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))
4480 if video_id:
4481 return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',
4482 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4483 return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',
4484 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4485 browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))
4486 if browse_id:
4487 return self.url_result(f'https://music.youtube.com/browse/{browse_id}',
4488 ie=YoutubeTabIE.ie_key(), video_id=browse_id)
4489
4490 def _shelf_entries_from_content(self, shelf_renderer):
4491 content = shelf_renderer.get('content')
4492 if not isinstance(content, dict):
4493 return
4494 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
4495 if renderer:
4496 # TODO: add support for nested playlists so each shelf is processed
4497 # as separate playlist
4498 # TODO: this includes only first N items
4499 yield from self._grid_entries(renderer)
4500 renderer = content.get('horizontalListRenderer')
4501 if renderer:
4502 # TODO
4503 pass
4504
4505 def _shelf_entries(self, shelf_renderer, skip_channels=False):
4506 ep = try_get(
4507 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4508 str)
4509 shelf_url = urljoin('https://www.youtube.com', ep)
4510 if shelf_url:
4511 # Skipping links to another channels, note that checking for
4512 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
4513 # will not work
4514 if skip_channels and '/channels?' in shelf_url:
4515 return
4516 title = self._get_text(shelf_renderer, 'title')
4517 yield self.url_result(shelf_url, video_title=title)
4518 # Shelf may not contain shelf URL, fallback to extraction from content
4519 yield from self._shelf_entries_from_content(shelf_renderer)
4520
4521 def _playlist_entries(self, video_list_renderer):
4522 for content in video_list_renderer['contents']:
4523 if not isinstance(content, dict):
4524 continue
4525 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
4526 if not isinstance(renderer, dict):
4527 continue
4528 video_id = renderer.get('videoId')
4529 if not video_id:
4530 continue
4531 yield self._extract_video(renderer)
4532
4533 def _rich_entries(self, rich_grid_renderer):
4534 renderer = traverse_obj(
4535 rich_grid_renderer, ('content', ('videoRenderer', 'reelItemRenderer')), get_all=False) or {}
4536 video_id = renderer.get('videoId')
4537 if not video_id:
4538 return
4539 yield self._extract_video(renderer)
4540
4541 def _video_entry(self, video_renderer):
4542 video_id = video_renderer.get('videoId')
4543 if video_id:
4544 return self._extract_video(video_renderer)
4545
4546 def _hashtag_tile_entry(self, hashtag_tile_renderer):
4547 url = urljoin('https://youtube.com', traverse_obj(
4548 hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))
4549 if url:
4550 return self.url_result(
4551 url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))
4552
4553 def _post_thread_entries(self, post_thread_renderer):
4554 post_renderer = try_get(
4555 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
4556 if not post_renderer:
4557 return
4558 # video attachment
4559 video_renderer = try_get(
4560 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
4561 video_id = video_renderer.get('videoId')
4562 if video_id:
4563 entry = self._extract_video(video_renderer)
4564 if entry:
4565 yield entry
4566 # playlist attachment
4567 playlist_id = try_get(
4568 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)
4569 if playlist_id:
4570 yield self.url_result(
4571 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4572 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4573 # inline video links
4574 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
4575 for run in runs:
4576 if not isinstance(run, dict):
4577 continue
4578 ep_url = try_get(
4579 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)
4580 if not ep_url:
4581 continue
4582 if not YoutubeIE.suitable(ep_url):
4583 continue
4584 ep_video_id = YoutubeIE._match_id(ep_url)
4585 if video_id == ep_video_id:
4586 continue
4587 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
4588
4589 def _post_thread_continuation_entries(self, post_thread_continuation):
4590 contents = post_thread_continuation.get('contents')
4591 if not isinstance(contents, list):
4592 return
4593 for content in contents:
4594 renderer = content.get('backstagePostThreadRenderer')
4595 if isinstance(renderer, dict):
4596 yield from self._post_thread_entries(renderer)
4597 continue
4598 renderer = content.get('videoRenderer')
4599 if isinstance(renderer, dict):
4600 yield self._video_entry(renderer)
4601
4602 r''' # unused
4603 def _rich_grid_entries(self, contents):
4604 for content in contents:
4605 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
4606 if video_renderer:
4607 entry = self._video_entry(video_renderer)
4608 if entry:
4609 yield entry
4610 '''
4611
4612 def _report_history_entries(self, renderer):
4613 for url in traverse_obj(renderer, (
4614 'rows', ..., 'reportHistoryTableRowRenderer', 'cells', ...,
4615 'reportHistoryTableCellRenderer', 'cell', 'reportHistoryTableTextCellRenderer', 'text', 'runs', ...,
4616 'navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url')):
4617 yield self.url_result(urljoin('https://www.youtube.com', url), YoutubeIE)
4618
4619 def _extract_entries(self, parent_renderer, continuation_list):
4620 # continuation_list is modified in-place with continuation_list = [continuation_token]
4621 continuation_list[:] = [None]
4622 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
4623 for content in contents:
4624 if not isinstance(content, dict):
4625 continue
4626 is_renderer = traverse_obj(
4627 content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',
4628 expected_type=dict)
4629 if not is_renderer:
4630 if content.get('richItemRenderer'):
4631 for entry in self._rich_entries(content['richItemRenderer']):
4632 yield entry
4633 continuation_list[0] = self._extract_continuation(parent_renderer)
4634 elif content.get('reportHistorySectionRenderer'): # https://www.youtube.com/reporthistory
4635 table = traverse_obj(content, ('reportHistorySectionRenderer', 'table', 'tableRenderer'))
4636 yield from self._report_history_entries(table)
4637 continuation_list[0] = self._extract_continuation(table)
4638 continue
4639
4640 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
4641 for isr_content in isr_contents:
4642 if not isinstance(isr_content, dict):
4643 continue
4644
4645 known_renderers = {
4646 'playlistVideoListRenderer': self._playlist_entries,
4647 'gridRenderer': self._grid_entries,
4648 'reelShelfRenderer': self._grid_entries,
4649 'shelfRenderer': self._shelf_entries,
4650 'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],
4651 'backstagePostThreadRenderer': self._post_thread_entries,
4652 'videoRenderer': lambda x: [self._video_entry(x)],
4653 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
4654 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
4655 'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]
4656 }
4657 for key, renderer in isr_content.items():
4658 if key not in known_renderers:
4659 continue
4660 for entry in known_renderers[key](renderer):
4661 if entry:
4662 yield entry
4663 continuation_list[0] = self._extract_continuation(renderer)
4664 break
4665
4666 if not continuation_list[0]:
4667 continuation_list[0] = self._extract_continuation(is_renderer)
4668
4669 if not continuation_list[0]:
4670 continuation_list[0] = self._extract_continuation(parent_renderer)
4671
4672 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
4673 continuation_list = [None]
4674 extract_entries = lambda x: self._extract_entries(x, continuation_list)
4675 tab_content = try_get(tab, lambda x: x['content'], dict)
4676 if not tab_content:
4677 return
4678 parent_renderer = (
4679 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
4680 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
4681 yield from extract_entries(parent_renderer)
4682 continuation = continuation_list[0]
4683
4684 for page_num in itertools.count(1):
4685 if not continuation:
4686 break
4687 headers = self.generate_api_headers(
4688 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
4689 response = self._extract_response(
4690 item_id=f'{item_id} page {page_num}',
4691 query=continuation, headers=headers, ytcfg=ytcfg,
4692 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
4693
4694 if not response:
4695 break
4696 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
4697 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
4698 visitor_data = self._extract_visitor_data(response) or visitor_data
4699
4700 known_renderers = {
4701 'videoRenderer': (self._grid_entries, 'items'), # for membership tab
4702 'gridPlaylistRenderer': (self._grid_entries, 'items'),
4703 'gridVideoRenderer': (self._grid_entries, 'items'),
4704 'gridChannelRenderer': (self._grid_entries, 'items'),
4705 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
4706 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
4707 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
4708 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents'),
4709 'reportHistoryTableRowRenderer': (self._report_history_entries, 'rows'),
4710 'playlistVideoListContinuation': (self._playlist_entries, None),
4711 'gridContinuation': (self._grid_entries, None),
4712 'itemSectionContinuation': (self._post_thread_continuation_entries, None),
4713 'sectionListContinuation': (extract_entries, None), # for feeds
4714 }
4715
4716 continuation_items = traverse_obj(response, (
4717 ('onResponseReceivedActions', 'onResponseReceivedEndpoints'), ...,
4718 'appendContinuationItemsAction', 'continuationItems'
4719 ), 'continuationContents', get_all=False)
4720 continuation_item = traverse_obj(continuation_items, 0, None, expected_type=dict, default={})
4721
4722 video_items_renderer = None
4723 for key in continuation_item.keys():
4724 if key not in known_renderers:
4725 continue
4726 func, parent_key = known_renderers[key]
4727 video_items_renderer = {parent_key: continuation_items} if parent_key else continuation_items
4728 continuation_list = [None]
4729 yield from func(video_items_renderer)
4730 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
4731
4732 if not video_items_renderer:
4733 break
4734
4735 @staticmethod
4736 def _extract_selected_tab(tabs, fatal=True):
4737 for tab_renderer in tabs:
4738 if tab_renderer.get('selected'):
4739 return tab_renderer
4740 if fatal:
4741 raise ExtractorError('Unable to find selected tab')
4742
4743 @staticmethod
4744 def _extract_tab_renderers(response):
4745 return traverse_obj(
4746 response, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., ('tabRenderer', 'expandableTabRenderer')), expected_type=dict)
4747
4748 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
4749 metadata = self._extract_metadata_from_tabs(item_id, data)
4750
4751 selected_tab = self._extract_selected_tab(tabs)
4752 metadata['title'] += format_field(selected_tab, 'title', ' - %s')
4753 metadata['title'] += format_field(selected_tab, 'expandedText', ' - %s')
4754
4755 return self.playlist_result(
4756 self._entries(
4757 selected_tab, metadata['id'], ytcfg,
4758 self._extract_account_syncid(ytcfg, data),
4759 self._extract_visitor_data(data, ytcfg)),
4760 **metadata)
4761
4762 def _extract_metadata_from_tabs(self, item_id, data):
4763 info = {'id': item_id}
4764
4765 metadata_renderer = traverse_obj(data, ('metadata', 'channelMetadataRenderer'), expected_type=dict)
4766 if metadata_renderer:
4767 info.update({
4768 'uploader': metadata_renderer.get('title'),
4769 'uploader_id': metadata_renderer.get('externalId'),
4770 'uploader_url': metadata_renderer.get('channelUrl'),
4771 })
4772 if info['uploader_id']:
4773 info['id'] = info['uploader_id']
4774 else:
4775 metadata_renderer = traverse_obj(data, ('metadata', 'playlistMetadataRenderer'), expected_type=dict)
4776
4777 # We can get the uncropped banner/avatar by replacing the crop params with '=s0'
4778 # See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714
4779 def _get_uncropped(url):
4780 return url_or_none((url or '').split('=')[0] + '=s0')
4781
4782 avatar_thumbnails = self._extract_thumbnails(metadata_renderer, 'avatar')
4783 if avatar_thumbnails:
4784 uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])
4785 if uncropped_avatar:
4786 avatar_thumbnails.append({
4787 'url': uncropped_avatar,
4788 'id': 'avatar_uncropped',
4789 'preference': 1
4790 })
4791
4792 channel_banners = self._extract_thumbnails(
4793 data, ('header', ..., ('banner', 'mobileBanner', 'tvBanner')))
4794 for banner in channel_banners:
4795 banner['preference'] = -10
4796
4797 if channel_banners:
4798 uncropped_banner = _get_uncropped(channel_banners[0]['url'])
4799 if uncropped_banner:
4800 channel_banners.append({
4801 'url': uncropped_banner,
4802 'id': 'banner_uncropped',
4803 'preference': -5
4804 })
4805
4806 # Deprecated - remove primary_sidebar_renderer when layout discontinued
4807 primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4808 playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer'), expected_type=dict)
4809
4810 primary_thumbnails = self._extract_thumbnails(
4811 primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))
4812 playlist_thumbnails = self._extract_thumbnails(
4813 playlist_header_renderer, ('playlistHeaderBanner', 'heroPlaylistThumbnailRenderer', 'thumbnail'))
4814
4815 info.update({
4816 'title': (traverse_obj(metadata_renderer, 'title')
4817 or self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag'))
4818 or info['id']),
4819 'availability': self._extract_availability(data),
4820 'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),
4821 'description': try_get(metadata_renderer, lambda x: x.get('description', '')),
4822 'tags': try_get(metadata_renderer or {}, lambda x: x.get('keywords', '').split()),
4823 'thumbnails': (primary_thumbnails or playlist_thumbnails) + avatar_thumbnails + channel_banners,
4824 })
4825
4826 # Playlist stats is a text runs array containing [video count, view count, last updated].
4827 # last updated or (view count and last updated) may be missing.
4828 playlist_stats = get_first(
4829 (primary_sidebar_renderer, playlist_header_renderer), (('stats', 'briefStats', 'numVideosText'), ))
4830
4831 last_updated_unix = self._parse_time_text(
4832 self._get_text(playlist_stats, 2) # deprecated, remove when old layout discontinued
4833 or self._get_text(playlist_header_renderer, ('byline', 1, 'playlistBylineRenderer', 'text')))
4834 info['modified_date'] = strftime_or_none(last_updated_unix, '%Y%m%d')
4835
4836 info['view_count'] = self._get_count(playlist_stats, 1)
4837 if info['view_count'] is None: # 0 is allowed
4838 info['view_count'] = self._get_count(playlist_header_renderer, 'viewCountText')
4839
4840 info['playlist_count'] = self._get_count(playlist_stats, 0)
4841 if info['playlist_count'] is None: # 0 is allowed
4842 info['playlist_count'] = self._get_count(playlist_header_renderer, ('byline', 0, 'playlistBylineRenderer', 'text'))
4843
4844 if not info.get('uploader_id'):
4845 owner = traverse_obj(playlist_header_renderer, 'ownerText')
4846 if not owner: # Deprecated
4847 owner = traverse_obj(
4848 self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer'),
4849 ('videoOwner', 'videoOwnerRenderer', 'title'))
4850 owner_text = self._get_text(owner)
4851 browse_ep = traverse_obj(owner, ('runs', 0, 'navigationEndpoint', 'browseEndpoint')) or {}
4852 info.update({
4853 'uploader': self._search_regex(r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text),
4854 'uploader_id': browse_ep.get('browseId'),
4855 'uploader_url': urljoin('https://www.youtube.com', browse_ep.get('canonicalBaseUrl'))
4856 })
4857
4858 info.update({
4859 'channel': info['uploader'],
4860 'channel_id': info['uploader_id'],
4861 'channel_url': info['uploader_url']
4862 })
4863 return info
4864
4865 def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):
4866 first_id = last_id = response = None
4867 for page_num in itertools.count(1):
4868 videos = list(self._playlist_entries(playlist))
4869 if not videos:
4870 return
4871 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4872 if start >= len(videos):
4873 return
4874 yield from videos[start:]
4875 first_id = first_id or videos[0]['id']
4876 last_id = videos[-1]['id']
4877 watch_endpoint = try_get(
4878 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
4879 headers = self.generate_api_headers(
4880 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4881 visitor_data=self._extract_visitor_data(response, data, ytcfg))
4882 query = {
4883 'playlistId': playlist_id,
4884 'videoId': watch_endpoint.get('videoId') or last_id,
4885 'index': watch_endpoint.get('index') or len(videos),
4886 'params': watch_endpoint.get('params') or 'OAE%3D'
4887 }
4888 response = self._extract_response(
4889 item_id='%s page %d' % (playlist_id, page_num),
4890 query=query, ep='next', headers=headers, ytcfg=ytcfg,
4891 check_get_keys='contents'
4892 )
4893 playlist = try_get(
4894 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4895
4896 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
4897 title = playlist.get('title') or try_get(
4898 data, lambda x: x['titleText']['simpleText'], str)
4899 playlist_id = playlist.get('playlistId') or item_id
4900
4901 # Delegating everything except mix playlists to regular tab-based playlist URL
4902 playlist_url = urljoin(url, try_get(
4903 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4904 str))
4905
4906 # Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]
4907 # [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg
4908 is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)
4909
4910 if playlist_url and playlist_url != url and not is_known_unviewable:
4911 return self.url_result(
4912 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4913 video_title=title)
4914
4915 return self.playlist_result(
4916 self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),
4917 playlist_id=playlist_id, playlist_title=title)
4918
4919 def _extract_availability(self, data):
4920 """
4921 Gets the availability of a given playlist/tab.
4922 Note: Unless YouTube tells us explicitly, we do not assume it is public
4923 @param data: response
4924 """
4925 sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4926 playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer')) or {}
4927 player_header_privacy = playlist_header_renderer.get('privacy')
4928
4929 badges = self._extract_badges(sidebar_renderer)
4930
4931 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4932 privacy_setting_icon = get_first(
4933 (playlist_header_renderer, sidebar_renderer),
4934 ('privacyForm', 'dropdownFormFieldRenderer', 'dropdown', 'dropdownRenderer', 'entries',
4935 lambda _, v: v['privacyDropdownItemRenderer']['isSelected'], 'privacyDropdownItemRenderer', 'icon', 'iconType'),
4936 expected_type=str)
4937
4938 microformats_is_unlisted = traverse_obj(
4939 data, ('microformat', 'microformatDataRenderer', 'unlisted'), expected_type=bool)
4940
4941 return (
4942 'public' if (
4943 self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
4944 or player_header_privacy == 'PUBLIC'
4945 or privacy_setting_icon == 'PRIVACY_PUBLIC')
4946 else self._availability(
4947 is_private=(
4948 self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
4949 or player_header_privacy == 'PRIVATE' if player_header_privacy is not None
4950 else privacy_setting_icon == 'PRIVACY_PRIVATE' if privacy_setting_icon is not None else None),
4951 is_unlisted=(
4952 self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
4953 or player_header_privacy == 'UNLISTED' if player_header_privacy is not None
4954 else privacy_setting_icon == 'PRIVACY_UNLISTED' if privacy_setting_icon is not None
4955 else microformats_is_unlisted if microformats_is_unlisted is not None else None),
4956 needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
4957 needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
4958 needs_auth=False))
4959
4960 @staticmethod
4961 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4962 sidebar_renderer = try_get(
4963 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4964 for item in sidebar_renderer:
4965 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4966 if renderer:
4967 return renderer
4968
4969 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
4970 """
4971 Reload playlists with unavailable videos (e.g. private videos, region blocked, etc.)
4972 """
4973 is_playlist = bool(traverse_obj(
4974 data, ('metadata', 'playlistMetadataRenderer'), ('header', 'playlistHeaderRenderer')))
4975 if not is_playlist:
4976 return
4977 headers = self.generate_api_headers(
4978 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4979 visitor_data=self._extract_visitor_data(data, ytcfg))
4980 query = {
4981 'params': 'wgYCCAA=',
4982 'browseId': f'VL{item_id}'
4983 }
4984 return self._extract_response(
4985 item_id=item_id, headers=headers, query=query,
4986 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
4987 note='Redownloading playlist API JSON with unavailable videos')
4988
4989 @functools.cached_property
4990 def skip_webpage(self):
4991 return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())
4992
4993 def _extract_webpage(self, url, item_id, fatal=True):
4994 webpage, data = None, None
4995 for retry in self.RetryManager(fatal=fatal):
4996 try:
4997 webpage = self._download_webpage(url, item_id, note='Downloading webpage')
4998 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
4999 except ExtractorError as e:
5000 if isinstance(e.cause, network_exceptions):
5001 if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):
5002 retry.error = e
5003 continue
5004 self._error_or_warning(e, fatal=fatal)
5005 break
5006
5007 try:
5008 self._extract_and_report_alerts(data)
5009 except ExtractorError as e:
5010 self._error_or_warning(e, fatal=fatal)
5011 break
5012
5013 # Sometimes youtube returns a webpage with incomplete ytInitialData
5014 # See: https://github.com/yt-dlp/yt-dlp/issues/116
5015 if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):
5016 retry.error = ExtractorError('Incomplete yt initial data received')
5017 continue
5018
5019 return webpage, data
5020
5021 def _report_playlist_authcheck(self, ytcfg, fatal=True):
5022 """Use if failed to extract ytcfg (and data) from initial webpage"""
5023 if not ytcfg and self.is_authenticated:
5024 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'
5025 if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:
5026 raise ExtractorError(
5027 f'{msg}. If you are not downloading private content, or '
5028 'your cookies are only for the first account and channel,'
5029 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
5030 expected=True)
5031 self.report_warning(msg, only_once=True)
5032
5033 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
5034 data = None
5035 if not self.skip_webpage:
5036 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
5037 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
5038 # Reject webpage data if redirected to home page without explicitly requesting
5039 selected_tab = self._extract_selected_tab(self._extract_tab_renderers(data), fatal=False) or {}
5040 if (url != 'https://www.youtube.com/feed/recommended'
5041 and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page
5042 and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):
5043 msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'
5044 if fatal:
5045 raise ExtractorError(msg, expected=True)
5046 self.report_warning(msg, only_once=True)
5047 if not data:
5048 self._report_playlist_authcheck(ytcfg, fatal=fatal)
5049 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
5050 return data, ytcfg
5051
5052 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
5053 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
5054 resolve_response = self._extract_response(
5055 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
5056 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
5057 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
5058 for ep_key, ep in endpoints.items():
5059 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
5060 if params:
5061 return self._extract_response(
5062 item_id=item_id, query=params, ep=ep, headers=headers,
5063 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
5064 check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))
5065 err_note = 'Failed to resolve url (does the playlist exist?)'
5066 if fatal:
5067 raise ExtractorError(err_note, expected=True)
5068 self.report_warning(err_note, item_id)
5069
5070 _SEARCH_PARAMS = None
5071
5072 def _search_results(self, query, params=NO_DEFAULT, default_client='web'):
5073 data = {'query': query}
5074 if params is NO_DEFAULT:
5075 params = self._SEARCH_PARAMS
5076 if params:
5077 data['params'] = params
5078
5079 content_keys = (
5080 ('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),
5081 ('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),
5082 # ytmusic search
5083 ('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),
5084 ('continuationContents', ),
5085 )
5086 display_id = f'query "{query}"'
5087 check_get_keys = tuple({keys[0] for keys in content_keys})
5088 ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}
5089 self._report_playlist_authcheck(ytcfg, fatal=False)
5090
5091 continuation_list = [None]
5092 search = None
5093 for page_num in itertools.count(1):
5094 data.update(continuation_list[0] or {})
5095 headers = self.generate_api_headers(
5096 ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)
5097 search = self._extract_response(
5098 item_id=f'{display_id} page {page_num}', ep='search', query=data,
5099 default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)
5100 slr_contents = traverse_obj(search, *content_keys)
5101 yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)
5102 if not continuation_list[0]:
5103 break
5104
5105
5106 class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
5107 IE_DESC = 'YouTube Tabs'
5108 _VALID_URL = r'''(?x:
5109 https?://
5110 (?:\w+\.)?
5111 (?:
5112 youtube(?:kids)?\.com|
5113 %(invidious)s
5114 )/
5115 (?:
5116 (?P<channel_type>channel|c|user|browse)/|
5117 (?P<not_channel>
5118 feed/|hashtag/|
5119 (?:playlist|watch)\?.*?\blist=
5120 )|
5121 (?!(?:%(reserved_names)s)\b) # Direct URLs
5122 )
5123 (?P<id>[^/?\#&]+)
5124 )''' % {
5125 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
5126 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
5127 }
5128 IE_NAME = 'youtube:tab'
5129
5130 _TESTS = [{
5131 'note': 'playlists, multipage',
5132 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
5133 'playlist_mincount': 94,
5134 'info_dict': {
5135 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
5136 'title': 'Igor Kleiner - Playlists',
5137 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
5138 'uploader': 'Igor Kleiner',
5139 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
5140 'channel': 'Igor Kleiner',
5141 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
5142 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
5143 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
5144 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
5145 'channel_follower_count': int
5146 },
5147 }, {
5148 'note': 'playlists, multipage, different order',
5149 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
5150 'playlist_mincount': 94,
5151 'info_dict': {
5152 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
5153 'title': 'Igor Kleiner - Playlists',
5154 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
5155 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
5156 'uploader': 'Igor Kleiner',
5157 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
5158 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
5159 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
5160 'channel': 'Igor Kleiner',
5161 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
5162 'channel_follower_count': int
5163 },
5164 }, {
5165 'note': 'playlists, series',
5166 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
5167 'playlist_mincount': 5,
5168 'info_dict': {
5169 'id': 'UCYO_jab_esuFRV4b17AJtAw',
5170 'title': '3Blue1Brown - Playlists',
5171 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
5172 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
5173 'uploader': '3Blue1Brown',
5174 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5175 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5176 'channel': '3Blue1Brown',
5177 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
5178 'tags': ['Mathematics'],
5179 'channel_follower_count': int
5180 },
5181 }, {
5182 'note': 'playlists, singlepage',
5183 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
5184 'playlist_mincount': 4,
5185 'info_dict': {
5186 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
5187 'title': 'ThirstForScience - Playlists',
5188 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
5189 'uploader': 'ThirstForScience',
5190 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
5191 'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
5192 'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
5193 'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
5194 'tags': 'count:13',
5195 'channel': 'ThirstForScience',
5196 'channel_follower_count': int
5197 }
5198 }, {
5199 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
5200 'only_matching': True,
5201 }, {
5202 'note': 'basic, single video playlist',
5203 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5204 'info_dict': {
5205 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5206 'uploader': 'Sergey M.',
5207 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5208 'title': 'youtube-dl public playlist',
5209 'description': '',
5210 'tags': [],
5211 'view_count': int,
5212 'modified_date': '20201130',
5213 'channel': 'Sergey M.',
5214 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5215 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5216 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5217 'availability': 'public',
5218 },
5219 'playlist_count': 1,
5220 }, {
5221 'note': 'empty playlist',
5222 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
5223 'info_dict': {
5224 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5225 'uploader': 'Sergey M.',
5226 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
5227 'title': 'youtube-dl empty playlist',
5228 'tags': [],
5229 'channel': 'Sergey M.',
5230 'description': '',
5231 'modified_date': '20160902',
5232 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5233 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5234 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5235 'availability': 'public',
5236 },
5237 'playlist_count': 0,
5238 }, {
5239 'note': 'Home tab',
5240 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
5241 'info_dict': {
5242 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5243 'title': 'lex will - Home',
5244 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5245 'uploader': 'lex will',
5246 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5247 'channel': 'lex will',
5248 'tags': ['bible', 'history', 'prophesy'],
5249 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5250 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5251 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5252 'channel_follower_count': int
5253 },
5254 'playlist_mincount': 2,
5255 }, {
5256 'note': 'Videos tab',
5257 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
5258 'info_dict': {
5259 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5260 'title': 'lex will - Videos',
5261 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5262 'uploader': 'lex will',
5263 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5264 'tags': ['bible', 'history', 'prophesy'],
5265 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5266 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5267 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5268 'channel': 'lex will',
5269 'channel_follower_count': int
5270 },
5271 'playlist_mincount': 975,
5272 }, {
5273 'note': 'Videos tab, sorted by popular',
5274 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
5275 'info_dict': {
5276 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5277 'title': 'lex will - Videos',
5278 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5279 'uploader': 'lex will',
5280 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5281 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5282 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5283 'channel': 'lex will',
5284 'tags': ['bible', 'history', 'prophesy'],
5285 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5286 'channel_follower_count': int
5287 },
5288 'playlist_mincount': 199,
5289 }, {
5290 'note': 'Playlists tab',
5291 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
5292 'info_dict': {
5293 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5294 'title': 'lex will - Playlists',
5295 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5296 'uploader': 'lex will',
5297 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5298 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5299 'channel': 'lex will',
5300 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5301 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5302 'tags': ['bible', 'history', 'prophesy'],
5303 'channel_follower_count': int
5304 },
5305 'playlist_mincount': 17,
5306 }, {
5307 'note': 'Community tab',
5308 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
5309 'info_dict': {
5310 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5311 'title': 'lex will - Community',
5312 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5313 'uploader': 'lex will',
5314 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5315 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5316 'channel': 'lex will',
5317 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5318 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5319 'tags': ['bible', 'history', 'prophesy'],
5320 'channel_follower_count': int
5321 },
5322 'playlist_mincount': 18,
5323 }, {
5324 'note': 'Channels tab',
5325 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
5326 'info_dict': {
5327 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5328 'title': 'lex will - Channels',
5329 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5330 'uploader': 'lex will',
5331 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5332 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5333 'channel': 'lex will',
5334 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5335 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5336 'tags': ['bible', 'history', 'prophesy'],
5337 'channel_follower_count': int
5338 },
5339 'playlist_mincount': 12,
5340 }, {
5341 'note': 'Search tab',
5342 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
5343 'playlist_mincount': 40,
5344 'info_dict': {
5345 'id': 'UCYO_jab_esuFRV4b17AJtAw',
5346 'title': '3Blue1Brown - Search - linear algebra',
5347 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
5348 'uploader': '3Blue1Brown',
5349 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
5350 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5351 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5352 'tags': ['Mathematics'],
5353 'channel': '3Blue1Brown',
5354 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
5355 'channel_follower_count': int
5356 },
5357 }, {
5358 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5359 'only_matching': True,
5360 }, {
5361 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5362 'only_matching': True,
5363 }, {
5364 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5365 'only_matching': True,
5366 }, {
5367 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
5368 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
5369 'info_dict': {
5370 'title': '29C3: Not my department',
5371 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
5372 'uploader': 'Christiaan008',
5373 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
5374 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
5375 'tags': [],
5376 'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',
5377 'view_count': int,
5378 'modified_date': '20150605',
5379 'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
5380 'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',
5381 'channel': 'Christiaan008',
5382 'availability': 'public',
5383 },
5384 'playlist_count': 96,
5385 }, {
5386 'note': 'Large playlist',
5387 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
5388 'info_dict': {
5389 'title': 'Uploads from Cauchemar',
5390 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
5391 'uploader': 'Cauchemar',
5392 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
5393 'channel_url': 'https://www.youtube.com/c/Cauchemar89',
5394 'tags': [],
5395 'modified_date': r're:\d{8}',
5396 'channel': 'Cauchemar',
5397 'uploader_url': 'https://www.youtube.com/c/Cauchemar89',
5398 'view_count': int,
5399 'description': '',
5400 'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
5401 'availability': 'public',
5402 },
5403 'playlist_mincount': 1123,
5404 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5405 }, {
5406 'note': 'even larger playlist, 8832 videos',
5407 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
5408 'only_matching': True,
5409 }, {
5410 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
5411 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
5412 'info_dict': {
5413 'title': 'Uploads from Interstellar Movie',
5414 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
5415 'uploader': 'Interstellar Movie',
5416 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
5417 'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',
5418 'tags': [],
5419 'view_count': int,
5420 'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
5421 'channel_url': 'https://www.youtube.com/c/InterstellarMovie',
5422 'channel': 'Interstellar Movie',
5423 'description': '',
5424 'modified_date': r're:\d{8}',
5425 'availability': 'public',
5426 },
5427 'playlist_mincount': 21,
5428 }, {
5429 'note': 'Playlist with "show unavailable videos" button',
5430 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
5431 'info_dict': {
5432 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
5433 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
5434 'uploader': 'Phim Siêu Nhân Nhật Bản',
5435 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
5436 'view_count': int,
5437 'channel': 'Phim Siêu Nhân Nhật Bản',
5438 'tags': [],
5439 'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5440 'description': '',
5441 'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5442 'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
5443 'modified_date': r're:\d{8}',
5444 'availability': 'public',
5445 },
5446 'playlist_mincount': 200,
5447 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5448 }, {
5449 'note': 'Playlist with unavailable videos in page 7',
5450 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
5451 'info_dict': {
5452 'title': 'Uploads from BlankTV',
5453 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
5454 'uploader': 'BlankTV',
5455 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
5456 'channel': 'BlankTV',
5457 'channel_url': 'https://www.youtube.com/c/blanktv',
5458 'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',
5459 'view_count': int,
5460 'tags': [],
5461 'uploader_url': 'https://www.youtube.com/c/blanktv',
5462 'modified_date': r're:\d{8}',
5463 'description': '',
5464 'availability': 'public',
5465 },
5466 'playlist_mincount': 1000,
5467 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5468 }, {
5469 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
5470 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5471 'info_dict': {
5472 'title': 'Data Analysis with Dr Mike Pound',
5473 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5474 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
5475 'uploader': 'Computerphile',
5476 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
5477 'uploader_url': 'https://www.youtube.com/user/Computerphile',
5478 'tags': [],
5479 'view_count': int,
5480 'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
5481 'channel_url': 'https://www.youtube.com/user/Computerphile',
5482 'channel': 'Computerphile',
5483 'availability': 'public',
5484 'modified_date': '20190712',
5485 },
5486 'playlist_mincount': 11,
5487 }, {
5488 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5489 'only_matching': True,
5490 }, {
5491 'note': 'Playlist URL that does not actually serve a playlist',
5492 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
5493 'info_dict': {
5494 'id': 'FqZTN594JQw',
5495 'ext': 'webm',
5496 'title': "Smiley's People 01 detective, Adventure Series, Action",
5497 'uploader': 'STREEM',
5498 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
5499 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
5500 'upload_date': '20150526',
5501 'license': 'Standard YouTube License',
5502 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
5503 'categories': ['People & Blogs'],
5504 'tags': list,
5505 'view_count': int,
5506 'like_count': int,
5507 },
5508 'params': {
5509 'skip_download': True,
5510 },
5511 'skip': 'This video is not available.',
5512 'add_ie': [YoutubeIE.ie_key()],
5513 }, {
5514 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
5515 'only_matching': True,
5516 }, {
5517 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
5518 'only_matching': True,
5519 }, {
5520 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
5521 'info_dict': {
5522 'id': 'Wq15eF5vCbI', # This will keep changing
5523 'ext': 'mp4',
5524 'title': str,
5525 'uploader': 'Sky News',
5526 'uploader_id': 'skynews',
5527 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
5528 'upload_date': r're:\d{8}',
5529 'description': str,
5530 'categories': ['News & Politics'],
5531 'tags': list,
5532 'like_count': int,
5533 'release_timestamp': int,
5534 'channel': 'Sky News',
5535 'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',
5536 'age_limit': 0,
5537 'view_count': int,
5538 'thumbnail': r're:https?://i\.ytimg\.com/vi/[^/]+/maxresdefault(?:_live)?\.jpg',
5539 'playable_in_embed': True,
5540 'release_date': r're:\d+',
5541 'availability': 'public',
5542 'live_status': 'is_live',
5543 'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',
5544 'channel_follower_count': int,
5545 'concurrent_view_count': int,
5546 },
5547 'params': {
5548 'skip_download': True,
5549 },
5550 'expected_warnings': ['Ignoring subtitle tracks found in '],
5551 }, {
5552 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
5553 'info_dict': {
5554 'id': 'a48o2S1cPoo',
5555 'ext': 'mp4',
5556 'title': 'The Young Turks - Live Main Show',
5557 'uploader': 'The Young Turks',
5558 'uploader_id': 'TheYoungTurks',
5559 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
5560 'upload_date': '20150715',
5561 'license': 'Standard YouTube License',
5562 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
5563 'categories': ['News & Politics'],
5564 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
5565 'like_count': int,
5566 },
5567 'params': {
5568 'skip_download': True,
5569 },
5570 'only_matching': True,
5571 }, {
5572 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
5573 'only_matching': True,
5574 }, {
5575 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
5576 'only_matching': True,
5577 }, {
5578 'note': 'A channel that is not live. Should raise error',
5579 'url': 'https://www.youtube.com/user/numberphile/live',
5580 'only_matching': True,
5581 }, {
5582 'url': 'https://www.youtube.com/feed/trending',
5583 'only_matching': True,
5584 }, {
5585 'url': 'https://www.youtube.com/feed/library',
5586 'only_matching': True,
5587 }, {
5588 'url': 'https://www.youtube.com/feed/history',
5589 'only_matching': True,
5590 }, {
5591 'url': 'https://www.youtube.com/feed/subscriptions',
5592 'only_matching': True,
5593 }, {
5594 'url': 'https://www.youtube.com/feed/watch_later',
5595 'only_matching': True,
5596 }, {
5597 'note': 'Recommended - redirects to home page.',
5598 'url': 'https://www.youtube.com/feed/recommended',
5599 'only_matching': True,
5600 }, {
5601 'note': 'inline playlist with not always working continuations',
5602 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
5603 'only_matching': True,
5604 }, {
5605 'url': 'https://www.youtube.com/course',
5606 'only_matching': True,
5607 }, {
5608 'url': 'https://www.youtube.com/zsecurity',
5609 'only_matching': True,
5610 }, {
5611 'url': 'http://www.youtube.com/NASAgovVideo/videos',
5612 'only_matching': True,
5613 }, {
5614 'url': 'https://www.youtube.com/TheYoungTurks/live',
5615 'only_matching': True,
5616 }, {
5617 'url': 'https://www.youtube.com/hashtag/cctv9',
5618 'info_dict': {
5619 'id': 'cctv9',
5620 'title': '#cctv9',
5621 'tags': [],
5622 },
5623 'playlist_mincount': 300, # not consistent but should be over 300
5624 }, {
5625 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
5626 'only_matching': True,
5627 }, {
5628 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
5629 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5630 'only_matching': True
5631 }, {
5632 'note': '/browse/ should redirect to /channel/',
5633 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
5634 'only_matching': True
5635 }, {
5636 'note': 'VLPL, should redirect to playlist?list=PL...',
5637 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5638 'info_dict': {
5639 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5640 'uploader': 'NoCopyrightSounds',
5641 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
5642 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
5643 'title': 'NCS : All Releases 💿',
5644 'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',
5645 'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',
5646 'modified_date': r're:\d{8}',
5647 'view_count': int,
5648 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
5649 'tags': [],
5650 'channel': 'NoCopyrightSounds',
5651 'availability': 'public',
5652 },
5653 'playlist_mincount': 166,
5654 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5655 }, {
5656 'note': 'Topic, should redirect to playlist?list=UU...',
5657 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5658 'info_dict': {
5659 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5660 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5661 'title': 'Uploads from Royalty Free Music - Topic',
5662 'uploader': 'Royalty Free Music - Topic',
5663 'tags': [],
5664 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5665 'channel': 'Royalty Free Music - Topic',
5666 'view_count': int,
5667 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5668 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5669 'modified_date': r're:\d{8}',
5670 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5671 'description': '',
5672 'availability': 'public',
5673 },
5674 'playlist_mincount': 101,
5675 }, {
5676 # Destination channel with only a hidden self tab (tab id is UCtFRv9O2AHqOZjjynzrv-xg)
5677 # Treat as a general feed
5678 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
5679 'info_dict': {
5680 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
5681 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
5682 'tags': [],
5683 },
5684 'playlist_mincount': 9,
5685 }, {
5686 'note': 'Youtube music Album',
5687 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
5688 'info_dict': {
5689 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
5690 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
5691 'tags': [],
5692 'view_count': int,
5693 'description': '',
5694 'availability': 'unlisted',
5695 'modified_date': r're:\d{8}',
5696 },
5697 'playlist_count': 50,
5698 }, {
5699 'note': 'unlisted single video playlist',
5700 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5701 'info_dict': {
5702 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5703 'uploader': 'colethedj',
5704 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5705 'title': 'yt-dlp unlisted playlist test',
5706 'availability': 'unlisted',
5707 'tags': [],
5708 'modified_date': '20220418',
5709 'channel': 'colethedj',
5710 'view_count': int,
5711 'description': '',
5712 'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
5713 'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5714 'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
5715 },
5716 'playlist_count': 1,
5717 }, {
5718 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
5719 'url': 'https://www.youtube.com/feed/recommended',
5720 'info_dict': {
5721 'id': 'recommended',
5722 'title': 'recommended',
5723 'tags': [],
5724 },
5725 'playlist_mincount': 50,
5726 'params': {
5727 'skip_download': True,
5728 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5729 },
5730 }, {
5731 'note': 'API Fallback: /videos tab, sorted by oldest first',
5732 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
5733 'info_dict': {
5734 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5735 'title': 'Cody\'sLab - Videos',
5736 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
5737 'uploader': 'Cody\'sLab',
5738 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5739 'channel': 'Cody\'sLab',
5740 'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5741 'tags': [],
5742 'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
5743 'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
5744 'channel_follower_count': int
5745 },
5746 'playlist_mincount': 650,
5747 'params': {
5748 'skip_download': True,
5749 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5750 },
5751 'skip': 'Query for sorting no longer works',
5752 }, {
5753 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
5754 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5755 'info_dict': {
5756 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5757 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5758 'title': 'Uploads from Royalty Free Music - Topic',
5759 'uploader': 'Royalty Free Music - Topic',
5760 'modified_date': r're:\d{8}',
5761 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5762 'description': '',
5763 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5764 'tags': [],
5765 'channel': 'Royalty Free Music - Topic',
5766 'view_count': int,
5767 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5768 'availability': 'public',
5769 },
5770 'playlist_mincount': 101,
5771 'params': {
5772 'skip_download': True,
5773 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5774 },
5775 }, {
5776 'note': 'non-standard redirect to regional channel',
5777 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
5778 'only_matching': True
5779 }, {
5780 'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
5781 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5782 'info_dict': {
5783 'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5784 'modified_date': '20220407',
5785 'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5786 'tags': [],
5787 'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5788 'uploader': 'pukkandan',
5789 'availability': 'unlisted',
5790 'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5791 'channel': 'pukkandan',
5792 'description': 'Test for collaborative playlist',
5793 'title': 'yt-dlp test - collaborative playlist',
5794 'view_count': int,
5795 'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5796 },
5797 'playlist_mincount': 2
5798 }, {
5799 'note': 'translated tab name',
5800 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/playlists',
5801 'info_dict': {
5802 'id': 'UCiu-3thuViMebBjw_5nWYrA',
5803 'tags': [],
5804 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
5805 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
5806 'description': 'test description',
5807 'title': 'cole-dlp-test-acc - 再生リスト',
5808 'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
5809 'uploader': 'cole-dlp-test-acc',
5810 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
5811 'channel': 'cole-dlp-test-acc',
5812 },
5813 'playlist_mincount': 1,
5814 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
5815 'expected_warnings': ['Preferring "ja"'],
5816 }, {
5817 # XXX: this should really check flat playlist entries, but the test suite doesn't support that
5818 'note': 'preferred lang set with playlist with translated video titles',
5819 'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
5820 'info_dict': {
5821 'id': 'PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
5822 'tags': [],
5823 'view_count': int,
5824 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
5825 'uploader': 'cole-dlp-test-acc',
5826 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
5827 'channel': 'cole-dlp-test-acc',
5828 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
5829 'description': 'test',
5830 'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
5831 'title': 'dlp test playlist',
5832 'availability': 'public',
5833 },
5834 'playlist_mincount': 1,
5835 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
5836 'expected_warnings': ['Preferring "ja"'],
5837 }, {
5838 # shorts audio pivot for 2GtVksBMYFM.
5839 'url': 'https://www.youtube.com/feed/sfv_audio_pivot?bp=8gUrCikSJwoLMkd0VmtzQk1ZRk0SCzJHdFZrc0JNWUZNGgsyR3RWa3NCTVlGTQ==',
5840 'info_dict': {
5841 'id': 'sfv_audio_pivot',
5842 'title': 'sfv_audio_pivot',
5843 'tags': [],
5844 },
5845 'playlist_mincount': 50,
5846
5847 }, {
5848 # Channel with a real live tab (not to be mistaken with streams tab)
5849 # Do not treat like it should redirect to live stream
5850 'url': 'https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live',
5851 'info_dict': {
5852 'id': 'UCEH7P7kyJIkS_gJf93VYbmg',
5853 'title': 'UCEH7P7kyJIkS_gJf93VYbmg - Live',
5854 'tags': [],
5855 },
5856 'playlist_mincount': 20,
5857 }, {
5858 # Tab name is not the same as tab id
5859 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/letsplay',
5860 'info_dict': {
5861 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
5862 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Let\'s play',
5863 'tags': [],
5864 },
5865 'playlist_mincount': 8,
5866 }, {
5867 # Home tab id is literally home. Not to get mistaken with featured
5868 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/home',
5869 'info_dict': {
5870 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
5871 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Home',
5872 'tags': [],
5873 },
5874 'playlist_mincount': 8,
5875 }, {
5876 # Should get three playlists for videos, shorts and streams tabs
5877 'url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
5878 'info_dict': {
5879 'id': 'UCK9V2B22uJYu3N7eR_BT9QA',
5880 'title': 'Polka Ch. 尾丸ポルカ',
5881 'channel_follower_count': int,
5882 'channel_id': 'UCK9V2B22uJYu3N7eR_BT9QA',
5883 'channel_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
5884 'uploader': 'Polka Ch. 尾丸ポルカ',
5885 'description': 'md5:3b8df1ac5af337aa206e37ee3d181ec9',
5886 'channel': 'Polka Ch. 尾丸ポルカ',
5887 'tags': 'count:35',
5888 'uploader_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
5889 'uploader_id': 'UCK9V2B22uJYu3N7eR_BT9QA',
5890 },
5891 'playlist_count': 3,
5892 }, {
5893 # Shorts tab with channel with handle
5894 'url': 'https://www.youtube.com/@NotJustBikes/shorts',
5895 'info_dict': {
5896 'id': 'UC0intLFzLaudFG-xAvUEO-A',
5897 'title': 'Not Just Bikes - Shorts',
5898 'tags': 'count:12',
5899 'uploader': 'Not Just Bikes',
5900 'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',
5901 'description': 'md5:7513148b1f02b924783157d84c4ea555',
5902 'channel_follower_count': int,
5903 'uploader_id': 'UC0intLFzLaudFG-xAvUEO-A',
5904 'channel_id': 'UC0intLFzLaudFG-xAvUEO-A',
5905 'uploader_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',
5906 'channel': 'Not Just Bikes',
5907 },
5908 'playlist_mincount': 10,
5909 }, {
5910 # Streams tab
5911 'url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig/streams',
5912 'info_dict': {
5913 'id': 'UC3eYAvjCVwNHgkaGbXX3sig',
5914 'title': '中村悠一 - Live',
5915 'tags': 'count:7',
5916 'channel_id': 'UC3eYAvjCVwNHgkaGbXX3sig',
5917 'channel_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',
5918 'uploader_id': 'UC3eYAvjCVwNHgkaGbXX3sig',
5919 'channel': '中村悠一',
5920 'uploader_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',
5921 'channel_follower_count': int,
5922 'uploader': '中村悠一',
5923 'description': 'md5:e744f6c93dafa7a03c0c6deecb157300',
5924 },
5925 'playlist_mincount': 60,
5926 }, {
5927 # Channel with no uploads and hence no videos, streams, shorts tabs or uploads playlist. This should fail.
5928 # See test_youtube_lists
5929 'url': 'https://www.youtube.com/channel/UC2yXPzFejc422buOIzn_0CA',
5930 'only_matching': True,
5931 }, {
5932 # No uploads and no UCID given. Should fail with no uploads error
5933 # See test_youtube_lists
5934 'url': 'https://www.youtube.com/news',
5935 'only_matching': True
5936 }, {
5937 # No videos tab but has a shorts tab
5938 'url': 'https://www.youtube.com/c/TKFShorts',
5939 'info_dict': {
5940 'id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
5941 'title': 'Shorts Break - Shorts',
5942 'tags': 'count:32',
5943 'channel_id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
5944 'channel': 'Shorts Break',
5945 'description': 'md5:a6c234cf3d50d878ef8721e34457cd11',
5946 'uploader': 'Shorts Break',
5947 'channel_follower_count': int,
5948 'uploader_id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
5949 'uploader_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg',
5950 'channel_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg',
5951 },
5952 'playlist_mincount': 30,
5953 }, {
5954 # Trending Now Tab. tab id is empty
5955 'url': 'https://www.youtube.com/feed/trending',
5956 'info_dict': {
5957 'id': 'trending',
5958 'title': 'trending - Now',
5959 'tags': [],
5960 },
5961 'playlist_mincount': 30,
5962 }, {
5963 # Trending Gaming Tab. tab id is empty
5964 'url': 'https://www.youtube.com/feed/trending?bp=4gIcGhpnYW1pbmdfY29ycHVzX21vc3RfcG9wdWxhcg%3D%3D',
5965 'info_dict': {
5966 'id': 'trending',
5967 'title': 'trending - Gaming',
5968 'tags': [],
5969 },
5970 'playlist_mincount': 30,
5971 }, {
5972 # Shorts url result in shorts tab
5973 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/shorts',
5974 'info_dict': {
5975 'id': 'UCiu-3thuViMebBjw_5nWYrA',
5976 'title': 'cole-dlp-test-acc - Shorts',
5977 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
5978 'channel': 'cole-dlp-test-acc',
5979 'description': 'test description',
5980 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
5981 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
5982 'tags': [],
5983 'uploader': 'cole-dlp-test-acc',
5984 'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
5985
5986 },
5987 'playlist': [{
5988 'info_dict': {
5989 '_type': 'url',
5990 'ie_key': 'Youtube',
5991 'url': 'https://www.youtube.com/shorts/sSM9J5YH_60',
5992 'id': 'sSM9J5YH_60',
5993 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
5994 'title': 'SHORT short',
5995 'channel': 'cole-dlp-test-acc',
5996 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
5997 'view_count': int,
5998 'thumbnails': list,
5999 }
6000 }],
6001 'params': {'extract_flat': True},
6002 }, {
6003 # Live video status should be extracted
6004 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live',
6005 'info_dict': {
6006 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
6007 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Live', # TODO, should be Minecraft - Live or Minecraft - Topic - Live
6008 'tags': []
6009 },
6010 'playlist': [{
6011 'info_dict': {
6012 '_type': 'url',
6013 'ie_key': 'Youtube',
6014 'url': 'startswith:https://www.youtube.com/watch?v=',
6015 'id': str,
6016 'title': str,
6017 'live_status': 'is_live',
6018 'channel_id': str,
6019 'channel_url': str,
6020 'concurrent_view_count': int,
6021 'channel': str,
6022 }
6023 }],
6024 'params': {'extract_flat': True, 'playlist_items': '1'},
6025 'playlist_mincount': 1
6026 }, {
6027 # Channel renderer metadata. Contains number of videos on the channel
6028 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/channels',
6029 'info_dict': {
6030 'id': 'UCiu-3thuViMebBjw_5nWYrA',
6031 'title': 'cole-dlp-test-acc - Channels',
6032 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
6033 'channel': 'cole-dlp-test-acc',
6034 'description': 'test description',
6035 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6036 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6037 'tags': [],
6038 'uploader': 'cole-dlp-test-acc',
6039 'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6040
6041 },
6042 'playlist': [{
6043 'info_dict': {
6044 '_type': 'url',
6045 'ie_key': 'YoutubeTab',
6046 'url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6047 'id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6048 'channel_id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6049 'title': 'PewDiePie',
6050 'channel': 'PewDiePie',
6051 'channel_url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6052 'thumbnails': list,
6053 'channel_follower_count': int,
6054 'playlist_count': int
6055 }
6056 }],
6057 'params': {'extract_flat': True},
6058 }]
6059
6060 @classmethod
6061 def suitable(cls, url):
6062 return False if YoutubeIE.suitable(url) else super().suitable(url)
6063
6064 _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/[^?#/]+))?(?P<post>.*)$')
6065
6066 def _get_url_mobj(self, url):
6067 mobj = self._URL_RE.match(url).groupdict()
6068 mobj.update((k, '') for k, v in mobj.items() if v is None)
6069 return mobj
6070
6071 def _extract_tab_id_and_name(self, tab, base_url='https://www.youtube.com'):
6072 tab_name = (tab.get('title') or '').lower()
6073 tab_url = urljoin(base_url, traverse_obj(
6074 tab, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url')))
6075
6076 tab_id = (tab_url and self._get_url_mobj(tab_url)['tab'][1:]
6077 or traverse_obj(tab, 'tabIdentifier', expected_type=str))
6078 if tab_id:
6079 return {
6080 'TAB_ID_SPONSORSHIPS': 'membership',
6081 }.get(tab_id, tab_id), tab_name
6082
6083 # Fallback to tab name if we cannot get the tab id.
6084 # XXX: should we strip non-ascii letters? e.g. in case of 'let's play' tab example on special gaming channel
6085 # Note that in the case of translated tab name this may result in an empty string, which we don't want.
6086 if tab_name:
6087 self.write_debug(f'Falling back to selected tab name: {tab_name}')
6088 return {
6089 'home': 'featured',
6090 'live': 'streams',
6091 }.get(tab_name, tab_name), tab_name
6092
6093 def _has_tab(self, tabs, tab_id):
6094 return any(self._extract_tab_id_and_name(tab)[0] == tab_id for tab in tabs)
6095
6096 @YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
6097 def _real_extract(self, url, smuggled_data):
6098 item_id = self._match_id(url)
6099 url = urllib.parse.urlunparse(
6100 urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))
6101 compat_opts = self.get_param('compat_opts', [])
6102
6103 mobj = self._get_url_mobj(url)
6104 pre, tab, post, is_channel = mobj['pre'], mobj['tab'], mobj['post'], not mobj['not_channel']
6105 if is_channel and smuggled_data.get('is_music_url'):
6106 if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
6107 return self.url_result(
6108 f'https://music.youtube.com/playlist?list={item_id[2:]}', YoutubeTabIE, item_id[2:])
6109 elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
6110 mdata = self._extract_tab_endpoint(
6111 f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
6112 murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
6113 get_all=False, expected_type=str)
6114 if not murl:
6115 raise ExtractorError('Failed to resolve album to playlist')
6116 return self.url_result(murl, YoutubeTabIE)
6117 elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
6118 return self.url_result(
6119 f'https://music.youtube.com/channel/{item_id}{tab}{post}', YoutubeTabIE, item_id)
6120
6121 original_tab_id, display_id = tab[1:], f'{item_id}{tab}'
6122 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
6123 url = f'{pre}/videos{post}'
6124
6125 # Handle both video/playlist URLs
6126 qs = parse_qs(url)
6127 video_id, playlist_id = [traverse_obj(qs, (key, 0)) for key in ('v', 'list')]
6128 if not video_id and mobj['not_channel'].startswith('watch'):
6129 if not playlist_id:
6130 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
6131 raise ExtractorError('A video URL was given without video ID', expected=True)
6132 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6133 self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
6134 return self.url_result(
6135 f'https://www.youtube.com/playlist?list={playlist_id}', YoutubeTabIE, playlist_id)
6136
6137 if not self._yes_playlist(playlist_id, video_id):
6138 return self.url_result(
6139 f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)
6140
6141 data, ytcfg = self._extract_data(url, display_id)
6142
6143 # YouTube may provide a non-standard redirect to the regional channel
6144 # See: https://github.com/yt-dlp/yt-dlp/issues/2694
6145 # https://support.google.com/youtube/answer/2976814#zippy=,conditional-redirects
6146 redirect_url = traverse_obj(
6147 data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)
6148 if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:
6149 redirect_url = ''.join((urljoin('https://www.youtube.com', redirect_url), tab, post))
6150 self.to_screen(f'This playlist is likely not available in your region. Following conditional redirect to {redirect_url}')
6151 return self.url_result(redirect_url, YoutubeTabIE)
6152
6153 tabs, extra_tabs = self._extract_tab_renderers(data), []
6154 if is_channel and tabs and 'no-youtube-channel-redirect' not in compat_opts:
6155 selected_tab = self._extract_selected_tab(tabs)
6156 selected_tab_id, selected_tab_name = self._extract_tab_id_and_name(selected_tab, url) # NB: Name may be translated
6157 self.write_debug(f'Selected tab: {selected_tab_id!r} ({selected_tab_name}), Requested tab: {original_tab_id!r}')
6158
6159 if not original_tab_id and selected_tab_name:
6160 self.to_screen('Downloading all uploads of the channel. '
6161 'To download only the videos in a specific tab, pass the tab\'s URL')
6162 if self._has_tab(tabs, 'streams'):
6163 extra_tabs.append(''.join((pre, '/streams', post)))
6164 if self._has_tab(tabs, 'shorts'):
6165 extra_tabs.append(''.join((pre, '/shorts', post)))
6166 # XXX: Members-only tab should also be extracted
6167
6168 if not extra_tabs and selected_tab_id != 'videos':
6169 # Channel does not have streams, shorts or videos tabs
6170 if item_id[:2] != 'UC':
6171 raise ExtractorError('This channel has no uploads', expected=True)
6172
6173 # Topic channels don't have /videos. Use the equivalent playlist instead
6174 pl_id = f'UU{item_id[2:]}'
6175 pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
6176 try:
6177 data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)
6178 except ExtractorError:
6179 raise ExtractorError('This channel has no uploads', expected=True)
6180 else:
6181 item_id, url = pl_id, pl_url
6182 self.to_screen(
6183 f'The channel does not have a videos, shorts, or live tab. Redirecting to playlist {pl_id} instead')
6184
6185 elif extra_tabs and selected_tab_id != 'videos':
6186 # When there are shorts/live tabs but not videos tab
6187 url, data = f'{pre}{post}', None
6188
6189 elif (original_tab_id or 'videos') != selected_tab_id:
6190 if original_tab_id == 'live':
6191 # Live tab should have redirected to the video
6192 # Except in the case the channel has an actual live tab
6193 # Example: https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live
6194 raise UserNotLive(video_id=item_id)
6195 elif selected_tab_name:
6196 raise ExtractorError(f'This channel does not have a {original_tab_id} tab', expected=True)
6197
6198 # For channels such as https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg
6199 url = f'{pre}{post}'
6200
6201 # YouTube sometimes provides a button to reload playlist with unavailable videos.
6202 if 'no-youtube-unavailable-videos' not in compat_opts:
6203 data = self._reload_with_unavailable_videos(display_id, data, ytcfg) or data
6204 self._extract_and_report_alerts(data, only_once=True)
6205
6206 tabs, entries = self._extract_tab_renderers(data), []
6207 if tabs:
6208 entries = [self._extract_from_tabs(item_id, ytcfg, data, tabs)]
6209 entries[0].update({
6210 'extractor_key': YoutubeTabIE.ie_key(),
6211 'extractor': YoutubeTabIE.IE_NAME,
6212 'webpage_url': url,
6213 })
6214 if self.get_param('playlist_items') == '0':
6215 entries.extend(self.url_result(u, YoutubeTabIE) for u in extra_tabs)
6216 else: # Users expect to get all `video_id`s even with `--flat-playlist`. So don't return `url_result`
6217 entries.extend(map(self._real_extract, extra_tabs))
6218
6219 if len(entries) == 1:
6220 return entries[0]
6221 elif entries:
6222 metadata = self._extract_metadata_from_tabs(item_id, data)
6223 uploads_url = 'the Uploads (UU) playlist URL'
6224 if try_get(metadata, lambda x: x['channel_id'].startswith('UC')):
6225 uploads_url = f'https://www.youtube.com/playlist?list=UU{metadata["channel_id"][2:]}'
6226 self.to_screen(
6227 'Downloading as multiple playlists, separated by tabs. '
6228 f'To download as a single playlist instead, pass {uploads_url}')
6229 return self.playlist_result(entries, item_id, **metadata)
6230
6231 # Inline playlist
6232 playlist = traverse_obj(
6233 data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
6234 if playlist:
6235 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
6236
6237 video_id = traverse_obj(
6238 data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
6239 if video_id:
6240 if tab != '/live': # live tab is expected to redirect to video
6241 self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
6242 return self.url_result(f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)
6243
6244 raise ExtractorError('Unable to recognize tab page')
6245
6246
6247 class YoutubePlaylistIE(InfoExtractor):
6248 IE_DESC = 'YouTube playlists'
6249 _VALID_URL = r'''(?x)(?:
6250 (?:https?://)?
6251 (?:\w+\.)?
6252 (?:
6253 (?:
6254 youtube(?:kids)?\.com|
6255 %(invidious)s
6256 )
6257 /.*?\?.*?\blist=
6258 )?
6259 (?P<id>%(playlist_id)s)
6260 )''' % {
6261 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
6262 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
6263 }
6264 IE_NAME = 'youtube:playlist'
6265 _TESTS = [{
6266 'note': 'issue #673',
6267 'url': 'PLBB231211A4F62143',
6268 'info_dict': {
6269 'title': '[OLD]Team Fortress 2 (Class-based LP)',
6270 'id': 'PLBB231211A4F62143',
6271 'uploader': 'Wickman',
6272 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
6273 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
6274 'view_count': int,
6275 'uploader_url': 'https://www.youtube.com/c/WickmanVT',
6276 'modified_date': r're:\d{8}',
6277 'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
6278 'channel': 'Wickman',
6279 'tags': [],
6280 'channel_url': 'https://www.youtube.com/c/WickmanVT',
6281 'availability': 'public',
6282 },
6283 'playlist_mincount': 29,
6284 }, {
6285 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
6286 'info_dict': {
6287 'title': 'YDL_safe_search',
6288 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
6289 },
6290 'playlist_count': 2,
6291 'skip': 'This playlist is private',
6292 }, {
6293 'note': 'embedded',
6294 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
6295 'playlist_count': 4,
6296 'info_dict': {
6297 'title': 'JODA15',
6298 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
6299 'uploader': 'milan',
6300 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
6301 'description': '',
6302 'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
6303 'tags': [],
6304 'modified_date': '20140919',
6305 'view_count': int,
6306 'channel': 'milan',
6307 'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
6308 'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
6309 'availability': 'public',
6310 },
6311 'expected_warnings': [r'[Uu]navailable videos? (is|are|will be) hidden'],
6312 }, {
6313 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
6314 'playlist_mincount': 455,
6315 'info_dict': {
6316 'title': '2018 Chinese New Singles (11/6 updated)',
6317 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
6318 'uploader': 'LBK',
6319 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
6320 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
6321 'channel': 'LBK',
6322 'view_count': int,
6323 'channel_url': 'https://www.youtube.com/c/愛低音的國王',
6324 'tags': [],
6325 'uploader_url': 'https://www.youtube.com/c/愛低音的國王',
6326 'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
6327 'modified_date': r're:\d{8}',
6328 'availability': 'public',
6329 },
6330 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
6331 }, {
6332 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
6333 'only_matching': True,
6334 }, {
6335 # music album playlist
6336 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
6337 'only_matching': True,
6338 }]
6339
6340 @classmethod
6341 def suitable(cls, url):
6342 if YoutubeTabIE.suitable(url):
6343 return False
6344 from ..utils import parse_qs
6345 qs = parse_qs(url)
6346 if qs.get('v', [None])[0]:
6347 return False
6348 return super().suitable(url)
6349
6350 def _real_extract(self, url):
6351 playlist_id = self._match_id(url)
6352 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
6353 url = update_url_query(
6354 'https://www.youtube.com/playlist',
6355 parse_qs(url) or {'list': playlist_id})
6356 if is_music_url:
6357 url = smuggle_url(url, {'is_music_url': True})
6358 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
6359
6360
6361 class YoutubeYtBeIE(InfoExtractor):
6362 IE_DESC = 'youtu.be'
6363 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
6364 _TESTS = [{
6365 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
6366 'info_dict': {
6367 'id': 'yeWKywCrFtk',
6368 'ext': 'mp4',
6369 'title': 'Small Scale Baler and Braiding Rugs',
6370 'uploader': 'Backus-Page House Museum',
6371 'uploader_id': 'backuspagemuseum',
6372 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
6373 'upload_date': '20161008',
6374 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
6375 'categories': ['Nonprofits & Activism'],
6376 'tags': list,
6377 'like_count': int,
6378 'age_limit': 0,
6379 'playable_in_embed': True,
6380 'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',
6381 'channel': 'Backus-Page House Museum',
6382 'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',
6383 'live_status': 'not_live',
6384 'view_count': int,
6385 'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',
6386 'availability': 'public',
6387 'duration': 59,
6388 'comment_count': int,
6389 'channel_follower_count': int
6390 },
6391 'params': {
6392 'noplaylist': True,
6393 'skip_download': True,
6394 },
6395 }, {
6396 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
6397 'only_matching': True,
6398 }]
6399
6400 def _real_extract(self, url):
6401 mobj = self._match_valid_url(url)
6402 video_id = mobj.group('id')
6403 playlist_id = mobj.group('playlist_id')
6404 return self.url_result(
6405 update_url_query('https://www.youtube.com/watch', {
6406 'v': video_id,
6407 'list': playlist_id,
6408 'feature': 'youtu.be',
6409 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
6410
6411
6412 class YoutubeLivestreamEmbedIE(InfoExtractor):
6413 IE_DESC = 'YouTube livestream embeds'
6414 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'
6415 _TESTS = [{
6416 'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',
6417 'only_matching': True,
6418 }]
6419
6420 def _real_extract(self, url):
6421 channel_id = self._match_id(url)
6422 return self.url_result(
6423 f'https://www.youtube.com/channel/{channel_id}/live',
6424 ie=YoutubeTabIE.ie_key(), video_id=channel_id)
6425
6426
6427 class YoutubeYtUserIE(InfoExtractor):
6428 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
6429 IE_NAME = 'youtube:user'
6430 _VALID_URL = r'ytuser:(?P<id>.+)'
6431 _TESTS = [{
6432 'url': 'ytuser:phihag',
6433 'only_matching': True,
6434 }]
6435
6436 def _real_extract(self, url):
6437 user_id = self._match_id(url)
6438 return self.url_result(f'https://www.youtube.com/user/{user_id}', YoutubeTabIE, user_id)
6439
6440
6441 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
6442 IE_NAME = 'youtube:favorites'
6443 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
6444 _VALID_URL = r':ytfav(?:ou?rite)?s?'
6445 _LOGIN_REQUIRED = True
6446 _TESTS = [{
6447 'url': ':ytfav',
6448 'only_matching': True,
6449 }, {
6450 'url': ':ytfavorites',
6451 'only_matching': True,
6452 }]
6453
6454 def _real_extract(self, url):
6455 return self.url_result(
6456 'https://www.youtube.com/playlist?list=LL',
6457 ie=YoutubeTabIE.ie_key())
6458
6459
6460 class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):
6461 IE_NAME = 'youtube:notif'
6462 IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'
6463 _VALID_URL = r':ytnotif(?:ication)?s?'
6464 _LOGIN_REQUIRED = True
6465 _TESTS = [{
6466 'url': ':ytnotif',
6467 'only_matching': True,
6468 }, {
6469 'url': ':ytnotifications',
6470 'only_matching': True,
6471 }]
6472
6473 def _extract_notification_menu(self, response, continuation_list):
6474 notification_list = traverse_obj(
6475 response,
6476 ('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),
6477 ('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),
6478 expected_type=list) or []
6479 continuation_list[0] = None
6480 for item in notification_list:
6481 entry = self._extract_notification_renderer(item.get('notificationRenderer'))
6482 if entry:
6483 yield entry
6484 continuation = item.get('continuationItemRenderer')
6485 if continuation:
6486 continuation_list[0] = continuation
6487
6488 def _extract_notification_renderer(self, notification):
6489 video_id = traverse_obj(
6490 notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)
6491 url = f'https://www.youtube.com/watch?v={video_id}'
6492 channel_id = None
6493 if not video_id:
6494 browse_ep = traverse_obj(
6495 notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)
6496 channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)
6497 post_id = self._search_regex(
6498 r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),
6499 'post id', default=None)
6500 if not channel_id or not post_id:
6501 return
6502 # The direct /post url redirects to this in the browser
6503 url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'
6504
6505 channel = traverse_obj(
6506 notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),
6507 expected_type=str)
6508 notification_title = self._get_text(notification, 'shortMessage')
6509 if notification_title:
6510 notification_title = notification_title.replace('\xad', '') # remove soft hyphens
6511 # TODO: handle recommended videos
6512 title = self._search_regex(
6513 rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,
6514 'video title', default=None)
6515 timestamp = (self._parse_time_text(self._get_text(notification, 'sentTimeText'))
6516 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)
6517 else None)
6518 return {
6519 '_type': 'url',
6520 'url': url,
6521 'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),
6522 'video_id': video_id,
6523 'title': title,
6524 'channel_id': channel_id,
6525 'channel': channel,
6526 'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),
6527 'timestamp': timestamp,
6528 }
6529
6530 def _notification_menu_entries(self, ytcfg):
6531 continuation_list = [None]
6532 response = None
6533 for page in itertools.count(1):
6534 ctoken = traverse_obj(
6535 continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)
6536 response = self._extract_response(
6537 item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,
6538 ep='notification/get_notification_menu', check_get_keys='actions',
6539 headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))
6540 yield from self._extract_notification_menu(response, continuation_list)
6541 if not continuation_list[0]:
6542 break
6543
6544 def _real_extract(self, url):
6545 display_id = 'notifications'
6546 ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}
6547 self._report_playlist_authcheck(ytcfg)
6548 return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)
6549
6550
6551 class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
6552 IE_DESC = 'YouTube search'
6553 IE_NAME = 'youtube:search'
6554 _SEARCH_KEY = 'ytsearch'
6555 _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
6556 _TESTS = [{
6557 'url': 'ytsearch5:youtube-dl test video',
6558 'playlist_count': 5,
6559 'info_dict': {
6560 'id': 'youtube-dl test video',
6561 'title': 'youtube-dl test video',
6562 }
6563 }]
6564
6565
6566 class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
6567 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
6568 _SEARCH_KEY = 'ytsearchdate'
6569 IE_DESC = 'YouTube search, newest videos first'
6570 _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
6571 _TESTS = [{
6572 'url': 'ytsearchdate5:youtube-dl test video',
6573 'playlist_count': 5,
6574 'info_dict': {
6575 'id': 'youtube-dl test video',
6576 'title': 'youtube-dl test video',
6577 }
6578 }]
6579
6580
6581 class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
6582 IE_DESC = 'YouTube search URLs with sorting and filter support'
6583 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
6584 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
6585 _TESTS = [{
6586 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
6587 'playlist_mincount': 5,
6588 'info_dict': {
6589 'id': 'youtube-dl test video',
6590 'title': 'youtube-dl test video',
6591 }
6592 }, {
6593 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
6594 'playlist_mincount': 5,
6595 'info_dict': {
6596 'id': 'python',
6597 'title': 'python',
6598 }
6599 }, {
6600 'url': 'https://www.youtube.com/results?search_query=%23cats',
6601 'playlist_mincount': 1,
6602 'info_dict': {
6603 'id': '#cats',
6604 'title': '#cats',
6605 # The test suite does not have support for nested playlists
6606 # 'entries': [{
6607 # 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',
6608 # 'title': '#cats',
6609 # }],
6610 },
6611 }, {
6612 # Channel results
6613 'url': 'https://www.youtube.com/results?search_query=kurzgesagt&sp=EgIQAg%253D%253D',
6614 'info_dict': {
6615 'id': 'kurzgesagt',
6616 'title': 'kurzgesagt',
6617 },
6618 'playlist': [{
6619 'info_dict': {
6620 '_type': 'url',
6621 'id': 'UCsXVk37bltHxD1rDPwtNM8Q',
6622 'url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
6623 'ie_key': 'YoutubeTab',
6624 'channel': 'Kurzgesagt – In a Nutshell',
6625 'description': 'md5:4ae48dfa9505ffc307dad26342d06bfc',
6626 'title': 'Kurzgesagt – In a Nutshell',
6627 'channel_id': 'UCsXVk37bltHxD1rDPwtNM8Q',
6628 'playlist_count': int, # XXX: should have a way of saying > 1
6629 'channel_url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
6630 'thumbnails': list
6631 }
6632 }],
6633 'params': {'extract_flat': True, 'playlist_items': '1'},
6634 'playlist_mincount': 1,
6635 }, {
6636 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
6637 'only_matching': True,
6638 }]
6639
6640 def _real_extract(self, url):
6641 qs = parse_qs(url)
6642 query = (qs.get('search_query') or qs.get('q'))[0]
6643 return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
6644
6645
6646 class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
6647 IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'
6648 IE_NAME = 'youtube:music:search_url'
6649 _VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
6650 _TESTS = [{
6651 'url': 'https://music.youtube.com/search?q=royalty+free+music',
6652 'playlist_count': 16,
6653 'info_dict': {
6654 'id': 'royalty free music',
6655 'title': 'royalty free music',
6656 }
6657 }, {
6658 'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',
6659 'playlist_mincount': 30,
6660 'info_dict': {
6661 'id': 'royalty free music - songs',
6662 'title': 'royalty free music - songs',
6663 },
6664 'params': {'extract_flat': 'in_playlist'}
6665 }, {
6666 'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',
6667 'playlist_mincount': 30,
6668 'info_dict': {
6669 'id': 'royalty free music - community playlists',
6670 'title': 'royalty free music - community playlists',
6671 },
6672 'params': {'extract_flat': 'in_playlist'}
6673 }]
6674
6675 _SECTIONS = {
6676 'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',
6677 'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',
6678 'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',
6679 'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',
6680 'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',
6681 'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',
6682 }
6683
6684 def _real_extract(self, url):
6685 qs = parse_qs(url)
6686 query = (qs.get('search_query') or qs.get('q'))[0]
6687 params = qs.get('sp', (None,))[0]
6688 if params:
6689 section = next((k for k, v in self._SECTIONS.items() if v == params), params)
6690 else:
6691 section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower()
6692 params = self._SECTIONS.get(section)
6693 if not params:
6694 section = None
6695 title = join_nonempty(query, section, delim=' - ')
6696 return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)
6697
6698
6699 class YoutubeFeedsInfoExtractor(InfoExtractor):
6700 """
6701 Base class for feed extractors
6702 Subclasses must re-define the _FEED_NAME property.
6703 """
6704 _LOGIN_REQUIRED = True
6705 _FEED_NAME = 'feeds'
6706
6707 def _real_initialize(self):
6708 YoutubeBaseInfoExtractor._check_login_required(self)
6709
6710 @classproperty
6711 def IE_NAME(self):
6712 return f'youtube:{self._FEED_NAME}'
6713
6714 def _real_extract(self, url):
6715 return self.url_result(
6716 f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())
6717
6718
6719 class YoutubeWatchLaterIE(InfoExtractor):
6720 IE_NAME = 'youtube:watchlater'
6721 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
6722 _VALID_URL = r':ytwatchlater'
6723 _TESTS = [{
6724 'url': ':ytwatchlater',
6725 'only_matching': True,
6726 }]
6727
6728 def _real_extract(self, url):
6729 return self.url_result(
6730 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
6731
6732
6733 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
6734 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
6735 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
6736 _FEED_NAME = 'recommended'
6737 _LOGIN_REQUIRED = False
6738 _TESTS = [{
6739 'url': ':ytrec',
6740 'only_matching': True,
6741 }, {
6742 'url': ':ytrecommended',
6743 'only_matching': True,
6744 }, {
6745 'url': 'https://youtube.com',
6746 'only_matching': True,
6747 }]
6748
6749
6750 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
6751 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
6752 _VALID_URL = r':ytsub(?:scription)?s?'
6753 _FEED_NAME = 'subscriptions'
6754 _TESTS = [{
6755 'url': ':ytsubs',
6756 'only_matching': True,
6757 }, {
6758 'url': ':ytsubscriptions',
6759 'only_matching': True,
6760 }]
6761
6762
6763 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
6764 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
6765 _VALID_URL = r':ythis(?:tory)?'
6766 _FEED_NAME = 'history'
6767 _TESTS = [{
6768 'url': ':ythistory',
6769 'only_matching': True,
6770 }]
6771
6772
6773 class YoutubeStoriesIE(InfoExtractor):
6774 IE_DESC = 'YouTube channel stories; "ytstories:" prefix'
6775 IE_NAME = 'youtube:stories'
6776 _VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'
6777 _TESTS = [{
6778 'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',
6779 'only_matching': True,
6780 }]
6781
6782 def _real_extract(self, url):
6783 playlist_id = f'RLTD{self._match_id(url)}'
6784 return self.url_result(
6785 smuggle_url(f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1', {'is_story': True}),
6786 ie=YoutubeTabIE, video_id=playlist_id)
6787
6788
6789 class YoutubeShortsAudioPivotIE(InfoExtractor):
6790 IE_DESC = 'YouTube Shorts audio pivot (Shorts using audio of a given video)'
6791 IE_NAME = 'youtube:shorts:pivot:audio'
6792 _VALID_URL = r'https?://(?:www\.)?youtube\.com/source/(?P<id>[\w-]{11})/shorts'
6793 _TESTS = [{
6794 'url': 'https://www.youtube.com/source/Lyj-MZSAA9o/shorts',
6795 'only_matching': True,
6796 }]
6797
6798 @staticmethod
6799 def _generate_audio_pivot_params(video_id):
6800 """
6801 Generates sfv_audio_pivot browse params for this video id
6802 """
6803 pb_params = b'\xf2\x05+\n)\x12\'\n\x0b%b\x12\x0b%b\x1a\x0b%b' % ((video_id.encode(),) * 3)
6804 return urllib.parse.quote(base64.b64encode(pb_params).decode())
6805
6806 def _real_extract(self, url):
6807 video_id = self._match_id(url)
6808 return self.url_result(
6809 f'https://www.youtube.com/feed/sfv_audio_pivot?bp={self._generate_audio_pivot_params(video_id)}',
6810 ie=YoutubeTabIE)
6811
6812
6813 class YoutubeTruncatedURLIE(InfoExtractor):
6814 IE_NAME = 'youtube:truncated_url'
6815 IE_DESC = False # Do not list
6816 _VALID_URL = r'''(?x)
6817 (?:https?://)?
6818 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
6819 (?:watch\?(?:
6820 feature=[a-z_]+|
6821 annotation_id=annotation_[^&]+|
6822 x-yt-cl=[0-9]+|
6823 hl=[^&]*|
6824 t=[0-9]+
6825 )?
6826 |
6827 attribution_link\?a=[^&]+
6828 )
6829 $
6830 '''
6831
6832 _TESTS = [{
6833 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
6834 'only_matching': True,
6835 }, {
6836 'url': 'https://www.youtube.com/watch?',
6837 'only_matching': True,
6838 }, {
6839 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
6840 'only_matching': True,
6841 }, {
6842 'url': 'https://www.youtube.com/watch?feature=foo',
6843 'only_matching': True,
6844 }, {
6845 'url': 'https://www.youtube.com/watch?hl=en-GB',
6846 'only_matching': True,
6847 }, {
6848 'url': 'https://www.youtube.com/watch?t=2372',
6849 'only_matching': True,
6850 }]
6851
6852 def _real_extract(self, url):
6853 raise ExtractorError(
6854 'Did you forget to quote the URL? Remember that & is a meta '
6855 'character in most shells, so you want to put the URL in quotes, '
6856 'like youtube-dl '
6857 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
6858 ' or simply youtube-dl BaW_jenozKc .',
6859 expected=True)
6860
6861
6862 class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
6863 IE_NAME = 'youtube:clip'
6864 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'
6865 _TESTS = [{
6866 # FIXME: Other metadata should be extracted from the clip, not from the base video
6867 'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',
6868 'info_dict': {
6869 'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',
6870 'ext': 'mp4',
6871 'section_start': 29.0,
6872 'section_end': 39.7,
6873 'duration': 10.7,
6874 'age_limit': 0,
6875 'availability': 'public',
6876 'categories': ['Gaming'],
6877 'channel': 'Scott The Woz',
6878 'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',
6879 'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',
6880 'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',
6881 'like_count': int,
6882 'playable_in_embed': True,
6883 'tags': 'count:17',
6884 'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',
6885 'title': 'Mobile Games on Console - Scott The Woz',
6886 'upload_date': '20210920',
6887 'uploader': 'Scott The Woz',
6888 'uploader_id': 'scottthewoz',
6889 'uploader_url': 'http://www.youtube.com/user/scottthewoz',
6890 'view_count': int,
6891 'live_status': 'not_live',
6892 'channel_follower_count': int
6893 }
6894 }]
6895
6896 def _real_extract(self, url):
6897 clip_id = self._match_id(url)
6898 _, data = self._extract_webpage(url, clip_id)
6899
6900 video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))
6901 if not video_id:
6902 raise ExtractorError('Unable to find video ID')
6903
6904 clip_data = traverse_obj(data, (
6905 'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',
6906 'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,
6907 'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',
6908 'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)
6909
6910 return {
6911 '_type': 'url_transparent',
6912 'url': f'https://www.youtube.com/watch?v={video_id}',
6913 'ie_key': YoutubeIE.ie_key(),
6914 'id': clip_id,
6915 'section_start': int(clip_data['startTimeMs']) / 1000,
6916 'section_end': int(clip_data['endTimeMs']) / 1000,
6917 }
6918
6919
6920 class YoutubeTruncatedIDIE(InfoExtractor):
6921 IE_NAME = 'youtube:truncated_id'
6922 IE_DESC = False # Do not list
6923 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
6924
6925 _TESTS = [{
6926 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
6927 'only_matching': True,
6928 }]
6929
6930 def _real_extract(self, url):
6931 video_id = self._match_id(url)
6932 raise ExtractorError(
6933 f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',
6934 expected=True)