]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/youtube.py
[utils] `traverse_obj`: Fix more bugs
[yt-dlp.git] / yt_dlp / extractor / youtube.py
1 import base64
2 import calendar
3 import collections
4 import copy
5 import datetime
6 import enum
7 import hashlib
8 import itertools
9 import json
10 import math
11 import os.path
12 import random
13 import re
14 import sys
15 import threading
16 import time
17 import traceback
18 import urllib.error
19 import urllib.parse
20
21 from .common import InfoExtractor, SearchInfoExtractor
22 from .openload import PhantomJSwrapper
23 from ..compat import functools
24 from ..jsinterp import JSInterpreter
25 from ..utils import (
26 NO_DEFAULT,
27 ExtractorError,
28 LazyList,
29 UserNotLive,
30 bug_reports_message,
31 classproperty,
32 clean_html,
33 datetime_from_str,
34 dict_get,
35 filter_dict,
36 float_or_none,
37 format_field,
38 get_first,
39 int_or_none,
40 is_html,
41 join_nonempty,
42 js_to_json,
43 mimetype2ext,
44 network_exceptions,
45 orderedSet,
46 parse_codecs,
47 parse_count,
48 parse_duration,
49 parse_iso8601,
50 parse_qs,
51 qualities,
52 remove_start,
53 smuggle_url,
54 str_or_none,
55 str_to_int,
56 strftime_or_none,
57 traverse_obj,
58 try_get,
59 unescapeHTML,
60 unified_strdate,
61 unified_timestamp,
62 unsmuggle_url,
63 update_url_query,
64 url_or_none,
65 urljoin,
66 variadic,
67 )
68
69 # any clients starting with _ cannot be explicitly requested by the user
70 INNERTUBE_CLIENTS = {
71 'web': {
72 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
73 'INNERTUBE_CONTEXT': {
74 'client': {
75 'clientName': 'WEB',
76 'clientVersion': '2.20220801.00.00',
77 }
78 },
79 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
80 },
81 'web_embedded': {
82 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
83 'INNERTUBE_CONTEXT': {
84 'client': {
85 'clientName': 'WEB_EMBEDDED_PLAYER',
86 'clientVersion': '1.20220731.00.00',
87 },
88 },
89 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
90 },
91 'web_music': {
92 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
93 'INNERTUBE_HOST': 'music.youtube.com',
94 'INNERTUBE_CONTEXT': {
95 'client': {
96 'clientName': 'WEB_REMIX',
97 'clientVersion': '1.20220727.01.00',
98 }
99 },
100 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
101 },
102 'web_creator': {
103 'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
104 'INNERTUBE_CONTEXT': {
105 'client': {
106 'clientName': 'WEB_CREATOR',
107 'clientVersion': '1.20220726.00.00',
108 }
109 },
110 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
111 },
112 'android': {
113 'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
114 'INNERTUBE_CONTEXT': {
115 'client': {
116 'clientName': 'ANDROID',
117 'clientVersion': '17.31.35',
118 'androidSdkVersion': 30,
119 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
120 }
121 },
122 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
123 'REQUIRE_JS_PLAYER': False
124 },
125 'android_embedded': {
126 'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
127 'INNERTUBE_CONTEXT': {
128 'client': {
129 'clientName': 'ANDROID_EMBEDDED_PLAYER',
130 'clientVersion': '17.31.35',
131 'androidSdkVersion': 30,
132 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
133 },
134 },
135 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
136 'REQUIRE_JS_PLAYER': False
137 },
138 'android_music': {
139 'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
140 'INNERTUBE_CONTEXT': {
141 'client': {
142 'clientName': 'ANDROID_MUSIC',
143 'clientVersion': '5.16.51',
144 'androidSdkVersion': 30,
145 'userAgent': 'com.google.android.apps.youtube.music/5.16.51 (Linux; U; Android 11) gzip'
146 }
147 },
148 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
149 'REQUIRE_JS_PLAYER': False
150 },
151 'android_creator': {
152 'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
153 'INNERTUBE_CONTEXT': {
154 'client': {
155 'clientName': 'ANDROID_CREATOR',
156 'clientVersion': '22.30.100',
157 'androidSdkVersion': 30,
158 'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip'
159 },
160 },
161 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
162 'REQUIRE_JS_PLAYER': False
163 },
164 # iOS clients have HLS live streams. Setting device model to get 60fps formats.
165 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
166 'ios': {
167 'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
168 'INNERTUBE_CONTEXT': {
169 'client': {
170 'clientName': 'IOS',
171 'clientVersion': '17.33.2',
172 'deviceModel': 'iPhone14,3',
173 'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
174 }
175 },
176 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
177 'REQUIRE_JS_PLAYER': False
178 },
179 'ios_embedded': {
180 'INNERTUBE_CONTEXT': {
181 'client': {
182 'clientName': 'IOS_MESSAGES_EXTENSION',
183 'clientVersion': '17.33.2',
184 'deviceModel': 'iPhone14,3',
185 'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
186 },
187 },
188 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
189 'REQUIRE_JS_PLAYER': False
190 },
191 'ios_music': {
192 'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
193 'INNERTUBE_CONTEXT': {
194 'client': {
195 'clientName': 'IOS_MUSIC',
196 'clientVersion': '5.21',
197 'deviceModel': 'iPhone14,3',
198 'userAgent': 'com.google.ios.youtubemusic/5.21 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
199 },
200 },
201 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
202 'REQUIRE_JS_PLAYER': False
203 },
204 'ios_creator': {
205 'INNERTUBE_CONTEXT': {
206 'client': {
207 'clientName': 'IOS_CREATOR',
208 'clientVersion': '22.33.101',
209 'deviceModel': 'iPhone14,3',
210 'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
211 },
212 },
213 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
214 'REQUIRE_JS_PLAYER': False
215 },
216 # mweb has 'ultralow' formats
217 # See: https://github.com/yt-dlp/yt-dlp/pull/557
218 'mweb': {
219 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
220 'INNERTUBE_CONTEXT': {
221 'client': {
222 'clientName': 'MWEB',
223 'clientVersion': '2.20220801.00.00',
224 }
225 },
226 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
227 },
228 # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
229 # See: https://github.com/zerodytrash/YouTube-Internal-Clients
230 'tv_embedded': {
231 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
232 'INNERTUBE_CONTEXT': {
233 'client': {
234 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
235 'clientVersion': '2.0',
236 },
237 },
238 'INNERTUBE_CONTEXT_CLIENT_NAME': 85
239 },
240 }
241
242
243 def _split_innertube_client(client_name):
244 variant, *base = client_name.rsplit('.', 1)
245 if base:
246 return variant, base[0], variant
247 base, *variant = client_name.split('_', 1)
248 return client_name, base, variant[0] if variant else None
249
250
251 def build_innertube_clients():
252 THIRD_PARTY = {
253 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
254 }
255 BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
256 priority = qualities(BASE_CLIENTS[::-1])
257
258 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
259 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
260 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
261 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
262 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
263
264 _, base_client, variant = _split_innertube_client(client)
265 ytcfg['priority'] = 10 * priority(base_client)
266
267 if not variant:
268 INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
269 embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
270 embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
271 embedscreen['priority'] -= 3
272 elif variant == 'embedded':
273 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
274 ytcfg['priority'] -= 2
275 else:
276 ytcfg['priority'] -= 3
277
278
279 build_innertube_clients()
280
281
282 class BadgeType(enum.Enum):
283 AVAILABILITY_UNLISTED = enum.auto()
284 AVAILABILITY_PRIVATE = enum.auto()
285 AVAILABILITY_PUBLIC = enum.auto()
286 AVAILABILITY_PREMIUM = enum.auto()
287 AVAILABILITY_SUBSCRIPTION = enum.auto()
288 LIVE_NOW = enum.auto()
289
290
291 class YoutubeBaseInfoExtractor(InfoExtractor):
292 """Provide base functions for Youtube extractors"""
293
294 _RESERVED_NAMES = (
295 r'channel|c|user|playlist|watch|w|v|embed|e|live|watch_popup|clip|'
296 r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|'
297 r'browse|oembed|get_video_info|iframe_api|s/player|source|'
298 r'storefront|oops|index|account|t/terms|about|upload|signin|logout')
299
300 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
301
302 # _NETRC_MACHINE = 'youtube'
303
304 # If True it will raise an error if no login info is provided
305 _LOGIN_REQUIRED = False
306
307 _INVIDIOUS_SITES = (
308 # invidious-redirect websites
309 r'(?:www\.)?redirect\.invidious\.io',
310 r'(?:(?:www|dev)\.)?invidio\.us',
311 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
312 r'(?:www\.)?invidious\.pussthecat\.org',
313 r'(?:www\.)?invidious\.zee\.li',
314 r'(?:www\.)?invidious\.ethibox\.fr',
315 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
316 r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
317 r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
318 # youtube-dl invidious instances list
319 r'(?:(?:www|no)\.)?invidiou\.sh',
320 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
321 r'(?:www\.)?invidious\.kabi\.tk',
322 r'(?:www\.)?invidious\.mastodon\.host',
323 r'(?:www\.)?invidious\.zapashcanon\.fr',
324 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
325 r'(?:www\.)?invidious\.tinfoil-hat\.net',
326 r'(?:www\.)?invidious\.himiko\.cloud',
327 r'(?:www\.)?invidious\.reallyancient\.tech',
328 r'(?:www\.)?invidious\.tube',
329 r'(?:www\.)?invidiou\.site',
330 r'(?:www\.)?invidious\.site',
331 r'(?:www\.)?invidious\.xyz',
332 r'(?:www\.)?invidious\.nixnet\.xyz',
333 r'(?:www\.)?invidious\.048596\.xyz',
334 r'(?:www\.)?invidious\.drycat\.fr',
335 r'(?:www\.)?inv\.skyn3t\.in',
336 r'(?:www\.)?tube\.poal\.co',
337 r'(?:www\.)?tube\.connect\.cafe',
338 r'(?:www\.)?vid\.wxzm\.sx',
339 r'(?:www\.)?vid\.mint\.lgbt',
340 r'(?:www\.)?vid\.puffyan\.us',
341 r'(?:www\.)?yewtu\.be',
342 r'(?:www\.)?yt\.elukerio\.org',
343 r'(?:www\.)?yt\.lelux\.fi',
344 r'(?:www\.)?invidious\.ggc-project\.de',
345 r'(?:www\.)?yt\.maisputain\.ovh',
346 r'(?:www\.)?ytprivate\.com',
347 r'(?:www\.)?invidious\.13ad\.de',
348 r'(?:www\.)?invidious\.toot\.koeln',
349 r'(?:www\.)?invidious\.fdn\.fr',
350 r'(?:www\.)?watch\.nettohikari\.com',
351 r'(?:www\.)?invidious\.namazso\.eu',
352 r'(?:www\.)?invidious\.silkky\.cloud',
353 r'(?:www\.)?invidious\.exonip\.de',
354 r'(?:www\.)?invidious\.riverside\.rocks',
355 r'(?:www\.)?invidious\.blamefran\.net',
356 r'(?:www\.)?invidious\.moomoo\.de',
357 r'(?:www\.)?ytb\.trom\.tf',
358 r'(?:www\.)?yt\.cyberhost\.uk',
359 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
360 r'(?:www\.)?qklhadlycap4cnod\.onion',
361 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
362 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
363 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
364 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
365 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
366 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
367 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
368 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
369 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
370 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
371 # piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
372 r'(?:www\.)?piped\.kavin\.rocks',
373 r'(?:www\.)?piped\.tokhmi\.xyz',
374 r'(?:www\.)?piped\.syncpundit\.io',
375 r'(?:www\.)?piped\.mha\.fi',
376 r'(?:www\.)?watch\.whatever\.social',
377 r'(?:www\.)?piped\.garudalinux\.org',
378 r'(?:www\.)?piped\.rivo\.lol',
379 r'(?:www\.)?piped-libre\.kavin\.rocks',
380 r'(?:www\.)?yt\.jae\.fi',
381 r'(?:www\.)?piped\.mint\.lgbt',
382 r'(?:www\.)?il\.ax',
383 r'(?:www\.)?piped\.esmailelbob\.xyz',
384 r'(?:www\.)?piped\.projectsegfau\.lt',
385 r'(?:www\.)?piped\.privacydev\.net',
386 r'(?:www\.)?piped\.palveluntarjoaja\.eu',
387 r'(?:www\.)?piped\.smnz\.de',
388 r'(?:www\.)?piped\.adminforge\.de',
389 r'(?:www\.)?watch\.whatevertinfoil\.de',
390 r'(?:www\.)?piped\.qdi\.fi',
391 r'(?:www\.)?piped\.video',
392 r'(?:www\.)?piped\.aeong\.one',
393 )
394
395 # extracted from account/account_menu ep
396 # XXX: These are the supported YouTube UI and API languages,
397 # which is slightly different from languages supported for translation in YouTube studio
398 _SUPPORTED_LANG_CODES = [
399 'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',
400 'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',
401 'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',
402 'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',
403 'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',
404 'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko'
405 ]
406
407 _IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'}
408
409 @functools.cached_property
410 def _preferred_lang(self):
411 """
412 Returns a language code supported by YouTube for the user preferred language.
413 Returns None if no preferred language set.
414 """
415 preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0]
416 if not preferred_lang:
417 return
418 if preferred_lang not in self._SUPPORTED_LANG_CODES:
419 raise ExtractorError(
420 f'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.',
421 expected=True)
422 elif preferred_lang != 'en':
423 self.report_warning(
424 f'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.')
425 return preferred_lang
426
427 def _initialize_consent(self):
428 cookies = self._get_cookies('https://www.youtube.com/')
429 if cookies.get('__Secure-3PSID'):
430 return
431 consent_id = None
432 consent = cookies.get('CONSENT')
433 if consent:
434 if 'YES' in consent.value:
435 return
436 consent_id = self._search_regex(
437 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
438 if not consent_id:
439 consent_id = random.randint(100, 999)
440 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
441
442 def _initialize_pref(self):
443 cookies = self._get_cookies('https://www.youtube.com/')
444 pref_cookie = cookies.get('PREF')
445 pref = {}
446 if pref_cookie:
447 try:
448 pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
449 except ValueError:
450 self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
451 pref.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'})
452 self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
453
454 def _real_initialize(self):
455 self._initialize_pref()
456 self._initialize_consent()
457 self._check_login_required()
458
459 def _check_login_required(self):
460 if self._LOGIN_REQUIRED and not self._cookies_passed:
461 self.raise_login_required('Login details are needed to download this content', method='cookies')
462
463 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='
464 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
465
466 def _get_default_ytcfg(self, client='web'):
467 return copy.deepcopy(INNERTUBE_CLIENTS[client])
468
469 def _get_innertube_host(self, client='web'):
470 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
471
472 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
473 # try_get but with fallback to default ytcfg client values when present
474 _func = lambda y: try_get(y, getter, expected_type)
475 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
476
477 def _extract_client_name(self, ytcfg, default_client='web'):
478 return self._ytcfg_get_safe(
479 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
480 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
481
482 def _extract_client_version(self, ytcfg, default_client='web'):
483 return self._ytcfg_get_safe(
484 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
485 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
486
487 def _select_api_hostname(self, req_api_hostname, default_client=None):
488 return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
489 or req_api_hostname or self._get_innertube_host(default_client or 'web'))
490
491 def _extract_api_key(self, ytcfg=None, default_client='web'):
492 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
493
494 def _extract_context(self, ytcfg=None, default_client='web'):
495 context = get_first(
496 (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
497 # Enforce language and tz for extraction
498 client_context = traverse_obj(context, 'client', expected_type=dict, default={})
499 client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
500 return context
501
502 _SAPISID = None
503
504 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
505 time_now = round(time.time())
506 if self._SAPISID is None:
507 yt_cookies = self._get_cookies('https://www.youtube.com')
508 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
509 # See: https://github.com/yt-dlp/yt-dlp/issues/393
510 sapisid_cookie = dict_get(
511 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
512 if sapisid_cookie and sapisid_cookie.value:
513 self._SAPISID = sapisid_cookie.value
514 self.write_debug('Extracted SAPISID cookie')
515 # SAPISID cookie is required if not already present
516 if not yt_cookies.get('SAPISID'):
517 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
518 self._set_cookie(
519 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
520 else:
521 self._SAPISID = False
522 if not self._SAPISID:
523 return None
524 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
525 sapisidhash = hashlib.sha1(
526 f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
527 return f'SAPISIDHASH {time_now}_{sapisidhash}'
528
529 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
530 note='Downloading API JSON', errnote='Unable to download API page',
531 context=None, api_key=None, api_hostname=None, default_client='web'):
532
533 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
534 data.update(query)
535 real_headers = self.generate_api_headers(default_client=default_client)
536 real_headers.update({'content-type': 'application/json'})
537 if headers:
538 real_headers.update(headers)
539 api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]
540 or api_key or self._extract_api_key(default_client=default_client))
541 return self._download_json(
542 f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
543 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
544 data=json.dumps(data).encode('utf8'), headers=real_headers,
545 query={'key': api_key, 'prettyPrint': 'false'})
546
547 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
548 return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
549
550 @staticmethod
551 def _extract_session_index(*data):
552 """
553 Index of current account in account list.
554 See: https://github.com/yt-dlp/yt-dlp/pull/519
555 """
556 for ytcfg in data:
557 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
558 if session_index is not None:
559 return session_index
560
561 # Deprecated?
562 def _extract_identity_token(self, ytcfg=None, webpage=None):
563 if ytcfg:
564 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)
565 if token:
566 return token
567 if webpage:
568 return self._search_regex(
569 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
570 'identity token', default=None, fatal=False)
571
572 @staticmethod
573 def _extract_account_syncid(*args):
574 """
575 Extract syncId required to download private playlists of secondary channels
576 @params response and/or ytcfg
577 """
578 for data in args:
579 # ytcfg includes channel_syncid if on secondary channel
580 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)
581 if delegated_sid:
582 return delegated_sid
583 sync_ids = (try_get(
584 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
585 lambda x: x['DATASYNC_ID']), str) or '').split('||')
586 if len(sync_ids) >= 2 and sync_ids[1]:
587 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
588 # and just "user_syncid||" for primary channel. We only want the channel_syncid
589 return sync_ids[0]
590
591 @staticmethod
592 def _extract_visitor_data(*args):
593 """
594 Extracts visitorData from an API response or ytcfg
595 Appears to be used to track session state
596 """
597 return get_first(
598 args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
599 expected_type=str)
600
601 @functools.cached_property
602 def is_authenticated(self):
603 return bool(self._generate_sapisidhash_header())
604
605 def extract_ytcfg(self, video_id, webpage):
606 if not webpage:
607 return {}
608 return self._parse_json(
609 self._search_regex(
610 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
611 default='{}'), video_id, fatal=False) or {}
612
613 def generate_api_headers(
614 self, *, ytcfg=None, account_syncid=None, session_index=None,
615 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
616
617 origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))
618 headers = {
619 'X-YouTube-Client-Name': str(
620 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
621 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
622 'Origin': origin,
623 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
624 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
625 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),
626 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client)
627 }
628 if session_index is None:
629 session_index = self._extract_session_index(ytcfg)
630 if account_syncid or session_index is not None:
631 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
632
633 auth = self._generate_sapisidhash_header(origin)
634 if auth is not None:
635 headers['Authorization'] = auth
636 headers['X-Origin'] = origin
637 return filter_dict(headers)
638
639 def _download_ytcfg(self, client, video_id):
640 url = {
641 'web': 'https://www.youtube.com',
642 'web_music': 'https://music.youtube.com',
643 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
644 }.get(client)
645 if not url:
646 return {}
647 webpage = self._download_webpage(
648 url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')
649 return self.extract_ytcfg(video_id, webpage) or {}
650
651 @staticmethod
652 def _build_api_continuation_query(continuation, ctp=None):
653 query = {
654 'continuation': continuation
655 }
656 # TODO: Inconsistency with clickTrackingParams.
657 # Currently we have a fixed ctp contained within context (from ytcfg)
658 # and a ctp in root query for continuation.
659 if ctp:
660 query['clickTracking'] = {'clickTrackingParams': ctp}
661 return query
662
663 @classmethod
664 def _extract_next_continuation_data(cls, renderer):
665 next_continuation = try_get(
666 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
667 lambda x: x['continuation']['reloadContinuationData']), dict)
668 if not next_continuation:
669 return
670 continuation = next_continuation.get('continuation')
671 if not continuation:
672 return
673 ctp = next_continuation.get('clickTrackingParams')
674 return cls._build_api_continuation_query(continuation, ctp)
675
676 @classmethod
677 def _extract_continuation_ep_data(cls, continuation_ep: dict):
678 if isinstance(continuation_ep, dict):
679 continuation = try_get(
680 continuation_ep, lambda x: x['continuationCommand']['token'], str)
681 if not continuation:
682 return
683 ctp = continuation_ep.get('clickTrackingParams')
684 return cls._build_api_continuation_query(continuation, ctp)
685
686 @classmethod
687 def _extract_continuation(cls, renderer):
688 next_continuation = cls._extract_next_continuation_data(renderer)
689 if next_continuation:
690 return next_continuation
691
692 return traverse_obj(renderer, (
693 ('contents', 'items', 'rows'), ..., 'continuationItemRenderer',
694 ('continuationEndpoint', ('button', 'buttonRenderer', 'command'))
695 ), get_all=False, expected_type=cls._extract_continuation_ep_data)
696
697 @classmethod
698 def _extract_alerts(cls, data):
699 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
700 if not isinstance(alert_dict, dict):
701 continue
702 for alert in alert_dict.values():
703 alert_type = alert.get('type')
704 if not alert_type:
705 continue
706 message = cls._get_text(alert, 'text')
707 if message:
708 yield alert_type, message
709
710 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
711 errors, warnings = [], []
712 for alert_type, alert_message in alerts:
713 if alert_type.lower() == 'error' and fatal:
714 errors.append([alert_type, alert_message])
715 elif alert_message not in self._IGNORED_WARNINGS:
716 warnings.append([alert_type, alert_message])
717
718 for alert_type, alert_message in (warnings + errors[:-1]):
719 self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)
720 if errors:
721 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
722
723 def _extract_and_report_alerts(self, data, *args, **kwargs):
724 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
725
726 def _extract_badges(self, renderer: dict):
727 privacy_icon_map = {
728 'PRIVACY_UNLISTED': BadgeType.AVAILABILITY_UNLISTED,
729 'PRIVACY_PRIVATE': BadgeType.AVAILABILITY_PRIVATE,
730 'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC
731 }
732
733 badge_style_map = {
734 'BADGE_STYLE_TYPE_MEMBERS_ONLY': BadgeType.AVAILABILITY_SUBSCRIPTION,
735 'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM,
736 'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW
737 }
738
739 label_map = {
740 'unlisted': BadgeType.AVAILABILITY_UNLISTED,
741 'private': BadgeType.AVAILABILITY_PRIVATE,
742 'members only': BadgeType.AVAILABILITY_SUBSCRIPTION,
743 'live': BadgeType.LIVE_NOW,
744 'premium': BadgeType.AVAILABILITY_PREMIUM
745 }
746
747 badges = []
748 for badge in traverse_obj(renderer, ('badges', ..., 'metadataBadgeRenderer')):
749 badge_type = (
750 privacy_icon_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))
751 or badge_style_map.get(traverse_obj(badge, 'style'))
752 )
753 if badge_type:
754 badges.append({'type': badge_type})
755 continue
756
757 # fallback, won't work in some languages
758 label = traverse_obj(badge, 'label', expected_type=str, default='')
759 for match, label_badge_type in label_map.items():
760 if match in label.lower():
761 badges.append({'type': badge_type})
762 continue
763
764 return badges
765
766 @staticmethod
767 def _has_badge(badges, badge_type):
768 return bool(traverse_obj(badges, lambda _, v: v['type'] == badge_type))
769
770 @staticmethod
771 def _get_text(data, *path_list, max_runs=None):
772 for path in path_list or [None]:
773 if path is None:
774 obj = [data]
775 else:
776 obj = traverse_obj(data, path, default=[])
777 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
778 obj = [obj]
779 for item in obj:
780 text = try_get(item, lambda x: x['simpleText'], str)
781 if text:
782 return text
783 runs = try_get(item, lambda x: x['runs'], list) or []
784 if not runs and isinstance(item, list):
785 runs = item
786
787 runs = runs[:min(len(runs), max_runs or len(runs))]
788 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str))
789 if text:
790 return text
791
792 def _get_count(self, data, *path_list):
793 count_text = self._get_text(data, *path_list) or ''
794 count = parse_count(count_text)
795 if count is None:
796 count = str_to_int(
797 self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))
798 return count
799
800 @staticmethod
801 def _extract_thumbnails(data, *path_list):
802 """
803 Extract thumbnails from thumbnails dict
804 @param path_list: path list to level that contains 'thumbnails' key
805 """
806 thumbnails = []
807 for path in path_list or [()]:
808 for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...)):
809 thumbnail_url = url_or_none(thumbnail.get('url'))
810 if not thumbnail_url:
811 continue
812 # Sometimes youtube gives a wrong thumbnail URL. See:
813 # https://github.com/yt-dlp/yt-dlp/issues/233
814 # https://github.com/ytdl-org/youtube-dl/issues/28023
815 if 'maxresdefault' in thumbnail_url:
816 thumbnail_url = thumbnail_url.split('?')[0]
817 thumbnails.append({
818 'url': thumbnail_url,
819 'height': int_or_none(thumbnail.get('height')),
820 'width': int_or_none(thumbnail.get('width')),
821 })
822 return thumbnails
823
824 @staticmethod
825 def extract_relative_time(relative_time_text):
826 """
827 Extracts a relative time from string and converts to dt object
828 e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'
829 """
830 mobj = re.search(r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?\s*ago', relative_time_text)
831 if mobj:
832 start = mobj.group('start')
833 if start:
834 return datetime_from_str(start)
835 try:
836 return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))
837 except ValueError:
838 return None
839
840 def _parse_time_text(self, text):
841 if not text:
842 return
843 dt = self.extract_relative_time(text)
844 timestamp = None
845 if isinstance(dt, datetime.datetime):
846 timestamp = calendar.timegm(dt.timetuple())
847
848 if timestamp is None:
849 timestamp = (
850 unified_timestamp(text) or unified_timestamp(
851 self._search_regex(
852 (r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),
853 text.lower(), 'time text', default=None)))
854
855 if text and timestamp is None and self._preferred_lang in (None, 'en'):
856 self.report_warning(
857 f'Cannot parse localized time text "{text}"', only_once=True)
858 return timestamp
859
860 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
861 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
862 default_client='web'):
863 for retry in self.RetryManager():
864 try:
865 response = self._call_api(
866 ep=ep, fatal=True, headers=headers,
867 video_id=item_id, query=query, note=note,
868 context=self._extract_context(ytcfg, default_client),
869 api_key=self._extract_api_key(ytcfg, default_client),
870 api_hostname=api_hostname, default_client=default_client)
871 except ExtractorError as e:
872 if not isinstance(e.cause, network_exceptions):
873 return self._error_or_warning(e, fatal=fatal)
874 elif not isinstance(e.cause, urllib.error.HTTPError):
875 retry.error = e
876 continue
877
878 first_bytes = e.cause.read(512)
879 if not is_html(first_bytes):
880 yt_error = try_get(
881 self._parse_json(
882 self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
883 lambda x: x['error']['message'], str)
884 if yt_error:
885 self._report_alerts([('ERROR', yt_error)], fatal=False)
886 # Downloading page may result in intermittent 5xx HTTP error
887 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
888 # We also want to catch all other network exceptions since errors in later pages can be troublesome
889 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
890 if e.cause.code not in (403, 429):
891 retry.error = e
892 continue
893 return self._error_or_warning(e, fatal=fatal)
894
895 try:
896 self._extract_and_report_alerts(response, only_once=True)
897 except ExtractorError as e:
898 # YouTube servers may return errors we want to retry on in a 200 OK response
899 # See: https://github.com/yt-dlp/yt-dlp/issues/839
900 if 'unknown error' in e.msg.lower():
901 retry.error = e
902 continue
903 return self._error_or_warning(e, fatal=fatal)
904 # Youtube sometimes sends incomplete data
905 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
906 if not traverse_obj(response, *variadic(check_get_keys)):
907 retry.error = ExtractorError('Incomplete data received', expected=True)
908 continue
909
910 return response
911
912 @staticmethod
913 def is_music_url(url):
914 return re.match(r'https?://music\.youtube\.com/', url) is not None
915
916 def _extract_video(self, renderer):
917 video_id = renderer.get('videoId')
918
919 reel_header_renderer = traverse_obj(renderer, (
920 'navigationEndpoint', 'reelWatchEndpoint', 'overlay', 'reelPlayerOverlayRenderer',
921 'reelPlayerHeaderSupportedRenderers', 'reelPlayerHeaderRenderer'))
922
923 title = self._get_text(renderer, 'title', 'headline') or self._get_text(reel_header_renderer, 'reelTitleText')
924 description = self._get_text(renderer, 'descriptionSnippet')
925
926 duration = int_or_none(renderer.get('lengthSeconds'))
927 if duration is None:
928 duration = parse_duration(self._get_text(
929 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
930 if duration is None:
931 # XXX: should write a parser to be more general to support more cases (e.g. shorts in shorts tab)
932 duration = parse_duration(self._search_regex(
933 r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',
934 traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),
935 video_id, default=None, group='duration'))
936
937 channel_id = traverse_obj(
938 renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
939 expected_type=str, get_all=False)
940 if not channel_id:
941 channel_id = traverse_obj(reel_header_renderer, ('channelNavigationEndpoint', 'browseEndpoint', 'browseId'))
942
943 overlay_style = traverse_obj(
944 renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
945 get_all=False, expected_type=str)
946 badges = self._extract_badges(renderer)
947
948 navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
949 renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
950 expected_type=str)) or ''
951 url = f'https://www.youtube.com/watch?v={video_id}'
952 if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:
953 url = f'https://www.youtube.com/shorts/{video_id}'
954
955 time_text = (self._get_text(renderer, 'publishedTimeText', 'videoInfo')
956 or self._get_text(reel_header_renderer, 'timestampText') or '')
957 scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
958
959 live_status = (
960 'is_upcoming' if scheduled_timestamp is not None
961 else 'was_live' if 'streamed' in time_text.lower()
962 else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW)
963 else None)
964
965 # videoInfo is a string like '50K views • 10 years ago'.
966 view_count_text = self._get_text(renderer, 'viewCountText', 'shortViewCountText', 'videoInfo') or ''
967 view_count = (0 if 'no views' in view_count_text.lower()
968 else self._get_count({'simpleText': view_count_text}))
969 view_count_field = 'concurrent_view_count' if live_status in ('is_live', 'is_upcoming') else 'view_count'
970
971 return {
972 '_type': 'url',
973 'ie_key': YoutubeIE.ie_key(),
974 'id': video_id,
975 'url': url,
976 'title': title,
977 'description': description,
978 'duration': duration,
979 'channel_id': channel_id,
980 'channel': (self._get_text(renderer, 'ownerText', 'shortBylineText')
981 or self._get_text(reel_header_renderer, 'channelTitleText')),
982 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
983 'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
984 'timestamp': (self._parse_time_text(time_text)
985 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)
986 else None),
987 'release_timestamp': scheduled_timestamp,
988 'availability':
989 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
990 else self._availability(
991 is_private=self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE) or None,
992 needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
993 needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
994 is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None),
995 view_count_field: view_count,
996 'live_status': live_status
997 }
998
999
1000 class YoutubeIE(YoutubeBaseInfoExtractor):
1001 IE_DESC = 'YouTube'
1002 _VALID_URL = r"""(?x)^
1003 (
1004 (?:https?://|//) # http(s):// or protocol-independent URL
1005 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
1006 (?:www\.)?deturl\.com/www\.youtube\.com|
1007 (?:www\.)?pwnyoutube\.com|
1008 (?:www\.)?hooktube\.com|
1009 (?:www\.)?yourepeat\.com|
1010 tube\.majestyc\.net|
1011 %(invidious)s|
1012 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
1013 (?:.*?\#/)? # handle anchor (#/) redirect urls
1014 (?: # the various things that can precede the ID:
1015 (?:(?:v|embed|e|shorts|live)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/
1016 |(?: # or the v= param in all its forms
1017 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
1018 (?:\?|\#!?) # the params delimiter ? or # or #!
1019 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
1020 v=
1021 )
1022 ))
1023 |(?:
1024 youtu\.be| # just youtu.be/xxxx
1025 vid\.plus| # or vid.plus/xxxx
1026 zwearz\.com/watch| # or zwearz.com/watch/xxxx
1027 %(invidious)s
1028 )/
1029 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
1030 )
1031 )? # all until now is optional -> you can pass the naked ID
1032 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
1033 (?(1).+)? # if we found the ID, everything can follow
1034 (?:\#|$)""" % {
1035 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
1036 }
1037 _EMBED_REGEX = [
1038 r'''(?x)
1039 (?:
1040 <(?:[0-9A-Za-z-]+?)?iframe[^>]+?src=|
1041 data-video-url=|
1042 <embed[^>]+?src=|
1043 embedSWF\(?:\s*|
1044 <object[^>]+data=|
1045 new\s+SWFObject\(
1046 )
1047 (["\'])
1048 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1049 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1050 \1''',
1051 # https://wordpress.org/plugins/lazy-load-for-videos/
1052 r'''(?xs)
1053 <a\s[^>]*\bhref="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"
1054 \s[^>]*\bclass="[^"]*\blazy-load-youtube''',
1055 ]
1056 _RETURN_TYPE = 'video' # XXX: How to handle multifeed?
1057
1058 _PLAYER_INFO_RE = (
1059 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
1060 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
1061 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
1062 )
1063 _formats = {
1064 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1065 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1066 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
1067 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
1068 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
1069 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1070 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1071 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1072 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
1073 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
1074 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1075 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1076 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1077 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1078 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
1079 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
1080 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1081 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1082
1083
1084 # 3D videos
1085 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1086 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1087 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1088 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1089 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
1090 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1091 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1092
1093 # Apple HTTP Live Streaming
1094 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1095 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1096 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1097 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1098 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1099 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1100 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1101 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
1102
1103 # DASH mp4 video
1104 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
1105 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
1106 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1107 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
1108 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
1109 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
1110 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
1111 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1112 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
1113 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1114 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1115 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
1116
1117 # Dash mp4 audio
1118 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1119 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1120 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1121 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1122 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1123 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1124 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
1125
1126 # Dash webm
1127 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1128 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1129 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1130 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1131 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1132 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1133 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1134 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1135 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1136 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1137 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1138 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1139 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1140 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1141 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1142 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
1143 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1144 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1145 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1146 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1147 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1148 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1149
1150 # Dash webm audio
1151 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1152 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
1153
1154 # Dash webm audio with opus inside
1155 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1156 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1157 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
1158
1159 # RTMP (unnamed)
1160 '_rtmp': {'protocol': 'rtmp'},
1161
1162 # av01 video only formats sometimes served with "unknown" codecs
1163 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1164 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1165 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
1166 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1167 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1168 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1169 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1170 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1171 }
1172 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
1173
1174 _GEO_BYPASS = False
1175
1176 IE_NAME = 'youtube'
1177 _TESTS = [
1178 {
1179 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
1180 'info_dict': {
1181 'id': 'BaW_jenozKc',
1182 'ext': 'mp4',
1183 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1184 'uploader': 'Philipp Hagemeister',
1185 'uploader_id': 'phihag',
1186 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1187 'channel': 'Philipp Hagemeister',
1188 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1189 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1190 'upload_date': '20121002',
1191 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
1192 'categories': ['Science & Technology'],
1193 'tags': ['youtube-dl'],
1194 'duration': 10,
1195 'view_count': int,
1196 'like_count': int,
1197 'availability': 'public',
1198 'playable_in_embed': True,
1199 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1200 'live_status': 'not_live',
1201 'age_limit': 0,
1202 'start_time': 1,
1203 'end_time': 9,
1204 'comment_count': int,
1205 'channel_follower_count': int
1206 }
1207 },
1208 {
1209 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1210 'note': 'Embed-only video (#1746)',
1211 'info_dict': {
1212 'id': 'yZIXLfi8CZQ',
1213 'ext': 'mp4',
1214 'upload_date': '20120608',
1215 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1216 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1217 'uploader': 'SET India',
1218 'uploader_id': 'setindia',
1219 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
1220 'age_limit': 18,
1221 },
1222 'skip': 'Private video',
1223 },
1224 {
1225 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1226 'note': 'Use the first video ID in the URL',
1227 'info_dict': {
1228 'id': 'BaW_jenozKc',
1229 'ext': 'mp4',
1230 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1231 'uploader': 'Philipp Hagemeister',
1232 'uploader_id': 'phihag',
1233 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1234 'channel': 'Philipp Hagemeister',
1235 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1236 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1237 'upload_date': '20121002',
1238 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
1239 'categories': ['Science & Technology'],
1240 'tags': ['youtube-dl'],
1241 'duration': 10,
1242 'view_count': int,
1243 'like_count': int,
1244 'availability': 'public',
1245 'playable_in_embed': True,
1246 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1247 'live_status': 'not_live',
1248 'age_limit': 0,
1249 'comment_count': int,
1250 'channel_follower_count': int
1251 },
1252 'params': {
1253 'skip_download': True,
1254 },
1255 },
1256 {
1257 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1258 'note': '256k DASH audio (format 141) via DASH manifest',
1259 'info_dict': {
1260 'id': 'a9LDPn-MO4I',
1261 'ext': 'm4a',
1262 'upload_date': '20121002',
1263 'uploader_id': '8KVIDEO',
1264 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1265 'description': '',
1266 'uploader': '8KVIDEO',
1267 'title': 'UHDTV TEST 8K VIDEO.mp4'
1268 },
1269 'params': {
1270 'youtube_include_dash_manifest': True,
1271 'format': '141',
1272 },
1273 'skip': 'format 141 not served anymore',
1274 },
1275 # DASH manifest with encrypted signature
1276 {
1277 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1278 'info_dict': {
1279 'id': 'IB3lcPjvWLA',
1280 'ext': 'm4a',
1281 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1282 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1283 'duration': 244,
1284 'uploader': 'AfrojackVEVO',
1285 'uploader_id': 'AfrojackVEVO',
1286 'upload_date': '20131011',
1287 'abr': 129.495,
1288 'like_count': int,
1289 'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',
1290 'playable_in_embed': True,
1291 'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',
1292 'view_count': int,
1293 'track': 'The Spark',
1294 'live_status': 'not_live',
1295 'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',
1296 'channel': 'Afrojack',
1297 'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',
1298 'tags': 'count:19',
1299 'availability': 'public',
1300 'categories': ['Music'],
1301 'age_limit': 0,
1302 'alt_title': 'The Spark',
1303 'channel_follower_count': int
1304 },
1305 'params': {
1306 'youtube_include_dash_manifest': True,
1307 'format': '141/bestaudio[ext=m4a]',
1308 },
1309 },
1310 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
1311 {
1312 'note': 'Embed allowed age-gate video',
1313 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1314 'info_dict': {
1315 'id': 'HtVdAasjOgU',
1316 'ext': 'mp4',
1317 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1318 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1319 'duration': 142,
1320 'uploader': 'The Witcher',
1321 'uploader_id': 'WitcherGame',
1322 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1323 'upload_date': '20140605',
1324 'age_limit': 18,
1325 'categories': ['Gaming'],
1326 'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',
1327 'availability': 'needs_auth',
1328 'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
1329 'like_count': int,
1330 'channel': 'The Witcher',
1331 'live_status': 'not_live',
1332 'tags': 'count:17',
1333 'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
1334 'playable_in_embed': True,
1335 'view_count': int,
1336 'channel_follower_count': int
1337 },
1338 },
1339 {
1340 'note': 'Age-gate video with embed allowed in public site',
1341 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1342 'info_dict': {
1343 'id': 'HsUATh_Nc2U',
1344 'ext': 'mp4',
1345 'title': 'Godzilla 2 (Official Video)',
1346 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1347 'upload_date': '20200408',
1348 'uploader_id': 'FlyingKitty900',
1349 'uploader': 'FlyingKitty',
1350 'age_limit': 18,
1351 'availability': 'needs_auth',
1352 'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
1353 'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',
1354 'channel': 'FlyingKitty',
1355 'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
1356 'view_count': int,
1357 'categories': ['Entertainment'],
1358 'live_status': 'not_live',
1359 'tags': ['Flyingkitty', 'godzilla 2'],
1360 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
1361 'like_count': int,
1362 'duration': 177,
1363 'playable_in_embed': True,
1364 'channel_follower_count': int
1365 },
1366 },
1367 {
1368 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1369 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1370 'info_dict': {
1371 'id': 'Tq92D6wQ1mg',
1372 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
1373 'ext': 'mp4',
1374 'upload_date': '20191228',
1375 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1376 'uploader': 'Projekt Melody',
1377 'description': 'md5:17eccca93a786d51bc67646756894066',
1378 'age_limit': 18,
1379 'like_count': int,
1380 'availability': 'needs_auth',
1381 'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
1382 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1383 'view_count': int,
1384 'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',
1385 'channel': 'Projekt Melody',
1386 'live_status': 'not_live',
1387 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
1388 'playable_in_embed': True,
1389 'categories': ['Entertainment'],
1390 'duration': 106,
1391 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
1392 'comment_count': int,
1393 'channel_follower_count': int
1394 },
1395 },
1396 {
1397 'note': 'Non-Agegated non-embeddable video',
1398 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1399 'info_dict': {
1400 'id': 'MeJVWBSsPAY',
1401 'ext': 'mp4',
1402 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1403 'uploader': 'Herr Lurik',
1404 'uploader_id': 'st3in234',
1405 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1406 'upload_date': '20130730',
1407 'track': 'Such mich find mich',
1408 'age_limit': 0,
1409 'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
1410 'like_count': int,
1411 'playable_in_embed': False,
1412 'creator': 'OOMPH!',
1413 'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',
1414 'view_count': int,
1415 'alt_title': 'Such mich find mich',
1416 'duration': 210,
1417 'channel': 'Herr Lurik',
1418 'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
1419 'categories': ['Music'],
1420 'availability': 'public',
1421 'uploader_url': 'http://www.youtube.com/user/st3in234',
1422 'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
1423 'live_status': 'not_live',
1424 'artist': 'OOMPH!',
1425 'channel_follower_count': int
1426 },
1427 },
1428 {
1429 'note': 'Non-bypassable age-gated video',
1430 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1431 'only_matching': True,
1432 },
1433 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1434 # YouTube Red ad is not captured for creator
1435 {
1436 'url': '__2ABJjxzNo',
1437 'info_dict': {
1438 'id': '__2ABJjxzNo',
1439 'ext': 'mp4',
1440 'duration': 266,
1441 'upload_date': '20100430',
1442 'uploader_id': 'deadmau5',
1443 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1444 'creator': 'deadmau5',
1445 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1446 'uploader': 'deadmau5',
1447 'title': 'Deadmau5 - Some Chords (HD)',
1448 'alt_title': 'Some Chords',
1449 'availability': 'public',
1450 'tags': 'count:14',
1451 'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',
1452 'view_count': int,
1453 'live_status': 'not_live',
1454 'channel': 'deadmau5',
1455 'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',
1456 'like_count': int,
1457 'track': 'Some Chords',
1458 'artist': 'deadmau5',
1459 'playable_in_embed': True,
1460 'age_limit': 0,
1461 'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',
1462 'categories': ['Music'],
1463 'album': 'Some Chords',
1464 'channel_follower_count': int
1465 },
1466 'expected_warnings': [
1467 'DASH manifest missing',
1468 ]
1469 },
1470 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1471 {
1472 'url': 'lqQg6PlCWgI',
1473 'info_dict': {
1474 'id': 'lqQg6PlCWgI',
1475 'ext': 'mp4',
1476 'duration': 6085,
1477 'upload_date': '20150827',
1478 'uploader_id': 'olympic',
1479 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1480 'description': 'md5:04bbbf3ccceb6795947572ca36f45904',
1481 'uploader': 'Olympics',
1482 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1483 'like_count': int,
1484 'release_timestamp': 1343767800,
1485 'playable_in_embed': True,
1486 'categories': ['Sports'],
1487 'release_date': '20120731',
1488 'channel': 'Olympics',
1489 'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
1490 'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
1491 'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
1492 'age_limit': 0,
1493 'availability': 'public',
1494 'live_status': 'was_live',
1495 'view_count': int,
1496 'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
1497 'channel_follower_count': int
1498 },
1499 'params': {
1500 'skip_download': 'requires avconv',
1501 }
1502 },
1503 # Non-square pixels
1504 {
1505 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1506 'info_dict': {
1507 'id': '_b-2C3KPAM0',
1508 'ext': 'mp4',
1509 'stretched_ratio': 16 / 9.,
1510 'duration': 85,
1511 'upload_date': '20110310',
1512 'uploader_id': 'AllenMeow',
1513 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1514 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1515 'uploader': '孫ᄋᄅ',
1516 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1517 'playable_in_embed': True,
1518 'channel': '孫ᄋᄅ',
1519 'age_limit': 0,
1520 'tags': 'count:11',
1521 'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',
1522 'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',
1523 'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',
1524 'view_count': int,
1525 'categories': ['People & Blogs'],
1526 'like_count': int,
1527 'live_status': 'not_live',
1528 'availability': 'unlisted',
1529 'comment_count': int,
1530 'channel_follower_count': int
1531 },
1532 },
1533 # url_encoded_fmt_stream_map is empty string
1534 {
1535 'url': 'qEJwOuvDf7I',
1536 'info_dict': {
1537 'id': 'qEJwOuvDf7I',
1538 'ext': 'webm',
1539 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1540 'description': '',
1541 'upload_date': '20150404',
1542 'uploader_id': 'spbelect',
1543 'uploader': 'Наблюдатели Петербурга',
1544 },
1545 'params': {
1546 'skip_download': 'requires avconv',
1547 },
1548 'skip': 'This live event has ended.',
1549 },
1550 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1551 {
1552 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1553 'info_dict': {
1554 'id': 'FIl7x6_3R5Y',
1555 'ext': 'webm',
1556 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1557 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1558 'duration': 220,
1559 'upload_date': '20150625',
1560 'uploader_id': 'dorappi2000',
1561 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1562 'uploader': 'dorappi2000',
1563 'formats': 'mincount:31',
1564 },
1565 'skip': 'not actual anymore',
1566 },
1567 # DASH manifest with segment_list
1568 {
1569 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1570 'md5': '8ce563a1d667b599d21064e982ab9e31',
1571 'info_dict': {
1572 'id': 'CsmdDsKjzN8',
1573 'ext': 'mp4',
1574 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1575 'uploader': 'Airtek',
1576 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1577 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1578 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1579 },
1580 'params': {
1581 'youtube_include_dash_manifest': True,
1582 'format': '135', # bestvideo
1583 },
1584 'skip': 'This live event has ended.',
1585 },
1586 {
1587 # Multifeed videos (multiple cameras), URL can be of any Camera
1588 'url': 'https://www.youtube.com/watch?v=zaPI8MvL8pg',
1589 'info_dict': {
1590 'id': 'zaPI8MvL8pg',
1591 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04',
1592 'description': 'md5:563ccbc698b39298481ca3c571169519',
1593 },
1594 'playlist': [{
1595 'info_dict': {
1596 'id': 'j5yGuxZ8lLU',
1597 'ext': 'mp4',
1598 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Chris)',
1599 'uploader': 'WiiLikeToPlay',
1600 'description': 'md5:563ccbc698b39298481ca3c571169519',
1601 'uploader_url': 'http://www.youtube.com/user/WiiRikeToPray',
1602 'duration': 10120,
1603 'channel_follower_count': int,
1604 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
1605 'availability': 'public',
1606 'playable_in_embed': True,
1607 'upload_date': '20131105',
1608 'uploader_id': 'WiiRikeToPray',
1609 'categories': ['Gaming'],
1610 'live_status': 'was_live',
1611 'tags': 'count:24',
1612 'release_timestamp': 1383701910,
1613 'thumbnail': 'https://i.ytimg.com/vi/j5yGuxZ8lLU/maxresdefault.jpg',
1614 'comment_count': int,
1615 'age_limit': 0,
1616 'like_count': int,
1617 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
1618 'channel': 'WiiLikeToPlay',
1619 'view_count': int,
1620 'release_date': '20131106',
1621 },
1622 }, {
1623 'info_dict': {
1624 'id': 'zaPI8MvL8pg',
1625 'ext': 'mp4',
1626 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Tyson)',
1627 'uploader_id': 'WiiRikeToPray',
1628 'availability': 'public',
1629 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
1630 'channel': 'WiiLikeToPlay',
1631 'uploader_url': 'http://www.youtube.com/user/WiiRikeToPray',
1632 'channel_follower_count': int,
1633 'description': 'md5:563ccbc698b39298481ca3c571169519',
1634 'duration': 10108,
1635 'age_limit': 0,
1636 'like_count': int,
1637 'tags': 'count:24',
1638 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
1639 'uploader': 'WiiLikeToPlay',
1640 'release_timestamp': 1383701915,
1641 'comment_count': int,
1642 'upload_date': '20131105',
1643 'thumbnail': 'https://i.ytimg.com/vi/zaPI8MvL8pg/maxresdefault.jpg',
1644 'release_date': '20131106',
1645 'playable_in_embed': True,
1646 'live_status': 'was_live',
1647 'categories': ['Gaming'],
1648 'view_count': int,
1649 },
1650 }, {
1651 'info_dict': {
1652 'id': 'R7r3vfO7Hao',
1653 'ext': 'mp4',
1654 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Spencer)',
1655 'thumbnail': 'https://i.ytimg.com/vi/R7r3vfO7Hao/maxresdefault.jpg',
1656 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
1657 'like_count': int,
1658 'availability': 'public',
1659 'playable_in_embed': True,
1660 'upload_date': '20131105',
1661 'description': 'md5:563ccbc698b39298481ca3c571169519',
1662 'uploader_id': 'WiiRikeToPray',
1663 'uploader_url': 'http://www.youtube.com/user/WiiRikeToPray',
1664 'channel_follower_count': int,
1665 'tags': 'count:24',
1666 'release_date': '20131106',
1667 'uploader': 'WiiLikeToPlay',
1668 'comment_count': int,
1669 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
1670 'channel': 'WiiLikeToPlay',
1671 'categories': ['Gaming'],
1672 'release_timestamp': 1383701914,
1673 'live_status': 'was_live',
1674 'age_limit': 0,
1675 'duration': 10128,
1676 'view_count': int,
1677 },
1678 }],
1679 'params': {'skip_download': True},
1680 },
1681 {
1682 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1683 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1684 'info_dict': {
1685 'id': 'gVfLd0zydlo',
1686 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1687 },
1688 'playlist_count': 2,
1689 'skip': 'Not multifeed anymore',
1690 },
1691 {
1692 'url': 'https://vid.plus/FlRa-iH7PGw',
1693 'only_matching': True,
1694 },
1695 {
1696 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1697 'only_matching': True,
1698 },
1699 {
1700 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1701 # Also tests cut-off URL expansion in video description (see
1702 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1703 # https://github.com/ytdl-org/youtube-dl/issues/8164)
1704 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1705 'info_dict': {
1706 'id': 'lsguqyKfVQg',
1707 'ext': 'mp4',
1708 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1709 'alt_title': 'Dark Walk',
1710 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1711 'duration': 133,
1712 'upload_date': '20151119',
1713 'uploader_id': 'IronSoulElf',
1714 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1715 'uploader': 'IronSoulElf',
1716 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1717 'track': 'Dark Walk',
1718 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1719 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1720 'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',
1721 'categories': ['Film & Animation'],
1722 'view_count': int,
1723 'live_status': 'not_live',
1724 'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',
1725 'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',
1726 'tags': 'count:13',
1727 'availability': 'public',
1728 'channel': 'IronSoulElf',
1729 'playable_in_embed': True,
1730 'like_count': int,
1731 'age_limit': 0,
1732 'channel_follower_count': int
1733 },
1734 'params': {
1735 'skip_download': True,
1736 },
1737 },
1738 {
1739 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1740 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1741 'only_matching': True,
1742 },
1743 {
1744 # Video with yt:stretch=17:0
1745 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1746 'info_dict': {
1747 'id': 'Q39EVAstoRM',
1748 'ext': 'mp4',
1749 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1750 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1751 'upload_date': '20151107',
1752 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1753 'uploader': 'CH GAMER DROID',
1754 },
1755 'params': {
1756 'skip_download': True,
1757 },
1758 'skip': 'This video does not exist.',
1759 },
1760 {
1761 # Video with incomplete 'yt:stretch=16:'
1762 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1763 'only_matching': True,
1764 },
1765 {
1766 # Video licensed under Creative Commons
1767 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1768 'info_dict': {
1769 'id': 'M4gD1WSo5mA',
1770 'ext': 'mp4',
1771 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1772 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1773 'duration': 721,
1774 'upload_date': '20150128',
1775 'uploader_id': 'BerkmanCenter',
1776 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1777 'uploader': 'The Berkman Klein Center for Internet & Society',
1778 'license': 'Creative Commons Attribution license (reuse allowed)',
1779 'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',
1780 'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',
1781 'like_count': int,
1782 'age_limit': 0,
1783 'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],
1784 'channel': 'The Berkman Klein Center for Internet & Society',
1785 'availability': 'public',
1786 'view_count': int,
1787 'categories': ['Education'],
1788 'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',
1789 'live_status': 'not_live',
1790 'playable_in_embed': True,
1791 'comment_count': int,
1792 'channel_follower_count': int,
1793 'chapters': list,
1794 },
1795 'params': {
1796 'skip_download': True,
1797 },
1798 },
1799 {
1800 # Channel-like uploader_url
1801 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1802 'info_dict': {
1803 'id': 'eQcmzGIKrzg',
1804 'ext': 'mp4',
1805 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1806 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1807 'duration': 4060,
1808 'upload_date': '20151120',
1809 'uploader': 'Bernie Sanders',
1810 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1811 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1812 'license': 'Creative Commons Attribution license (reuse allowed)',
1813 'playable_in_embed': True,
1814 'tags': 'count:12',
1815 'like_count': int,
1816 'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1817 'age_limit': 0,
1818 'availability': 'public',
1819 'categories': ['News & Politics'],
1820 'channel': 'Bernie Sanders',
1821 'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',
1822 'view_count': int,
1823 'live_status': 'not_live',
1824 'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1825 'comment_count': int,
1826 'channel_follower_count': int,
1827 'chapters': list,
1828 },
1829 'params': {
1830 'skip_download': True,
1831 },
1832 },
1833 {
1834 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1835 'only_matching': True,
1836 },
1837 {
1838 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1839 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1840 'only_matching': True,
1841 },
1842 {
1843 # Rental video preview
1844 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1845 'info_dict': {
1846 'id': 'uGpuVWrhIzE',
1847 'ext': 'mp4',
1848 'title': 'Piku - Trailer',
1849 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1850 'upload_date': '20150811',
1851 'uploader': 'FlixMatrix',
1852 'uploader_id': 'FlixMatrixKaravan',
1853 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1854 'license': 'Standard YouTube License',
1855 },
1856 'params': {
1857 'skip_download': True,
1858 },
1859 'skip': 'This video is not available.',
1860 },
1861 {
1862 # YouTube Red video with episode data
1863 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1864 'info_dict': {
1865 'id': 'iqKdEhx-dD4',
1866 'ext': 'mp4',
1867 'title': 'Isolation - Mind Field (Ep 1)',
1868 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1869 'duration': 2085,
1870 'upload_date': '20170118',
1871 'uploader': 'Vsauce',
1872 'uploader_id': 'Vsauce',
1873 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1874 'series': 'Mind Field',
1875 'season_number': 1,
1876 'episode_number': 1,
1877 'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',
1878 'tags': 'count:12',
1879 'view_count': int,
1880 'availability': 'public',
1881 'age_limit': 0,
1882 'channel': 'Vsauce',
1883 'episode': 'Episode 1',
1884 'categories': ['Entertainment'],
1885 'season': 'Season 1',
1886 'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',
1887 'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',
1888 'like_count': int,
1889 'playable_in_embed': True,
1890 'live_status': 'not_live',
1891 'channel_follower_count': int
1892 },
1893 'params': {
1894 'skip_download': True,
1895 },
1896 'expected_warnings': [
1897 'Skipping DASH manifest',
1898 ],
1899 },
1900 {
1901 # The following content has been identified by the YouTube community
1902 # as inappropriate or offensive to some audiences.
1903 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1904 'info_dict': {
1905 'id': '6SJNVb0GnPI',
1906 'ext': 'mp4',
1907 'title': 'Race Differences in Intelligence',
1908 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1909 'duration': 965,
1910 'upload_date': '20140124',
1911 'uploader': 'New Century Foundation',
1912 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1913 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1914 },
1915 'params': {
1916 'skip_download': True,
1917 },
1918 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1919 },
1920 {
1921 # itag 212
1922 'url': '1t24XAntNCY',
1923 'only_matching': True,
1924 },
1925 {
1926 # geo restricted to JP
1927 'url': 'sJL6WA-aGkQ',
1928 'only_matching': True,
1929 },
1930 {
1931 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1932 'only_matching': True,
1933 },
1934 {
1935 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1936 'only_matching': True,
1937 },
1938 {
1939 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1940 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1941 'only_matching': True,
1942 },
1943 {
1944 # DRM protected
1945 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1946 'only_matching': True,
1947 },
1948 {
1949 # Video with unsupported adaptive stream type formats
1950 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1951 'info_dict': {
1952 'id': 'Z4Vy8R84T1U',
1953 'ext': 'mp4',
1954 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1955 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1956 'duration': 433,
1957 'upload_date': '20130923',
1958 'uploader': 'Amelia Putri Harwita',
1959 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1960 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1961 'formats': 'maxcount:10',
1962 },
1963 'params': {
1964 'skip_download': True,
1965 'youtube_include_dash_manifest': False,
1966 },
1967 'skip': 'not actual anymore',
1968 },
1969 {
1970 # Youtube Music Auto-generated description
1971 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1972 'info_dict': {
1973 'id': 'MgNrAu2pzNs',
1974 'ext': 'mp4',
1975 'title': 'Voyeur Girl',
1976 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1977 'upload_date': '20190312',
1978 'uploader': 'Stephen - Topic',
1979 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1980 'artist': 'Stephen',
1981 'track': 'Voyeur Girl',
1982 'album': 'it\'s too much love to know my dear',
1983 'release_date': '20190313',
1984 'release_year': 2019,
1985 'alt_title': 'Voyeur Girl',
1986 'view_count': int,
1987 'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1988 'playable_in_embed': True,
1989 'like_count': int,
1990 'categories': ['Music'],
1991 'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1992 'channel': 'Stephen',
1993 'availability': 'public',
1994 'creator': 'Stephen',
1995 'duration': 169,
1996 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
1997 'age_limit': 0,
1998 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1999 'tags': 'count:11',
2000 'live_status': 'not_live',
2001 'channel_follower_count': int
2002 },
2003 'params': {
2004 'skip_download': True,
2005 },
2006 },
2007 {
2008 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
2009 'only_matching': True,
2010 },
2011 {
2012 # invalid -> valid video id redirection
2013 'url': 'DJztXj2GPfl',
2014 'info_dict': {
2015 'id': 'DJztXj2GPfk',
2016 'ext': 'mp4',
2017 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
2018 'description': 'md5:bf577a41da97918e94fa9798d9228825',
2019 'upload_date': '20090125',
2020 'uploader': 'Prochorowka',
2021 'uploader_id': 'Prochorowka',
2022 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
2023 'artist': 'Panjabi MC',
2024 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
2025 'album': 'Beware of the Boys (Mundian To Bach Ke)',
2026 },
2027 'params': {
2028 'skip_download': True,
2029 },
2030 'skip': 'Video unavailable',
2031 },
2032 {
2033 # empty description results in an empty string
2034 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
2035 'info_dict': {
2036 'id': 'x41yOUIvK2k',
2037 'ext': 'mp4',
2038 'title': 'IMG 3456',
2039 'description': '',
2040 'upload_date': '20170613',
2041 'uploader_id': 'ElevageOrVert',
2042 'uploader': 'ElevageOrVert',
2043 'view_count': int,
2044 'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',
2045 'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',
2046 'like_count': int,
2047 'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',
2048 'tags': [],
2049 'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',
2050 'availability': 'public',
2051 'age_limit': 0,
2052 'categories': ['Pets & Animals'],
2053 'duration': 7,
2054 'playable_in_embed': True,
2055 'live_status': 'not_live',
2056 'channel': 'ElevageOrVert',
2057 'channel_follower_count': int
2058 },
2059 'params': {
2060 'skip_download': True,
2061 },
2062 },
2063 {
2064 # with '};' inside yt initial data (see [1])
2065 # see [2] for an example with '};' inside ytInitialPlayerResponse
2066 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
2067 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
2068 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
2069 'info_dict': {
2070 'id': 'CHqg6qOn4no',
2071 'ext': 'mp4',
2072 'title': 'Part 77 Sort a list of simple types in c#',
2073 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
2074 'upload_date': '20130831',
2075 'uploader_id': 'kudvenkat',
2076 'uploader': 'kudvenkat',
2077 'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',
2078 'like_count': int,
2079 'uploader_url': 'http://www.youtube.com/user/kudvenkat',
2080 'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',
2081 'live_status': 'not_live',
2082 'categories': ['Education'],
2083 'availability': 'public',
2084 'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',
2085 'tags': 'count:12',
2086 'playable_in_embed': True,
2087 'age_limit': 0,
2088 'view_count': int,
2089 'duration': 522,
2090 'channel': 'kudvenkat',
2091 'comment_count': int,
2092 'channel_follower_count': int,
2093 'chapters': list,
2094 },
2095 'params': {
2096 'skip_download': True,
2097 },
2098 },
2099 {
2100 # another example of '};' in ytInitialData
2101 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
2102 'only_matching': True,
2103 },
2104 {
2105 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
2106 'only_matching': True,
2107 },
2108 {
2109 # https://github.com/ytdl-org/youtube-dl/pull/28094
2110 'url': 'OtqTfy26tG0',
2111 'info_dict': {
2112 'id': 'OtqTfy26tG0',
2113 'ext': 'mp4',
2114 'title': 'Burn Out',
2115 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
2116 'upload_date': '20141120',
2117 'uploader': 'The Cinematic Orchestra - Topic',
2118 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
2119 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
2120 'artist': 'The Cinematic Orchestra',
2121 'track': 'Burn Out',
2122 'album': 'Every Day',
2123 'like_count': int,
2124 'live_status': 'not_live',
2125 'alt_title': 'Burn Out',
2126 'duration': 614,
2127 'age_limit': 0,
2128 'view_count': int,
2129 'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
2130 'creator': 'The Cinematic Orchestra',
2131 'channel': 'The Cinematic Orchestra',
2132 'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],
2133 'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
2134 'availability': 'public',
2135 'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',
2136 'categories': ['Music'],
2137 'playable_in_embed': True,
2138 'channel_follower_count': int
2139 },
2140 'params': {
2141 'skip_download': True,
2142 },
2143 },
2144 {
2145 # controversial video, only works with bpctr when authenticated with cookies
2146 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
2147 'only_matching': True,
2148 },
2149 {
2150 # controversial video, requires bpctr/contentCheckOk
2151 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
2152 'info_dict': {
2153 'id': 'SZJvDhaSDnc',
2154 'ext': 'mp4',
2155 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
2156 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
2157 'uploader': 'CBS Mornings',
2158 'uploader_id': 'CBSThisMorning',
2159 'upload_date': '20140716',
2160 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',
2161 'duration': 170,
2162 'categories': ['News & Politics'],
2163 'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',
2164 'view_count': int,
2165 'channel': 'CBS Mornings',
2166 'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],
2167 'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',
2168 'age_limit': 18,
2169 'availability': 'needs_auth',
2170 'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',
2171 'like_count': int,
2172 'live_status': 'not_live',
2173 'playable_in_embed': True,
2174 'channel_follower_count': int
2175 }
2176 },
2177 {
2178 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
2179 'url': 'cBvYw8_A0vQ',
2180 'info_dict': {
2181 'id': 'cBvYw8_A0vQ',
2182 'ext': 'mp4',
2183 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
2184 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
2185 'upload_date': '20201120',
2186 'uploader': 'Walk around Japan',
2187 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2188 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2189 'duration': 1456,
2190 'categories': ['Travel & Events'],
2191 'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2192 'view_count': int,
2193 'channel': 'Walk around Japan',
2194 'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],
2195 'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',
2196 'age_limit': 0,
2197 'availability': 'public',
2198 'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2199 'live_status': 'not_live',
2200 'playable_in_embed': True,
2201 'channel_follower_count': int
2202 },
2203 'params': {
2204 'skip_download': True,
2205 },
2206 }, {
2207 # Has multiple audio streams
2208 'url': 'WaOKSUlf4TM',
2209 'only_matching': True
2210 }, {
2211 # Requires Premium: has format 141 when requested using YTM url
2212 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
2213 'only_matching': True
2214 }, {
2215 # multiple subtitles with same lang_code
2216 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
2217 'only_matching': True,
2218 }, {
2219 # Force use android client fallback
2220 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
2221 'info_dict': {
2222 'id': 'YOelRv7fMxY',
2223 'title': 'DIGGING A SECRET TUNNEL Part 1',
2224 'ext': '3gp',
2225 'upload_date': '20210624',
2226 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
2227 'uploader': 'colinfurze',
2228 'uploader_id': 'colinfurze',
2229 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
2230 'description': 'md5:5d5991195d599b56cd0c4148907eec50',
2231 'duration': 596,
2232 'categories': ['Entertainment'],
2233 'uploader_url': 'http://www.youtube.com/user/colinfurze',
2234 'view_count': int,
2235 'channel': 'colinfurze',
2236 'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],
2237 'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',
2238 'age_limit': 0,
2239 'availability': 'public',
2240 'like_count': int,
2241 'live_status': 'not_live',
2242 'playable_in_embed': True,
2243 'channel_follower_count': int,
2244 'chapters': list,
2245 },
2246 'params': {
2247 'format': '17', # 3gp format available on android
2248 'extractor_args': {'youtube': {'player_client': ['android']}},
2249 },
2250 },
2251 {
2252 # Skip download of additional client configs (remix client config in this case)
2253 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2254 'only_matching': True,
2255 'params': {
2256 'extractor_args': {'youtube': {'player_skip': ['configs']}},
2257 },
2258 }, {
2259 # shorts
2260 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
2261 'only_matching': True,
2262 }, {
2263 'note': 'Storyboards',
2264 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
2265 'info_dict': {
2266 'id': '5KLPxDtMqe8',
2267 'ext': 'mhtml',
2268 'format_id': 'sb0',
2269 'title': 'Your Brain is Plastic',
2270 'uploader_id': 'scishow',
2271 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
2272 'upload_date': '20140324',
2273 'uploader': 'SciShow',
2274 'like_count': int,
2275 'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',
2276 'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',
2277 'view_count': int,
2278 'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',
2279 'playable_in_embed': True,
2280 'tags': 'count:12',
2281 'uploader_url': 'http://www.youtube.com/user/scishow',
2282 'availability': 'public',
2283 'channel': 'SciShow',
2284 'live_status': 'not_live',
2285 'duration': 248,
2286 'categories': ['Education'],
2287 'age_limit': 0,
2288 'channel_follower_count': int,
2289 'chapters': list,
2290 }, 'params': {'format': 'mhtml', 'skip_download': True}
2291 }, {
2292 # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
2293 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2294 'info_dict': {
2295 'id': '2NUZ8W2llS4',
2296 'ext': 'mp4',
2297 'title': 'The NP that test your phone performance 🙂',
2298 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
2299 'uploader': 'Leon Nguyen',
2300 'uploader_id': 'VNSXIII',
2301 'uploader_url': 'http://www.youtube.com/user/VNSXIII',
2302 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2303 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2304 'duration': 21,
2305 'view_count': int,
2306 'age_limit': 0,
2307 'categories': ['Gaming'],
2308 'tags': 'count:23',
2309 'playable_in_embed': True,
2310 'live_status': 'not_live',
2311 'upload_date': '20220103',
2312 'like_count': int,
2313 'availability': 'public',
2314 'channel': 'Leon Nguyen',
2315 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
2316 'comment_count': int,
2317 'channel_follower_count': int
2318 }
2319 }, {
2320 # Same video as above, but with --compat-opt no-youtube-prefer-utc-upload-date
2321 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2322 'info_dict': {
2323 'id': '2NUZ8W2llS4',
2324 'ext': 'mp4',
2325 'title': 'The NP that test your phone performance 🙂',
2326 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
2327 'uploader': 'Leon Nguyen',
2328 'uploader_id': 'VNSXIII',
2329 'uploader_url': 'http://www.youtube.com/user/VNSXIII',
2330 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2331 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2332 'duration': 21,
2333 'view_count': int,
2334 'age_limit': 0,
2335 'categories': ['Gaming'],
2336 'tags': 'count:23',
2337 'playable_in_embed': True,
2338 'live_status': 'not_live',
2339 'upload_date': '20220102',
2340 'like_count': int,
2341 'availability': 'public',
2342 'channel': 'Leon Nguyen',
2343 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
2344 'comment_count': int,
2345 'channel_follower_count': int
2346 },
2347 'params': {'compat_opts': ['no-youtube-prefer-utc-upload-date']}
2348 }, {
2349 # date text is premiered video, ensure upload date in UTC (published 1641172509)
2350 'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',
2351 'info_dict': {
2352 'id': 'mzZzzBU6lrM',
2353 'ext': 'mp4',
2354 'title': 'I Met GeorgeNotFound In Real Life...',
2355 'description': 'md5:cca98a355c7184e750f711f3a1b22c84',
2356 'uploader': 'Quackity',
2357 'uploader_id': 'QuackityHQ',
2358 'uploader_url': 'http://www.youtube.com/user/QuackityHQ',
2359 'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',
2360 'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',
2361 'duration': 955,
2362 'view_count': int,
2363 'age_limit': 0,
2364 'categories': ['Entertainment'],
2365 'tags': 'count:26',
2366 'playable_in_embed': True,
2367 'live_status': 'not_live',
2368 'release_timestamp': 1641172509,
2369 'release_date': '20220103',
2370 'upload_date': '20220103',
2371 'like_count': int,
2372 'availability': 'public',
2373 'channel': 'Quackity',
2374 'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',
2375 'channel_follower_count': int
2376 }
2377 },
2378 { # continuous livestream. Microformat upload date should be preferred.
2379 # Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27
2380 'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',
2381 'info_dict': {
2382 'id': 'kgx4WGK0oNU',
2383 'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
2384 'ext': 'mp4',
2385 'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2386 'availability': 'public',
2387 'age_limit': 0,
2388 'release_timestamp': 1637975704,
2389 'upload_date': '20210619',
2390 'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2391 'live_status': 'is_live',
2392 'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',
2393 'uploader': '阿鲍Abao',
2394 'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2395 'channel': 'Abao in Tokyo',
2396 'channel_follower_count': int,
2397 'release_date': '20211127',
2398 'tags': 'count:39',
2399 'categories': ['People & Blogs'],
2400 'like_count': int,
2401 'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2402 'view_count': int,
2403 'playable_in_embed': True,
2404 'description': 'md5:2ef1d002cad520f65825346e2084e49d',
2405 'concurrent_view_count': int,
2406 },
2407 'params': {'skip_download': True}
2408 }, {
2409 # Story. Requires specific player params to work.
2410 'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',
2411 'info_dict': {
2412 'id': 'vv8qTUWmulI',
2413 'ext': 'mp4',
2414 'availability': 'unlisted',
2415 'view_count': int,
2416 'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',
2417 'upload_date': '20220526',
2418 'categories': ['Education'],
2419 'title': 'Story',
2420 'channel': 'IT\'S HISTORY',
2421 'description': '',
2422 'uploader_id': 'BlastfromthePast',
2423 'duration': 12,
2424 'uploader': 'IT\'S HISTORY',
2425 'playable_in_embed': True,
2426 'age_limit': 0,
2427 'live_status': 'not_live',
2428 'tags': [],
2429 'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',
2430 'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',
2431 'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',
2432 },
2433 'skip': 'stories get removed after some period of time',
2434 }, {
2435 'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
2436 'info_dict': {
2437 'id': 'tjjjtzRLHvA',
2438 'ext': 'mp4',
2439 'title': 'ハッシュタグ無し };if window.ytcsi',
2440 'upload_date': '20220323',
2441 'like_count': int,
2442 'availability': 'unlisted',
2443 'channel': 'nao20010128nao',
2444 'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',
2445 'age_limit': 0,
2446 'uploader': 'nao20010128nao',
2447 'uploader_id': 'nao20010128nao',
2448 'categories': ['Music'],
2449 'view_count': int,
2450 'description': '',
2451 'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',
2452 'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',
2453 'live_status': 'not_live',
2454 'playable_in_embed': True,
2455 'channel_follower_count': int,
2456 'duration': 6,
2457 'tags': [],
2458 'uploader_url': 'http://www.youtube.com/user/nao20010128nao',
2459 }
2460 }, {
2461 # Prefer primary title+description language metadata by default
2462 # Do not prefer translated description if primary is empty
2463 'url': 'https://www.youtube.com/watch?v=el3E4MbxRqQ',
2464 'info_dict': {
2465 'id': 'el3E4MbxRqQ',
2466 'ext': 'mp4',
2467 'title': 'dlp test video 2 - primary sv no desc',
2468 'description': '',
2469 'channel': 'cole-dlp-test-acc',
2470 'tags': [],
2471 'view_count': int,
2472 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2473 'like_count': int,
2474 'playable_in_embed': True,
2475 'availability': 'unlisted',
2476 'thumbnail': 'https://i.ytimg.com/vi_webp/el3E4MbxRqQ/maxresdefault.webp',
2477 'age_limit': 0,
2478 'duration': 5,
2479 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
2480 'uploader_url': 'http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2481 'live_status': 'not_live',
2482 'upload_date': '20220908',
2483 'categories': ['People & Blogs'],
2484 'uploader': 'cole-dlp-test-acc',
2485 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
2486 },
2487 'params': {'skip_download': True}
2488 }, {
2489 # Extractor argument: prefer translated title+description
2490 'url': 'https://www.youtube.com/watch?v=gHKT4uU8Zng',
2491 'info_dict': {
2492 'id': 'gHKT4uU8Zng',
2493 'ext': 'mp4',
2494 'channel': 'cole-dlp-test-acc',
2495 'tags': [],
2496 'duration': 5,
2497 'live_status': 'not_live',
2498 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
2499 'upload_date': '20220728',
2500 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
2501 'view_count': int,
2502 'categories': ['People & Blogs'],
2503 'thumbnail': 'https://i.ytimg.com/vi_webp/gHKT4uU8Zng/maxresdefault.webp',
2504 'title': 'dlp test video title translated (fr)',
2505 'availability': 'public',
2506 'uploader': 'cole-dlp-test-acc',
2507 'age_limit': 0,
2508 'description': 'dlp test video description translated (fr)',
2509 'playable_in_embed': True,
2510 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2511 'uploader_url': 'http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2512 },
2513 'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}},
2514 'expected_warnings': [r'Preferring "fr" translated fields'],
2515 }, {
2516 'note': '6 channel audio',
2517 'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',
2518 'only_matching': True,
2519 }, {
2520 'note': 'Multiple HLS formats with same itag',
2521 'url': 'https://www.youtube.com/watch?v=kX3nB4PpJko',
2522 'info_dict': {
2523 'id': 'kX3nB4PpJko',
2524 'ext': 'mp4',
2525 'categories': ['Entertainment'],
2526 'description': 'md5:e8031ff6e426cdb6a77670c9b81f6fa6',
2527 'uploader_url': 'http://www.youtube.com/user/MrBeast6000',
2528 'live_status': 'not_live',
2529 'duration': 937,
2530 'channel_follower_count': int,
2531 'thumbnail': 'https://i.ytimg.com/vi_webp/kX3nB4PpJko/maxresdefault.webp',
2532 'title': 'Last To Take Hand Off Jet, Keeps It!',
2533 'channel': 'MrBeast',
2534 'playable_in_embed': True,
2535 'view_count': int,
2536 'upload_date': '20221112',
2537 'uploader': 'MrBeast',
2538 'uploader_id': 'MrBeast6000',
2539 'channel_url': 'https://www.youtube.com/channel/UCX6OQ3DkcsbYNE6H8uQQuVA',
2540 'age_limit': 0,
2541 'availability': 'public',
2542 'channel_id': 'UCX6OQ3DkcsbYNE6H8uQQuVA',
2543 'like_count': int,
2544 'tags': [],
2545 },
2546 'params': {'extractor_args': {'youtube': {'player_client': ['ios']}}, 'format': '233-1'},
2547 }, {
2548 'note': 'Audio formats with Dynamic Range Compression',
2549 'url': 'https://www.youtube.com/watch?v=Tq92D6wQ1mg',
2550 'info_dict': {
2551 'id': 'Tq92D6wQ1mg',
2552 'ext': 'weba',
2553 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
2554 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
2555 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
2556 'channel_follower_count': int,
2557 'description': 'md5:17eccca93a786d51bc67646756894066',
2558 'upload_date': '20191228',
2559 'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
2560 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
2561 'playable_in_embed': True,
2562 'like_count': int,
2563 'categories': ['Entertainment'],
2564 'thumbnail': 'https://i.ytimg.com/vi/Tq92D6wQ1mg/sddefault.jpg',
2565 'age_limit': 18,
2566 'channel': 'Projekt Melody',
2567 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
2568 'view_count': int,
2569 'availability': 'needs_auth',
2570 'comment_count': int,
2571 'live_status': 'not_live',
2572 'uploader': 'Projekt Melody',
2573 'duration': 106,
2574 },
2575 'params': {'extractor_args': {'youtube': {'player_client': ['tv_embedded']}}, 'format': '251-drc'},
2576 },
2577 {
2578 'url': 'https://www.youtube.com/live/qVv6vCqciTM',
2579 'info_dict': {
2580 'id': 'qVv6vCqciTM',
2581 'ext': 'mp4',
2582 'age_limit': 0,
2583 'uploader_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',
2584 'comment_count': int,
2585 'chapters': 'count:13',
2586 'upload_date': '20221223',
2587 'thumbnail': 'https://i.ytimg.com/vi/qVv6vCqciTM/maxresdefault.jpg',
2588 'channel_url': 'https://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',
2589 'uploader_url': 'http://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',
2590 'like_count': int,
2591 'release_date': '20221223',
2592 'tags': ['Vtuber', '月ノ美兎', '名取さな', 'にじさんじ', 'クリスマス', '3D配信'],
2593 'title': '【 #インターネット女クリスマス 】3Dで歌ってはしゃぐインターネットの女たち【月ノ美兎/名取さな】',
2594 'view_count': int,
2595 'playable_in_embed': True,
2596 'duration': 4438,
2597 'availability': 'public',
2598 'channel_follower_count': int,
2599 'channel_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',
2600 'categories': ['Entertainment'],
2601 'live_status': 'was_live',
2602 'release_timestamp': 1671793345,
2603 'channel': 'さなちゃんねる',
2604 'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d',
2605 'uploader': 'さなちゃんねる',
2606 },
2607 },
2608 ]
2609
2610 _WEBPAGE_TESTS = [
2611 # YouTube <object> embed
2612 {
2613 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
2614 'md5': '873c81d308b979f0e23ee7e620b312a3',
2615 'info_dict': {
2616 'id': 'msN87y-iEx0',
2617 'ext': 'mp4',
2618 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
2619 'upload_date': '20080526',
2620 'description': 'md5:873c81d308b979f0e23ee7e620b312a3',
2621 'uploader': 'Christopher Sykes',
2622 'uploader_id': 'ChristopherJSykes',
2623 'age_limit': 0,
2624 'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],
2625 'channel_id': 'UCCeo--lls1vna5YJABWAcVA',
2626 'playable_in_embed': True,
2627 'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',
2628 'like_count': int,
2629 'comment_count': int,
2630 'channel': 'Christopher Sykes',
2631 'live_status': 'not_live',
2632 'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',
2633 'availability': 'public',
2634 'duration': 195,
2635 'view_count': int,
2636 'categories': ['Science & Technology'],
2637 'channel_follower_count': int,
2638 'uploader_url': 'http://www.youtube.com/user/ChristopherJSykes',
2639 },
2640 'params': {
2641 'skip_download': True,
2642 }
2643 },
2644 ]
2645
2646 @classmethod
2647 def suitable(cls, url):
2648 from ..utils import parse_qs
2649
2650 qs = parse_qs(url)
2651 if qs.get('list', [None])[0]:
2652 return False
2653 return super().suitable(url)
2654
2655 def __init__(self, *args, **kwargs):
2656 super().__init__(*args, **kwargs)
2657 self._code_cache = {}
2658 self._player_cache = {}
2659
2660 def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data, is_live):
2661 lock = threading.Lock()
2662 start_time = time.time()
2663 formats = [f for f in formats if f.get('is_from_start')]
2664
2665 def refetch_manifest(format_id, delay):
2666 nonlocal formats, start_time, is_live
2667 if time.time() <= start_time + delay:
2668 return
2669
2670 _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
2671 video_details = traverse_obj(prs, (..., 'videoDetails'), expected_type=dict)
2672 microformats = traverse_obj(
2673 prs, (..., 'microformat', 'playerMicroformatRenderer'),
2674 expected_type=dict)
2675 _, live_status, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)
2676 is_live = live_status == 'is_live'
2677 start_time = time.time()
2678
2679 def mpd_feed(format_id, delay):
2680 """
2681 @returns (manifest_url, manifest_stream_number, is_live) or None
2682 """
2683 for retry in self.RetryManager(fatal=False):
2684 with lock:
2685 refetch_manifest(format_id, delay)
2686
2687 f = next((f for f in formats if f['format_id'] == format_id), None)
2688 if not f:
2689 if not is_live:
2690 retry.error = f'{video_id}: Video is no longer live'
2691 else:
2692 retry.error = f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}'
2693 continue
2694 return f['manifest_url'], f['manifest_stream_number'], is_live
2695 return None
2696
2697 for f in formats:
2698 f['is_live'] = is_live
2699 gen = functools.partial(self._live_dash_fragments, video_id, f['format_id'],
2700 live_start_time, mpd_feed, not is_live and f.copy())
2701 if is_live:
2702 f['fragments'] = gen
2703 f['protocol'] = 'http_dash_segments_generator'
2704 else:
2705 f['fragments'] = LazyList(gen({}))
2706 del f['is_from_start']
2707
2708 def _live_dash_fragments(self, video_id, format_id, live_start_time, mpd_feed, manifestless_orig_fmt, ctx):
2709 FETCH_SPAN, MAX_DURATION = 5, 432000
2710
2711 mpd_url, stream_number, is_live = None, None, True
2712
2713 begin_index = 0
2714 download_start_time = ctx.get('start') or time.time()
2715
2716 lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
2717 if lack_early_segments:
2718 self.report_warning(bug_reports_message(
2719 'Starting download from the last 120 hours of the live stream since '
2720 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
2721 lack_early_segments = True
2722
2723 known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
2724 fragments, fragment_base_url = None, None
2725
2726 def _extract_sequence_from_mpd(refresh_sequence, immediate):
2727 nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
2728 # Obtain from MPD's maximum seq value
2729 old_mpd_url = mpd_url
2730 last_error = ctx.pop('last_error', None)
2731 expire_fast = immediate or last_error and isinstance(last_error, urllib.error.HTTPError) and last_error.code == 403
2732 mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
2733 or (mpd_url, stream_number, False))
2734 if not refresh_sequence:
2735 if expire_fast and not is_live:
2736 return False, last_seq
2737 elif old_mpd_url == mpd_url:
2738 return True, last_seq
2739 if manifestless_orig_fmt:
2740 fmt_info = manifestless_orig_fmt
2741 else:
2742 try:
2743 fmts, _ = self._extract_mpd_formats_and_subtitles(
2744 mpd_url, None, note=False, errnote=False, fatal=False)
2745 except ExtractorError:
2746 fmts = None
2747 if not fmts:
2748 no_fragment_score += 2
2749 return False, last_seq
2750 fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
2751 fragments = fmt_info['fragments']
2752 fragment_base_url = fmt_info['fragment_base_url']
2753 assert fragment_base_url
2754
2755 _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
2756 return True, _last_seq
2757
2758 self.write_debug(f'[{video_id}] Generating fragments for format {format_id}')
2759 while is_live:
2760 fetch_time = time.time()
2761 if no_fragment_score > 30:
2762 return
2763 if last_segment_url:
2764 # Obtain from "X-Head-Seqnum" header value from each segment
2765 try:
2766 urlh = self._request_webpage(
2767 last_segment_url, None, note=False, errnote=False, fatal=False)
2768 except ExtractorError:
2769 urlh = None
2770 last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
2771 if last_seq is None:
2772 no_fragment_score += 2
2773 last_segment_url = None
2774 continue
2775 else:
2776 should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)
2777 no_fragment_score += 2
2778 if not should_continue:
2779 continue
2780
2781 if known_idx > last_seq:
2782 last_segment_url = None
2783 continue
2784
2785 last_seq += 1
2786
2787 if begin_index < 0 and known_idx < 0:
2788 # skip from the start when it's negative value
2789 known_idx = last_seq + begin_index
2790 if lack_early_segments:
2791 known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
2792 try:
2793 for idx in range(known_idx, last_seq):
2794 # do not update sequence here or you'll get skipped some part of it
2795 should_continue, _ = _extract_sequence_from_mpd(False, False)
2796 if not should_continue:
2797 known_idx = idx - 1
2798 raise ExtractorError('breaking out of outer loop')
2799 last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)
2800 yield {
2801 'url': last_segment_url,
2802 'fragment_count': last_seq,
2803 }
2804 if known_idx == last_seq:
2805 no_fragment_score += 5
2806 else:
2807 no_fragment_score = 0
2808 known_idx = last_seq
2809 except ExtractorError:
2810 continue
2811
2812 if manifestless_orig_fmt:
2813 # Stop at the first iteration if running for post-live manifestless;
2814 # fragment count no longer increase since it starts
2815 break
2816
2817 time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
2818
2819 def _extract_player_url(self, *ytcfgs, webpage=None):
2820 player_url = traverse_obj(
2821 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
2822 get_all=False, expected_type=str)
2823 if not player_url:
2824 return
2825 return urljoin('https://www.youtube.com', player_url)
2826
2827 def _download_player_url(self, video_id, fatal=False):
2828 res = self._download_webpage(
2829 'https://www.youtube.com/iframe_api',
2830 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
2831 if res:
2832 player_version = self._search_regex(
2833 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
2834 if player_version:
2835 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
2836
2837 def _signature_cache_id(self, example_sig):
2838 """ Return a string representation of a signature """
2839 return '.'.join(str(len(part)) for part in example_sig.split('.'))
2840
2841 @classmethod
2842 def _extract_player_info(cls, player_url):
2843 for player_re in cls._PLAYER_INFO_RE:
2844 id_m = re.search(player_re, player_url)
2845 if id_m:
2846 break
2847 else:
2848 raise ExtractorError('Cannot identify player %r' % player_url)
2849 return id_m.group('id')
2850
2851 def _load_player(self, video_id, player_url, fatal=True):
2852 player_id = self._extract_player_info(player_url)
2853 if player_id not in self._code_cache:
2854 code = self._download_webpage(
2855 player_url, video_id, fatal=fatal,
2856 note='Downloading player ' + player_id,
2857 errnote='Download of %s failed' % player_url)
2858 if code:
2859 self._code_cache[player_id] = code
2860 return self._code_cache.get(player_id)
2861
2862 def _extract_signature_function(self, video_id, player_url, example_sig):
2863 player_id = self._extract_player_info(player_url)
2864
2865 # Read from filesystem cache
2866 func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'
2867 assert os.path.basename(func_id) == func_id
2868
2869 self.write_debug(f'Extracting signature function {func_id}')
2870 cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
2871
2872 if not cache_spec:
2873 code = self._load_player(video_id, player_url)
2874 if code:
2875 res = self._parse_sig_js(code)
2876 test_string = ''.join(map(chr, range(len(example_sig))))
2877 cache_spec = [ord(c) for c in res(test_string)]
2878 self.cache.store('youtube-sigfuncs', func_id, cache_spec)
2879
2880 return lambda s: ''.join(s[i] for i in cache_spec)
2881
2882 def _print_sig_code(self, func, example_sig):
2883 if not self.get_param('youtube_print_sig_code'):
2884 return
2885
2886 def gen_sig_code(idxs):
2887 def _genslice(start, end, step):
2888 starts = '' if start == 0 else str(start)
2889 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
2890 steps = '' if step == 1 else (':%d' % step)
2891 return f's[{starts}{ends}{steps}]'
2892
2893 step = None
2894 # Quelch pyflakes warnings - start will be set when step is set
2895 start = '(Never used)'
2896 for i, prev in zip(idxs[1:], idxs[:-1]):
2897 if step is not None:
2898 if i - prev == step:
2899 continue
2900 yield _genslice(start, prev, step)
2901 step = None
2902 continue
2903 if i - prev in [-1, 1]:
2904 step = i - prev
2905 start = prev
2906 continue
2907 else:
2908 yield 's[%d]' % prev
2909 if step is None:
2910 yield 's[%d]' % i
2911 else:
2912 yield _genslice(start, i, step)
2913
2914 test_string = ''.join(map(chr, range(len(example_sig))))
2915 cache_res = func(test_string)
2916 cache_spec = [ord(c) for c in cache_res]
2917 expr_code = ' + '.join(gen_sig_code(cache_spec))
2918 signature_id_tuple = '(%s)' % (
2919 ', '.join(str(len(p)) for p in example_sig.split('.')))
2920 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
2921 ' return %s\n') % (signature_id_tuple, expr_code)
2922 self.to_screen('Extracted signature function:\n' + code)
2923
2924 def _parse_sig_js(self, jscode):
2925 funcname = self._search_regex(
2926 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2927 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2928 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
2929 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
2930 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
2931 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
2932 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
2933 # Obsolete patterns
2934 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2935 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
2936 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2937 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2938 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2939 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2940 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2941 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
2942 jscode, 'Initial JS player signature function name', group='sig')
2943
2944 jsi = JSInterpreter(jscode)
2945 initial_function = jsi.extract_function(funcname)
2946 return lambda s: initial_function([s])
2947
2948 def _cached(self, func, *cache_id):
2949 def inner(*args, **kwargs):
2950 if cache_id not in self._player_cache:
2951 try:
2952 self._player_cache[cache_id] = func(*args, **kwargs)
2953 except ExtractorError as e:
2954 self._player_cache[cache_id] = e
2955 except Exception as e:
2956 self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)
2957
2958 ret = self._player_cache[cache_id]
2959 if isinstance(ret, Exception):
2960 raise ret
2961 return ret
2962 return inner
2963
2964 def _decrypt_signature(self, s, video_id, player_url):
2965 """Turn the encrypted s field into a working signature"""
2966 extract_sig = self._cached(
2967 self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))
2968 func = extract_sig(video_id, player_url, s)
2969 self._print_sig_code(func, s)
2970 return func(s)
2971
2972 def _decrypt_nsig(self, s, video_id, player_url):
2973 """Turn the encrypted n field into a working signature"""
2974 if player_url is None:
2975 raise ExtractorError('Cannot decrypt nsig without player_url')
2976 player_url = urljoin('https://www.youtube.com', player_url)
2977
2978 try:
2979 jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
2980 except ExtractorError as e:
2981 raise ExtractorError('Unable to extract nsig function code', cause=e)
2982 if self.get_param('youtube_print_sig_code'):
2983 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
2984
2985 try:
2986 extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)
2987 ret = extract_nsig(jsi, func_code)(s)
2988 except JSInterpreter.Exception as e:
2989 try:
2990 jsi = PhantomJSwrapper(self, timeout=5000)
2991 except ExtractorError:
2992 raise e
2993 self.report_warning(
2994 f'Native nsig extraction failed: Trying with PhantomJS\n'
2995 f' n = {s} ; player = {player_url}', video_id)
2996 self.write_debug(e, only_once=True)
2997
2998 args, func_body = func_code
2999 ret = jsi.execute(
3000 f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',
3001 video_id=video_id, note='Executing signature code').strip()
3002
3003 self.write_debug(f'Decrypted nsig {s} => {ret}')
3004 return ret
3005
3006 def _extract_n_function_name(self, jscode):
3007 funcname, idx = self._search_regex(
3008 r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)',
3009 jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
3010 if not idx:
3011 return funcname
3012
3013 return json.loads(js_to_json(self._search_regex(
3014 rf'var {re.escape(funcname)}\s*=\s*(\[.+?\]);', jscode,
3015 f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
3016
3017 def _extract_n_function_code(self, video_id, player_url):
3018 player_id = self._extract_player_info(player_url)
3019 func_code = self.cache.load('youtube-nsig', player_id, min_ver='2022.09.1')
3020 jscode = func_code or self._load_player(video_id, player_url)
3021 jsi = JSInterpreter(jscode)
3022
3023 if func_code:
3024 return jsi, player_id, func_code
3025
3026 func_name = self._extract_n_function_name(jscode)
3027
3028 # For redundancy
3029 func_code = self._search_regex(
3030 r'''(?xs)%s\s*=\s*function\s*\((?P<var>[\w$]+)\)\s*
3031 # NB: The end of the regex is intentionally kept strict
3032 {(?P<code>.+?}\s*return\ [\w$]+.join\(""\))};''' % func_name,
3033 jscode, 'nsig function', group=('var', 'code'), default=None)
3034 if func_code:
3035 func_code = ([func_code[0]], func_code[1])
3036 else:
3037 self.write_debug('Extracting nsig function with jsinterp')
3038 func_code = jsi.extract_function_code(func_name)
3039
3040 self.cache.store('youtube-nsig', player_id, func_code)
3041 return jsi, player_id, func_code
3042
3043 def _extract_n_function_from_code(self, jsi, func_code):
3044 func = jsi.extract_function_from_code(*func_code)
3045
3046 def extract_nsig(s):
3047 try:
3048 ret = func([s])
3049 except JSInterpreter.Exception:
3050 raise
3051 except Exception as e:
3052 raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
3053
3054 if ret.startswith('enhanced_except_'):
3055 raise JSInterpreter.Exception('Signature function returned an exception')
3056 return ret
3057
3058 return extract_nsig
3059
3060 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
3061 """
3062 Extract signatureTimestamp (sts)
3063 Required to tell API what sig/player version is in use.
3064 """
3065 sts = None
3066 if isinstance(ytcfg, dict):
3067 sts = int_or_none(ytcfg.get('STS'))
3068
3069 if not sts:
3070 # Attempt to extract from player
3071 if player_url is None:
3072 error_msg = 'Cannot extract signature timestamp without player_url.'
3073 if fatal:
3074 raise ExtractorError(error_msg)
3075 self.report_warning(error_msg)
3076 return
3077 code = self._load_player(video_id, player_url, fatal=fatal)
3078 if code:
3079 sts = int_or_none(self._search_regex(
3080 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
3081 'JS player signature timestamp', group='sts', fatal=fatal))
3082 return sts
3083
3084 def _mark_watched(self, video_id, player_responses):
3085 for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):
3086 label = 'fully ' if is_full else ''
3087 url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),
3088 expected_type=url_or_none)
3089 if not url:
3090 self.report_warning(f'Unable to mark {label}watched')
3091 return
3092 parsed_url = urllib.parse.urlparse(url)
3093 qs = urllib.parse.parse_qs(parsed_url.query)
3094
3095 # cpn generation algorithm is reverse engineered from base.js.
3096 # In fact it works even with dummy cpn.
3097 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
3098 cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))
3099
3100 # # more consistent results setting it to right before the end
3101 video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]
3102
3103 qs.update({
3104 'ver': ['2'],
3105 'cpn': [cpn],
3106 'cmt': video_length,
3107 'el': 'detailpage', # otherwise defaults to "shorts"
3108 })
3109
3110 if is_full:
3111 # these seem to mark watchtime "history" in the real world
3112 # they're required, so send in a single value
3113 qs.update({
3114 'st': 0,
3115 'et': video_length,
3116 })
3117
3118 url = urllib.parse.urlunparse(
3119 parsed_url._replace(query=urllib.parse.urlencode(qs, True)))
3120
3121 self._download_webpage(
3122 url, video_id, f'Marking {label}watched',
3123 'Unable to mark watched', fatal=False)
3124
3125 @classmethod
3126 def _extract_from_webpage(cls, url, webpage):
3127 # Invidious Instances
3128 # https://github.com/yt-dlp/yt-dlp/issues/195
3129 # https://github.com/iv-org/invidious/pull/1730
3130 mobj = re.search(
3131 r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',
3132 webpage)
3133 if mobj:
3134 yield cls.url_result(mobj.group('url'), cls)
3135 raise cls.StopExtraction()
3136
3137 yield from super()._extract_from_webpage(url, webpage)
3138
3139 # lazyYT YouTube embed
3140 for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):
3141 yield cls.url_result(unescapeHTML(id_), cls, id_)
3142
3143 # Wordpress "YouTube Video Importer" plugin
3144 for m in re.findall(r'''(?x)<div[^>]+
3145 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
3146 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):
3147 yield cls.url_result(m[-1], cls, m[-1])
3148
3149 @classmethod
3150 def extract_id(cls, url):
3151 video_id = cls.get_temp_id(url)
3152 if not video_id:
3153 raise ExtractorError(f'Invalid URL: {url}')
3154 return video_id
3155
3156 def _extract_chapters_from_json(self, data, duration):
3157 chapter_list = traverse_obj(
3158 data, (
3159 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
3160 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
3161 ), expected_type=list)
3162
3163 return self._extract_chapters(
3164 chapter_list,
3165 chapter_time=lambda chapter: float_or_none(
3166 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
3167 chapter_title=lambda chapter: traverse_obj(
3168 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
3169 duration=duration)
3170
3171 def _extract_chapters_from_engagement_panel(self, data, duration):
3172 content_list = traverse_obj(
3173 data,
3174 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
3175 expected_type=list)
3176 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
3177 chapter_title = lambda chapter: self._get_text(chapter, 'title')
3178
3179 return next(filter(None, (
3180 self._extract_chapters(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
3181 chapter_time, chapter_title, duration)
3182 for contents in content_list)), [])
3183
3184 def _extract_chapters_from_description(self, description, duration):
3185 duration_re = r'(?:\d+:)?\d{1,2}:\d{2}'
3186 sep_re = r'(?m)^\s*(%s)\b\W*\s(%s)\s*$'
3187 return self._extract_chapters(
3188 re.findall(sep_re % (duration_re, r'.+?'), description or ''),
3189 chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1],
3190 duration=duration, strict=False) or self._extract_chapters(
3191 re.findall(sep_re % (r'.+?', duration_re), description or ''),
3192 chapter_time=lambda x: parse_duration(x[1]), chapter_title=lambda x: x[0],
3193 duration=duration, strict=False)
3194
3195 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True):
3196 if not duration:
3197 return
3198 chapter_list = [{
3199 'start_time': chapter_time(chapter),
3200 'title': chapter_title(chapter),
3201 } for chapter in chapter_list or []]
3202 if not strict:
3203 chapter_list.sort(key=lambda c: c['start_time'] or 0)
3204
3205 chapters = [{'start_time': 0}]
3206 for idx, chapter in enumerate(chapter_list):
3207 if chapter['start_time'] is None:
3208 self.report_warning(f'Incomplete chapter {idx}')
3209 elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:
3210 chapters.append(chapter)
3211 elif chapter not in chapters:
3212 self.report_warning(
3213 f'Invalid start time ({chapter["start_time"]} < {chapters[-1]["start_time"]}) for chapter "{chapter["title"]}"')
3214 return chapters[1:]
3215
3216 def _extract_comment(self, comment_renderer, parent=None):
3217 comment_id = comment_renderer.get('commentId')
3218 if not comment_id:
3219 return
3220
3221 text = self._get_text(comment_renderer, 'contentText')
3222
3223 # Timestamp is an estimate calculated from the current time and time_text
3224 time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
3225 timestamp = self._parse_time_text(time_text)
3226
3227 author = self._get_text(comment_renderer, 'authorText')
3228 author_id = try_get(comment_renderer,
3229 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str)
3230
3231 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
3232 lambda x: x['likeCount']), str)) or 0
3233 author_thumbnail = try_get(comment_renderer,
3234 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], str)
3235
3236 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
3237 is_favorited = 'creatorHeart' in (try_get(
3238 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
3239 return {
3240 'id': comment_id,
3241 'text': text,
3242 'timestamp': timestamp,
3243 'time_text': time_text,
3244 'like_count': votes,
3245 'is_favorited': is_favorited,
3246 'author': author,
3247 'author_id': author_id,
3248 'author_thumbnail': author_thumbnail,
3249 'author_is_uploader': author_is_uploader,
3250 'parent': parent or 'root'
3251 }
3252
3253 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
3254
3255 get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
3256
3257 def extract_header(contents):
3258 _continuation = None
3259 for content in contents:
3260 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
3261 expected_comment_count = self._get_count(
3262 comments_header_renderer, 'countText', 'commentsCount')
3263
3264 if expected_comment_count:
3265 tracker['est_total'] = expected_comment_count
3266 self.to_screen(f'Downloading ~{expected_comment_count} comments')
3267 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
3268
3269 sort_menu_item = try_get(
3270 comments_header_renderer,
3271 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
3272 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
3273
3274 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
3275 if not _continuation:
3276 continue
3277
3278 sort_text = str_or_none(sort_menu_item.get('title'))
3279 if not sort_text:
3280 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
3281 self.to_screen('Sorting comments by %s' % sort_text.lower())
3282 break
3283 return _continuation
3284
3285 def extract_thread(contents):
3286 if not parent:
3287 tracker['current_page_thread'] = 0
3288 for content in contents:
3289 if not parent and tracker['total_parent_comments'] >= max_parents:
3290 yield
3291 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
3292 comment_renderer = get_first(
3293 (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
3294 expected_type=dict, default={})
3295
3296 comment = self._extract_comment(comment_renderer, parent)
3297 if not comment:
3298 continue
3299
3300 tracker['running_total'] += 1
3301 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
3302 yield comment
3303
3304 # Attempt to get the replies
3305 comment_replies_renderer = try_get(
3306 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
3307
3308 if comment_replies_renderer:
3309 tracker['current_page_thread'] += 1
3310 comment_entries_iter = self._comment_entries(
3311 comment_replies_renderer, ytcfg, video_id,
3312 parent=comment.get('id'), tracker=tracker)
3313 yield from itertools.islice(comment_entries_iter, min(
3314 max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))
3315
3316 # Keeps track of counts across recursive calls
3317 if not tracker:
3318 tracker = dict(
3319 running_total=0,
3320 est_total=0,
3321 current_page_thread=0,
3322 total_parent_comments=0,
3323 total_reply_comments=0)
3324
3325 # TODO: Deprecated
3326 # YouTube comments have a max depth of 2
3327 max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
3328 if max_depth:
3329 self._downloader.deprecated_feature('[youtube] max_comment_depth extractor argument is deprecated. '
3330 'Set max replies in the max-comments extractor argument instead')
3331 if max_depth == 1 and parent:
3332 return
3333
3334 max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(
3335 lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
3336
3337 continuation = self._extract_continuation(root_continuation_data)
3338
3339 response = None
3340 is_forced_continuation = False
3341 is_first_continuation = parent is None
3342 if is_first_continuation and not continuation:
3343 # Sometimes you can get comments by generating the continuation yourself,
3344 # even if YouTube initially reports them being disabled - e.g. stories comments.
3345 # Note: if the comment section is actually disabled, YouTube may return a response with
3346 # required check_get_keys missing. So we will disable that check initially in this case.
3347 continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
3348 is_forced_continuation = True
3349
3350 for page_num in itertools.count(0):
3351 if not continuation:
3352 break
3353 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
3354 comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"
3355 if page_num == 0:
3356 if is_first_continuation:
3357 note_prefix = 'Downloading comment section API JSON'
3358 else:
3359 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
3360 tracker['current_page_thread'], comment_prog_str)
3361 else:
3362 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
3363 ' ' if parent else '', ' replies' if parent else '',
3364 page_num, comment_prog_str)
3365 try:
3366 response = self._extract_response(
3367 item_id=None, query=continuation,
3368 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
3369 check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)
3370 except ExtractorError as e:
3371 # Ignore incomplete data error for replies if retries didn't work.
3372 # This is to allow any other parent comments and comment threads to be downloaded.
3373 # See: https://github.com/yt-dlp/yt-dlp/issues/4669
3374 if 'incomplete data' in str(e).lower() and parent and self.get_param('ignoreerrors') is True:
3375 self.report_warning(
3376 'Received incomplete data for a comment reply thread and retrying did not help. '
3377 'Ignoring to let other comments be downloaded.')
3378 else:
3379 raise
3380 is_forced_continuation = False
3381 continuation_contents = traverse_obj(
3382 response, 'onResponseReceivedEndpoints', expected_type=list, default=[])
3383
3384 continuation = None
3385 for continuation_section in continuation_contents:
3386 continuation_items = traverse_obj(
3387 continuation_section,
3388 (('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),
3389 get_all=False, expected_type=list) or []
3390 if is_first_continuation:
3391 continuation = extract_header(continuation_items)
3392 is_first_continuation = False
3393 if continuation:
3394 break
3395 continue
3396
3397 for entry in extract_thread(continuation_items):
3398 if not entry:
3399 return
3400 yield entry
3401 continuation = self._extract_continuation({'contents': continuation_items})
3402 if continuation:
3403 break
3404
3405 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
3406 if message and not parent and tracker['running_total'] == 0:
3407 self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)
3408 raise self.CommentsDisabled
3409
3410 @staticmethod
3411 def _generate_comment_continuation(video_id):
3412 """
3413 Generates initial comment section continuation token from given video id
3414 """
3415 token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'
3416 return base64.b64encode(token.encode()).decode()
3417
3418 def _get_comments(self, ytcfg, video_id, contents, webpage):
3419 """Entry for comment extraction"""
3420 def _real_comment_extract(contents):
3421 renderer = next((
3422 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
3423 if item.get('sectionIdentifier') == 'comment-item-section'), None)
3424 yield from self._comment_entries(renderer, ytcfg, video_id)
3425
3426 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
3427 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
3428
3429 @staticmethod
3430 def _get_checkok_params():
3431 return {'contentCheckOk': True, 'racyCheckOk': True}
3432
3433 @classmethod
3434 def _generate_player_context(cls, sts=None):
3435 context = {
3436 'html5Preference': 'HTML5_PREF_WANTS',
3437 }
3438 if sts is not None:
3439 context['signatureTimestamp'] = sts
3440 return {
3441 'playbackContext': {
3442 'contentPlaybackContext': context
3443 },
3444 **cls._get_checkok_params()
3445 }
3446
3447 @staticmethod
3448 def _is_agegated(player_response):
3449 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
3450 return True
3451
3452 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')))
3453 AGE_GATE_REASONS = (
3454 'confirm your age', 'age-restricted', 'inappropriate', # reason
3455 'age_verification_required', 'age_check_required', # status
3456 )
3457 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
3458
3459 @staticmethod
3460 def _is_unplayable(player_response):
3461 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
3462
3463 _STORY_PLAYER_PARAMS = '8AEB'
3464
3465 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):
3466
3467 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
3468 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
3469 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
3470 headers = self.generate_api_headers(
3471 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
3472
3473 yt_query = {
3474 'videoId': video_id,
3475 }
3476 if smuggled_data.get('is_story') or _split_innertube_client(client)[0] == 'android':
3477 yt_query['params'] = self._STORY_PLAYER_PARAMS
3478
3479 yt_query.update(self._generate_player_context(sts))
3480 return self._extract_response(
3481 item_id=video_id, ep='player', query=yt_query,
3482 ytcfg=player_ytcfg, headers=headers, fatal=True,
3483 default_client=client,
3484 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
3485 ) or None
3486
3487 def _get_requested_clients(self, url, smuggled_data):
3488 requested_clients = []
3489 default = ['android', 'web']
3490 allowed_clients = sorted(
3491 (client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),
3492 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
3493 for client in self._configuration_arg('player_client'):
3494 if client in allowed_clients:
3495 requested_clients.append(client)
3496 elif client == 'default':
3497 requested_clients.extend(default)
3498 elif client == 'all':
3499 requested_clients.extend(allowed_clients)
3500 else:
3501 self.report_warning(f'Skipping unsupported client {client}')
3502 if not requested_clients:
3503 requested_clients = default
3504
3505 if smuggled_data.get('is_music_url') or self.is_music_url(url):
3506 requested_clients.extend(
3507 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
3508
3509 return orderedSet(requested_clients)
3510
3511 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
3512 initial_pr = None
3513 if webpage:
3514 initial_pr = self._search_json(
3515 self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
3516
3517 all_clients = set(clients)
3518 clients = clients[::-1]
3519 prs = []
3520
3521 def append_client(*client_names):
3522 """ Append the first client name that exists but not already used """
3523 for client_name in client_names:
3524 actual_client = _split_innertube_client(client_name)[0]
3525 if actual_client in INNERTUBE_CLIENTS:
3526 if actual_client not in all_clients:
3527 clients.append(client_name)
3528 all_clients.add(actual_client)
3529 return
3530
3531 # Android player_response does not have microFormats which are needed for
3532 # extraction of some data. So we return the initial_pr with formats
3533 # stripped out even if not requested by the user
3534 # See: https://github.com/yt-dlp/yt-dlp/issues/501
3535 if initial_pr:
3536 pr = dict(initial_pr)
3537 pr['streamingData'] = None
3538 prs.append(pr)
3539
3540 last_error = None
3541 tried_iframe_fallback = False
3542 player_url = None
3543 while clients:
3544 client, base_client, variant = _split_innertube_client(clients.pop())
3545 player_ytcfg = master_ytcfg if client == 'web' else {}
3546 if 'configs' not in self._configuration_arg('player_skip') and client != 'web':
3547 player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
3548
3549 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
3550 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
3551 if 'js' in self._configuration_arg('player_skip'):
3552 require_js_player = False
3553 player_url = None
3554
3555 if not player_url and not tried_iframe_fallback and require_js_player:
3556 player_url = self._download_player_url(video_id)
3557 tried_iframe_fallback = True
3558
3559 try:
3560 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
3561 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)
3562 except ExtractorError as e:
3563 if last_error:
3564 self.report_warning(last_error)
3565 last_error = e
3566 continue
3567
3568 if pr:
3569 # YouTube may return a different video player response than expected.
3570 # See: https://github.com/TeamNewPipe/NewPipe/issues/8713
3571 pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId'))
3572 if pr_video_id and pr_video_id != video_id:
3573 self.report_warning(
3574 f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message())
3575 else:
3576 prs.append(pr)
3577
3578 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
3579 if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
3580 append_client(f'{base_client}_creator')
3581 elif self._is_agegated(pr):
3582 if variant == 'tv_embedded':
3583 append_client(f'{base_client}_embedded')
3584 elif not variant:
3585 append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
3586
3587 if last_error:
3588 if not len(prs):
3589 raise last_error
3590 self.report_warning(last_error)
3591 return prs, player_url
3592
3593 def _needs_live_processing(self, live_status, duration):
3594 if (live_status == 'is_live' and self.get_param('live_from_start')
3595 or live_status == 'post_live' and (duration or 0) > 4 * 3600):
3596 return live_status
3597
3598 def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):
3599 itags, stream_ids = collections.defaultdict(set), []
3600 itag_qualities, res_qualities = {}, {0: None}
3601 q = qualities([
3602 # Normally tiny is the smallest video-only formats. But
3603 # audio-only formats with unknown quality may get tagged as tiny
3604 'tiny',
3605 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
3606 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
3607 ])
3608 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...))
3609
3610 for fmt in streaming_formats:
3611 if fmt.get('targetDurationSec'):
3612 continue
3613
3614 itag = str_or_none(fmt.get('itag'))
3615 audio_track = fmt.get('audioTrack') or {}
3616 stream_id = (itag, audio_track.get('id'), fmt.get('isDrc'))
3617 if stream_id in stream_ids:
3618 continue
3619
3620 quality = fmt.get('quality')
3621 height = int_or_none(fmt.get('height'))
3622 if quality == 'tiny' or not quality:
3623 quality = fmt.get('audioQuality', '').lower() or quality
3624 # The 3gp format (17) in android client has a quality of "small",
3625 # but is actually worse than other formats
3626 if itag == '17':
3627 quality = 'tiny'
3628 if quality:
3629 if itag:
3630 itag_qualities[itag] = quality
3631 if height:
3632 res_qualities[height] = quality
3633 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
3634 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
3635 # number of fragment that would subsequently requested with (`&sq=N`)
3636 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
3637 continue
3638
3639 fmt_url = fmt.get('url')
3640 if not fmt_url:
3641 sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))
3642 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
3643 encrypted_sig = try_get(sc, lambda x: x['s'][0])
3644 if not all((sc, fmt_url, player_url, encrypted_sig)):
3645 continue
3646 try:
3647 fmt_url += '&%s=%s' % (
3648 traverse_obj(sc, ('sp', -1)) or 'signature',
3649 self._decrypt_signature(encrypted_sig, video_id, player_url)
3650 )
3651 except ExtractorError as e:
3652 self.report_warning('Signature extraction failed: Some formats may be missing',
3653 video_id=video_id, only_once=True)
3654 self.write_debug(e, only_once=True)
3655 continue
3656
3657 query = parse_qs(fmt_url)
3658 throttled = False
3659 if query.get('n'):
3660 try:
3661 decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])
3662 fmt_url = update_url_query(fmt_url, {
3663 'n': decrypt_nsig(query['n'][0], video_id, player_url)
3664 })
3665 except ExtractorError as e:
3666 phantomjs_hint = ''
3667 if isinstance(e, JSInterpreter.Exception):
3668 phantomjs_hint = (f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} '
3669 f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')
3670 if player_url:
3671 self.report_warning(
3672 f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}'
3673 f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)
3674 self.write_debug(e, only_once=True)
3675 else:
3676 self.report_warning(
3677 'Cannot decrypt nsig without player_url: You may experience throttling for some formats',
3678 video_id=video_id, only_once=True)
3679 throttled = True
3680
3681 tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
3682 language_preference = (
3683 10 if audio_track.get('audioIsDefault') and 10
3684 else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
3685 else -1)
3686 # Some formats may have much smaller duration than others (possibly damaged during encoding)
3687 # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
3688 # Make sure to avoid false positives with small duration differences.
3689 # E.g. __2ABJjxzNo, ySuUZEjARPY
3690 is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)
3691 if is_damaged:
3692 self.report_warning(
3693 f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
3694 dct = {
3695 'asr': int_or_none(fmt.get('audioSampleRate')),
3696 'filesize': int_or_none(fmt.get('contentLength')),
3697 'format_id': f'{itag}{"-drc" if fmt.get("isDrc") else ""}',
3698 'format_note': join_nonempty(
3699 '%s%s' % (audio_track.get('displayName') or '',
3700 ' (default)' if language_preference > 0 else ''),
3701 fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
3702 'DRC' if fmt.get('isDrc') else None,
3703 try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
3704 try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
3705 throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),
3706 # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
3707 'source_preference': -10 if throttled else -5 if itag == '22' else -1,
3708 'fps': int_or_none(fmt.get('fps')) or None,
3709 'audio_channels': fmt.get('audioChannels'),
3710 'height': height,
3711 'quality': q(quality) - bool(fmt.get('isDrc')) / 2,
3712 'has_drm': bool(fmt.get('drmFamilies')),
3713 'tbr': tbr,
3714 'url': fmt_url,
3715 'width': int_or_none(fmt.get('width')),
3716 'language': join_nonempty(audio_track.get('id', '').split('.')[0],
3717 'desc' if language_preference < -1 else '') or None,
3718 'language_preference': language_preference,
3719 # Strictly de-prioritize damaged and 3gp formats
3720 'preference': -10 if is_damaged else -2 if itag == '17' else None,
3721 }
3722 mime_mobj = re.match(
3723 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
3724 if mime_mobj:
3725 dct['ext'] = mimetype2ext(mime_mobj.group(1))
3726 dct.update(parse_codecs(mime_mobj.group(2)))
3727 no_audio = dct.get('acodec') == 'none'
3728 no_video = dct.get('vcodec') == 'none'
3729 if no_audio:
3730 dct['vbr'] = tbr
3731 if no_video:
3732 dct['abr'] = tbr
3733 if no_audio or no_video:
3734 dct['downloader_options'] = {
3735 # Youtube throttles chunks >~10M
3736 'http_chunk_size': 10485760,
3737 }
3738 if dct.get('ext'):
3739 dct['container'] = dct['ext'] + '_dash'
3740
3741 if itag:
3742 itags[itag].add(('https', dct.get('language')))
3743 stream_ids.append(stream_id)
3744 yield dct
3745
3746 needs_live_processing = self._needs_live_processing(live_status, duration)
3747 skip_bad_formats = not self._configuration_arg('include_incomplete_formats')
3748
3749 skip_manifests = set(self._configuration_arg('skip'))
3750 if (not self.get_param('youtube_include_hls_manifest', True)
3751 or needs_live_processing == 'is_live' # These will be filtered out by YoutubeDL anyway
3752 or needs_live_processing and skip_bad_formats):
3753 skip_manifests.add('hls')
3754
3755 if not self.get_param('youtube_include_dash_manifest', True):
3756 skip_manifests.add('dash')
3757 if self._configuration_arg('include_live_dash'):
3758 self._downloader.deprecated_feature('[youtube] include_live_dash extractor argument is deprecated. '
3759 'Use include_incomplete_formats extractor argument instead')
3760 elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':
3761 skip_manifests.add('dash')
3762
3763 def process_manifest_format(f, proto, itag):
3764 key = (proto, f.get('language'))
3765 if key in itags[itag]:
3766 return False
3767 itags[itag].add(key)
3768
3769 if any(p != proto for p, _ in itags[itag]):
3770 f['format_id'] = f'{itag}-{proto}'
3771 elif itag:
3772 f['format_id'] = itag
3773
3774 f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
3775 if f['quality'] == -1 and f.get('height'):
3776 f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
3777 return True
3778
3779 subtitles = {}
3780 for sd in streaming_data:
3781 hls_manifest_url = 'hls' not in skip_manifests and sd.get('hlsManifestUrl')
3782 if hls_manifest_url:
3783 fmts, subs = self._extract_m3u8_formats_and_subtitles(
3784 hls_manifest_url, video_id, 'mp4', fatal=False, live=live_status == 'is_live')
3785 subtitles = self._merge_subtitles(subs, subtitles)
3786 for f in fmts:
3787 if process_manifest_format(f, 'hls', self._search_regex(
3788 r'/itag/(\d+)', f['url'], 'itag', default=None)):
3789 yield f
3790
3791 dash_manifest_url = 'dash' not in skip_manifests and sd.get('dashManifestUrl')
3792 if dash_manifest_url:
3793 formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
3794 subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
3795 for f in formats:
3796 if process_manifest_format(f, 'dash', f['format_id']):
3797 f['filesize'] = int_or_none(self._search_regex(
3798 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
3799 if needs_live_processing:
3800 f['is_from_start'] = True
3801
3802 yield f
3803 yield subtitles
3804
3805 def _extract_storyboard(self, player_responses, duration):
3806 spec = get_first(
3807 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
3808 base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))
3809 if not base_url:
3810 return
3811 L = len(spec) - 1
3812 for i, args in enumerate(spec):
3813 args = args.split('#')
3814 counts = list(map(int_or_none, args[:5]))
3815 if len(args) != 8 or not all(counts):
3816 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
3817 continue
3818 width, height, frame_count, cols, rows = counts
3819 N, sigh = args[6:]
3820
3821 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
3822 fragment_count = frame_count / (cols * rows)
3823 fragment_duration = duration / fragment_count
3824 yield {
3825 'format_id': f'sb{i}',
3826 'format_note': 'storyboard',
3827 'ext': 'mhtml',
3828 'protocol': 'mhtml',
3829 'acodec': 'none',
3830 'vcodec': 'none',
3831 'url': url,
3832 'width': width,
3833 'height': height,
3834 'fps': frame_count / duration,
3835 'rows': rows,
3836 'columns': cols,
3837 'fragments': [{
3838 'url': url.replace('$M', str(j)),
3839 'duration': min(fragment_duration, duration - (j * fragment_duration)),
3840 } for j in range(math.ceil(fragment_count))],
3841 }
3842
3843 def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
3844 webpage = None
3845 if 'webpage' not in self._configuration_arg('player_skip'):
3846 query = {'bpctr': '9999999999', 'has_verified': '1'}
3847 if smuggled_data.get('is_story'):
3848 query['pp'] = self._STORY_PLAYER_PARAMS
3849 webpage = self._download_webpage(
3850 webpage_url, video_id, fatal=False, query=query)
3851
3852 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
3853
3854 player_responses, player_url = self._extract_player_responses(
3855 self._get_requested_clients(url, smuggled_data),
3856 video_id, webpage, master_ytcfg, smuggled_data)
3857
3858 return webpage, master_ytcfg, player_responses, player_url
3859
3860 def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):
3861 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
3862 is_live = get_first(video_details, 'isLive')
3863 if is_live is None:
3864 is_live = get_first(live_broadcast_details, 'isLiveNow')
3865 live_content = get_first(video_details, 'isLiveContent')
3866 is_upcoming = get_first(video_details, 'isUpcoming')
3867 post_live = get_first(video_details, 'isPostLiveDvr')
3868 live_status = ('post_live' if post_live
3869 else 'is_live' if is_live
3870 else 'is_upcoming' if is_upcoming
3871 else 'was_live' if live_content
3872 else 'not_live' if False in (is_live, live_content)
3873 else None)
3874 streaming_data = traverse_obj(player_responses, (..., 'streamingData'))
3875 *formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, live_status, duration)
3876
3877 return live_broadcast_details, live_status, streaming_data, formats, subtitles
3878
3879 def _real_extract(self, url):
3880 url, smuggled_data = unsmuggle_url(url, {})
3881 video_id = self._match_id(url)
3882
3883 base_url = self.http_scheme() + '//www.youtube.com/'
3884 webpage_url = base_url + 'watch?v=' + video_id
3885
3886 webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
3887
3888 playability_statuses = traverse_obj(
3889 player_responses, (..., 'playabilityStatus'), expected_type=dict)
3890
3891 trailer_video_id = get_first(
3892 playability_statuses,
3893 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
3894 expected_type=str)
3895 if trailer_video_id:
3896 return self.url_result(
3897 trailer_video_id, self.ie_key(), trailer_video_id)
3898
3899 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
3900 if webpage else (lambda x: None))
3901
3902 video_details = traverse_obj(player_responses, (..., 'videoDetails'), expected_type=dict)
3903 microformats = traverse_obj(
3904 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
3905 expected_type=dict)
3906
3907 translated_title = self._get_text(microformats, (..., 'title'))
3908 video_title = (self._preferred_lang and translated_title
3909 or get_first(video_details, 'title') # primary
3910 or translated_title
3911 or search_meta(['og:title', 'twitter:title', 'title']))
3912 translated_description = self._get_text(microformats, (..., 'description'))
3913 original_description = get_first(video_details, 'shortDescription')
3914 video_description = (
3915 self._preferred_lang and translated_description
3916 # If original description is blank, it will be an empty string.
3917 # Do not prefer translated description in this case.
3918 or original_description if original_description is not None else translated_description)
3919
3920 multifeed_metadata_list = get_first(
3921 player_responses,
3922 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
3923 expected_type=str)
3924 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
3925 if self.get_param('noplaylist'):
3926 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3927 else:
3928 entries = []
3929 feed_ids = []
3930 for feed in multifeed_metadata_list.split(','):
3931 # Unquote should take place before split on comma (,) since textual
3932 # fields may contain comma as well (see
3933 # https://github.com/ytdl-org/youtube-dl/issues/8536)
3934 feed_data = urllib.parse.parse_qs(
3935 urllib.parse.unquote_plus(feed))
3936
3937 def feed_entry(name):
3938 return try_get(
3939 feed_data, lambda x: x[name][0], str)
3940
3941 feed_id = feed_entry('id')
3942 if not feed_id:
3943 continue
3944 feed_title = feed_entry('title')
3945 title = video_title
3946 if feed_title:
3947 title += ' (%s)' % feed_title
3948 entries.append({
3949 '_type': 'url_transparent',
3950 'ie_key': 'Youtube',
3951 'url': smuggle_url(
3952 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
3953 {'force_singlefeed': True}),
3954 'title': title,
3955 })
3956 feed_ids.append(feed_id)
3957 self.to_screen(
3958 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
3959 % (', '.join(feed_ids), video_id))
3960 return self.playlist_result(
3961 entries, video_id, video_title, video_description)
3962
3963 duration = (int_or_none(get_first(video_details, 'lengthSeconds'))
3964 or int_or_none(get_first(microformats, 'lengthSeconds'))
3965 or parse_duration(search_meta('duration')) or None)
3966
3967 live_broadcast_details, live_status, streaming_data, formats, automatic_captions = \
3968 self._list_formats(video_id, microformats, video_details, player_responses, player_url, duration)
3969 if live_status == 'post_live':
3970 self.write_debug(f'{video_id}: Video is in Post-Live Manifestless mode')
3971
3972 if not formats:
3973 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
3974 self.report_drm(video_id)
3975 pemr = get_first(
3976 playability_statuses,
3977 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
3978 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
3979 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
3980 if subreason:
3981 if subreason == 'The uploader has not made this video available in your country.':
3982 countries = get_first(microformats, 'availableCountries')
3983 if not countries:
3984 regions_allowed = search_meta('regionsAllowed')
3985 countries = regions_allowed.split(',') if regions_allowed else None
3986 self.raise_geo_restricted(subreason, countries, metadata_available=True)
3987 reason += f'. {subreason}'
3988 if reason:
3989 self.raise_no_formats(reason, expected=True)
3990
3991 keywords = get_first(video_details, 'keywords', expected_type=list) or []
3992 if not keywords and webpage:
3993 keywords = [
3994 unescapeHTML(m.group('content'))
3995 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
3996 for keyword in keywords:
3997 if keyword.startswith('yt:stretch='):
3998 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
3999 if mobj:
4000 # NB: float is intentional for forcing float division
4001 w, h = (float(v) for v in mobj.groups())
4002 if w > 0 and h > 0:
4003 ratio = w / h
4004 for f in formats:
4005 if f.get('vcodec') != 'none':
4006 f['stretched_ratio'] = ratio
4007 break
4008 thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))
4009 thumbnail_url = search_meta(['og:image', 'twitter:image'])
4010 if thumbnail_url:
4011 thumbnails.append({
4012 'url': thumbnail_url,
4013 })
4014 original_thumbnails = thumbnails.copy()
4015
4016 # The best resolution thumbnails sometimes does not appear in the webpage
4017 # See: https://github.com/yt-dlp/yt-dlp/issues/340
4018 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
4019 thumbnail_names = [
4020 # While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants
4021 # in resolution, these are not the custom thumbnail. So de-prioritize them
4022 'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',
4023 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'
4024 ]
4025 n_thumbnail_names = len(thumbnail_names)
4026 thumbnails.extend({
4027 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
4028 video_id=video_id, name=name, ext=ext,
4029 webp='_webp' if ext == 'webp' else '', live='_live' if live_status == 'is_live' else ''),
4030 } for name in thumbnail_names for ext in ('webp', 'jpg'))
4031 for thumb in thumbnails:
4032 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
4033 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
4034 self._remove_duplicate_formats(thumbnails)
4035 self._downloader._sort_thumbnails(original_thumbnails)
4036
4037 category = get_first(microformats, 'category') or search_meta('genre')
4038 channel_id = str_or_none(
4039 get_first(video_details, 'channelId')
4040 or get_first(microformats, 'externalChannelId')
4041 or search_meta('channelId'))
4042 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
4043
4044 live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
4045 live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
4046 if not duration and live_end_time and live_start_time:
4047 duration = live_end_time - live_start_time
4048
4049 needs_live_processing = self._needs_live_processing(live_status, duration)
4050
4051 def is_bad_format(fmt):
4052 if needs_live_processing and not fmt.get('is_from_start'):
4053 return True
4054 elif (live_status == 'is_live' and needs_live_processing != 'is_live'
4055 and fmt.get('protocol') == 'http_dash_segments'):
4056 return True
4057
4058 for fmt in filter(is_bad_format, formats):
4059 fmt['preference'] = (fmt.get('preference') or -1) - 10
4060 fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 4 hours)', delim=' ')
4061
4062 if needs_live_processing:
4063 self._prepare_live_from_start_formats(
4064 formats, video_id, live_start_time, url, webpage_url, smuggled_data, live_status == 'is_live')
4065
4066 formats.extend(self._extract_storyboard(player_responses, duration))
4067
4068 info = {
4069 'id': video_id,
4070 'title': video_title,
4071 'formats': formats,
4072 'thumbnails': thumbnails,
4073 # The best thumbnail that we are sure exists. Prevents unnecessary
4074 # URL checking if user don't care about getting the best possible thumbnail
4075 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
4076 'description': video_description,
4077 'uploader': get_first(video_details, 'author'),
4078 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
4079 'uploader_url': owner_profile_url,
4080 'channel_id': channel_id,
4081 'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s'),
4082 'duration': duration,
4083 'view_count': int_or_none(
4084 get_first((video_details, microformats), (..., 'viewCount'))
4085 or search_meta('interactionCount')),
4086 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
4087 'age_limit': 18 if (
4088 get_first(microformats, 'isFamilySafe') is False
4089 or search_meta('isFamilyFriendly') == 'false'
4090 or search_meta('og:restrictions:age') == '18+') else 0,
4091 'webpage_url': webpage_url,
4092 'categories': [category] if category else None,
4093 'tags': keywords,
4094 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
4095 'live_status': live_status,
4096 'release_timestamp': live_start_time,
4097 '_format_sort_fields': ( # source_preference is lower for throttled/potentially damaged formats
4098 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto')
4099 }
4100
4101 subtitles = {}
4102 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
4103 if pctr:
4104 def get_lang_code(track):
4105 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
4106 or track.get('languageCode'))
4107
4108 # Converted into dicts to remove duplicates
4109 captions = {
4110 get_lang_code(sub): sub
4111 for sub in traverse_obj(pctr, (..., 'captionTracks', ...))}
4112 translation_languages = {
4113 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
4114 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...))}
4115
4116 def process_language(container, base_url, lang_code, sub_name, query):
4117 lang_subs = container.setdefault(lang_code, [])
4118 for fmt in self._SUBTITLE_FORMATS:
4119 query.update({
4120 'fmt': fmt,
4121 })
4122 lang_subs.append({
4123 'ext': fmt,
4124 'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),
4125 'name': sub_name,
4126 })
4127
4128 # NB: Constructing the full subtitle dictionary is slow
4129 get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (
4130 self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))
4131 for lang_code, caption_track in captions.items():
4132 base_url = caption_track.get('baseUrl')
4133 orig_lang = parse_qs(base_url).get('lang', [None])[-1]
4134 if not base_url:
4135 continue
4136 lang_name = self._get_text(caption_track, 'name', max_runs=1)
4137 if caption_track.get('kind') != 'asr':
4138 if not lang_code:
4139 continue
4140 process_language(
4141 subtitles, base_url, lang_code, lang_name, {})
4142 if not caption_track.get('isTranslatable'):
4143 continue
4144 for trans_code, trans_name in translation_languages.items():
4145 if not trans_code:
4146 continue
4147 orig_trans_code = trans_code
4148 if caption_track.get('kind') != 'asr' and trans_code != 'und':
4149 if not get_translated_subs:
4150 continue
4151 trans_code += f'-{lang_code}'
4152 trans_name += format_field(lang_name, None, ' from %s')
4153 # Add an "-orig" label to the original language so that it can be distinguished.
4154 # The subs are returned without "-orig" as well for compatibility
4155 if lang_code == f'a-{orig_trans_code}':
4156 process_language(
4157 automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
4158 # Setting tlang=lang returns damaged subtitles.
4159 process_language(automatic_captions, base_url, trans_code, trans_name,
4160 {} if orig_lang == orig_trans_code else {'tlang': trans_code})
4161
4162 info['automatic_captions'] = automatic_captions
4163 info['subtitles'] = subtitles
4164
4165 parsed_url = urllib.parse.urlparse(url)
4166 for component in [parsed_url.fragment, parsed_url.query]:
4167 query = urllib.parse.parse_qs(component)
4168 for k, v in query.items():
4169 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
4170 d_k += '_time'
4171 if d_k not in info and k in s_ks:
4172 info[d_k] = parse_duration(query[k][0])
4173
4174 # Youtube Music Auto-generated description
4175 if video_description:
4176 mobj = re.search(
4177 r'''(?xs)
4178 (?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+
4179 (?P<album>[^\n]+)
4180 (?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
4181 (?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
4182 (.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?
4183 .+\nAuto-generated\ by\ YouTube\.\s*$
4184 ''', video_description)
4185 if mobj:
4186 release_year = mobj.group('release_year')
4187 release_date = mobj.group('release_date')
4188 if release_date:
4189 release_date = release_date.replace('-', '')
4190 if not release_year:
4191 release_year = release_date[:4]
4192 info.update({
4193 'album': mobj.group('album'.strip()),
4194 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
4195 'track': mobj.group('track').strip(),
4196 'release_date': release_date,
4197 'release_year': int_or_none(release_year),
4198 })
4199
4200 initial_data = None
4201 if webpage:
4202 initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)
4203 if not initial_data:
4204 query = {'videoId': video_id}
4205 query.update(self._get_checkok_params())
4206 initial_data = self._extract_response(
4207 item_id=video_id, ep='next', fatal=False,
4208 ytcfg=master_ytcfg, query=query,
4209 headers=self.generate_api_headers(ytcfg=master_ytcfg),
4210 note='Downloading initial data API JSON')
4211
4212 info['comment_count'] = traverse_obj(initial_data, (
4213 'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',
4214 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount', 'simpleText'
4215 ), (
4216 'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',
4217 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo', 'runs', ..., 'text'
4218 ), expected_type=int_or_none, get_all=False)
4219
4220 try: # This will error if there is no livechat
4221 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
4222 except (KeyError, IndexError, TypeError):
4223 pass
4224 else:
4225 info.setdefault('subtitles', {})['live_chat'] = [{
4226 # url is needed to set cookies
4227 'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',
4228 'video_id': video_id,
4229 'ext': 'json',
4230 'protocol': ('youtube_live_chat' if live_status in ('is_live', 'is_upcoming')
4231 else 'youtube_live_chat_replay'),
4232 }]
4233
4234 if initial_data:
4235 info['chapters'] = (
4236 self._extract_chapters_from_json(initial_data, duration)
4237 or self._extract_chapters_from_engagement_panel(initial_data, duration)
4238 or self._extract_chapters_from_description(video_description, duration)
4239 or None)
4240
4241 contents = traverse_obj(
4242 initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
4243 expected_type=list, default=[])
4244
4245 vpir = get_first(contents, 'videoPrimaryInfoRenderer')
4246 if vpir:
4247 stl = vpir.get('superTitleLink')
4248 if stl:
4249 stl = self._get_text(stl)
4250 if try_get(
4251 vpir,
4252 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
4253 info['location'] = stl
4254 else:
4255 mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)
4256 if mobj:
4257 info.update({
4258 'series': mobj.group(1),
4259 'season_number': int(mobj.group(2)),
4260 'episode_number': int(mobj.group(3)),
4261 })
4262 for tlb in (try_get(
4263 vpir,
4264 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
4265 list) or []):
4266 tbrs = variadic(
4267 traverse_obj(
4268 tlb, ('toggleButtonRenderer', ...),
4269 ('segmentedLikeDislikeButtonRenderer', ..., 'toggleButtonRenderer')))
4270 for tbr in tbrs:
4271 for getter, regex in [(
4272 lambda x: x['defaultText']['accessibility']['accessibilityData'],
4273 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
4274 lambda x: x['accessibility'],
4275 lambda x: x['accessibilityData']['accessibilityData'],
4276 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
4277 label = (try_get(tbr, getter, dict) or {}).get('label')
4278 if label:
4279 mobj = re.match(regex, label)
4280 if mobj:
4281 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
4282 break
4283 sbr_tooltip = try_get(
4284 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
4285 if sbr_tooltip:
4286 like_count, dislike_count = sbr_tooltip.split(' / ')
4287 info.update({
4288 'like_count': str_to_int(like_count),
4289 'dislike_count': str_to_int(dislike_count),
4290 })
4291 vcr = traverse_obj(vpir, ('viewCount', 'videoViewCountRenderer'))
4292 if vcr:
4293 vc = self._get_count(vcr, 'viewCount')
4294 # Upcoming premieres with waiting count are treated as live here
4295 if vcr.get('isLive'):
4296 info['concurrent_view_count'] = vc
4297 elif info.get('view_count') is None:
4298 info['view_count'] = vc
4299
4300 vsir = get_first(contents, 'videoSecondaryInfoRenderer')
4301 if vsir:
4302 vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))
4303 info.update({
4304 'channel': self._get_text(vor, 'title'),
4305 'channel_follower_count': self._get_count(vor, 'subscriberCountText')})
4306
4307 rows = try_get(
4308 vsir,
4309 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
4310 list) or []
4311 multiple_songs = False
4312 for row in rows:
4313 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
4314 multiple_songs = True
4315 break
4316 for row in rows:
4317 mrr = row.get('metadataRowRenderer') or {}
4318 mrr_title = mrr.get('title')
4319 if not mrr_title:
4320 continue
4321 mrr_title = self._get_text(mrr, 'title')
4322 mrr_contents_text = self._get_text(mrr, ('contents', 0))
4323 if mrr_title == 'License':
4324 info['license'] = mrr_contents_text
4325 elif not multiple_songs:
4326 if mrr_title == 'Album':
4327 info['album'] = mrr_contents_text
4328 elif mrr_title == 'Artist':
4329 info['artist'] = mrr_contents_text
4330 elif mrr_title == 'Song':
4331 info['track'] = mrr_contents_text
4332
4333 fallbacks = {
4334 'channel': 'uploader',
4335 'channel_id': 'uploader_id',
4336 'channel_url': 'uploader_url',
4337 }
4338
4339 # The upload date for scheduled, live and past live streams / premieres in microformats
4340 # may be different from the stream date. Although not in UTC, we will prefer it in this case.
4341 # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139
4342 upload_date = (
4343 unified_strdate(get_first(microformats, 'uploadDate'))
4344 or unified_strdate(search_meta('uploadDate')))
4345 if not upload_date or (
4346 live_status in ('not_live', None)
4347 and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', [])
4348 ):
4349 upload_date = strftime_or_none(
4350 self._parse_time_text(self._get_text(vpir, 'dateText')), '%Y%m%d') or upload_date
4351 info['upload_date'] = upload_date
4352
4353 for to, frm in fallbacks.items():
4354 if not info.get(to):
4355 info[to] = info.get(frm)
4356
4357 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
4358 v = info.get(s_k)
4359 if v:
4360 info[d_k] = v
4361
4362 badges = self._extract_badges(traverse_obj(contents, (..., 'videoPrimaryInfoRenderer'), get_all=False))
4363
4364 is_private = (self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
4365 or get_first(video_details, 'isPrivate', expected_type=bool))
4366
4367 info['availability'] = (
4368 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
4369 else self._availability(
4370 is_private=is_private,
4371 needs_premium=(
4372 self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM)
4373 or False if initial_data and is_private is not None else None),
4374 needs_subscription=(
4375 self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION)
4376 or False if initial_data and is_private is not None else None),
4377 needs_auth=info['age_limit'] >= 18,
4378 is_unlisted=None if is_private is None else (
4379 self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
4380 or get_first(microformats, 'isUnlisted', expected_type=bool))))
4381
4382 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
4383
4384 self.mark_watched(video_id, player_responses)
4385
4386 return info
4387
4388
4389 class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
4390 @staticmethod
4391 def passthrough_smuggled_data(func):
4392 def _smuggle(info, smuggled_data):
4393 if info.get('_type') not in ('url', 'url_transparent'):
4394 return info
4395 if smuggled_data.get('is_music_url'):
4396 parsed_url = urllib.parse.urlparse(info['url'])
4397 if parsed_url.netloc in ('www.youtube.com', 'music.youtube.com'):
4398 smuggled_data.pop('is_music_url')
4399 info['url'] = urllib.parse.urlunparse(parsed_url._replace(netloc='music.youtube.com'))
4400 if smuggled_data:
4401 info['url'] = smuggle_url(info['url'], smuggled_data)
4402 return info
4403
4404 @functools.wraps(func)
4405 def wrapper(self, url):
4406 url, smuggled_data = unsmuggle_url(url, {})
4407 if self.is_music_url(url):
4408 smuggled_data['is_music_url'] = True
4409 info_dict = func(self, url, smuggled_data)
4410 if smuggled_data:
4411 _smuggle(info_dict, smuggled_data)
4412 if info_dict.get('entries'):
4413 info_dict['entries'] = (_smuggle(i, smuggled_data.copy()) for i in info_dict['entries'])
4414 return info_dict
4415 return wrapper
4416
4417 def _extract_channel_id(self, webpage):
4418 channel_id = self._html_search_meta(
4419 'channelId', webpage, 'channel id', default=None)
4420 if channel_id:
4421 return channel_id
4422 channel_url = self._html_search_meta(
4423 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
4424 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
4425 'twitter:app:url:googleplay'), webpage, 'channel url')
4426 return self._search_regex(
4427 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
4428 channel_url, 'channel id')
4429
4430 @staticmethod
4431 def _extract_basic_item_renderer(item):
4432 # Modified from _extract_grid_item_renderer
4433 known_basic_renderers = (
4434 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'
4435 )
4436 for key, renderer in item.items():
4437 if not isinstance(renderer, dict):
4438 continue
4439 elif key in known_basic_renderers:
4440 return renderer
4441 elif key.startswith('grid') and key.endswith('Renderer'):
4442 return renderer
4443
4444 def _extract_channel_renderer(self, renderer):
4445 channel_id = renderer['channelId']
4446 title = self._get_text(renderer, 'title')
4447 channel_url = f'https://www.youtube.com/channel/{channel_id}'
4448 return {
4449 '_type': 'url',
4450 'url': channel_url,
4451 'id': channel_id,
4452 'ie_key': YoutubeTabIE.ie_key(),
4453 'channel': title,
4454 'channel_id': channel_id,
4455 'channel_url': channel_url,
4456 'title': title,
4457 'channel_follower_count': self._get_count(renderer, 'subscriberCountText'),
4458 'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
4459 'playlist_count': self._get_count(renderer, 'videoCountText'),
4460 'description': self._get_text(renderer, 'descriptionSnippet'),
4461 }
4462
4463 def _grid_entries(self, grid_renderer):
4464 for item in grid_renderer['items']:
4465 if not isinstance(item, dict):
4466 continue
4467 renderer = self._extract_basic_item_renderer(item)
4468 if not isinstance(renderer, dict):
4469 continue
4470 title = self._get_text(renderer, 'title')
4471
4472 # playlist
4473 playlist_id = renderer.get('playlistId')
4474 if playlist_id:
4475 yield self.url_result(
4476 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4477 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4478 video_title=title)
4479 continue
4480 # video
4481 video_id = renderer.get('videoId')
4482 if video_id:
4483 yield self._extract_video(renderer)
4484 continue
4485 # channel
4486 channel_id = renderer.get('channelId')
4487 if channel_id:
4488 yield self._extract_channel_renderer(renderer)
4489 continue
4490 # generic endpoint URL support
4491 ep_url = urljoin('https://www.youtube.com/', try_get(
4492 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
4493 str))
4494 if ep_url:
4495 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
4496 if ie.suitable(ep_url):
4497 yield self.url_result(
4498 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
4499 break
4500
4501 def _music_reponsive_list_entry(self, renderer):
4502 video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))
4503 if video_id:
4504 return self.url_result(f'https://music.youtube.com/watch?v={video_id}',
4505 ie=YoutubeIE.ie_key(), video_id=video_id)
4506 playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))
4507 if playlist_id:
4508 video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))
4509 if video_id:
4510 return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',
4511 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4512 return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',
4513 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4514 browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))
4515 if browse_id:
4516 return self.url_result(f'https://music.youtube.com/browse/{browse_id}',
4517 ie=YoutubeTabIE.ie_key(), video_id=browse_id)
4518
4519 def _shelf_entries_from_content(self, shelf_renderer):
4520 content = shelf_renderer.get('content')
4521 if not isinstance(content, dict):
4522 return
4523 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
4524 if renderer:
4525 # TODO: add support for nested playlists so each shelf is processed
4526 # as separate playlist
4527 # TODO: this includes only first N items
4528 yield from self._grid_entries(renderer)
4529 renderer = content.get('horizontalListRenderer')
4530 if renderer:
4531 # TODO
4532 pass
4533
4534 def _shelf_entries(self, shelf_renderer, skip_channels=False):
4535 ep = try_get(
4536 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4537 str)
4538 shelf_url = urljoin('https://www.youtube.com', ep)
4539 if shelf_url:
4540 # Skipping links to another channels, note that checking for
4541 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
4542 # will not work
4543 if skip_channels and '/channels?' in shelf_url:
4544 return
4545 title = self._get_text(shelf_renderer, 'title')
4546 yield self.url_result(shelf_url, video_title=title)
4547 # Shelf may not contain shelf URL, fallback to extraction from content
4548 yield from self._shelf_entries_from_content(shelf_renderer)
4549
4550 def _playlist_entries(self, video_list_renderer):
4551 for content in video_list_renderer['contents']:
4552 if not isinstance(content, dict):
4553 continue
4554 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
4555 if not isinstance(renderer, dict):
4556 continue
4557 video_id = renderer.get('videoId')
4558 if not video_id:
4559 continue
4560 yield self._extract_video(renderer)
4561
4562 def _rich_entries(self, rich_grid_renderer):
4563 renderer = traverse_obj(
4564 rich_grid_renderer, ('content', ('videoRenderer', 'reelItemRenderer')), get_all=False) or {}
4565 video_id = renderer.get('videoId')
4566 if not video_id:
4567 return
4568 yield self._extract_video(renderer)
4569
4570 def _video_entry(self, video_renderer):
4571 video_id = video_renderer.get('videoId')
4572 if video_id:
4573 return self._extract_video(video_renderer)
4574
4575 def _hashtag_tile_entry(self, hashtag_tile_renderer):
4576 url = urljoin('https://youtube.com', traverse_obj(
4577 hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))
4578 if url:
4579 return self.url_result(
4580 url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))
4581
4582 def _post_thread_entries(self, post_thread_renderer):
4583 post_renderer = try_get(
4584 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
4585 if not post_renderer:
4586 return
4587 # video attachment
4588 video_renderer = try_get(
4589 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
4590 video_id = video_renderer.get('videoId')
4591 if video_id:
4592 entry = self._extract_video(video_renderer)
4593 if entry:
4594 yield entry
4595 # playlist attachment
4596 playlist_id = try_get(
4597 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)
4598 if playlist_id:
4599 yield self.url_result(
4600 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4601 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4602 # inline video links
4603 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
4604 for run in runs:
4605 if not isinstance(run, dict):
4606 continue
4607 ep_url = try_get(
4608 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)
4609 if not ep_url:
4610 continue
4611 if not YoutubeIE.suitable(ep_url):
4612 continue
4613 ep_video_id = YoutubeIE._match_id(ep_url)
4614 if video_id == ep_video_id:
4615 continue
4616 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
4617
4618 def _post_thread_continuation_entries(self, post_thread_continuation):
4619 contents = post_thread_continuation.get('contents')
4620 if not isinstance(contents, list):
4621 return
4622 for content in contents:
4623 renderer = content.get('backstagePostThreadRenderer')
4624 if isinstance(renderer, dict):
4625 yield from self._post_thread_entries(renderer)
4626 continue
4627 renderer = content.get('videoRenderer')
4628 if isinstance(renderer, dict):
4629 yield self._video_entry(renderer)
4630
4631 r''' # unused
4632 def _rich_grid_entries(self, contents):
4633 for content in contents:
4634 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
4635 if video_renderer:
4636 entry = self._video_entry(video_renderer)
4637 if entry:
4638 yield entry
4639 '''
4640
4641 def _report_history_entries(self, renderer):
4642 for url in traverse_obj(renderer, (
4643 'rows', ..., 'reportHistoryTableRowRenderer', 'cells', ...,
4644 'reportHistoryTableCellRenderer', 'cell', 'reportHistoryTableTextCellRenderer', 'text', 'runs', ...,
4645 'navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url')):
4646 yield self.url_result(urljoin('https://www.youtube.com', url), YoutubeIE)
4647
4648 def _extract_entries(self, parent_renderer, continuation_list):
4649 # continuation_list is modified in-place with continuation_list = [continuation_token]
4650 continuation_list[:] = [None]
4651 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
4652 for content in contents:
4653 if not isinstance(content, dict):
4654 continue
4655 is_renderer = traverse_obj(
4656 content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',
4657 expected_type=dict)
4658 if not is_renderer:
4659 if content.get('richItemRenderer'):
4660 for entry in self._rich_entries(content['richItemRenderer']):
4661 yield entry
4662 continuation_list[0] = self._extract_continuation(parent_renderer)
4663 elif content.get('reportHistorySectionRenderer'): # https://www.youtube.com/reporthistory
4664 table = traverse_obj(content, ('reportHistorySectionRenderer', 'table', 'tableRenderer'))
4665 yield from self._report_history_entries(table)
4666 continuation_list[0] = self._extract_continuation(table)
4667 continue
4668
4669 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
4670 for isr_content in isr_contents:
4671 if not isinstance(isr_content, dict):
4672 continue
4673
4674 known_renderers = {
4675 'playlistVideoListRenderer': self._playlist_entries,
4676 'gridRenderer': self._grid_entries,
4677 'reelShelfRenderer': self._grid_entries,
4678 'shelfRenderer': self._shelf_entries,
4679 'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],
4680 'backstagePostThreadRenderer': self._post_thread_entries,
4681 'videoRenderer': lambda x: [self._video_entry(x)],
4682 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
4683 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
4684 'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]
4685 }
4686 for key, renderer in isr_content.items():
4687 if key not in known_renderers:
4688 continue
4689 for entry in known_renderers[key](renderer):
4690 if entry:
4691 yield entry
4692 continuation_list[0] = self._extract_continuation(renderer)
4693 break
4694
4695 if not continuation_list[0]:
4696 continuation_list[0] = self._extract_continuation(is_renderer)
4697
4698 if not continuation_list[0]:
4699 continuation_list[0] = self._extract_continuation(parent_renderer)
4700
4701 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
4702 continuation_list = [None]
4703 extract_entries = lambda x: self._extract_entries(x, continuation_list)
4704 tab_content = try_get(tab, lambda x: x['content'], dict)
4705 if not tab_content:
4706 return
4707 parent_renderer = (
4708 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
4709 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
4710 yield from extract_entries(parent_renderer)
4711 continuation = continuation_list[0]
4712
4713 for page_num in itertools.count(1):
4714 if not continuation:
4715 break
4716 headers = self.generate_api_headers(
4717 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
4718 response = self._extract_response(
4719 item_id=f'{item_id} page {page_num}',
4720 query=continuation, headers=headers, ytcfg=ytcfg,
4721 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
4722
4723 if not response:
4724 break
4725 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
4726 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
4727 visitor_data = self._extract_visitor_data(response) or visitor_data
4728
4729 known_renderers = {
4730 'videoRenderer': (self._grid_entries, 'items'), # for membership tab
4731 'gridPlaylistRenderer': (self._grid_entries, 'items'),
4732 'gridVideoRenderer': (self._grid_entries, 'items'),
4733 'gridChannelRenderer': (self._grid_entries, 'items'),
4734 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
4735 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
4736 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
4737 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents'),
4738 'reportHistoryTableRowRenderer': (self._report_history_entries, 'rows'),
4739 'playlistVideoListContinuation': (self._playlist_entries, None),
4740 'gridContinuation': (self._grid_entries, None),
4741 'itemSectionContinuation': (self._post_thread_continuation_entries, None),
4742 'sectionListContinuation': (extract_entries, None), # for feeds
4743 }
4744
4745 continuation_items = traverse_obj(response, (
4746 ('onResponseReceivedActions', 'onResponseReceivedEndpoints'), ...,
4747 'appendContinuationItemsAction', 'continuationItems'
4748 ), 'continuationContents', get_all=False)
4749 continuation_item = traverse_obj(continuation_items, 0, None, expected_type=dict, default={})
4750
4751 video_items_renderer = None
4752 for key in continuation_item.keys():
4753 if key not in known_renderers:
4754 continue
4755 func, parent_key = known_renderers[key]
4756 video_items_renderer = {parent_key: continuation_items} if parent_key else continuation_items
4757 continuation_list = [None]
4758 yield from func(video_items_renderer)
4759 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
4760
4761 if not video_items_renderer:
4762 break
4763
4764 @staticmethod
4765 def _extract_selected_tab(tabs, fatal=True):
4766 for tab_renderer in tabs:
4767 if tab_renderer.get('selected'):
4768 return tab_renderer
4769 if fatal:
4770 raise ExtractorError('Unable to find selected tab')
4771
4772 @staticmethod
4773 def _extract_tab_renderers(response):
4774 return traverse_obj(
4775 response, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., ('tabRenderer', 'expandableTabRenderer')), expected_type=dict)
4776
4777 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
4778 metadata = self._extract_metadata_from_tabs(item_id, data)
4779
4780 selected_tab = self._extract_selected_tab(tabs)
4781 metadata['title'] += format_field(selected_tab, 'title', ' - %s')
4782 metadata['title'] += format_field(selected_tab, 'expandedText', ' - %s')
4783
4784 return self.playlist_result(
4785 self._entries(
4786 selected_tab, metadata['id'], ytcfg,
4787 self._extract_account_syncid(ytcfg, data),
4788 self._extract_visitor_data(data, ytcfg)),
4789 **metadata)
4790
4791 def _extract_metadata_from_tabs(self, item_id, data):
4792 info = {'id': item_id}
4793
4794 metadata_renderer = traverse_obj(data, ('metadata', 'channelMetadataRenderer'), expected_type=dict)
4795 if metadata_renderer:
4796 info.update({
4797 'uploader': metadata_renderer.get('title'),
4798 'uploader_id': metadata_renderer.get('externalId'),
4799 'uploader_url': metadata_renderer.get('channelUrl'),
4800 })
4801 if info['uploader_id']:
4802 info['id'] = info['uploader_id']
4803 else:
4804 metadata_renderer = traverse_obj(data, ('metadata', 'playlistMetadataRenderer'), expected_type=dict)
4805
4806 # We can get the uncropped banner/avatar by replacing the crop params with '=s0'
4807 # See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714
4808 def _get_uncropped(url):
4809 return url_or_none((url or '').split('=')[0] + '=s0')
4810
4811 avatar_thumbnails = self._extract_thumbnails(metadata_renderer, 'avatar')
4812 if avatar_thumbnails:
4813 uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])
4814 if uncropped_avatar:
4815 avatar_thumbnails.append({
4816 'url': uncropped_avatar,
4817 'id': 'avatar_uncropped',
4818 'preference': 1
4819 })
4820
4821 channel_banners = self._extract_thumbnails(
4822 data, ('header', ..., ('banner', 'mobileBanner', 'tvBanner')))
4823 for banner in channel_banners:
4824 banner['preference'] = -10
4825
4826 if channel_banners:
4827 uncropped_banner = _get_uncropped(channel_banners[0]['url'])
4828 if uncropped_banner:
4829 channel_banners.append({
4830 'url': uncropped_banner,
4831 'id': 'banner_uncropped',
4832 'preference': -5
4833 })
4834
4835 # Deprecated - remove primary_sidebar_renderer when layout discontinued
4836 primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4837 playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer'), expected_type=dict)
4838
4839 primary_thumbnails = self._extract_thumbnails(
4840 primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))
4841 playlist_thumbnails = self._extract_thumbnails(
4842 playlist_header_renderer, ('playlistHeaderBanner', 'heroPlaylistThumbnailRenderer', 'thumbnail'))
4843
4844 info.update({
4845 'title': (traverse_obj(metadata_renderer, 'title')
4846 or self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag'))
4847 or info['id']),
4848 'availability': self._extract_availability(data),
4849 'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),
4850 'description': try_get(metadata_renderer, lambda x: x.get('description', '')),
4851 'tags': try_get(metadata_renderer or {}, lambda x: x.get('keywords', '').split()),
4852 'thumbnails': (primary_thumbnails or playlist_thumbnails) + avatar_thumbnails + channel_banners,
4853 })
4854
4855 # Playlist stats is a text runs array containing [video count, view count, last updated].
4856 # last updated or (view count and last updated) may be missing.
4857 playlist_stats = get_first(
4858 (primary_sidebar_renderer, playlist_header_renderer), (('stats', 'briefStats', 'numVideosText'), ))
4859
4860 last_updated_unix = self._parse_time_text(
4861 self._get_text(playlist_stats, 2) # deprecated, remove when old layout discontinued
4862 or self._get_text(playlist_header_renderer, ('byline', 1, 'playlistBylineRenderer', 'text')))
4863 info['modified_date'] = strftime_or_none(last_updated_unix, '%Y%m%d')
4864
4865 info['view_count'] = self._get_count(playlist_stats, 1)
4866 if info['view_count'] is None: # 0 is allowed
4867 info['view_count'] = self._get_count(playlist_header_renderer, 'viewCountText')
4868
4869 info['playlist_count'] = self._get_count(playlist_stats, 0)
4870 if info['playlist_count'] is None: # 0 is allowed
4871 info['playlist_count'] = self._get_count(playlist_header_renderer, ('byline', 0, 'playlistBylineRenderer', 'text'))
4872
4873 if not info.get('uploader_id'):
4874 owner = traverse_obj(playlist_header_renderer, 'ownerText')
4875 if not owner: # Deprecated
4876 owner = traverse_obj(
4877 self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer'),
4878 ('videoOwner', 'videoOwnerRenderer', 'title'))
4879 owner_text = self._get_text(owner)
4880 browse_ep = traverse_obj(owner, ('runs', 0, 'navigationEndpoint', 'browseEndpoint')) or {}
4881 info.update({
4882 'uploader': self._search_regex(r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text),
4883 'uploader_id': browse_ep.get('browseId'),
4884 'uploader_url': urljoin('https://www.youtube.com', browse_ep.get('canonicalBaseUrl'))
4885 })
4886
4887 info.update({
4888 'channel': info['uploader'],
4889 'channel_id': info['uploader_id'],
4890 'channel_url': info['uploader_url']
4891 })
4892 return info
4893
4894 def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):
4895 first_id = last_id = response = None
4896 for page_num in itertools.count(1):
4897 videos = list(self._playlist_entries(playlist))
4898 if not videos:
4899 return
4900 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4901 if start >= len(videos):
4902 return
4903 yield from videos[start:]
4904 first_id = first_id or videos[0]['id']
4905 last_id = videos[-1]['id']
4906 watch_endpoint = try_get(
4907 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
4908 headers = self.generate_api_headers(
4909 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4910 visitor_data=self._extract_visitor_data(response, data, ytcfg))
4911 query = {
4912 'playlistId': playlist_id,
4913 'videoId': watch_endpoint.get('videoId') or last_id,
4914 'index': watch_endpoint.get('index') or len(videos),
4915 'params': watch_endpoint.get('params') or 'OAE%3D'
4916 }
4917 response = self._extract_response(
4918 item_id='%s page %d' % (playlist_id, page_num),
4919 query=query, ep='next', headers=headers, ytcfg=ytcfg,
4920 check_get_keys='contents'
4921 )
4922 playlist = try_get(
4923 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4924
4925 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
4926 title = playlist.get('title') or try_get(
4927 data, lambda x: x['titleText']['simpleText'], str)
4928 playlist_id = playlist.get('playlistId') or item_id
4929
4930 # Delegating everything except mix playlists to regular tab-based playlist URL
4931 playlist_url = urljoin(url, try_get(
4932 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4933 str))
4934
4935 # Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]
4936 # [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg
4937 is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)
4938
4939 if playlist_url and playlist_url != url and not is_known_unviewable:
4940 return self.url_result(
4941 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4942 video_title=title)
4943
4944 return self.playlist_result(
4945 self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),
4946 playlist_id=playlist_id, playlist_title=title)
4947
4948 def _extract_availability(self, data):
4949 """
4950 Gets the availability of a given playlist/tab.
4951 Note: Unless YouTube tells us explicitly, we do not assume it is public
4952 @param data: response
4953 """
4954 sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4955 playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer')) or {}
4956 player_header_privacy = playlist_header_renderer.get('privacy')
4957
4958 badges = self._extract_badges(sidebar_renderer)
4959
4960 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4961 privacy_setting_icon = get_first(
4962 (playlist_header_renderer, sidebar_renderer),
4963 ('privacyForm', 'dropdownFormFieldRenderer', 'dropdown', 'dropdownRenderer', 'entries',
4964 lambda _, v: v['privacyDropdownItemRenderer']['isSelected'], 'privacyDropdownItemRenderer', 'icon', 'iconType'),
4965 expected_type=str)
4966
4967 microformats_is_unlisted = traverse_obj(
4968 data, ('microformat', 'microformatDataRenderer', 'unlisted'), expected_type=bool)
4969
4970 return (
4971 'public' if (
4972 self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
4973 or player_header_privacy == 'PUBLIC'
4974 or privacy_setting_icon == 'PRIVACY_PUBLIC')
4975 else self._availability(
4976 is_private=(
4977 self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
4978 or player_header_privacy == 'PRIVATE' if player_header_privacy is not None
4979 else privacy_setting_icon == 'PRIVACY_PRIVATE' if privacy_setting_icon is not None else None),
4980 is_unlisted=(
4981 self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
4982 or player_header_privacy == 'UNLISTED' if player_header_privacy is not None
4983 else privacy_setting_icon == 'PRIVACY_UNLISTED' if privacy_setting_icon is not None
4984 else microformats_is_unlisted if microformats_is_unlisted is not None else None),
4985 needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
4986 needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
4987 needs_auth=False))
4988
4989 @staticmethod
4990 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4991 sidebar_renderer = try_get(
4992 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4993 for item in sidebar_renderer:
4994 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4995 if renderer:
4996 return renderer
4997
4998 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
4999 """
5000 Reload playlists with unavailable videos (e.g. private videos, region blocked, etc.)
5001 """
5002 is_playlist = bool(traverse_obj(
5003 data, ('metadata', 'playlistMetadataRenderer'), ('header', 'playlistHeaderRenderer')))
5004 if not is_playlist:
5005 return
5006 headers = self.generate_api_headers(
5007 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
5008 visitor_data=self._extract_visitor_data(data, ytcfg))
5009 query = {
5010 'params': 'wgYCCAA=',
5011 'browseId': f'VL{item_id}'
5012 }
5013 return self._extract_response(
5014 item_id=item_id, headers=headers, query=query,
5015 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
5016 note='Redownloading playlist API JSON with unavailable videos')
5017
5018 @functools.cached_property
5019 def skip_webpage(self):
5020 return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())
5021
5022 def _extract_webpage(self, url, item_id, fatal=True):
5023 webpage, data = None, None
5024 for retry in self.RetryManager(fatal=fatal):
5025 try:
5026 webpage = self._download_webpage(url, item_id, note='Downloading webpage')
5027 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
5028 except ExtractorError as e:
5029 if isinstance(e.cause, network_exceptions):
5030 if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):
5031 retry.error = e
5032 continue
5033 self._error_or_warning(e, fatal=fatal)
5034 break
5035
5036 try:
5037 self._extract_and_report_alerts(data)
5038 except ExtractorError as e:
5039 self._error_or_warning(e, fatal=fatal)
5040 break
5041
5042 # Sometimes youtube returns a webpage with incomplete ytInitialData
5043 # See: https://github.com/yt-dlp/yt-dlp/issues/116
5044 if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):
5045 retry.error = ExtractorError('Incomplete yt initial data received')
5046 continue
5047
5048 return webpage, data
5049
5050 def _report_playlist_authcheck(self, ytcfg, fatal=True):
5051 """Use if failed to extract ytcfg (and data) from initial webpage"""
5052 if not ytcfg and self.is_authenticated:
5053 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'
5054 if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:
5055 raise ExtractorError(
5056 f'{msg}. If you are not downloading private content, or '
5057 'your cookies are only for the first account and channel,'
5058 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
5059 expected=True)
5060 self.report_warning(msg, only_once=True)
5061
5062 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
5063 data = None
5064 if not self.skip_webpage:
5065 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
5066 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
5067 # Reject webpage data if redirected to home page without explicitly requesting
5068 selected_tab = self._extract_selected_tab(self._extract_tab_renderers(data), fatal=False) or {}
5069 if (url != 'https://www.youtube.com/feed/recommended'
5070 and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page
5071 and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):
5072 msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'
5073 if fatal:
5074 raise ExtractorError(msg, expected=True)
5075 self.report_warning(msg, only_once=True)
5076 if not data:
5077 self._report_playlist_authcheck(ytcfg, fatal=fatal)
5078 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
5079 return data, ytcfg
5080
5081 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
5082 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
5083 resolve_response = self._extract_response(
5084 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
5085 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
5086 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
5087 for ep_key, ep in endpoints.items():
5088 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
5089 if params:
5090 return self._extract_response(
5091 item_id=item_id, query=params, ep=ep, headers=headers,
5092 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
5093 check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))
5094 err_note = 'Failed to resolve url (does the playlist exist?)'
5095 if fatal:
5096 raise ExtractorError(err_note, expected=True)
5097 self.report_warning(err_note, item_id)
5098
5099 _SEARCH_PARAMS = None
5100
5101 def _search_results(self, query, params=NO_DEFAULT, default_client='web'):
5102 data = {'query': query}
5103 if params is NO_DEFAULT:
5104 params = self._SEARCH_PARAMS
5105 if params:
5106 data['params'] = params
5107
5108 content_keys = (
5109 ('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),
5110 ('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),
5111 # ytmusic search
5112 ('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),
5113 ('continuationContents', ),
5114 )
5115 display_id = f'query "{query}"'
5116 check_get_keys = tuple({keys[0] for keys in content_keys})
5117 ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}
5118 self._report_playlist_authcheck(ytcfg, fatal=False)
5119
5120 continuation_list = [None]
5121 search = None
5122 for page_num in itertools.count(1):
5123 data.update(continuation_list[0] or {})
5124 headers = self.generate_api_headers(
5125 ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)
5126 search = self._extract_response(
5127 item_id=f'{display_id} page {page_num}', ep='search', query=data,
5128 default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)
5129 slr_contents = traverse_obj(search, *content_keys)
5130 yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)
5131 if not continuation_list[0]:
5132 break
5133
5134
5135 class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
5136 IE_DESC = 'YouTube Tabs'
5137 _VALID_URL = r'''(?x:
5138 https?://
5139 (?!consent\.)(?:\w+\.)?
5140 (?:
5141 youtube(?:kids)?\.com|
5142 %(invidious)s
5143 )/
5144 (?:
5145 (?P<channel_type>channel|c|user|browse)/|
5146 (?P<not_channel>
5147 feed/|hashtag/|
5148 (?:playlist|watch)\?.*?\blist=
5149 )|
5150 (?!(?:%(reserved_names)s)\b) # Direct URLs
5151 )
5152 (?P<id>[^/?\#&]+)
5153 )''' % {
5154 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
5155 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
5156 }
5157 IE_NAME = 'youtube:tab'
5158
5159 _TESTS = [{
5160 'note': 'playlists, multipage',
5161 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
5162 'playlist_mincount': 94,
5163 'info_dict': {
5164 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
5165 'title': 'Igor Kleiner - Playlists',
5166 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
5167 'uploader': 'Igor Kleiner',
5168 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
5169 'channel': 'Igor Kleiner',
5170 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
5171 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
5172 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
5173 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
5174 'channel_follower_count': int
5175 },
5176 }, {
5177 'note': 'playlists, multipage, different order',
5178 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
5179 'playlist_mincount': 94,
5180 'info_dict': {
5181 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
5182 'title': 'Igor Kleiner - Playlists',
5183 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
5184 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
5185 'uploader': 'Igor Kleiner',
5186 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
5187 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
5188 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
5189 'channel': 'Igor Kleiner',
5190 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
5191 'channel_follower_count': int
5192 },
5193 }, {
5194 'note': 'playlists, series',
5195 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
5196 'playlist_mincount': 5,
5197 'info_dict': {
5198 'id': 'UCYO_jab_esuFRV4b17AJtAw',
5199 'title': '3Blue1Brown - Playlists',
5200 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
5201 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
5202 'uploader': '3Blue1Brown',
5203 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5204 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5205 'channel': '3Blue1Brown',
5206 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
5207 'tags': ['Mathematics'],
5208 'channel_follower_count': int
5209 },
5210 }, {
5211 'note': 'playlists, singlepage',
5212 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
5213 'playlist_mincount': 4,
5214 'info_dict': {
5215 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
5216 'title': 'ThirstForScience - Playlists',
5217 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
5218 'uploader': 'ThirstForScience',
5219 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
5220 'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
5221 'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
5222 'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
5223 'tags': 'count:13',
5224 'channel': 'ThirstForScience',
5225 'channel_follower_count': int
5226 }
5227 }, {
5228 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
5229 'only_matching': True,
5230 }, {
5231 'note': 'basic, single video playlist',
5232 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5233 'info_dict': {
5234 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5235 'uploader': 'Sergey M.',
5236 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5237 'title': 'youtube-dl public playlist',
5238 'description': '',
5239 'tags': [],
5240 'view_count': int,
5241 'modified_date': '20201130',
5242 'channel': 'Sergey M.',
5243 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5244 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5245 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5246 'availability': 'public',
5247 },
5248 'playlist_count': 1,
5249 }, {
5250 'note': 'empty playlist',
5251 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
5252 'info_dict': {
5253 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5254 'uploader': 'Sergey M.',
5255 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
5256 'title': 'youtube-dl empty playlist',
5257 'tags': [],
5258 'channel': 'Sergey M.',
5259 'description': '',
5260 'modified_date': '20160902',
5261 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5262 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5263 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5264 'availability': 'public',
5265 },
5266 'playlist_count': 0,
5267 }, {
5268 'note': 'Home tab',
5269 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
5270 'info_dict': {
5271 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5272 'title': 'lex will - Home',
5273 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5274 'uploader': 'lex will',
5275 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5276 'channel': 'lex will',
5277 'tags': ['bible', 'history', 'prophesy'],
5278 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5279 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5280 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5281 'channel_follower_count': int
5282 },
5283 'playlist_mincount': 2,
5284 }, {
5285 'note': 'Videos tab',
5286 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
5287 'info_dict': {
5288 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5289 'title': 'lex will - Videos',
5290 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5291 'uploader': 'lex will',
5292 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5293 'tags': ['bible', 'history', 'prophesy'],
5294 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5295 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5296 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5297 'channel': 'lex will',
5298 'channel_follower_count': int
5299 },
5300 'playlist_mincount': 975,
5301 }, {
5302 'note': 'Videos tab, sorted by popular',
5303 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
5304 'info_dict': {
5305 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5306 'title': 'lex will - Videos',
5307 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5308 'uploader': 'lex will',
5309 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5310 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5311 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5312 'channel': 'lex will',
5313 'tags': ['bible', 'history', 'prophesy'],
5314 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5315 'channel_follower_count': int
5316 },
5317 'playlist_mincount': 199,
5318 }, {
5319 'note': 'Playlists tab',
5320 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
5321 'info_dict': {
5322 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5323 'title': 'lex will - Playlists',
5324 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5325 'uploader': 'lex will',
5326 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5327 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5328 'channel': 'lex will',
5329 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5330 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5331 'tags': ['bible', 'history', 'prophesy'],
5332 'channel_follower_count': int
5333 },
5334 'playlist_mincount': 17,
5335 }, {
5336 'note': 'Community tab',
5337 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
5338 'info_dict': {
5339 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5340 'title': 'lex will - Community',
5341 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5342 'uploader': 'lex will',
5343 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5344 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5345 'channel': 'lex will',
5346 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5347 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5348 'tags': ['bible', 'history', 'prophesy'],
5349 'channel_follower_count': int
5350 },
5351 'playlist_mincount': 18,
5352 }, {
5353 'note': 'Channels tab',
5354 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
5355 'info_dict': {
5356 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5357 'title': 'lex will - Channels',
5358 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5359 'uploader': 'lex will',
5360 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5361 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5362 'channel': 'lex will',
5363 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5364 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5365 'tags': ['bible', 'history', 'prophesy'],
5366 'channel_follower_count': int
5367 },
5368 'playlist_mincount': 12,
5369 }, {
5370 'note': 'Search tab',
5371 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
5372 'playlist_mincount': 40,
5373 'info_dict': {
5374 'id': 'UCYO_jab_esuFRV4b17AJtAw',
5375 'title': '3Blue1Brown - Search - linear algebra',
5376 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
5377 'uploader': '3Blue1Brown',
5378 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
5379 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5380 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5381 'tags': ['Mathematics'],
5382 'channel': '3Blue1Brown',
5383 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
5384 'channel_follower_count': int
5385 },
5386 }, {
5387 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5388 'only_matching': True,
5389 }, {
5390 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5391 'only_matching': True,
5392 }, {
5393 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5394 'only_matching': True,
5395 }, {
5396 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
5397 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
5398 'info_dict': {
5399 'title': '29C3: Not my department',
5400 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
5401 'uploader': 'Christiaan008',
5402 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
5403 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
5404 'tags': [],
5405 'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',
5406 'view_count': int,
5407 'modified_date': '20150605',
5408 'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
5409 'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',
5410 'channel': 'Christiaan008',
5411 'availability': 'public',
5412 },
5413 'playlist_count': 96,
5414 }, {
5415 'note': 'Large playlist',
5416 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
5417 'info_dict': {
5418 'title': 'Uploads from Cauchemar',
5419 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
5420 'uploader': 'Cauchemar',
5421 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
5422 'channel_url': 'https://www.youtube.com/c/Cauchemar89',
5423 'tags': [],
5424 'modified_date': r're:\d{8}',
5425 'channel': 'Cauchemar',
5426 'uploader_url': 'https://www.youtube.com/c/Cauchemar89',
5427 'view_count': int,
5428 'description': '',
5429 'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
5430 'availability': 'public',
5431 },
5432 'playlist_mincount': 1123,
5433 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5434 }, {
5435 'note': 'even larger playlist, 8832 videos',
5436 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
5437 'only_matching': True,
5438 }, {
5439 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
5440 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
5441 'info_dict': {
5442 'title': 'Uploads from Interstellar Movie',
5443 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
5444 'uploader': 'Interstellar Movie',
5445 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
5446 'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',
5447 'tags': [],
5448 'view_count': int,
5449 'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
5450 'channel_url': 'https://www.youtube.com/c/InterstellarMovie',
5451 'channel': 'Interstellar Movie',
5452 'description': '',
5453 'modified_date': r're:\d{8}',
5454 'availability': 'public',
5455 },
5456 'playlist_mincount': 21,
5457 }, {
5458 'note': 'Playlist with "show unavailable videos" button',
5459 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
5460 'info_dict': {
5461 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
5462 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
5463 'uploader': 'Phim Siêu Nhân Nhật Bản',
5464 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
5465 'view_count': int,
5466 'channel': 'Phim Siêu Nhân Nhật Bản',
5467 'tags': [],
5468 'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5469 'description': '',
5470 'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5471 'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
5472 'modified_date': r're:\d{8}',
5473 'availability': 'public',
5474 },
5475 'playlist_mincount': 200,
5476 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5477 }, {
5478 'note': 'Playlist with unavailable videos in page 7',
5479 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
5480 'info_dict': {
5481 'title': 'Uploads from BlankTV',
5482 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
5483 'uploader': 'BlankTV',
5484 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
5485 'channel': 'BlankTV',
5486 'channel_url': 'https://www.youtube.com/c/blanktv',
5487 'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',
5488 'view_count': int,
5489 'tags': [],
5490 'uploader_url': 'https://www.youtube.com/c/blanktv',
5491 'modified_date': r're:\d{8}',
5492 'description': '',
5493 'availability': 'public',
5494 },
5495 'playlist_mincount': 1000,
5496 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5497 }, {
5498 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
5499 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5500 'info_dict': {
5501 'title': 'Data Analysis with Dr Mike Pound',
5502 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5503 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
5504 'uploader': 'Computerphile',
5505 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
5506 'uploader_url': 'https://www.youtube.com/user/Computerphile',
5507 'tags': [],
5508 'view_count': int,
5509 'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
5510 'channel_url': 'https://www.youtube.com/user/Computerphile',
5511 'channel': 'Computerphile',
5512 'availability': 'public',
5513 'modified_date': '20190712',
5514 },
5515 'playlist_mincount': 11,
5516 }, {
5517 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5518 'only_matching': True,
5519 }, {
5520 'note': 'Playlist URL that does not actually serve a playlist',
5521 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
5522 'info_dict': {
5523 'id': 'FqZTN594JQw',
5524 'ext': 'webm',
5525 'title': "Smiley's People 01 detective, Adventure Series, Action",
5526 'uploader': 'STREEM',
5527 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
5528 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
5529 'upload_date': '20150526',
5530 'license': 'Standard YouTube License',
5531 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
5532 'categories': ['People & Blogs'],
5533 'tags': list,
5534 'view_count': int,
5535 'like_count': int,
5536 },
5537 'params': {
5538 'skip_download': True,
5539 },
5540 'skip': 'This video is not available.',
5541 'add_ie': [YoutubeIE.ie_key()],
5542 }, {
5543 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
5544 'only_matching': True,
5545 }, {
5546 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
5547 'only_matching': True,
5548 }, {
5549 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
5550 'info_dict': {
5551 'id': 'Wq15eF5vCbI', # This will keep changing
5552 'ext': 'mp4',
5553 'title': str,
5554 'uploader': 'Sky News',
5555 'uploader_id': 'skynews',
5556 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
5557 'upload_date': r're:\d{8}',
5558 'description': str,
5559 'categories': ['News & Politics'],
5560 'tags': list,
5561 'like_count': int,
5562 'release_timestamp': int,
5563 'channel': 'Sky News',
5564 'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',
5565 'age_limit': 0,
5566 'view_count': int,
5567 'thumbnail': r're:https?://i\.ytimg\.com/vi/[^/]+/maxresdefault(?:_live)?\.jpg',
5568 'playable_in_embed': True,
5569 'release_date': r're:\d+',
5570 'availability': 'public',
5571 'live_status': 'is_live',
5572 'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',
5573 'channel_follower_count': int,
5574 'concurrent_view_count': int,
5575 },
5576 'params': {
5577 'skip_download': True,
5578 },
5579 'expected_warnings': ['Ignoring subtitle tracks found in '],
5580 }, {
5581 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
5582 'info_dict': {
5583 'id': 'a48o2S1cPoo',
5584 'ext': 'mp4',
5585 'title': 'The Young Turks - Live Main Show',
5586 'uploader': 'The Young Turks',
5587 'uploader_id': 'TheYoungTurks',
5588 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
5589 'upload_date': '20150715',
5590 'license': 'Standard YouTube License',
5591 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
5592 'categories': ['News & Politics'],
5593 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
5594 'like_count': int,
5595 },
5596 'params': {
5597 'skip_download': True,
5598 },
5599 'only_matching': True,
5600 }, {
5601 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
5602 'only_matching': True,
5603 }, {
5604 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
5605 'only_matching': True,
5606 }, {
5607 'note': 'A channel that is not live. Should raise error',
5608 'url': 'https://www.youtube.com/user/numberphile/live',
5609 'only_matching': True,
5610 }, {
5611 'url': 'https://www.youtube.com/feed/trending',
5612 'only_matching': True,
5613 }, {
5614 'url': 'https://www.youtube.com/feed/library',
5615 'only_matching': True,
5616 }, {
5617 'url': 'https://www.youtube.com/feed/history',
5618 'only_matching': True,
5619 }, {
5620 'url': 'https://www.youtube.com/feed/subscriptions',
5621 'only_matching': True,
5622 }, {
5623 'url': 'https://www.youtube.com/feed/watch_later',
5624 'only_matching': True,
5625 }, {
5626 'note': 'Recommended - redirects to home page.',
5627 'url': 'https://www.youtube.com/feed/recommended',
5628 'only_matching': True,
5629 }, {
5630 'note': 'inline playlist with not always working continuations',
5631 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
5632 'only_matching': True,
5633 }, {
5634 'url': 'https://www.youtube.com/course',
5635 'only_matching': True,
5636 }, {
5637 'url': 'https://www.youtube.com/zsecurity',
5638 'only_matching': True,
5639 }, {
5640 'url': 'http://www.youtube.com/NASAgovVideo/videos',
5641 'only_matching': True,
5642 }, {
5643 'url': 'https://www.youtube.com/TheYoungTurks/live',
5644 'only_matching': True,
5645 }, {
5646 'url': 'https://www.youtube.com/hashtag/cctv9',
5647 'info_dict': {
5648 'id': 'cctv9',
5649 'title': '#cctv9',
5650 'tags': [],
5651 },
5652 'playlist_mincount': 300, # not consistent but should be over 300
5653 }, {
5654 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
5655 'only_matching': True,
5656 }, {
5657 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
5658 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5659 'only_matching': True
5660 }, {
5661 'note': '/browse/ should redirect to /channel/',
5662 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
5663 'only_matching': True
5664 }, {
5665 'note': 'VLPL, should redirect to playlist?list=PL...',
5666 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5667 'info_dict': {
5668 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5669 'uploader': 'NoCopyrightSounds',
5670 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
5671 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
5672 'title': 'NCS : All Releases 💿',
5673 'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',
5674 'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',
5675 'modified_date': r're:\d{8}',
5676 'view_count': int,
5677 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
5678 'tags': [],
5679 'channel': 'NoCopyrightSounds',
5680 'availability': 'public',
5681 },
5682 'playlist_mincount': 166,
5683 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5684 }, {
5685 'note': 'Topic, should redirect to playlist?list=UU...',
5686 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5687 'info_dict': {
5688 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5689 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5690 'title': 'Uploads from Royalty Free Music - Topic',
5691 'uploader': 'Royalty Free Music - Topic',
5692 'tags': [],
5693 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5694 'channel': 'Royalty Free Music - Topic',
5695 'view_count': int,
5696 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5697 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5698 'modified_date': r're:\d{8}',
5699 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5700 'description': '',
5701 'availability': 'public',
5702 },
5703 'playlist_mincount': 101,
5704 }, {
5705 # Destination channel with only a hidden self tab (tab id is UCtFRv9O2AHqOZjjynzrv-xg)
5706 # Treat as a general feed
5707 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
5708 'info_dict': {
5709 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
5710 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
5711 'tags': [],
5712 },
5713 'playlist_mincount': 9,
5714 }, {
5715 'note': 'Youtube music Album',
5716 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
5717 'info_dict': {
5718 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
5719 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
5720 'tags': [],
5721 'view_count': int,
5722 'description': '',
5723 'availability': 'unlisted',
5724 'modified_date': r're:\d{8}',
5725 },
5726 'playlist_count': 50,
5727 }, {
5728 'note': 'unlisted single video playlist',
5729 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5730 'info_dict': {
5731 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5732 'uploader': 'colethedj',
5733 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5734 'title': 'yt-dlp unlisted playlist test',
5735 'availability': 'unlisted',
5736 'tags': [],
5737 'modified_date': '20220418',
5738 'channel': 'colethedj',
5739 'view_count': int,
5740 'description': '',
5741 'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
5742 'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5743 'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
5744 },
5745 'playlist_count': 1,
5746 }, {
5747 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
5748 'url': 'https://www.youtube.com/feed/recommended',
5749 'info_dict': {
5750 'id': 'recommended',
5751 'title': 'recommended',
5752 'tags': [],
5753 },
5754 'playlist_mincount': 50,
5755 'params': {
5756 'skip_download': True,
5757 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5758 },
5759 }, {
5760 'note': 'API Fallback: /videos tab, sorted by oldest first',
5761 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
5762 'info_dict': {
5763 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5764 'title': 'Cody\'sLab - Videos',
5765 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
5766 'uploader': 'Cody\'sLab',
5767 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5768 'channel': 'Cody\'sLab',
5769 'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5770 'tags': [],
5771 'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
5772 'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
5773 'channel_follower_count': int
5774 },
5775 'playlist_mincount': 650,
5776 'params': {
5777 'skip_download': True,
5778 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5779 },
5780 'skip': 'Query for sorting no longer works',
5781 }, {
5782 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
5783 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5784 'info_dict': {
5785 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5786 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5787 'title': 'Uploads from Royalty Free Music - Topic',
5788 'uploader': 'Royalty Free Music - Topic',
5789 'modified_date': r're:\d{8}',
5790 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5791 'description': '',
5792 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5793 'tags': [],
5794 'channel': 'Royalty Free Music - Topic',
5795 'view_count': int,
5796 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5797 'availability': 'public',
5798 },
5799 'playlist_mincount': 101,
5800 'params': {
5801 'skip_download': True,
5802 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5803 },
5804 }, {
5805 'note': 'non-standard redirect to regional channel',
5806 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
5807 'only_matching': True
5808 }, {
5809 'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
5810 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5811 'info_dict': {
5812 'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5813 'modified_date': '20220407',
5814 'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5815 'tags': [],
5816 'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5817 'uploader': 'pukkandan',
5818 'availability': 'unlisted',
5819 'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5820 'channel': 'pukkandan',
5821 'description': 'Test for collaborative playlist',
5822 'title': 'yt-dlp test - collaborative playlist',
5823 'view_count': int,
5824 'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5825 },
5826 'playlist_mincount': 2
5827 }, {
5828 'note': 'translated tab name',
5829 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/playlists',
5830 'info_dict': {
5831 'id': 'UCiu-3thuViMebBjw_5nWYrA',
5832 'tags': [],
5833 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
5834 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
5835 'description': 'test description',
5836 'title': 'cole-dlp-test-acc - 再生リスト',
5837 'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
5838 'uploader': 'cole-dlp-test-acc',
5839 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
5840 'channel': 'cole-dlp-test-acc',
5841 },
5842 'playlist_mincount': 1,
5843 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
5844 'expected_warnings': ['Preferring "ja"'],
5845 }, {
5846 # XXX: this should really check flat playlist entries, but the test suite doesn't support that
5847 'note': 'preferred lang set with playlist with translated video titles',
5848 'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
5849 'info_dict': {
5850 'id': 'PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
5851 'tags': [],
5852 'view_count': int,
5853 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
5854 'uploader': 'cole-dlp-test-acc',
5855 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
5856 'channel': 'cole-dlp-test-acc',
5857 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
5858 'description': 'test',
5859 'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
5860 'title': 'dlp test playlist',
5861 'availability': 'public',
5862 },
5863 'playlist_mincount': 1,
5864 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
5865 'expected_warnings': ['Preferring "ja"'],
5866 }, {
5867 # shorts audio pivot for 2GtVksBMYFM.
5868 'url': 'https://www.youtube.com/feed/sfv_audio_pivot?bp=8gUrCikSJwoLMkd0VmtzQk1ZRk0SCzJHdFZrc0JNWUZNGgsyR3RWa3NCTVlGTQ==',
5869 'info_dict': {
5870 'id': 'sfv_audio_pivot',
5871 'title': 'sfv_audio_pivot',
5872 'tags': [],
5873 },
5874 'playlist_mincount': 50,
5875
5876 }, {
5877 # Channel with a real live tab (not to be mistaken with streams tab)
5878 # Do not treat like it should redirect to live stream
5879 'url': 'https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live',
5880 'info_dict': {
5881 'id': 'UCEH7P7kyJIkS_gJf93VYbmg',
5882 'title': 'UCEH7P7kyJIkS_gJf93VYbmg - Live',
5883 'tags': [],
5884 },
5885 'playlist_mincount': 20,
5886 }, {
5887 # Tab name is not the same as tab id
5888 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/letsplay',
5889 'info_dict': {
5890 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
5891 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Let\'s play',
5892 'tags': [],
5893 },
5894 'playlist_mincount': 8,
5895 }, {
5896 # Home tab id is literally home. Not to get mistaken with featured
5897 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/home',
5898 'info_dict': {
5899 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
5900 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Home',
5901 'tags': [],
5902 },
5903 'playlist_mincount': 8,
5904 }, {
5905 # Should get three playlists for videos, shorts and streams tabs
5906 'url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
5907 'info_dict': {
5908 'id': 'UCK9V2B22uJYu3N7eR_BT9QA',
5909 'title': 'Polka Ch. 尾丸ポルカ',
5910 'channel_follower_count': int,
5911 'channel_id': 'UCK9V2B22uJYu3N7eR_BT9QA',
5912 'channel_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
5913 'uploader': 'Polka Ch. 尾丸ポルカ',
5914 'description': 'md5:3b8df1ac5af337aa206e37ee3d181ec9',
5915 'channel': 'Polka Ch. 尾丸ポルカ',
5916 'tags': 'count:35',
5917 'uploader_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
5918 'uploader_id': 'UCK9V2B22uJYu3N7eR_BT9QA',
5919 },
5920 'playlist_count': 3,
5921 }, {
5922 # Shorts tab with channel with handle
5923 'url': 'https://www.youtube.com/@NotJustBikes/shorts',
5924 'info_dict': {
5925 'id': 'UC0intLFzLaudFG-xAvUEO-A',
5926 'title': 'Not Just Bikes - Shorts',
5927 'tags': 'count:12',
5928 'uploader': 'Not Just Bikes',
5929 'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',
5930 'description': 'md5:7513148b1f02b924783157d84c4ea555',
5931 'channel_follower_count': int,
5932 'uploader_id': 'UC0intLFzLaudFG-xAvUEO-A',
5933 'channel_id': 'UC0intLFzLaudFG-xAvUEO-A',
5934 'uploader_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',
5935 'channel': 'Not Just Bikes',
5936 },
5937 'playlist_mincount': 10,
5938 }, {
5939 # Streams tab
5940 'url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig/streams',
5941 'info_dict': {
5942 'id': 'UC3eYAvjCVwNHgkaGbXX3sig',
5943 'title': '中村悠一 - Live',
5944 'tags': 'count:7',
5945 'channel_id': 'UC3eYAvjCVwNHgkaGbXX3sig',
5946 'channel_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',
5947 'uploader_id': 'UC3eYAvjCVwNHgkaGbXX3sig',
5948 'channel': '中村悠一',
5949 'uploader_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',
5950 'channel_follower_count': int,
5951 'uploader': '中村悠一',
5952 'description': 'md5:e744f6c93dafa7a03c0c6deecb157300',
5953 },
5954 'playlist_mincount': 60,
5955 }, {
5956 # Channel with no uploads and hence no videos, streams, shorts tabs or uploads playlist. This should fail.
5957 # See test_youtube_lists
5958 'url': 'https://www.youtube.com/channel/UC2yXPzFejc422buOIzn_0CA',
5959 'only_matching': True,
5960 }, {
5961 # No uploads and no UCID given. Should fail with no uploads error
5962 # See test_youtube_lists
5963 'url': 'https://www.youtube.com/news',
5964 'only_matching': True
5965 }, {
5966 # No videos tab but has a shorts tab
5967 'url': 'https://www.youtube.com/c/TKFShorts',
5968 'info_dict': {
5969 'id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
5970 'title': 'Shorts Break - Shorts',
5971 'tags': 'count:32',
5972 'channel_id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
5973 'channel': 'Shorts Break',
5974 'description': 'md5:a6c234cf3d50d878ef8721e34457cd11',
5975 'uploader': 'Shorts Break',
5976 'channel_follower_count': int,
5977 'uploader_id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
5978 'uploader_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg',
5979 'channel_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg',
5980 },
5981 'playlist_mincount': 30,
5982 }, {
5983 # Trending Now Tab. tab id is empty
5984 'url': 'https://www.youtube.com/feed/trending',
5985 'info_dict': {
5986 'id': 'trending',
5987 'title': 'trending - Now',
5988 'tags': [],
5989 },
5990 'playlist_mincount': 30,
5991 }, {
5992 # Trending Gaming Tab. tab id is empty
5993 'url': 'https://www.youtube.com/feed/trending?bp=4gIcGhpnYW1pbmdfY29ycHVzX21vc3RfcG9wdWxhcg%3D%3D',
5994 'info_dict': {
5995 'id': 'trending',
5996 'title': 'trending - Gaming',
5997 'tags': [],
5998 },
5999 'playlist_mincount': 30,
6000 }, {
6001 # Shorts url result in shorts tab
6002 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/shorts',
6003 'info_dict': {
6004 'id': 'UCiu-3thuViMebBjw_5nWYrA',
6005 'title': 'cole-dlp-test-acc - Shorts',
6006 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
6007 'channel': 'cole-dlp-test-acc',
6008 'description': 'test description',
6009 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6010 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6011 'tags': [],
6012 'uploader': 'cole-dlp-test-acc',
6013 'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6014
6015 },
6016 'playlist': [{
6017 'info_dict': {
6018 '_type': 'url',
6019 'ie_key': 'Youtube',
6020 'url': 'https://www.youtube.com/shorts/sSM9J5YH_60',
6021 'id': 'sSM9J5YH_60',
6022 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6023 'title': 'SHORT short',
6024 'channel': 'cole-dlp-test-acc',
6025 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6026 'view_count': int,
6027 'thumbnails': list,
6028 }
6029 }],
6030 'params': {'extract_flat': True},
6031 }, {
6032 # Live video status should be extracted
6033 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live',
6034 'info_dict': {
6035 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
6036 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Live', # TODO, should be Minecraft - Live or Minecraft - Topic - Live
6037 'tags': []
6038 },
6039 'playlist': [{
6040 'info_dict': {
6041 '_type': 'url',
6042 'ie_key': 'Youtube',
6043 'url': 'startswith:https://www.youtube.com/watch?v=',
6044 'id': str,
6045 'title': str,
6046 'live_status': 'is_live',
6047 'channel_id': str,
6048 'channel_url': str,
6049 'concurrent_view_count': int,
6050 'channel': str,
6051 }
6052 }],
6053 'params': {'extract_flat': True, 'playlist_items': '1'},
6054 'playlist_mincount': 1
6055 }, {
6056 # Channel renderer metadata. Contains number of videos on the channel
6057 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/channels',
6058 'info_dict': {
6059 'id': 'UCiu-3thuViMebBjw_5nWYrA',
6060 'title': 'cole-dlp-test-acc - Channels',
6061 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
6062 'channel': 'cole-dlp-test-acc',
6063 'description': 'test description',
6064 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6065 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6066 'tags': [],
6067 'uploader': 'cole-dlp-test-acc',
6068 'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6069
6070 },
6071 'playlist': [{
6072 'info_dict': {
6073 '_type': 'url',
6074 'ie_key': 'YoutubeTab',
6075 'url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6076 'id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6077 'channel_id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6078 'title': 'PewDiePie',
6079 'channel': 'PewDiePie',
6080 'channel_url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6081 'thumbnails': list,
6082 'channel_follower_count': int,
6083 'playlist_count': int
6084 }
6085 }],
6086 'params': {'extract_flat': True},
6087 }]
6088
6089 @classmethod
6090 def suitable(cls, url):
6091 return False if YoutubeIE.suitable(url) else super().suitable(url)
6092
6093 _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/[^?#/]+))?(?P<post>.*)$')
6094
6095 def _get_url_mobj(self, url):
6096 mobj = self._URL_RE.match(url).groupdict()
6097 mobj.update((k, '') for k, v in mobj.items() if v is None)
6098 return mobj
6099
6100 def _extract_tab_id_and_name(self, tab, base_url='https://www.youtube.com'):
6101 tab_name = (tab.get('title') or '').lower()
6102 tab_url = urljoin(base_url, traverse_obj(
6103 tab, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url')))
6104
6105 tab_id = (tab_url and self._get_url_mobj(tab_url)['tab'][1:]
6106 or traverse_obj(tab, 'tabIdentifier', expected_type=str))
6107 if tab_id:
6108 return {
6109 'TAB_ID_SPONSORSHIPS': 'membership',
6110 }.get(tab_id, tab_id), tab_name
6111
6112 # Fallback to tab name if we cannot get the tab id.
6113 # XXX: should we strip non-ascii letters? e.g. in case of 'let's play' tab example on special gaming channel
6114 # Note that in the case of translated tab name this may result in an empty string, which we don't want.
6115 if tab_name:
6116 self.write_debug(f'Falling back to selected tab name: {tab_name}')
6117 return {
6118 'home': 'featured',
6119 'live': 'streams',
6120 }.get(tab_name, tab_name), tab_name
6121
6122 def _has_tab(self, tabs, tab_id):
6123 return any(self._extract_tab_id_and_name(tab)[0] == tab_id for tab in tabs)
6124
6125 @YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
6126 def _real_extract(self, url, smuggled_data):
6127 item_id = self._match_id(url)
6128 url = urllib.parse.urlunparse(
6129 urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))
6130 compat_opts = self.get_param('compat_opts', [])
6131
6132 mobj = self._get_url_mobj(url)
6133 pre, tab, post, is_channel = mobj['pre'], mobj['tab'], mobj['post'], not mobj['not_channel']
6134 if is_channel and smuggled_data.get('is_music_url'):
6135 if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
6136 return self.url_result(
6137 f'https://music.youtube.com/playlist?list={item_id[2:]}', YoutubeTabIE, item_id[2:])
6138 elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
6139 mdata = self._extract_tab_endpoint(
6140 f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
6141 murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
6142 get_all=False, expected_type=str)
6143 if not murl:
6144 raise ExtractorError('Failed to resolve album to playlist')
6145 return self.url_result(murl, YoutubeTabIE)
6146 elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
6147 return self.url_result(
6148 f'https://music.youtube.com/channel/{item_id}{tab}{post}', YoutubeTabIE, item_id)
6149
6150 original_tab_id, display_id = tab[1:], f'{item_id}{tab}'
6151 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
6152 url = f'{pre}/videos{post}'
6153
6154 # Handle both video/playlist URLs
6155 qs = parse_qs(url)
6156 video_id, playlist_id = [traverse_obj(qs, (key, 0)) for key in ('v', 'list')]
6157 if not video_id and mobj['not_channel'].startswith('watch'):
6158 if not playlist_id:
6159 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
6160 raise ExtractorError('A video URL was given without video ID', expected=True)
6161 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6162 self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
6163 return self.url_result(
6164 f'https://www.youtube.com/playlist?list={playlist_id}', YoutubeTabIE, playlist_id)
6165
6166 if not self._yes_playlist(playlist_id, video_id):
6167 return self.url_result(
6168 f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)
6169
6170 data, ytcfg = self._extract_data(url, display_id)
6171
6172 # YouTube may provide a non-standard redirect to the regional channel
6173 # See: https://github.com/yt-dlp/yt-dlp/issues/2694
6174 # https://support.google.com/youtube/answer/2976814#zippy=,conditional-redirects
6175 redirect_url = traverse_obj(
6176 data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)
6177 if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:
6178 redirect_url = ''.join((urljoin('https://www.youtube.com', redirect_url), tab, post))
6179 self.to_screen(f'This playlist is likely not available in your region. Following conditional redirect to {redirect_url}')
6180 return self.url_result(redirect_url, YoutubeTabIE)
6181
6182 tabs, extra_tabs = self._extract_tab_renderers(data), []
6183 if is_channel and tabs and 'no-youtube-channel-redirect' not in compat_opts:
6184 selected_tab = self._extract_selected_tab(tabs)
6185 selected_tab_id, selected_tab_name = self._extract_tab_id_and_name(selected_tab, url) # NB: Name may be translated
6186 self.write_debug(f'Selected tab: {selected_tab_id!r} ({selected_tab_name}), Requested tab: {original_tab_id!r}')
6187
6188 if not original_tab_id and selected_tab_name:
6189 self.to_screen('Downloading all uploads of the channel. '
6190 'To download only the videos in a specific tab, pass the tab\'s URL')
6191 if self._has_tab(tabs, 'streams'):
6192 extra_tabs.append(''.join((pre, '/streams', post)))
6193 if self._has_tab(tabs, 'shorts'):
6194 extra_tabs.append(''.join((pre, '/shorts', post)))
6195 # XXX: Members-only tab should also be extracted
6196
6197 if not extra_tabs and selected_tab_id != 'videos':
6198 # Channel does not have streams, shorts or videos tabs
6199 if item_id[:2] != 'UC':
6200 raise ExtractorError('This channel has no uploads', expected=True)
6201
6202 # Topic channels don't have /videos. Use the equivalent playlist instead
6203 pl_id = f'UU{item_id[2:]}'
6204 pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
6205 try:
6206 data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)
6207 except ExtractorError:
6208 raise ExtractorError('This channel has no uploads', expected=True)
6209 else:
6210 item_id, url = pl_id, pl_url
6211 self.to_screen(
6212 f'The channel does not have a videos, shorts, or live tab. Redirecting to playlist {pl_id} instead')
6213
6214 elif extra_tabs and selected_tab_id != 'videos':
6215 # When there are shorts/live tabs but not videos tab
6216 url, data = f'{pre}{post}', None
6217
6218 elif (original_tab_id or 'videos') != selected_tab_id:
6219 if original_tab_id == 'live':
6220 # Live tab should have redirected to the video
6221 # Except in the case the channel has an actual live tab
6222 # Example: https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live
6223 raise UserNotLive(video_id=item_id)
6224 elif selected_tab_name:
6225 raise ExtractorError(f'This channel does not have a {original_tab_id} tab', expected=True)
6226
6227 # For channels such as https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg
6228 url = f'{pre}{post}'
6229
6230 # YouTube sometimes provides a button to reload playlist with unavailable videos.
6231 if 'no-youtube-unavailable-videos' not in compat_opts:
6232 data = self._reload_with_unavailable_videos(display_id, data, ytcfg) or data
6233 self._extract_and_report_alerts(data, only_once=True)
6234
6235 tabs, entries = self._extract_tab_renderers(data), []
6236 if tabs:
6237 entries = [self._extract_from_tabs(item_id, ytcfg, data, tabs)]
6238 entries[0].update({
6239 'extractor_key': YoutubeTabIE.ie_key(),
6240 'extractor': YoutubeTabIE.IE_NAME,
6241 'webpage_url': url,
6242 })
6243 if self.get_param('playlist_items') == '0':
6244 entries.extend(self.url_result(u, YoutubeTabIE) for u in extra_tabs)
6245 else: # Users expect to get all `video_id`s even with `--flat-playlist`. So don't return `url_result`
6246 entries.extend(map(self._real_extract, extra_tabs))
6247
6248 if len(entries) == 1:
6249 return entries[0]
6250 elif entries:
6251 metadata = self._extract_metadata_from_tabs(item_id, data)
6252 uploads_url = 'the Uploads (UU) playlist URL'
6253 if try_get(metadata, lambda x: x['channel_id'].startswith('UC')):
6254 uploads_url = f'https://www.youtube.com/playlist?list=UU{metadata["channel_id"][2:]}'
6255 self.to_screen(
6256 'Downloading as multiple playlists, separated by tabs. '
6257 f'To download as a single playlist instead, pass {uploads_url}')
6258 return self.playlist_result(entries, item_id, **metadata)
6259
6260 # Inline playlist
6261 playlist = traverse_obj(
6262 data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
6263 if playlist:
6264 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
6265
6266 video_id = traverse_obj(
6267 data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
6268 if video_id:
6269 if tab != '/live': # live tab is expected to redirect to video
6270 self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
6271 return self.url_result(f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)
6272
6273 raise ExtractorError('Unable to recognize tab page')
6274
6275
6276 class YoutubePlaylistIE(InfoExtractor):
6277 IE_DESC = 'YouTube playlists'
6278 _VALID_URL = r'''(?x)(?:
6279 (?:https?://)?
6280 (?:\w+\.)?
6281 (?:
6282 (?:
6283 youtube(?:kids)?\.com|
6284 %(invidious)s
6285 )
6286 /.*?\?.*?\blist=
6287 )?
6288 (?P<id>%(playlist_id)s)
6289 )''' % {
6290 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
6291 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
6292 }
6293 IE_NAME = 'youtube:playlist'
6294 _TESTS = [{
6295 'note': 'issue #673',
6296 'url': 'PLBB231211A4F62143',
6297 'info_dict': {
6298 'title': '[OLD]Team Fortress 2 (Class-based LP)',
6299 'id': 'PLBB231211A4F62143',
6300 'uploader': 'Wickman',
6301 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
6302 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
6303 'view_count': int,
6304 'uploader_url': 'https://www.youtube.com/c/WickmanVT',
6305 'modified_date': r're:\d{8}',
6306 'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
6307 'channel': 'Wickman',
6308 'tags': [],
6309 'channel_url': 'https://www.youtube.com/c/WickmanVT',
6310 'availability': 'public',
6311 },
6312 'playlist_mincount': 29,
6313 }, {
6314 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
6315 'info_dict': {
6316 'title': 'YDL_safe_search',
6317 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
6318 },
6319 'playlist_count': 2,
6320 'skip': 'This playlist is private',
6321 }, {
6322 'note': 'embedded',
6323 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
6324 'playlist_count': 4,
6325 'info_dict': {
6326 'title': 'JODA15',
6327 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
6328 'uploader': 'milan',
6329 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
6330 'description': '',
6331 'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
6332 'tags': [],
6333 'modified_date': '20140919',
6334 'view_count': int,
6335 'channel': 'milan',
6336 'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
6337 'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
6338 'availability': 'public',
6339 },
6340 'expected_warnings': [r'[Uu]navailable videos? (is|are|will be) hidden'],
6341 }, {
6342 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
6343 'playlist_mincount': 455,
6344 'info_dict': {
6345 'title': '2018 Chinese New Singles (11/6 updated)',
6346 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
6347 'uploader': 'LBK',
6348 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
6349 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
6350 'channel': 'LBK',
6351 'view_count': int,
6352 'channel_url': 'https://www.youtube.com/c/愛低音的國王',
6353 'tags': [],
6354 'uploader_url': 'https://www.youtube.com/c/愛低音的國王',
6355 'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
6356 'modified_date': r're:\d{8}',
6357 'availability': 'public',
6358 },
6359 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
6360 }, {
6361 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
6362 'only_matching': True,
6363 }, {
6364 # music album playlist
6365 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
6366 'only_matching': True,
6367 }]
6368
6369 @classmethod
6370 def suitable(cls, url):
6371 if YoutubeTabIE.suitable(url):
6372 return False
6373 from ..utils import parse_qs
6374 qs = parse_qs(url)
6375 if qs.get('v', [None])[0]:
6376 return False
6377 return super().suitable(url)
6378
6379 def _real_extract(self, url):
6380 playlist_id = self._match_id(url)
6381 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
6382 url = update_url_query(
6383 'https://www.youtube.com/playlist',
6384 parse_qs(url) or {'list': playlist_id})
6385 if is_music_url:
6386 url = smuggle_url(url, {'is_music_url': True})
6387 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
6388
6389
6390 class YoutubeYtBeIE(InfoExtractor):
6391 IE_DESC = 'youtu.be'
6392 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
6393 _TESTS = [{
6394 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
6395 'info_dict': {
6396 'id': 'yeWKywCrFtk',
6397 'ext': 'mp4',
6398 'title': 'Small Scale Baler and Braiding Rugs',
6399 'uploader': 'Backus-Page House Museum',
6400 'uploader_id': 'backuspagemuseum',
6401 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
6402 'upload_date': '20161008',
6403 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
6404 'categories': ['Nonprofits & Activism'],
6405 'tags': list,
6406 'like_count': int,
6407 'age_limit': 0,
6408 'playable_in_embed': True,
6409 'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',
6410 'channel': 'Backus-Page House Museum',
6411 'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',
6412 'live_status': 'not_live',
6413 'view_count': int,
6414 'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',
6415 'availability': 'public',
6416 'duration': 59,
6417 'comment_count': int,
6418 'channel_follower_count': int
6419 },
6420 'params': {
6421 'noplaylist': True,
6422 'skip_download': True,
6423 },
6424 }, {
6425 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
6426 'only_matching': True,
6427 }]
6428
6429 def _real_extract(self, url):
6430 mobj = self._match_valid_url(url)
6431 video_id = mobj.group('id')
6432 playlist_id = mobj.group('playlist_id')
6433 return self.url_result(
6434 update_url_query('https://www.youtube.com/watch', {
6435 'v': video_id,
6436 'list': playlist_id,
6437 'feature': 'youtu.be',
6438 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
6439
6440
6441 class YoutubeLivestreamEmbedIE(InfoExtractor):
6442 IE_DESC = 'YouTube livestream embeds'
6443 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'
6444 _TESTS = [{
6445 'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',
6446 'only_matching': True,
6447 }]
6448
6449 def _real_extract(self, url):
6450 channel_id = self._match_id(url)
6451 return self.url_result(
6452 f'https://www.youtube.com/channel/{channel_id}/live',
6453 ie=YoutubeTabIE.ie_key(), video_id=channel_id)
6454
6455
6456 class YoutubeYtUserIE(InfoExtractor):
6457 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
6458 IE_NAME = 'youtube:user'
6459 _VALID_URL = r'ytuser:(?P<id>.+)'
6460 _TESTS = [{
6461 'url': 'ytuser:phihag',
6462 'only_matching': True,
6463 }]
6464
6465 def _real_extract(self, url):
6466 user_id = self._match_id(url)
6467 return self.url_result(f'https://www.youtube.com/user/{user_id}', YoutubeTabIE, user_id)
6468
6469
6470 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
6471 IE_NAME = 'youtube:favorites'
6472 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
6473 _VALID_URL = r':ytfav(?:ou?rite)?s?'
6474 _LOGIN_REQUIRED = True
6475 _TESTS = [{
6476 'url': ':ytfav',
6477 'only_matching': True,
6478 }, {
6479 'url': ':ytfavorites',
6480 'only_matching': True,
6481 }]
6482
6483 def _real_extract(self, url):
6484 return self.url_result(
6485 'https://www.youtube.com/playlist?list=LL',
6486 ie=YoutubeTabIE.ie_key())
6487
6488
6489 class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):
6490 IE_NAME = 'youtube:notif'
6491 IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'
6492 _VALID_URL = r':ytnotif(?:ication)?s?'
6493 _LOGIN_REQUIRED = True
6494 _TESTS = [{
6495 'url': ':ytnotif',
6496 'only_matching': True,
6497 }, {
6498 'url': ':ytnotifications',
6499 'only_matching': True,
6500 }]
6501
6502 def _extract_notification_menu(self, response, continuation_list):
6503 notification_list = traverse_obj(
6504 response,
6505 ('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),
6506 ('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),
6507 expected_type=list) or []
6508 continuation_list[0] = None
6509 for item in notification_list:
6510 entry = self._extract_notification_renderer(item.get('notificationRenderer'))
6511 if entry:
6512 yield entry
6513 continuation = item.get('continuationItemRenderer')
6514 if continuation:
6515 continuation_list[0] = continuation
6516
6517 def _extract_notification_renderer(self, notification):
6518 video_id = traverse_obj(
6519 notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)
6520 url = f'https://www.youtube.com/watch?v={video_id}'
6521 channel_id = None
6522 if not video_id:
6523 browse_ep = traverse_obj(
6524 notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)
6525 channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)
6526 post_id = self._search_regex(
6527 r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),
6528 'post id', default=None)
6529 if not channel_id or not post_id:
6530 return
6531 # The direct /post url redirects to this in the browser
6532 url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'
6533
6534 channel = traverse_obj(
6535 notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),
6536 expected_type=str)
6537 notification_title = self._get_text(notification, 'shortMessage')
6538 if notification_title:
6539 notification_title = notification_title.replace('\xad', '') # remove soft hyphens
6540 # TODO: handle recommended videos
6541 title = self._search_regex(
6542 rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,
6543 'video title', default=None)
6544 timestamp = (self._parse_time_text(self._get_text(notification, 'sentTimeText'))
6545 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)
6546 else None)
6547 return {
6548 '_type': 'url',
6549 'url': url,
6550 'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),
6551 'video_id': video_id,
6552 'title': title,
6553 'channel_id': channel_id,
6554 'channel': channel,
6555 'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),
6556 'timestamp': timestamp,
6557 }
6558
6559 def _notification_menu_entries(self, ytcfg):
6560 continuation_list = [None]
6561 response = None
6562 for page in itertools.count(1):
6563 ctoken = traverse_obj(
6564 continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)
6565 response = self._extract_response(
6566 item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,
6567 ep='notification/get_notification_menu', check_get_keys='actions',
6568 headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))
6569 yield from self._extract_notification_menu(response, continuation_list)
6570 if not continuation_list[0]:
6571 break
6572
6573 def _real_extract(self, url):
6574 display_id = 'notifications'
6575 ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}
6576 self._report_playlist_authcheck(ytcfg)
6577 return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)
6578
6579
6580 class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
6581 IE_DESC = 'YouTube search'
6582 IE_NAME = 'youtube:search'
6583 _SEARCH_KEY = 'ytsearch'
6584 _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
6585 _TESTS = [{
6586 'url': 'ytsearch5:youtube-dl test video',
6587 'playlist_count': 5,
6588 'info_dict': {
6589 'id': 'youtube-dl test video',
6590 'title': 'youtube-dl test video',
6591 }
6592 }]
6593
6594
6595 class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
6596 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
6597 _SEARCH_KEY = 'ytsearchdate'
6598 IE_DESC = 'YouTube search, newest videos first'
6599 _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
6600 _TESTS = [{
6601 'url': 'ytsearchdate5:youtube-dl test video',
6602 'playlist_count': 5,
6603 'info_dict': {
6604 'id': 'youtube-dl test video',
6605 'title': 'youtube-dl test video',
6606 }
6607 }]
6608
6609
6610 class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
6611 IE_DESC = 'YouTube search URLs with sorting and filter support'
6612 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
6613 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
6614 _TESTS = [{
6615 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
6616 'playlist_mincount': 5,
6617 'info_dict': {
6618 'id': 'youtube-dl test video',
6619 'title': 'youtube-dl test video',
6620 }
6621 }, {
6622 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
6623 'playlist_mincount': 5,
6624 'info_dict': {
6625 'id': 'python',
6626 'title': 'python',
6627 }
6628 }, {
6629 'url': 'https://www.youtube.com/results?search_query=%23cats',
6630 'playlist_mincount': 1,
6631 'info_dict': {
6632 'id': '#cats',
6633 'title': '#cats',
6634 # The test suite does not have support for nested playlists
6635 # 'entries': [{
6636 # 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',
6637 # 'title': '#cats',
6638 # }],
6639 },
6640 }, {
6641 # Channel results
6642 'url': 'https://www.youtube.com/results?search_query=kurzgesagt&sp=EgIQAg%253D%253D',
6643 'info_dict': {
6644 'id': 'kurzgesagt',
6645 'title': 'kurzgesagt',
6646 },
6647 'playlist': [{
6648 'info_dict': {
6649 '_type': 'url',
6650 'id': 'UCsXVk37bltHxD1rDPwtNM8Q',
6651 'url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
6652 'ie_key': 'YoutubeTab',
6653 'channel': 'Kurzgesagt – In a Nutshell',
6654 'description': 'md5:4ae48dfa9505ffc307dad26342d06bfc',
6655 'title': 'Kurzgesagt – In a Nutshell',
6656 'channel_id': 'UCsXVk37bltHxD1rDPwtNM8Q',
6657 'playlist_count': int, # XXX: should have a way of saying > 1
6658 'channel_url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
6659 'thumbnails': list
6660 }
6661 }],
6662 'params': {'extract_flat': True, 'playlist_items': '1'},
6663 'playlist_mincount': 1,
6664 }, {
6665 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
6666 'only_matching': True,
6667 }]
6668
6669 def _real_extract(self, url):
6670 qs = parse_qs(url)
6671 query = (qs.get('search_query') or qs.get('q'))[0]
6672 return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
6673
6674
6675 class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
6676 IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'
6677 IE_NAME = 'youtube:music:search_url'
6678 _VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
6679 _TESTS = [{
6680 'url': 'https://music.youtube.com/search?q=royalty+free+music',
6681 'playlist_count': 16,
6682 'info_dict': {
6683 'id': 'royalty free music',
6684 'title': 'royalty free music',
6685 }
6686 }, {
6687 'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',
6688 'playlist_mincount': 30,
6689 'info_dict': {
6690 'id': 'royalty free music - songs',
6691 'title': 'royalty free music - songs',
6692 },
6693 'params': {'extract_flat': 'in_playlist'}
6694 }, {
6695 'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',
6696 'playlist_mincount': 30,
6697 'info_dict': {
6698 'id': 'royalty free music - community playlists',
6699 'title': 'royalty free music - community playlists',
6700 },
6701 'params': {'extract_flat': 'in_playlist'}
6702 }]
6703
6704 _SECTIONS = {
6705 'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',
6706 'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',
6707 'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',
6708 'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',
6709 'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',
6710 'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',
6711 }
6712
6713 def _real_extract(self, url):
6714 qs = parse_qs(url)
6715 query = (qs.get('search_query') or qs.get('q'))[0]
6716 params = qs.get('sp', (None,))[0]
6717 if params:
6718 section = next((k for k, v in self._SECTIONS.items() if v == params), params)
6719 else:
6720 section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower()
6721 params = self._SECTIONS.get(section)
6722 if not params:
6723 section = None
6724 title = join_nonempty(query, section, delim=' - ')
6725 return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)
6726
6727
6728 class YoutubeFeedsInfoExtractor(InfoExtractor):
6729 """
6730 Base class for feed extractors
6731 Subclasses must re-define the _FEED_NAME property.
6732 """
6733 _LOGIN_REQUIRED = True
6734 _FEED_NAME = 'feeds'
6735
6736 def _real_initialize(self):
6737 YoutubeBaseInfoExtractor._check_login_required(self)
6738
6739 @classproperty
6740 def IE_NAME(self):
6741 return f'youtube:{self._FEED_NAME}'
6742
6743 def _real_extract(self, url):
6744 return self.url_result(
6745 f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())
6746
6747
6748 class YoutubeWatchLaterIE(InfoExtractor):
6749 IE_NAME = 'youtube:watchlater'
6750 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
6751 _VALID_URL = r':ytwatchlater'
6752 _TESTS = [{
6753 'url': ':ytwatchlater',
6754 'only_matching': True,
6755 }]
6756
6757 def _real_extract(self, url):
6758 return self.url_result(
6759 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
6760
6761
6762 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
6763 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
6764 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
6765 _FEED_NAME = 'recommended'
6766 _LOGIN_REQUIRED = False
6767 _TESTS = [{
6768 'url': ':ytrec',
6769 'only_matching': True,
6770 }, {
6771 'url': ':ytrecommended',
6772 'only_matching': True,
6773 }, {
6774 'url': 'https://youtube.com',
6775 'only_matching': True,
6776 }]
6777
6778
6779 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
6780 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
6781 _VALID_URL = r':ytsub(?:scription)?s?'
6782 _FEED_NAME = 'subscriptions'
6783 _TESTS = [{
6784 'url': ':ytsubs',
6785 'only_matching': True,
6786 }, {
6787 'url': ':ytsubscriptions',
6788 'only_matching': True,
6789 }]
6790
6791
6792 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
6793 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
6794 _VALID_URL = r':ythis(?:tory)?'
6795 _FEED_NAME = 'history'
6796 _TESTS = [{
6797 'url': ':ythistory',
6798 'only_matching': True,
6799 }]
6800
6801
6802 class YoutubeStoriesIE(InfoExtractor):
6803 IE_DESC = 'YouTube channel stories; "ytstories:" prefix'
6804 IE_NAME = 'youtube:stories'
6805 _VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'
6806 _TESTS = [{
6807 'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',
6808 'only_matching': True,
6809 }]
6810
6811 def _real_extract(self, url):
6812 playlist_id = f'RLTD{self._match_id(url)}'
6813 return self.url_result(
6814 smuggle_url(f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1', {'is_story': True}),
6815 ie=YoutubeTabIE, video_id=playlist_id)
6816
6817
6818 class YoutubeShortsAudioPivotIE(InfoExtractor):
6819 IE_DESC = 'YouTube Shorts audio pivot (Shorts using audio of a given video)'
6820 IE_NAME = 'youtube:shorts:pivot:audio'
6821 _VALID_URL = r'https?://(?:www\.)?youtube\.com/source/(?P<id>[\w-]{11})/shorts'
6822 _TESTS = [{
6823 'url': 'https://www.youtube.com/source/Lyj-MZSAA9o/shorts',
6824 'only_matching': True,
6825 }]
6826
6827 @staticmethod
6828 def _generate_audio_pivot_params(video_id):
6829 """
6830 Generates sfv_audio_pivot browse params for this video id
6831 """
6832 pb_params = b'\xf2\x05+\n)\x12\'\n\x0b%b\x12\x0b%b\x1a\x0b%b' % ((video_id.encode(),) * 3)
6833 return urllib.parse.quote(base64.b64encode(pb_params).decode())
6834
6835 def _real_extract(self, url):
6836 video_id = self._match_id(url)
6837 return self.url_result(
6838 f'https://www.youtube.com/feed/sfv_audio_pivot?bp={self._generate_audio_pivot_params(video_id)}',
6839 ie=YoutubeTabIE)
6840
6841
6842 class YoutubeTruncatedURLIE(InfoExtractor):
6843 IE_NAME = 'youtube:truncated_url'
6844 IE_DESC = False # Do not list
6845 _VALID_URL = r'''(?x)
6846 (?:https?://)?
6847 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
6848 (?:watch\?(?:
6849 feature=[a-z_]+|
6850 annotation_id=annotation_[^&]+|
6851 x-yt-cl=[0-9]+|
6852 hl=[^&]*|
6853 t=[0-9]+
6854 )?
6855 |
6856 attribution_link\?a=[^&]+
6857 )
6858 $
6859 '''
6860
6861 _TESTS = [{
6862 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
6863 'only_matching': True,
6864 }, {
6865 'url': 'https://www.youtube.com/watch?',
6866 'only_matching': True,
6867 }, {
6868 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
6869 'only_matching': True,
6870 }, {
6871 'url': 'https://www.youtube.com/watch?feature=foo',
6872 'only_matching': True,
6873 }, {
6874 'url': 'https://www.youtube.com/watch?hl=en-GB',
6875 'only_matching': True,
6876 }, {
6877 'url': 'https://www.youtube.com/watch?t=2372',
6878 'only_matching': True,
6879 }]
6880
6881 def _real_extract(self, url):
6882 raise ExtractorError(
6883 'Did you forget to quote the URL? Remember that & is a meta '
6884 'character in most shells, so you want to put the URL in quotes, '
6885 'like youtube-dl '
6886 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
6887 ' or simply youtube-dl BaW_jenozKc .',
6888 expected=True)
6889
6890
6891 class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
6892 IE_NAME = 'youtube:clip'
6893 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'
6894 _TESTS = [{
6895 # FIXME: Other metadata should be extracted from the clip, not from the base video
6896 'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',
6897 'info_dict': {
6898 'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',
6899 'ext': 'mp4',
6900 'section_start': 29.0,
6901 'section_end': 39.7,
6902 'duration': 10.7,
6903 'age_limit': 0,
6904 'availability': 'public',
6905 'categories': ['Gaming'],
6906 'channel': 'Scott The Woz',
6907 'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',
6908 'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',
6909 'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',
6910 'like_count': int,
6911 'playable_in_embed': True,
6912 'tags': 'count:17',
6913 'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',
6914 'title': 'Mobile Games on Console - Scott The Woz',
6915 'upload_date': '20210920',
6916 'uploader': 'Scott The Woz',
6917 'uploader_id': 'scottthewoz',
6918 'uploader_url': 'http://www.youtube.com/user/scottthewoz',
6919 'view_count': int,
6920 'live_status': 'not_live',
6921 'channel_follower_count': int
6922 }
6923 }]
6924
6925 def _real_extract(self, url):
6926 clip_id = self._match_id(url)
6927 _, data = self._extract_webpage(url, clip_id)
6928
6929 video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))
6930 if not video_id:
6931 raise ExtractorError('Unable to find video ID')
6932
6933 clip_data = traverse_obj(data, (
6934 'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',
6935 'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,
6936 'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',
6937 'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)
6938
6939 return {
6940 '_type': 'url_transparent',
6941 'url': f'https://www.youtube.com/watch?v={video_id}',
6942 'ie_key': YoutubeIE.ie_key(),
6943 'id': clip_id,
6944 'section_start': int(clip_data['startTimeMs']) / 1000,
6945 'section_end': int(clip_data['endTimeMs']) / 1000,
6946 }
6947
6948
6949 class YoutubeConsentRedirectIE(YoutubeBaseInfoExtractor):
6950 IE_NAME = 'youtube:consent'
6951 IE_DESC = False # Do not list
6952 _VALID_URL = r'https?://consent\.youtube\.com/m\?'
6953 _TESTS = [{
6954 'url': 'https://consent.youtube.com/m?continue=https%3A%2F%2Fwww.youtube.com%2Flive%2FqVv6vCqciTM%3Fcbrd%3D1&gl=NL&m=0&pc=yt&hl=en&src=1',
6955 'info_dict': {
6956 'id': 'qVv6vCqciTM',
6957 'ext': 'mp4',
6958 'age_limit': 0,
6959 'uploader_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',
6960 'comment_count': int,
6961 'chapters': 'count:13',
6962 'upload_date': '20221223',
6963 'thumbnail': 'https://i.ytimg.com/vi/qVv6vCqciTM/maxresdefault.jpg',
6964 'channel_url': 'https://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',
6965 'uploader_url': 'http://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',
6966 'like_count': int,
6967 'release_date': '20221223',
6968 'tags': ['Vtuber', '月ノ美兎', '名取さな', 'にじさんじ', 'クリスマス', '3D配信'],
6969 'title': '【 #インターネット女クリスマス 】3Dで歌ってはしゃぐインターネットの女たち【月ノ美兎/名取さな】',
6970 'view_count': int,
6971 'playable_in_embed': True,
6972 'duration': 4438,
6973 'availability': 'public',
6974 'channel_follower_count': int,
6975 'channel_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',
6976 'categories': ['Entertainment'],
6977 'live_status': 'was_live',
6978 'release_timestamp': 1671793345,
6979 'channel': 'さなちゃんねる',
6980 'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d',
6981 'uploader': 'さなちゃんねる',
6982 },
6983 'add_ie': ['Youtube'],
6984 'params': {'skip_download': 'Youtube'},
6985 }]
6986
6987 def _real_extract(self, url):
6988 redirect_url = url_or_none(parse_qs(url).get('continue', [None])[-1])
6989 if not redirect_url:
6990 raise ExtractorError('Invalid cookie consent redirect URL', expected=True)
6991 return self.url_result(redirect_url)
6992
6993
6994 class YoutubeTruncatedIDIE(InfoExtractor):
6995 IE_NAME = 'youtube:truncated_id'
6996 IE_DESC = False # Do not list
6997 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
6998
6999 _TESTS = [{
7000 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
7001 'only_matching': True,
7002 }]
7003
7004 def _real_extract(self, url):
7005 video_id = self._match_id(url)
7006 raise ExtractorError(
7007 f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',
7008 expected=True)