]> jfr.im git - yt-dlp.git/blame_incremental - yt_dlp/extractor/youtube.py
[extractor/dplay] Add MotorTrendOnDemand extractor (#5151)
[yt-dlp.git] / yt_dlp / extractor / youtube.py
... / ...
CommitLineData
1import base64
2import calendar
3import copy
4import datetime
5import enum
6import hashlib
7import itertools
8import json
9import math
10import os.path
11import random
12import re
13import sys
14import threading
15import time
16import traceback
17import urllib.error
18import urllib.parse
19
20from .common import InfoExtractor, SearchInfoExtractor
21from .openload import PhantomJSwrapper
22from ..compat import functools
23from ..jsinterp import JSInterpreter
24from ..utils import (
25 NO_DEFAULT,
26 ExtractorError,
27 LazyList,
28 UserNotLive,
29 bug_reports_message,
30 classproperty,
31 clean_html,
32 datetime_from_str,
33 dict_get,
34 filter_dict,
35 float_or_none,
36 format_field,
37 get_first,
38 int_or_none,
39 is_html,
40 join_nonempty,
41 js_to_json,
42 mimetype2ext,
43 network_exceptions,
44 orderedSet,
45 parse_codecs,
46 parse_count,
47 parse_duration,
48 parse_iso8601,
49 parse_qs,
50 qualities,
51 remove_start,
52 smuggle_url,
53 str_or_none,
54 str_to_int,
55 strftime_or_none,
56 traverse_obj,
57 try_get,
58 unescapeHTML,
59 unified_strdate,
60 unified_timestamp,
61 unsmuggle_url,
62 update_url_query,
63 url_or_none,
64 urljoin,
65 variadic,
66)
67
68# any clients starting with _ cannot be explicitly requested by the user
69INNERTUBE_CLIENTS = {
70 'web': {
71 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
72 'INNERTUBE_CONTEXT': {
73 'client': {
74 'clientName': 'WEB',
75 'clientVersion': '2.20220801.00.00',
76 }
77 },
78 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
79 },
80 'web_embedded': {
81 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
82 'INNERTUBE_CONTEXT': {
83 'client': {
84 'clientName': 'WEB_EMBEDDED_PLAYER',
85 'clientVersion': '1.20220731.00.00',
86 },
87 },
88 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
89 },
90 'web_music': {
91 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
92 'INNERTUBE_HOST': 'music.youtube.com',
93 'INNERTUBE_CONTEXT': {
94 'client': {
95 'clientName': 'WEB_REMIX',
96 'clientVersion': '1.20220727.01.00',
97 }
98 },
99 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
100 },
101 'web_creator': {
102 'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
103 'INNERTUBE_CONTEXT': {
104 'client': {
105 'clientName': 'WEB_CREATOR',
106 'clientVersion': '1.20220726.00.00',
107 }
108 },
109 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
110 },
111 'android': {
112 'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
113 'INNERTUBE_CONTEXT': {
114 'client': {
115 'clientName': 'ANDROID',
116 'clientVersion': '17.31.35',
117 'androidSdkVersion': 30,
118 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
119 }
120 },
121 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
122 'REQUIRE_JS_PLAYER': False
123 },
124 'android_embedded': {
125 'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
126 'INNERTUBE_CONTEXT': {
127 'client': {
128 'clientName': 'ANDROID_EMBEDDED_PLAYER',
129 'clientVersion': '17.31.35',
130 'androidSdkVersion': 30,
131 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
132 },
133 },
134 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
135 'REQUIRE_JS_PLAYER': False
136 },
137 'android_music': {
138 'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
139 'INNERTUBE_CONTEXT': {
140 'client': {
141 'clientName': 'ANDROID_MUSIC',
142 'clientVersion': '5.16.51',
143 'androidSdkVersion': 30,
144 'userAgent': 'com.google.android.apps.youtube.music/5.16.51 (Linux; U; Android 11) gzip'
145 }
146 },
147 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
148 'REQUIRE_JS_PLAYER': False
149 },
150 'android_creator': {
151 'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
152 'INNERTUBE_CONTEXT': {
153 'client': {
154 'clientName': 'ANDROID_CREATOR',
155 'clientVersion': '22.30.100',
156 'androidSdkVersion': 30,
157 'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip'
158 },
159 },
160 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
161 'REQUIRE_JS_PLAYER': False
162 },
163 # iOS clients have HLS live streams. Setting device model to get 60fps formats.
164 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
165 'ios': {
166 'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
167 'INNERTUBE_CONTEXT': {
168 'client': {
169 'clientName': 'IOS',
170 'clientVersion': '17.33.2',
171 'deviceModel': 'iPhone14,3',
172 'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
173 }
174 },
175 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
176 'REQUIRE_JS_PLAYER': False
177 },
178 'ios_embedded': {
179 'INNERTUBE_CONTEXT': {
180 'client': {
181 'clientName': 'IOS_MESSAGES_EXTENSION',
182 'clientVersion': '17.33.2',
183 'deviceModel': 'iPhone14,3',
184 'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
185 },
186 },
187 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
188 'REQUIRE_JS_PLAYER': False
189 },
190 'ios_music': {
191 'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
192 'INNERTUBE_CONTEXT': {
193 'client': {
194 'clientName': 'IOS_MUSIC',
195 'clientVersion': '5.21',
196 'deviceModel': 'iPhone14,3',
197 'userAgent': 'com.google.ios.youtubemusic/5.21 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
198 },
199 },
200 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
201 'REQUIRE_JS_PLAYER': False
202 },
203 'ios_creator': {
204 'INNERTUBE_CONTEXT': {
205 'client': {
206 'clientName': 'IOS_CREATOR',
207 'clientVersion': '22.33.101',
208 'deviceModel': 'iPhone14,3',
209 'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
210 },
211 },
212 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
213 'REQUIRE_JS_PLAYER': False
214 },
215 # mweb has 'ultralow' formats
216 # See: https://github.com/yt-dlp/yt-dlp/pull/557
217 'mweb': {
218 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
219 'INNERTUBE_CONTEXT': {
220 'client': {
221 'clientName': 'MWEB',
222 'clientVersion': '2.20220801.00.00',
223 }
224 },
225 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
226 },
227 # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
228 # See: https://github.com/zerodytrash/YouTube-Internal-Clients
229 'tv_embedded': {
230 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
231 'INNERTUBE_CONTEXT': {
232 'client': {
233 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
234 'clientVersion': '2.0',
235 },
236 },
237 'INNERTUBE_CONTEXT_CLIENT_NAME': 85
238 },
239}
240
241
242def _split_innertube_client(client_name):
243 variant, *base = client_name.rsplit('.', 1)
244 if base:
245 return variant, base[0], variant
246 base, *variant = client_name.split('_', 1)
247 return client_name, base, variant[0] if variant else None
248
249
250def build_innertube_clients():
251 THIRD_PARTY = {
252 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
253 }
254 BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
255 priority = qualities(BASE_CLIENTS[::-1])
256
257 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
258 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
259 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
260 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
261 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
262
263 _, base_client, variant = _split_innertube_client(client)
264 ytcfg['priority'] = 10 * priority(base_client)
265
266 if not variant:
267 INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
268 embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
269 embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
270 embedscreen['priority'] -= 3
271 elif variant == 'embedded':
272 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
273 ytcfg['priority'] -= 2
274 else:
275 ytcfg['priority'] -= 3
276
277
278build_innertube_clients()
279
280
281class BadgeType(enum.Enum):
282 AVAILABILITY_UNLISTED = enum.auto()
283 AVAILABILITY_PRIVATE = enum.auto()
284 AVAILABILITY_PUBLIC = enum.auto()
285 AVAILABILITY_PREMIUM = enum.auto()
286 AVAILABILITY_SUBSCRIPTION = enum.auto()
287 LIVE_NOW = enum.auto()
288
289
290class YoutubeBaseInfoExtractor(InfoExtractor):
291 """Provide base functions for Youtube extractors"""
292
293 _RESERVED_NAMES = (
294 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
295 r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|'
296 r'browse|oembed|get_video_info|iframe_api|s/player|source|'
297 r'storefront|oops|index|account|t/terms|about|upload|signin|logout')
298
299 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
300
301 # _NETRC_MACHINE = 'youtube'
302
303 # If True it will raise an error if no login info is provided
304 _LOGIN_REQUIRED = False
305
306 _INVIDIOUS_SITES = (
307 # invidious-redirect websites
308 r'(?:www\.)?redirect\.invidious\.io',
309 r'(?:(?:www|dev)\.)?invidio\.us',
310 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
311 r'(?:www\.)?invidious\.pussthecat\.org',
312 r'(?:www\.)?invidious\.zee\.li',
313 r'(?:www\.)?invidious\.ethibox\.fr',
314 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
315 r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
316 r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
317 # youtube-dl invidious instances list
318 r'(?:(?:www|no)\.)?invidiou\.sh',
319 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
320 r'(?:www\.)?invidious\.kabi\.tk',
321 r'(?:www\.)?invidious\.mastodon\.host',
322 r'(?:www\.)?invidious\.zapashcanon\.fr',
323 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
324 r'(?:www\.)?invidious\.tinfoil-hat\.net',
325 r'(?:www\.)?invidious\.himiko\.cloud',
326 r'(?:www\.)?invidious\.reallyancient\.tech',
327 r'(?:www\.)?invidious\.tube',
328 r'(?:www\.)?invidiou\.site',
329 r'(?:www\.)?invidious\.site',
330 r'(?:www\.)?invidious\.xyz',
331 r'(?:www\.)?invidious\.nixnet\.xyz',
332 r'(?:www\.)?invidious\.048596\.xyz',
333 r'(?:www\.)?invidious\.drycat\.fr',
334 r'(?:www\.)?inv\.skyn3t\.in',
335 r'(?:www\.)?tube\.poal\.co',
336 r'(?:www\.)?tube\.connect\.cafe',
337 r'(?:www\.)?vid\.wxzm\.sx',
338 r'(?:www\.)?vid\.mint\.lgbt',
339 r'(?:www\.)?vid\.puffyan\.us',
340 r'(?:www\.)?yewtu\.be',
341 r'(?:www\.)?yt\.elukerio\.org',
342 r'(?:www\.)?yt\.lelux\.fi',
343 r'(?:www\.)?invidious\.ggc-project\.de',
344 r'(?:www\.)?yt\.maisputain\.ovh',
345 r'(?:www\.)?ytprivate\.com',
346 r'(?:www\.)?invidious\.13ad\.de',
347 r'(?:www\.)?invidious\.toot\.koeln',
348 r'(?:www\.)?invidious\.fdn\.fr',
349 r'(?:www\.)?watch\.nettohikari\.com',
350 r'(?:www\.)?invidious\.namazso\.eu',
351 r'(?:www\.)?invidious\.silkky\.cloud',
352 r'(?:www\.)?invidious\.exonip\.de',
353 r'(?:www\.)?invidious\.riverside\.rocks',
354 r'(?:www\.)?invidious\.blamefran\.net',
355 r'(?:www\.)?invidious\.moomoo\.de',
356 r'(?:www\.)?ytb\.trom\.tf',
357 r'(?:www\.)?yt\.cyberhost\.uk',
358 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
359 r'(?:www\.)?qklhadlycap4cnod\.onion',
360 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
361 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
362 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
363 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
364 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
365 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
366 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
367 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
368 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
369 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
370 # piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
371 r'(?:www\.)?piped\.kavin\.rocks',
372 r'(?:www\.)?piped\.silkky\.cloud',
373 r'(?:www\.)?piped\.tokhmi\.xyz',
374 r'(?:www\.)?piped\.moomoo\.me',
375 r'(?:www\.)?il\.ax',
376 r'(?:www\.)?piped\.syncpundit\.com',
377 r'(?:www\.)?piped\.mha\.fi',
378 r'(?:www\.)?piped\.mint\.lgbt',
379 r'(?:www\.)?piped\.privacy\.com\.de',
380 )
381
382 # extracted from account/account_menu ep
383 # XXX: These are the supported YouTube UI and API languages,
384 # which is slightly different from languages supported for translation in YouTube studio
385 _SUPPORTED_LANG_CODES = [
386 'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',
387 'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',
388 'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',
389 'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',
390 'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',
391 'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko'
392 ]
393
394 _IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'}
395
396 @functools.cached_property
397 def _preferred_lang(self):
398 """
399 Returns a language code supported by YouTube for the user preferred language.
400 Returns None if no preferred language set.
401 """
402 preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0]
403 if not preferred_lang:
404 return
405 if preferred_lang not in self._SUPPORTED_LANG_CODES:
406 raise ExtractorError(
407 f'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.',
408 expected=True)
409 elif preferred_lang != 'en':
410 self.report_warning(
411 f'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.')
412 return preferred_lang
413
414 def _initialize_consent(self):
415 cookies = self._get_cookies('https://www.youtube.com/')
416 if cookies.get('__Secure-3PSID'):
417 return
418 consent_id = None
419 consent = cookies.get('CONSENT')
420 if consent:
421 if 'YES' in consent.value:
422 return
423 consent_id = self._search_regex(
424 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
425 if not consent_id:
426 consent_id = random.randint(100, 999)
427 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
428
429 def _initialize_pref(self):
430 cookies = self._get_cookies('https://www.youtube.com/')
431 pref_cookie = cookies.get('PREF')
432 pref = {}
433 if pref_cookie:
434 try:
435 pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
436 except ValueError:
437 self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
438 pref.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'})
439 self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
440
441 def _real_initialize(self):
442 self._initialize_pref()
443 self._initialize_consent()
444 self._check_login_required()
445
446 def _check_login_required(self):
447 if self._LOGIN_REQUIRED and not self._cookies_passed:
448 self.raise_login_required('Login details are needed to download this content', method='cookies')
449
450 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='
451 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
452
453 def _get_default_ytcfg(self, client='web'):
454 return copy.deepcopy(INNERTUBE_CLIENTS[client])
455
456 def _get_innertube_host(self, client='web'):
457 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
458
459 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
460 # try_get but with fallback to default ytcfg client values when present
461 _func = lambda y: try_get(y, getter, expected_type)
462 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
463
464 def _extract_client_name(self, ytcfg, default_client='web'):
465 return self._ytcfg_get_safe(
466 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
467 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
468
469 def _extract_client_version(self, ytcfg, default_client='web'):
470 return self._ytcfg_get_safe(
471 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
472 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
473
474 def _select_api_hostname(self, req_api_hostname, default_client=None):
475 return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
476 or req_api_hostname or self._get_innertube_host(default_client or 'web'))
477
478 def _extract_api_key(self, ytcfg=None, default_client='web'):
479 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
480
481 def _extract_context(self, ytcfg=None, default_client='web'):
482 context = get_first(
483 (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
484 # Enforce language and tz for extraction
485 client_context = traverse_obj(context, 'client', expected_type=dict, default={})
486 client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
487 return context
488
489 _SAPISID = None
490
491 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
492 time_now = round(time.time())
493 if self._SAPISID is None:
494 yt_cookies = self._get_cookies('https://www.youtube.com')
495 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
496 # See: https://github.com/yt-dlp/yt-dlp/issues/393
497 sapisid_cookie = dict_get(
498 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
499 if sapisid_cookie and sapisid_cookie.value:
500 self._SAPISID = sapisid_cookie.value
501 self.write_debug('Extracted SAPISID cookie')
502 # SAPISID cookie is required if not already present
503 if not yt_cookies.get('SAPISID'):
504 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
505 self._set_cookie(
506 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
507 else:
508 self._SAPISID = False
509 if not self._SAPISID:
510 return None
511 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
512 sapisidhash = hashlib.sha1(
513 f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
514 return f'SAPISIDHASH {time_now}_{sapisidhash}'
515
516 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
517 note='Downloading API JSON', errnote='Unable to download API page',
518 context=None, api_key=None, api_hostname=None, default_client='web'):
519
520 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
521 data.update(query)
522 real_headers = self.generate_api_headers(default_client=default_client)
523 real_headers.update({'content-type': 'application/json'})
524 if headers:
525 real_headers.update(headers)
526 api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]
527 or api_key or self._extract_api_key(default_client=default_client))
528 return self._download_json(
529 f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
530 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
531 data=json.dumps(data).encode('utf8'), headers=real_headers,
532 query={'key': api_key, 'prettyPrint': 'false'})
533
534 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
535 return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
536
537 @staticmethod
538 def _extract_session_index(*data):
539 """
540 Index of current account in account list.
541 See: https://github.com/yt-dlp/yt-dlp/pull/519
542 """
543 for ytcfg in data:
544 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
545 if session_index is not None:
546 return session_index
547
548 # Deprecated?
549 def _extract_identity_token(self, ytcfg=None, webpage=None):
550 if ytcfg:
551 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)
552 if token:
553 return token
554 if webpage:
555 return self._search_regex(
556 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
557 'identity token', default=None, fatal=False)
558
559 @staticmethod
560 def _extract_account_syncid(*args):
561 """
562 Extract syncId required to download private playlists of secondary channels
563 @params response and/or ytcfg
564 """
565 for data in args:
566 # ytcfg includes channel_syncid if on secondary channel
567 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)
568 if delegated_sid:
569 return delegated_sid
570 sync_ids = (try_get(
571 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
572 lambda x: x['DATASYNC_ID']), str) or '').split('||')
573 if len(sync_ids) >= 2 and sync_ids[1]:
574 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
575 # and just "user_syncid||" for primary channel. We only want the channel_syncid
576 return sync_ids[0]
577
578 @staticmethod
579 def _extract_visitor_data(*args):
580 """
581 Extracts visitorData from an API response or ytcfg
582 Appears to be used to track session state
583 """
584 return get_first(
585 args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
586 expected_type=str)
587
588 @functools.cached_property
589 def is_authenticated(self):
590 return bool(self._generate_sapisidhash_header())
591
592 def extract_ytcfg(self, video_id, webpage):
593 if not webpage:
594 return {}
595 return self._parse_json(
596 self._search_regex(
597 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
598 default='{}'), video_id, fatal=False) or {}
599
600 def generate_api_headers(
601 self, *, ytcfg=None, account_syncid=None, session_index=None,
602 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
603
604 origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))
605 headers = {
606 'X-YouTube-Client-Name': str(
607 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
608 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
609 'Origin': origin,
610 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
611 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
612 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),
613 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client)
614 }
615 if session_index is None:
616 session_index = self._extract_session_index(ytcfg)
617 if account_syncid or session_index is not None:
618 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
619
620 auth = self._generate_sapisidhash_header(origin)
621 if auth is not None:
622 headers['Authorization'] = auth
623 headers['X-Origin'] = origin
624 return filter_dict(headers)
625
626 def _download_ytcfg(self, client, video_id):
627 url = {
628 'web': 'https://www.youtube.com',
629 'web_music': 'https://music.youtube.com',
630 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
631 }.get(client)
632 if not url:
633 return {}
634 webpage = self._download_webpage(
635 url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')
636 return self.extract_ytcfg(video_id, webpage) or {}
637
638 @staticmethod
639 def _build_api_continuation_query(continuation, ctp=None):
640 query = {
641 'continuation': continuation
642 }
643 # TODO: Inconsistency with clickTrackingParams.
644 # Currently we have a fixed ctp contained within context (from ytcfg)
645 # and a ctp in root query for continuation.
646 if ctp:
647 query['clickTracking'] = {'clickTrackingParams': ctp}
648 return query
649
650 @classmethod
651 def _extract_next_continuation_data(cls, renderer):
652 next_continuation = try_get(
653 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
654 lambda x: x['continuation']['reloadContinuationData']), dict)
655 if not next_continuation:
656 return
657 continuation = next_continuation.get('continuation')
658 if not continuation:
659 return
660 ctp = next_continuation.get('clickTrackingParams')
661 return cls._build_api_continuation_query(continuation, ctp)
662
663 @classmethod
664 def _extract_continuation_ep_data(cls, continuation_ep: dict):
665 if isinstance(continuation_ep, dict):
666 continuation = try_get(
667 continuation_ep, lambda x: x['continuationCommand']['token'], str)
668 if not continuation:
669 return
670 ctp = continuation_ep.get('clickTrackingParams')
671 return cls._build_api_continuation_query(continuation, ctp)
672
673 @classmethod
674 def _extract_continuation(cls, renderer):
675 next_continuation = cls._extract_next_continuation_data(renderer)
676 if next_continuation:
677 return next_continuation
678
679 return traverse_obj(renderer, (
680 ('contents', 'items', 'rows'), ..., 'continuationItemRenderer',
681 ('continuationEndpoint', ('button', 'buttonRenderer', 'command'))
682 ), get_all=False, expected_type=cls._extract_continuation_ep_data)
683
684 @classmethod
685 def _extract_alerts(cls, data):
686 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
687 if not isinstance(alert_dict, dict):
688 continue
689 for alert in alert_dict.values():
690 alert_type = alert.get('type')
691 if not alert_type:
692 continue
693 message = cls._get_text(alert, 'text')
694 if message:
695 yield alert_type, message
696
697 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
698 errors, warnings = [], []
699 for alert_type, alert_message in alerts:
700 if alert_type.lower() == 'error' and fatal:
701 errors.append([alert_type, alert_message])
702 elif alert_message not in self._IGNORED_WARNINGS:
703 warnings.append([alert_type, alert_message])
704
705 for alert_type, alert_message in (warnings + errors[:-1]):
706 self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)
707 if errors:
708 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
709
710 def _extract_and_report_alerts(self, data, *args, **kwargs):
711 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
712
713 def _extract_badges(self, renderer: dict):
714 privacy_icon_map = {
715 'PRIVACY_UNLISTED': BadgeType.AVAILABILITY_UNLISTED,
716 'PRIVACY_PRIVATE': BadgeType.AVAILABILITY_PRIVATE,
717 'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC
718 }
719
720 badge_style_map = {
721 'BADGE_STYLE_TYPE_MEMBERS_ONLY': BadgeType.AVAILABILITY_SUBSCRIPTION,
722 'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM,
723 'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW
724 }
725
726 label_map = {
727 'unlisted': BadgeType.AVAILABILITY_UNLISTED,
728 'private': BadgeType.AVAILABILITY_PRIVATE,
729 'members only': BadgeType.AVAILABILITY_SUBSCRIPTION,
730 'live': BadgeType.LIVE_NOW,
731 'premium': BadgeType.AVAILABILITY_PREMIUM
732 }
733
734 badges = []
735 for badge in traverse_obj(renderer, ('badges', ..., 'metadataBadgeRenderer'), default=[]):
736 badge_type = (
737 privacy_icon_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))
738 or badge_style_map.get(traverse_obj(badge, 'style'))
739 )
740 if badge_type:
741 badges.append({'type': badge_type})
742 continue
743
744 # fallback, won't work in some languages
745 label = traverse_obj(badge, 'label', expected_type=str, default='')
746 for match, label_badge_type in label_map.items():
747 if match in label.lower():
748 badges.append({'type': badge_type})
749 continue
750
751 return badges
752
753 @staticmethod
754 def _has_badge(badges, badge_type):
755 return bool(traverse_obj(badges, lambda _, v: v['type'] == badge_type))
756
757 @staticmethod
758 def _get_text(data, *path_list, max_runs=None):
759 for path in path_list or [None]:
760 if path is None:
761 obj = [data]
762 else:
763 obj = traverse_obj(data, path, default=[])
764 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
765 obj = [obj]
766 for item in obj:
767 text = try_get(item, lambda x: x['simpleText'], str)
768 if text:
769 return text
770 runs = try_get(item, lambda x: x['runs'], list) or []
771 if not runs and isinstance(item, list):
772 runs = item
773
774 runs = runs[:min(len(runs), max_runs or len(runs))]
775 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
776 if text:
777 return text
778
779 def _get_count(self, data, *path_list):
780 count_text = self._get_text(data, *path_list) or ''
781 count = parse_count(count_text)
782 if count is None:
783 count = str_to_int(
784 self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))
785 return count
786
787 @staticmethod
788 def _extract_thumbnails(data, *path_list):
789 """
790 Extract thumbnails from thumbnails dict
791 @param path_list: path list to level that contains 'thumbnails' key
792 """
793 thumbnails = []
794 for path in path_list or [()]:
795 for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):
796 thumbnail_url = url_or_none(thumbnail.get('url'))
797 if not thumbnail_url:
798 continue
799 # Sometimes youtube gives a wrong thumbnail URL. See:
800 # https://github.com/yt-dlp/yt-dlp/issues/233
801 # https://github.com/ytdl-org/youtube-dl/issues/28023
802 if 'maxresdefault' in thumbnail_url:
803 thumbnail_url = thumbnail_url.split('?')[0]
804 thumbnails.append({
805 'url': thumbnail_url,
806 'height': int_or_none(thumbnail.get('height')),
807 'width': int_or_none(thumbnail.get('width')),
808 })
809 return thumbnails
810
811 @staticmethod
812 def extract_relative_time(relative_time_text):
813 """
814 Extracts a relative time from string and converts to dt object
815 e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'
816 """
817 mobj = re.search(r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?\s*ago', relative_time_text)
818 if mobj:
819 start = mobj.group('start')
820 if start:
821 return datetime_from_str(start)
822 try:
823 return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))
824 except ValueError:
825 return None
826
827 def _parse_time_text(self, text):
828 if not text:
829 return
830 dt = self.extract_relative_time(text)
831 timestamp = None
832 if isinstance(dt, datetime.datetime):
833 timestamp = calendar.timegm(dt.timetuple())
834
835 if timestamp is None:
836 timestamp = (
837 unified_timestamp(text) or unified_timestamp(
838 self._search_regex(
839 (r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),
840 text.lower(), 'time text', default=None)))
841
842 if text and timestamp is None and self._preferred_lang in (None, 'en'):
843 self.report_warning(
844 f'Cannot parse localized time text "{text}"', only_once=True)
845 return timestamp
846
847 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
848 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
849 default_client='web'):
850 for retry in self.RetryManager():
851 try:
852 response = self._call_api(
853 ep=ep, fatal=True, headers=headers,
854 video_id=item_id, query=query, note=note,
855 context=self._extract_context(ytcfg, default_client),
856 api_key=self._extract_api_key(ytcfg, default_client),
857 api_hostname=api_hostname, default_client=default_client)
858 except ExtractorError as e:
859 if not isinstance(e.cause, network_exceptions):
860 return self._error_or_warning(e, fatal=fatal)
861 elif not isinstance(e.cause, urllib.error.HTTPError):
862 retry.error = e
863 continue
864
865 first_bytes = e.cause.read(512)
866 if not is_html(first_bytes):
867 yt_error = try_get(
868 self._parse_json(
869 self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
870 lambda x: x['error']['message'], str)
871 if yt_error:
872 self._report_alerts([('ERROR', yt_error)], fatal=False)
873 # Downloading page may result in intermittent 5xx HTTP error
874 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
875 # We also want to catch all other network exceptions since errors in later pages can be troublesome
876 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
877 if e.cause.code not in (403, 429):
878 retry.error = e
879 continue
880 return self._error_or_warning(e, fatal=fatal)
881
882 try:
883 self._extract_and_report_alerts(response, only_once=True)
884 except ExtractorError as e:
885 # YouTube servers may return errors we want to retry on in a 200 OK response
886 # See: https://github.com/yt-dlp/yt-dlp/issues/839
887 if 'unknown error' in e.msg.lower():
888 retry.error = e
889 continue
890 return self._error_or_warning(e, fatal=fatal)
891 # Youtube sometimes sends incomplete data
892 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
893 if not traverse_obj(response, *variadic(check_get_keys)):
894 retry.error = ExtractorError('Incomplete data received', expected=True)
895 continue
896
897 return response
898
899 @staticmethod
900 def is_music_url(url):
901 return re.match(r'https?://music\.youtube\.com/', url) is not None
902
903 def _extract_video(self, renderer):
904 video_id = renderer.get('videoId')
905 title = self._get_text(renderer, 'title')
906 description = self._get_text(renderer, 'descriptionSnippet')
907 duration = parse_duration(self._get_text(
908 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
909 if duration is None:
910 duration = parse_duration(self._search_regex(
911 r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',
912 traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),
913 video_id, default=None, group='duration'))
914
915 view_count = self._get_count(renderer, 'viewCountText')
916
917 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
918 channel_id = traverse_obj(
919 renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
920 expected_type=str, get_all=False)
921 time_text = self._get_text(renderer, 'publishedTimeText') or ''
922 scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
923 overlay_style = traverse_obj(
924 renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
925 get_all=False, expected_type=str)
926 badges = self._extract_badges(renderer)
927 thumbnails = self._extract_thumbnails(renderer, 'thumbnail')
928 navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
929 renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
930 expected_type=str)) or ''
931 url = f'https://www.youtube.com/watch?v={video_id}'
932 if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:
933 url = f'https://www.youtube.com/shorts/{video_id}'
934
935 return {
936 '_type': 'url',
937 'ie_key': YoutubeIE.ie_key(),
938 'id': video_id,
939 'url': url,
940 'title': title,
941 'description': description,
942 'duration': duration,
943 'view_count': view_count,
944 'uploader': uploader,
945 'channel_id': channel_id,
946 'thumbnails': thumbnails,
947 'upload_date': (strftime_or_none(self._parse_time_text(time_text), '%Y%m%d')
948 if self._configuration_arg('approximate_date', ie_key='youtubetab')
949 else None),
950 'live_status': ('is_upcoming' if scheduled_timestamp is not None
951 else 'was_live' if 'streamed' in time_text.lower()
952 else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW)
953 else None),
954 'release_timestamp': scheduled_timestamp,
955 'availability':
956 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
957 else self._availability(
958 is_private=self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE) or None,
959 needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
960 needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
961 is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None)
962 }
963
964
965class YoutubeIE(YoutubeBaseInfoExtractor):
966 IE_DESC = 'YouTube'
967 _VALID_URL = r"""(?x)^
968 (
969 (?:https?://|//) # http(s):// or protocol-independent URL
970 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
971 (?:www\.)?deturl\.com/www\.youtube\.com|
972 (?:www\.)?pwnyoutube\.com|
973 (?:www\.)?hooktube\.com|
974 (?:www\.)?yourepeat\.com|
975 tube\.majestyc\.net|
976 %(invidious)s|
977 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
978 (?:.*?\#/)? # handle anchor (#/) redirect urls
979 (?: # the various things that can precede the ID:
980 (?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/
981 |(?: # or the v= param in all its forms
982 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
983 (?:\?|\#!?) # the params delimiter ? or # or #!
984 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
985 v=
986 )
987 ))
988 |(?:
989 youtu\.be| # just youtu.be/xxxx
990 vid\.plus| # or vid.plus/xxxx
991 zwearz\.com/watch| # or zwearz.com/watch/xxxx
992 %(invidious)s
993 )/
994 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
995 )
996 )? # all until now is optional -> you can pass the naked ID
997 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
998 (?(1).+)? # if we found the ID, everything can follow
999 (?:\#|$)""" % {
1000 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
1001 }
1002 _EMBED_REGEX = [
1003 r'''(?x)
1004 (?:
1005 <(?:[0-9A-Za-z-]+?)?iframe[^>]+?src=|
1006 data-video-url=|
1007 <embed[^>]+?src=|
1008 embedSWF\(?:\s*|
1009 <object[^>]+data=|
1010 new\s+SWFObject\(
1011 )
1012 (["\'])
1013 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1014 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1015 \1''',
1016 # https://wordpress.org/plugins/lazy-load-for-videos/
1017 r'''(?xs)
1018 <a\s[^>]*\bhref="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"
1019 \s[^>]*\bclass="[^"]*\blazy-load-youtube''',
1020 ]
1021
1022 _PLAYER_INFO_RE = (
1023 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
1024 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
1025 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
1026 )
1027 _formats = {
1028 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1029 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1030 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
1031 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
1032 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
1033 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1034 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1035 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1036 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
1037 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
1038 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1039 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1040 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1041 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1042 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
1043 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
1044 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1045 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1046
1047
1048 # 3D videos
1049 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1050 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1051 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1052 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1053 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
1054 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1055 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1056
1057 # Apple HTTP Live Streaming
1058 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1059 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1060 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1061 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1062 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1063 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1064 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1065 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
1066
1067 # DASH mp4 video
1068 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
1069 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
1070 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1071 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
1072 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
1073 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
1074 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
1075 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1076 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
1077 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1078 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1079 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
1080
1081 # Dash mp4 audio
1082 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1083 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1084 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1085 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1086 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1087 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1088 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
1089
1090 # Dash webm
1091 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1092 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1093 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1094 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1095 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1096 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1097 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1098 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1099 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1100 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1101 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1102 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1103 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1104 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1105 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1106 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
1107 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1108 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1109 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1110 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1111 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1112 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1113
1114 # Dash webm audio
1115 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1116 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
1117
1118 # Dash webm audio with opus inside
1119 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1120 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1121 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
1122
1123 # RTMP (unnamed)
1124 '_rtmp': {'protocol': 'rtmp'},
1125
1126 # av01 video only formats sometimes served with "unknown" codecs
1127 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1128 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1129 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
1130 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1131 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1132 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1133 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1134 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1135 }
1136 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
1137
1138 _GEO_BYPASS = False
1139
1140 IE_NAME = 'youtube'
1141 _TESTS = [
1142 {
1143 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
1144 'info_dict': {
1145 'id': 'BaW_jenozKc',
1146 'ext': 'mp4',
1147 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1148 'uploader': 'Philipp Hagemeister',
1149 'uploader_id': 'phihag',
1150 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1151 'channel': 'Philipp Hagemeister',
1152 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1153 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1154 'upload_date': '20121002',
1155 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
1156 'categories': ['Science & Technology'],
1157 'tags': ['youtube-dl'],
1158 'duration': 10,
1159 'view_count': int,
1160 'like_count': int,
1161 'availability': 'public',
1162 'playable_in_embed': True,
1163 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1164 'live_status': 'not_live',
1165 'age_limit': 0,
1166 'start_time': 1,
1167 'end_time': 9,
1168 'comment_count': int,
1169 'channel_follower_count': int
1170 }
1171 },
1172 {
1173 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1174 'note': 'Embed-only video (#1746)',
1175 'info_dict': {
1176 'id': 'yZIXLfi8CZQ',
1177 'ext': 'mp4',
1178 'upload_date': '20120608',
1179 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1180 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1181 'uploader': 'SET India',
1182 'uploader_id': 'setindia',
1183 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
1184 'age_limit': 18,
1185 },
1186 'skip': 'Private video',
1187 },
1188 {
1189 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1190 'note': 'Use the first video ID in the URL',
1191 'info_dict': {
1192 'id': 'BaW_jenozKc',
1193 'ext': 'mp4',
1194 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1195 'uploader': 'Philipp Hagemeister',
1196 'uploader_id': 'phihag',
1197 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1198 'channel': 'Philipp Hagemeister',
1199 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1200 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1201 'upload_date': '20121002',
1202 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
1203 'categories': ['Science & Technology'],
1204 'tags': ['youtube-dl'],
1205 'duration': 10,
1206 'view_count': int,
1207 'like_count': int,
1208 'availability': 'public',
1209 'playable_in_embed': True,
1210 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1211 'live_status': 'not_live',
1212 'age_limit': 0,
1213 'comment_count': int,
1214 'channel_follower_count': int
1215 },
1216 'params': {
1217 'skip_download': True,
1218 },
1219 },
1220 {
1221 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1222 'note': '256k DASH audio (format 141) via DASH manifest',
1223 'info_dict': {
1224 'id': 'a9LDPn-MO4I',
1225 'ext': 'm4a',
1226 'upload_date': '20121002',
1227 'uploader_id': '8KVIDEO',
1228 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1229 'description': '',
1230 'uploader': '8KVIDEO',
1231 'title': 'UHDTV TEST 8K VIDEO.mp4'
1232 },
1233 'params': {
1234 'youtube_include_dash_manifest': True,
1235 'format': '141',
1236 },
1237 'skip': 'format 141 not served anymore',
1238 },
1239 # DASH manifest with encrypted signature
1240 {
1241 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1242 'info_dict': {
1243 'id': 'IB3lcPjvWLA',
1244 'ext': 'm4a',
1245 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1246 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1247 'duration': 244,
1248 'uploader': 'AfrojackVEVO',
1249 'uploader_id': 'AfrojackVEVO',
1250 'upload_date': '20131011',
1251 'abr': 129.495,
1252 'like_count': int,
1253 'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',
1254 'playable_in_embed': True,
1255 'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',
1256 'view_count': int,
1257 'track': 'The Spark',
1258 'live_status': 'not_live',
1259 'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',
1260 'channel': 'Afrojack',
1261 'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',
1262 'tags': 'count:19',
1263 'availability': 'public',
1264 'categories': ['Music'],
1265 'age_limit': 0,
1266 'alt_title': 'The Spark',
1267 'channel_follower_count': int
1268 },
1269 'params': {
1270 'youtube_include_dash_manifest': True,
1271 'format': '141/bestaudio[ext=m4a]',
1272 },
1273 },
1274 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
1275 {
1276 'note': 'Embed allowed age-gate video',
1277 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1278 'info_dict': {
1279 'id': 'HtVdAasjOgU',
1280 'ext': 'mp4',
1281 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1282 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1283 'duration': 142,
1284 'uploader': 'The Witcher',
1285 'uploader_id': 'WitcherGame',
1286 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1287 'upload_date': '20140605',
1288 'age_limit': 18,
1289 'categories': ['Gaming'],
1290 'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',
1291 'availability': 'needs_auth',
1292 'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
1293 'like_count': int,
1294 'channel': 'The Witcher',
1295 'live_status': 'not_live',
1296 'tags': 'count:17',
1297 'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
1298 'playable_in_embed': True,
1299 'view_count': int,
1300 'channel_follower_count': int
1301 },
1302 },
1303 {
1304 'note': 'Age-gate video with embed allowed in public site',
1305 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1306 'info_dict': {
1307 'id': 'HsUATh_Nc2U',
1308 'ext': 'mp4',
1309 'title': 'Godzilla 2 (Official Video)',
1310 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1311 'upload_date': '20200408',
1312 'uploader_id': 'FlyingKitty900',
1313 'uploader': 'FlyingKitty',
1314 'age_limit': 18,
1315 'availability': 'needs_auth',
1316 'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
1317 'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',
1318 'channel': 'FlyingKitty',
1319 'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
1320 'view_count': int,
1321 'categories': ['Entertainment'],
1322 'live_status': 'not_live',
1323 'tags': ['Flyingkitty', 'godzilla 2'],
1324 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
1325 'like_count': int,
1326 'duration': 177,
1327 'playable_in_embed': True,
1328 'channel_follower_count': int
1329 },
1330 },
1331 {
1332 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1333 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1334 'info_dict': {
1335 'id': 'Tq92D6wQ1mg',
1336 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
1337 'ext': 'mp4',
1338 'upload_date': '20191228',
1339 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1340 'uploader': 'Projekt Melody',
1341 'description': 'md5:17eccca93a786d51bc67646756894066',
1342 'age_limit': 18,
1343 'like_count': int,
1344 'availability': 'needs_auth',
1345 'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
1346 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1347 'view_count': int,
1348 'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',
1349 'channel': 'Projekt Melody',
1350 'live_status': 'not_live',
1351 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
1352 'playable_in_embed': True,
1353 'categories': ['Entertainment'],
1354 'duration': 106,
1355 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
1356 'comment_count': int,
1357 'channel_follower_count': int
1358 },
1359 },
1360 {
1361 'note': 'Non-Agegated non-embeddable video',
1362 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1363 'info_dict': {
1364 'id': 'MeJVWBSsPAY',
1365 'ext': 'mp4',
1366 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1367 'uploader': 'Herr Lurik',
1368 'uploader_id': 'st3in234',
1369 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1370 'upload_date': '20130730',
1371 'track': 'Such mich find mich',
1372 'age_limit': 0,
1373 'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
1374 'like_count': int,
1375 'playable_in_embed': False,
1376 'creator': 'OOMPH!',
1377 'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',
1378 'view_count': int,
1379 'alt_title': 'Such mich find mich',
1380 'duration': 210,
1381 'channel': 'Herr Lurik',
1382 'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
1383 'categories': ['Music'],
1384 'availability': 'public',
1385 'uploader_url': 'http://www.youtube.com/user/st3in234',
1386 'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
1387 'live_status': 'not_live',
1388 'artist': 'OOMPH!',
1389 'channel_follower_count': int
1390 },
1391 },
1392 {
1393 'note': 'Non-bypassable age-gated video',
1394 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1395 'only_matching': True,
1396 },
1397 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1398 # YouTube Red ad is not captured for creator
1399 {
1400 'url': '__2ABJjxzNo',
1401 'info_dict': {
1402 'id': '__2ABJjxzNo',
1403 'ext': 'mp4',
1404 'duration': 266,
1405 'upload_date': '20100430',
1406 'uploader_id': 'deadmau5',
1407 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1408 'creator': 'deadmau5',
1409 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1410 'uploader': 'deadmau5',
1411 'title': 'Deadmau5 - Some Chords (HD)',
1412 'alt_title': 'Some Chords',
1413 'availability': 'public',
1414 'tags': 'count:14',
1415 'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',
1416 'view_count': int,
1417 'live_status': 'not_live',
1418 'channel': 'deadmau5',
1419 'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',
1420 'like_count': int,
1421 'track': 'Some Chords',
1422 'artist': 'deadmau5',
1423 'playable_in_embed': True,
1424 'age_limit': 0,
1425 'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',
1426 'categories': ['Music'],
1427 'album': 'Some Chords',
1428 'channel_follower_count': int
1429 },
1430 'expected_warnings': [
1431 'DASH manifest missing',
1432 ]
1433 },
1434 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1435 {
1436 'url': 'lqQg6PlCWgI',
1437 'info_dict': {
1438 'id': 'lqQg6PlCWgI',
1439 'ext': 'mp4',
1440 'duration': 6085,
1441 'upload_date': '20150827',
1442 'uploader_id': 'olympic',
1443 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1444 'description': 'md5:04bbbf3ccceb6795947572ca36f45904',
1445 'uploader': 'Olympics',
1446 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1447 'like_count': int,
1448 'release_timestamp': 1343767800,
1449 'playable_in_embed': True,
1450 'categories': ['Sports'],
1451 'release_date': '20120731',
1452 'channel': 'Olympics',
1453 'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
1454 'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
1455 'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
1456 'age_limit': 0,
1457 'availability': 'public',
1458 'live_status': 'was_live',
1459 'view_count': int,
1460 'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
1461 'channel_follower_count': int
1462 },
1463 'params': {
1464 'skip_download': 'requires avconv',
1465 }
1466 },
1467 # Non-square pixels
1468 {
1469 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1470 'info_dict': {
1471 'id': '_b-2C3KPAM0',
1472 'ext': 'mp4',
1473 'stretched_ratio': 16 / 9.,
1474 'duration': 85,
1475 'upload_date': '20110310',
1476 'uploader_id': 'AllenMeow',
1477 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1478 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1479 'uploader': '孫ᄋᄅ',
1480 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1481 'playable_in_embed': True,
1482 'channel': '孫ᄋᄅ',
1483 'age_limit': 0,
1484 'tags': 'count:11',
1485 'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',
1486 'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',
1487 'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',
1488 'view_count': int,
1489 'categories': ['People & Blogs'],
1490 'like_count': int,
1491 'live_status': 'not_live',
1492 'availability': 'unlisted',
1493 'comment_count': int,
1494 'channel_follower_count': int
1495 },
1496 },
1497 # url_encoded_fmt_stream_map is empty string
1498 {
1499 'url': 'qEJwOuvDf7I',
1500 'info_dict': {
1501 'id': 'qEJwOuvDf7I',
1502 'ext': 'webm',
1503 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1504 'description': '',
1505 'upload_date': '20150404',
1506 'uploader_id': 'spbelect',
1507 'uploader': 'Наблюдатели Петербурга',
1508 },
1509 'params': {
1510 'skip_download': 'requires avconv',
1511 },
1512 'skip': 'This live event has ended.',
1513 },
1514 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1515 {
1516 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1517 'info_dict': {
1518 'id': 'FIl7x6_3R5Y',
1519 'ext': 'webm',
1520 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1521 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1522 'duration': 220,
1523 'upload_date': '20150625',
1524 'uploader_id': 'dorappi2000',
1525 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1526 'uploader': 'dorappi2000',
1527 'formats': 'mincount:31',
1528 },
1529 'skip': 'not actual anymore',
1530 },
1531 # DASH manifest with segment_list
1532 {
1533 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1534 'md5': '8ce563a1d667b599d21064e982ab9e31',
1535 'info_dict': {
1536 'id': 'CsmdDsKjzN8',
1537 'ext': 'mp4',
1538 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1539 'uploader': 'Airtek',
1540 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1541 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1542 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1543 },
1544 'params': {
1545 'youtube_include_dash_manifest': True,
1546 'format': '135', # bestvideo
1547 },
1548 'skip': 'This live event has ended.',
1549 },
1550 {
1551 # Multifeed videos (multiple cameras), URL is for Main Camera
1552 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1553 'info_dict': {
1554 'id': 'jvGDaLqkpTg',
1555 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1556 'description': 'md5:e03b909557865076822aa169218d6a5d',
1557 },
1558 'playlist': [{
1559 'info_dict': {
1560 'id': 'jvGDaLqkpTg',
1561 'ext': 'mp4',
1562 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1563 'description': 'md5:e03b909557865076822aa169218d6a5d',
1564 'duration': 10643,
1565 'upload_date': '20161111',
1566 'uploader': 'Team PGP',
1567 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1568 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1569 },
1570 }, {
1571 'info_dict': {
1572 'id': '3AKt1R1aDnw',
1573 'ext': 'mp4',
1574 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1575 'description': 'md5:e03b909557865076822aa169218d6a5d',
1576 'duration': 10991,
1577 'upload_date': '20161111',
1578 'uploader': 'Team PGP',
1579 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1580 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1581 },
1582 }, {
1583 'info_dict': {
1584 'id': 'RtAMM00gpVc',
1585 'ext': 'mp4',
1586 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1587 'description': 'md5:e03b909557865076822aa169218d6a5d',
1588 'duration': 10995,
1589 'upload_date': '20161111',
1590 'uploader': 'Team PGP',
1591 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1592 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1593 },
1594 }, {
1595 'info_dict': {
1596 'id': '6N2fdlP3C5U',
1597 'ext': 'mp4',
1598 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1599 'description': 'md5:e03b909557865076822aa169218d6a5d',
1600 'duration': 10990,
1601 'upload_date': '20161111',
1602 'uploader': 'Team PGP',
1603 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1604 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1605 },
1606 }],
1607 'params': {
1608 'skip_download': True,
1609 },
1610 'skip': 'Not multifeed anymore',
1611 },
1612 {
1613 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1614 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1615 'info_dict': {
1616 'id': 'gVfLd0zydlo',
1617 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1618 },
1619 'playlist_count': 2,
1620 'skip': 'Not multifeed anymore',
1621 },
1622 {
1623 'url': 'https://vid.plus/FlRa-iH7PGw',
1624 'only_matching': True,
1625 },
1626 {
1627 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1628 'only_matching': True,
1629 },
1630 {
1631 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1632 # Also tests cut-off URL expansion in video description (see
1633 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1634 # https://github.com/ytdl-org/youtube-dl/issues/8164)
1635 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1636 'info_dict': {
1637 'id': 'lsguqyKfVQg',
1638 'ext': 'mp4',
1639 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1640 'alt_title': 'Dark Walk',
1641 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1642 'duration': 133,
1643 'upload_date': '20151119',
1644 'uploader_id': 'IronSoulElf',
1645 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1646 'uploader': 'IronSoulElf',
1647 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1648 'track': 'Dark Walk',
1649 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1650 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1651 'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',
1652 'categories': ['Film & Animation'],
1653 'view_count': int,
1654 'live_status': 'not_live',
1655 'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',
1656 'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',
1657 'tags': 'count:13',
1658 'availability': 'public',
1659 'channel': 'IronSoulElf',
1660 'playable_in_embed': True,
1661 'like_count': int,
1662 'age_limit': 0,
1663 'channel_follower_count': int
1664 },
1665 'params': {
1666 'skip_download': True,
1667 },
1668 },
1669 {
1670 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1671 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1672 'only_matching': True,
1673 },
1674 {
1675 # Video with yt:stretch=17:0
1676 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1677 'info_dict': {
1678 'id': 'Q39EVAstoRM',
1679 'ext': 'mp4',
1680 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1681 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1682 'upload_date': '20151107',
1683 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1684 'uploader': 'CH GAMER DROID',
1685 },
1686 'params': {
1687 'skip_download': True,
1688 },
1689 'skip': 'This video does not exist.',
1690 },
1691 {
1692 # Video with incomplete 'yt:stretch=16:'
1693 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1694 'only_matching': True,
1695 },
1696 {
1697 # Video licensed under Creative Commons
1698 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1699 'info_dict': {
1700 'id': 'M4gD1WSo5mA',
1701 'ext': 'mp4',
1702 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1703 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1704 'duration': 721,
1705 'upload_date': '20150128',
1706 'uploader_id': 'BerkmanCenter',
1707 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1708 'uploader': 'The Berkman Klein Center for Internet & Society',
1709 'license': 'Creative Commons Attribution license (reuse allowed)',
1710 'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',
1711 'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',
1712 'like_count': int,
1713 'age_limit': 0,
1714 'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],
1715 'channel': 'The Berkman Klein Center for Internet & Society',
1716 'availability': 'public',
1717 'view_count': int,
1718 'categories': ['Education'],
1719 'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',
1720 'live_status': 'not_live',
1721 'playable_in_embed': True,
1722 'comment_count': int,
1723 'channel_follower_count': int
1724 },
1725 'params': {
1726 'skip_download': True,
1727 },
1728 },
1729 {
1730 # Channel-like uploader_url
1731 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1732 'info_dict': {
1733 'id': 'eQcmzGIKrzg',
1734 'ext': 'mp4',
1735 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1736 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1737 'duration': 4060,
1738 'upload_date': '20151120',
1739 'uploader': 'Bernie Sanders',
1740 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1741 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1742 'license': 'Creative Commons Attribution license (reuse allowed)',
1743 'playable_in_embed': True,
1744 'tags': 'count:12',
1745 'like_count': int,
1746 'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1747 'age_limit': 0,
1748 'availability': 'public',
1749 'categories': ['News & Politics'],
1750 'channel': 'Bernie Sanders',
1751 'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',
1752 'view_count': int,
1753 'live_status': 'not_live',
1754 'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1755 'comment_count': int,
1756 'channel_follower_count': int
1757 },
1758 'params': {
1759 'skip_download': True,
1760 },
1761 },
1762 {
1763 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1764 'only_matching': True,
1765 },
1766 {
1767 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1768 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1769 'only_matching': True,
1770 },
1771 {
1772 # Rental video preview
1773 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1774 'info_dict': {
1775 'id': 'uGpuVWrhIzE',
1776 'ext': 'mp4',
1777 'title': 'Piku - Trailer',
1778 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1779 'upload_date': '20150811',
1780 'uploader': 'FlixMatrix',
1781 'uploader_id': 'FlixMatrixKaravan',
1782 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1783 'license': 'Standard YouTube License',
1784 },
1785 'params': {
1786 'skip_download': True,
1787 },
1788 'skip': 'This video is not available.',
1789 },
1790 {
1791 # YouTube Red video with episode data
1792 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1793 'info_dict': {
1794 'id': 'iqKdEhx-dD4',
1795 'ext': 'mp4',
1796 'title': 'Isolation - Mind Field (Ep 1)',
1797 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1798 'duration': 2085,
1799 'upload_date': '20170118',
1800 'uploader': 'Vsauce',
1801 'uploader_id': 'Vsauce',
1802 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1803 'series': 'Mind Field',
1804 'season_number': 1,
1805 'episode_number': 1,
1806 'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',
1807 'tags': 'count:12',
1808 'view_count': int,
1809 'availability': 'public',
1810 'age_limit': 0,
1811 'channel': 'Vsauce',
1812 'episode': 'Episode 1',
1813 'categories': ['Entertainment'],
1814 'season': 'Season 1',
1815 'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',
1816 'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',
1817 'like_count': int,
1818 'playable_in_embed': True,
1819 'live_status': 'not_live',
1820 'channel_follower_count': int
1821 },
1822 'params': {
1823 'skip_download': True,
1824 },
1825 'expected_warnings': [
1826 'Skipping DASH manifest',
1827 ],
1828 },
1829 {
1830 # The following content has been identified by the YouTube community
1831 # as inappropriate or offensive to some audiences.
1832 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1833 'info_dict': {
1834 'id': '6SJNVb0GnPI',
1835 'ext': 'mp4',
1836 'title': 'Race Differences in Intelligence',
1837 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1838 'duration': 965,
1839 'upload_date': '20140124',
1840 'uploader': 'New Century Foundation',
1841 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1842 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1843 },
1844 'params': {
1845 'skip_download': True,
1846 },
1847 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1848 },
1849 {
1850 # itag 212
1851 'url': '1t24XAntNCY',
1852 'only_matching': True,
1853 },
1854 {
1855 # geo restricted to JP
1856 'url': 'sJL6WA-aGkQ',
1857 'only_matching': True,
1858 },
1859 {
1860 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1861 'only_matching': True,
1862 },
1863 {
1864 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1865 'only_matching': True,
1866 },
1867 {
1868 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1869 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1870 'only_matching': True,
1871 },
1872 {
1873 # DRM protected
1874 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1875 'only_matching': True,
1876 },
1877 {
1878 # Video with unsupported adaptive stream type formats
1879 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1880 'info_dict': {
1881 'id': 'Z4Vy8R84T1U',
1882 'ext': 'mp4',
1883 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1884 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1885 'duration': 433,
1886 'upload_date': '20130923',
1887 'uploader': 'Amelia Putri Harwita',
1888 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1889 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1890 'formats': 'maxcount:10',
1891 },
1892 'params': {
1893 'skip_download': True,
1894 'youtube_include_dash_manifest': False,
1895 },
1896 'skip': 'not actual anymore',
1897 },
1898 {
1899 # Youtube Music Auto-generated description
1900 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1901 'info_dict': {
1902 'id': 'MgNrAu2pzNs',
1903 'ext': 'mp4',
1904 'title': 'Voyeur Girl',
1905 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1906 'upload_date': '20190312',
1907 'uploader': 'Stephen - Topic',
1908 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1909 'artist': 'Stephen',
1910 'track': 'Voyeur Girl',
1911 'album': 'it\'s too much love to know my dear',
1912 'release_date': '20190313',
1913 'release_year': 2019,
1914 'alt_title': 'Voyeur Girl',
1915 'view_count': int,
1916 'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1917 'playable_in_embed': True,
1918 'like_count': int,
1919 'categories': ['Music'],
1920 'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1921 'channel': 'Stephen',
1922 'availability': 'public',
1923 'creator': 'Stephen',
1924 'duration': 169,
1925 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
1926 'age_limit': 0,
1927 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1928 'tags': 'count:11',
1929 'live_status': 'not_live',
1930 'channel_follower_count': int
1931 },
1932 'params': {
1933 'skip_download': True,
1934 },
1935 },
1936 {
1937 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1938 'only_matching': True,
1939 },
1940 {
1941 # invalid -> valid video id redirection
1942 'url': 'DJztXj2GPfl',
1943 'info_dict': {
1944 'id': 'DJztXj2GPfk',
1945 'ext': 'mp4',
1946 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1947 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1948 'upload_date': '20090125',
1949 'uploader': 'Prochorowka',
1950 'uploader_id': 'Prochorowka',
1951 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1952 'artist': 'Panjabi MC',
1953 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1954 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1955 },
1956 'params': {
1957 'skip_download': True,
1958 },
1959 'skip': 'Video unavailable',
1960 },
1961 {
1962 # empty description results in an empty string
1963 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1964 'info_dict': {
1965 'id': 'x41yOUIvK2k',
1966 'ext': 'mp4',
1967 'title': 'IMG 3456',
1968 'description': '',
1969 'upload_date': '20170613',
1970 'uploader_id': 'ElevageOrVert',
1971 'uploader': 'ElevageOrVert',
1972 'view_count': int,
1973 'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',
1974 'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',
1975 'like_count': int,
1976 'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',
1977 'tags': [],
1978 'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',
1979 'availability': 'public',
1980 'age_limit': 0,
1981 'categories': ['Pets & Animals'],
1982 'duration': 7,
1983 'playable_in_embed': True,
1984 'live_status': 'not_live',
1985 'channel': 'ElevageOrVert',
1986 'channel_follower_count': int
1987 },
1988 'params': {
1989 'skip_download': True,
1990 },
1991 },
1992 {
1993 # with '};' inside yt initial data (see [1])
1994 # see [2] for an example with '};' inside ytInitialPlayerResponse
1995 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1996 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1997 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1998 'info_dict': {
1999 'id': 'CHqg6qOn4no',
2000 'ext': 'mp4',
2001 'title': 'Part 77 Sort a list of simple types in c#',
2002 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
2003 'upload_date': '20130831',
2004 'uploader_id': 'kudvenkat',
2005 'uploader': 'kudvenkat',
2006 'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',
2007 'like_count': int,
2008 'uploader_url': 'http://www.youtube.com/user/kudvenkat',
2009 'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',
2010 'live_status': 'not_live',
2011 'categories': ['Education'],
2012 'availability': 'public',
2013 'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',
2014 'tags': 'count:12',
2015 'playable_in_embed': True,
2016 'age_limit': 0,
2017 'view_count': int,
2018 'duration': 522,
2019 'channel': 'kudvenkat',
2020 'comment_count': int,
2021 'channel_follower_count': int
2022 },
2023 'params': {
2024 'skip_download': True,
2025 },
2026 },
2027 {
2028 # another example of '};' in ytInitialData
2029 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
2030 'only_matching': True,
2031 },
2032 {
2033 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
2034 'only_matching': True,
2035 },
2036 {
2037 # https://github.com/ytdl-org/youtube-dl/pull/28094
2038 'url': 'OtqTfy26tG0',
2039 'info_dict': {
2040 'id': 'OtqTfy26tG0',
2041 'ext': 'mp4',
2042 'title': 'Burn Out',
2043 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
2044 'upload_date': '20141120',
2045 'uploader': 'The Cinematic Orchestra - Topic',
2046 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
2047 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
2048 'artist': 'The Cinematic Orchestra',
2049 'track': 'Burn Out',
2050 'album': 'Every Day',
2051 'like_count': int,
2052 'live_status': 'not_live',
2053 'alt_title': 'Burn Out',
2054 'duration': 614,
2055 'age_limit': 0,
2056 'view_count': int,
2057 'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
2058 'creator': 'The Cinematic Orchestra',
2059 'channel': 'The Cinematic Orchestra',
2060 'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],
2061 'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
2062 'availability': 'public',
2063 'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',
2064 'categories': ['Music'],
2065 'playable_in_embed': True,
2066 'channel_follower_count': int
2067 },
2068 'params': {
2069 'skip_download': True,
2070 },
2071 },
2072 {
2073 # controversial video, only works with bpctr when authenticated with cookies
2074 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
2075 'only_matching': True,
2076 },
2077 {
2078 # controversial video, requires bpctr/contentCheckOk
2079 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
2080 'info_dict': {
2081 'id': 'SZJvDhaSDnc',
2082 'ext': 'mp4',
2083 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
2084 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
2085 'uploader': 'CBS Mornings',
2086 'uploader_id': 'CBSThisMorning',
2087 'upload_date': '20140716',
2088 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',
2089 'duration': 170,
2090 'categories': ['News & Politics'],
2091 'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',
2092 'view_count': int,
2093 'channel': 'CBS Mornings',
2094 'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],
2095 'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',
2096 'age_limit': 18,
2097 'availability': 'needs_auth',
2098 'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',
2099 'like_count': int,
2100 'live_status': 'not_live',
2101 'playable_in_embed': True,
2102 'channel_follower_count': int
2103 }
2104 },
2105 {
2106 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
2107 'url': 'cBvYw8_A0vQ',
2108 'info_dict': {
2109 'id': 'cBvYw8_A0vQ',
2110 'ext': 'mp4',
2111 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
2112 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
2113 'upload_date': '20201120',
2114 'uploader': 'Walk around Japan',
2115 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2116 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2117 'duration': 1456,
2118 'categories': ['Travel & Events'],
2119 'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2120 'view_count': int,
2121 'channel': 'Walk around Japan',
2122 'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],
2123 'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',
2124 'age_limit': 0,
2125 'availability': 'public',
2126 'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2127 'live_status': 'not_live',
2128 'playable_in_embed': True,
2129 'channel_follower_count': int
2130 },
2131 'params': {
2132 'skip_download': True,
2133 },
2134 }, {
2135 # Has multiple audio streams
2136 'url': 'WaOKSUlf4TM',
2137 'only_matching': True
2138 }, {
2139 # Requires Premium: has format 141 when requested using YTM url
2140 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
2141 'only_matching': True
2142 }, {
2143 # multiple subtitles with same lang_code
2144 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
2145 'only_matching': True,
2146 }, {
2147 # Force use android client fallback
2148 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
2149 'info_dict': {
2150 'id': 'YOelRv7fMxY',
2151 'title': 'DIGGING A SECRET TUNNEL Part 1',
2152 'ext': '3gp',
2153 'upload_date': '20210624',
2154 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
2155 'uploader': 'colinfurze',
2156 'uploader_id': 'colinfurze',
2157 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
2158 'description': 'md5:5d5991195d599b56cd0c4148907eec50',
2159 'duration': 596,
2160 'categories': ['Entertainment'],
2161 'uploader_url': 'http://www.youtube.com/user/colinfurze',
2162 'view_count': int,
2163 'channel': 'colinfurze',
2164 'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],
2165 'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',
2166 'age_limit': 0,
2167 'availability': 'public',
2168 'like_count': int,
2169 'live_status': 'not_live',
2170 'playable_in_embed': True,
2171 'channel_follower_count': int
2172 },
2173 'params': {
2174 'format': '17', # 3gp format available on android
2175 'extractor_args': {'youtube': {'player_client': ['android']}},
2176 },
2177 },
2178 {
2179 # Skip download of additional client configs (remix client config in this case)
2180 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2181 'only_matching': True,
2182 'params': {
2183 'extractor_args': {'youtube': {'player_skip': ['configs']}},
2184 },
2185 }, {
2186 # shorts
2187 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
2188 'only_matching': True,
2189 }, {
2190 'note': 'Storyboards',
2191 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
2192 'info_dict': {
2193 'id': '5KLPxDtMqe8',
2194 'ext': 'mhtml',
2195 'format_id': 'sb0',
2196 'title': 'Your Brain is Plastic',
2197 'uploader_id': 'scishow',
2198 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
2199 'upload_date': '20140324',
2200 'uploader': 'SciShow',
2201 'like_count': int,
2202 'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',
2203 'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',
2204 'view_count': int,
2205 'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',
2206 'playable_in_embed': True,
2207 'tags': 'count:12',
2208 'uploader_url': 'http://www.youtube.com/user/scishow',
2209 'availability': 'public',
2210 'channel': 'SciShow',
2211 'live_status': 'not_live',
2212 'duration': 248,
2213 'categories': ['Education'],
2214 'age_limit': 0,
2215 'channel_follower_count': int
2216 }, 'params': {'format': 'mhtml', 'skip_download': True}
2217 }, {
2218 # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
2219 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2220 'info_dict': {
2221 'id': '2NUZ8W2llS4',
2222 'ext': 'mp4',
2223 'title': 'The NP that test your phone performance 🙂',
2224 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
2225 'uploader': 'Leon Nguyen',
2226 'uploader_id': 'VNSXIII',
2227 'uploader_url': 'http://www.youtube.com/user/VNSXIII',
2228 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2229 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2230 'duration': 21,
2231 'view_count': int,
2232 'age_limit': 0,
2233 'categories': ['Gaming'],
2234 'tags': 'count:23',
2235 'playable_in_embed': True,
2236 'live_status': 'not_live',
2237 'upload_date': '20220103',
2238 'like_count': int,
2239 'availability': 'public',
2240 'channel': 'Leon Nguyen',
2241 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
2242 'comment_count': int,
2243 'channel_follower_count': int
2244 }
2245 }, {
2246 # Same video as above, but with --compat-opt no-youtube-prefer-utc-upload-date
2247 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2248 'info_dict': {
2249 'id': '2NUZ8W2llS4',
2250 'ext': 'mp4',
2251 'title': 'The NP that test your phone performance 🙂',
2252 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
2253 'uploader': 'Leon Nguyen',
2254 'uploader_id': 'VNSXIII',
2255 'uploader_url': 'http://www.youtube.com/user/VNSXIII',
2256 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2257 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2258 'duration': 21,
2259 'view_count': int,
2260 'age_limit': 0,
2261 'categories': ['Gaming'],
2262 'tags': 'count:23',
2263 'playable_in_embed': True,
2264 'live_status': 'not_live',
2265 'upload_date': '20220102',
2266 'like_count': int,
2267 'availability': 'public',
2268 'channel': 'Leon Nguyen',
2269 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
2270 'comment_count': int,
2271 'channel_follower_count': int
2272 },
2273 'params': {'compat_opts': ['no-youtube-prefer-utc-upload-date']}
2274 }, {
2275 # date text is premiered video, ensure upload date in UTC (published 1641172509)
2276 'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',
2277 'info_dict': {
2278 'id': 'mzZzzBU6lrM',
2279 'ext': 'mp4',
2280 'title': 'I Met GeorgeNotFound In Real Life...',
2281 'description': 'md5:cca98a355c7184e750f711f3a1b22c84',
2282 'uploader': 'Quackity',
2283 'uploader_id': 'QuackityHQ',
2284 'uploader_url': 'http://www.youtube.com/user/QuackityHQ',
2285 'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',
2286 'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',
2287 'duration': 955,
2288 'view_count': int,
2289 'age_limit': 0,
2290 'categories': ['Entertainment'],
2291 'tags': 'count:26',
2292 'playable_in_embed': True,
2293 'live_status': 'not_live',
2294 'release_timestamp': 1641172509,
2295 'release_date': '20220103',
2296 'upload_date': '20220103',
2297 'like_count': int,
2298 'availability': 'public',
2299 'channel': 'Quackity',
2300 'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',
2301 'channel_follower_count': int
2302 }
2303 },
2304 { # continuous livestream. Microformat upload date should be preferred.
2305 # Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27
2306 'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',
2307 'info_dict': {
2308 'id': 'kgx4WGK0oNU',
2309 'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
2310 'ext': 'mp4',
2311 'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2312 'availability': 'public',
2313 'age_limit': 0,
2314 'release_timestamp': 1637975704,
2315 'upload_date': '20210619',
2316 'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2317 'live_status': 'is_live',
2318 'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',
2319 'uploader': '阿鲍Abao',
2320 'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2321 'channel': 'Abao in Tokyo',
2322 'channel_follower_count': int,
2323 'release_date': '20211127',
2324 'tags': 'count:39',
2325 'categories': ['People & Blogs'],
2326 'like_count': int,
2327 'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2328 'view_count': int,
2329 'playable_in_embed': True,
2330 'description': 'md5:2ef1d002cad520f65825346e2084e49d',
2331 },
2332 'params': {'skip_download': True}
2333 }, {
2334 # Story. Requires specific player params to work.
2335 'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',
2336 'info_dict': {
2337 'id': 'vv8qTUWmulI',
2338 'ext': 'mp4',
2339 'availability': 'unlisted',
2340 'view_count': int,
2341 'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',
2342 'upload_date': '20220526',
2343 'categories': ['Education'],
2344 'title': 'Story',
2345 'channel': 'IT\'S HISTORY',
2346 'description': '',
2347 'uploader_id': 'BlastfromthePast',
2348 'duration': 12,
2349 'uploader': 'IT\'S HISTORY',
2350 'playable_in_embed': True,
2351 'age_limit': 0,
2352 'live_status': 'not_live',
2353 'tags': [],
2354 'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',
2355 'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',
2356 'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',
2357 },
2358 'skip': 'stories get removed after some period of time',
2359 }, {
2360 'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
2361 'info_dict': {
2362 'id': 'tjjjtzRLHvA',
2363 'ext': 'mp4',
2364 'title': 'ハッシュタグ無し };if window.ytcsi',
2365 'upload_date': '20220323',
2366 'like_count': int,
2367 'availability': 'unlisted',
2368 'channel': 'nao20010128nao',
2369 'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',
2370 'age_limit': 0,
2371 'uploader': 'nao20010128nao',
2372 'uploader_id': 'nao20010128nao',
2373 'categories': ['Music'],
2374 'view_count': int,
2375 'description': '',
2376 'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',
2377 'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',
2378 'live_status': 'not_live',
2379 'playable_in_embed': True,
2380 'channel_follower_count': int,
2381 'duration': 6,
2382 'tags': [],
2383 'uploader_url': 'http://www.youtube.com/user/nao20010128nao',
2384 }
2385 }, {
2386 # Prefer primary title+description language metadata by default
2387 # Do not prefer translated description if primary is empty
2388 'url': 'https://www.youtube.com/watch?v=el3E4MbxRqQ',
2389 'info_dict': {
2390 'id': 'el3E4MbxRqQ',
2391 'ext': 'mp4',
2392 'title': 'dlp test video 2 - primary sv no desc',
2393 'description': '',
2394 'channel': 'cole-dlp-test-acc',
2395 'tags': [],
2396 'view_count': int,
2397 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2398 'like_count': int,
2399 'playable_in_embed': True,
2400 'availability': 'unlisted',
2401 'thumbnail': 'https://i.ytimg.com/vi_webp/el3E4MbxRqQ/maxresdefault.webp',
2402 'age_limit': 0,
2403 'duration': 5,
2404 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
2405 'uploader_url': 'http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2406 'live_status': 'not_live',
2407 'upload_date': '20220908',
2408 'categories': ['People & Blogs'],
2409 'uploader': 'cole-dlp-test-acc',
2410 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
2411 },
2412 'params': {'skip_download': True}
2413 }, {
2414 # Extractor argument: prefer translated title+description
2415 'url': 'https://www.youtube.com/watch?v=gHKT4uU8Zng',
2416 'info_dict': {
2417 'id': 'gHKT4uU8Zng',
2418 'ext': 'mp4',
2419 'channel': 'cole-dlp-test-acc',
2420 'tags': [],
2421 'duration': 5,
2422 'live_status': 'not_live',
2423 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
2424 'upload_date': '20220728',
2425 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
2426 'view_count': int,
2427 'categories': ['People & Blogs'],
2428 'thumbnail': 'https://i.ytimg.com/vi_webp/gHKT4uU8Zng/maxresdefault.webp',
2429 'title': 'dlp test video title translated (fr)',
2430 'availability': 'public',
2431 'uploader': 'cole-dlp-test-acc',
2432 'age_limit': 0,
2433 'description': 'dlp test video description translated (fr)',
2434 'playable_in_embed': True,
2435 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2436 'uploader_url': 'http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2437 },
2438 'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}},
2439 'expected_warnings': [r'Preferring "fr" translated fields'],
2440 }, {
2441 'note': '6 channel audio',
2442 'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',
2443 'only_matching': True,
2444 }
2445 ]
2446
2447 _WEBPAGE_TESTS = [
2448 # YouTube <object> embed
2449 {
2450 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
2451 'md5': '873c81d308b979f0e23ee7e620b312a3',
2452 'info_dict': {
2453 'id': 'msN87y-iEx0',
2454 'ext': 'mp4',
2455 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
2456 'upload_date': '20080526',
2457 'description': 'md5:873c81d308b979f0e23ee7e620b312a3',
2458 'uploader': 'Christopher Sykes',
2459 'uploader_id': 'ChristopherJSykes',
2460 'age_limit': 0,
2461 'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],
2462 'channel_id': 'UCCeo--lls1vna5YJABWAcVA',
2463 'playable_in_embed': True,
2464 'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',
2465 'like_count': int,
2466 'comment_count': int,
2467 'channel': 'Christopher Sykes',
2468 'live_status': 'not_live',
2469 'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',
2470 'availability': 'public',
2471 'duration': 195,
2472 'view_count': int,
2473 'categories': ['Science & Technology'],
2474 'channel_follower_count': int,
2475 'uploader_url': 'http://www.youtube.com/user/ChristopherJSykes',
2476 },
2477 'params': {
2478 'skip_download': True,
2479 }
2480 },
2481 ]
2482
2483 @classmethod
2484 def suitable(cls, url):
2485 from ..utils import parse_qs
2486
2487 qs = parse_qs(url)
2488 if qs.get('list', [None])[0]:
2489 return False
2490 return super().suitable(url)
2491
2492 def __init__(self, *args, **kwargs):
2493 super().__init__(*args, **kwargs)
2494 self._code_cache = {}
2495 self._player_cache = {}
2496
2497 def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data, is_live):
2498 lock = threading.Lock()
2499 start_time = time.time()
2500 formats = [f for f in formats if f.get('is_from_start')]
2501
2502 def refetch_manifest(format_id, delay):
2503 nonlocal formats, start_time, is_live
2504 if time.time() <= start_time + delay:
2505 return
2506
2507 _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
2508 video_details = traverse_obj(
2509 prs, (..., 'videoDetails'), expected_type=dict, default=[])
2510 microformats = traverse_obj(
2511 prs, (..., 'microformat', 'playerMicroformatRenderer'),
2512 expected_type=dict, default=[])
2513 _, live_status, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)
2514 is_live = live_status == 'is_live'
2515 start_time = time.time()
2516
2517 def mpd_feed(format_id, delay):
2518 """
2519 @returns (manifest_url, manifest_stream_number, is_live) or None
2520 """
2521 with lock:
2522 refetch_manifest(format_id, delay)
2523
2524 f = next((f for f in formats if f['format_id'] == format_id), None)
2525 if not f:
2526 if not is_live:
2527 self.to_screen(f'{video_id}: Video is no longer live')
2528 else:
2529 self.report_warning(
2530 f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')
2531 return None
2532 return f['manifest_url'], f['manifest_stream_number'], is_live
2533
2534 for f in formats:
2535 f['is_live'] = is_live
2536 gen = functools.partial(self._live_dash_fragments, video_id, f['format_id'],
2537 live_start_time, mpd_feed, not is_live and f.copy())
2538 if is_live:
2539 f['fragments'] = gen
2540 f['protocol'] = 'http_dash_segments_generator'
2541 else:
2542 f['fragments'] = LazyList(gen({}))
2543 del f['is_from_start']
2544
2545 def _live_dash_fragments(self, video_id, format_id, live_start_time, mpd_feed, manifestless_orig_fmt, ctx):
2546 FETCH_SPAN, MAX_DURATION = 5, 432000
2547
2548 mpd_url, stream_number, is_live = None, None, True
2549
2550 begin_index = 0
2551 download_start_time = ctx.get('start') or time.time()
2552
2553 lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
2554 if lack_early_segments:
2555 self.report_warning(bug_reports_message(
2556 'Starting download from the last 120 hours of the live stream since '
2557 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
2558 lack_early_segments = True
2559
2560 known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
2561 fragments, fragment_base_url = None, None
2562
2563 def _extract_sequence_from_mpd(refresh_sequence, immediate):
2564 nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
2565 # Obtain from MPD's maximum seq value
2566 old_mpd_url = mpd_url
2567 last_error = ctx.pop('last_error', None)
2568 expire_fast = immediate or last_error and isinstance(last_error, urllib.error.HTTPError) and last_error.code == 403
2569 mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
2570 or (mpd_url, stream_number, False))
2571 if not refresh_sequence:
2572 if expire_fast and not is_live:
2573 return False, last_seq
2574 elif old_mpd_url == mpd_url:
2575 return True, last_seq
2576 if manifestless_orig_fmt:
2577 fmt_info = manifestless_orig_fmt
2578 else:
2579 try:
2580 fmts, _ = self._extract_mpd_formats_and_subtitles(
2581 mpd_url, None, note=False, errnote=False, fatal=False)
2582 except ExtractorError:
2583 fmts = None
2584 if not fmts:
2585 no_fragment_score += 2
2586 return False, last_seq
2587 fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
2588 fragments = fmt_info['fragments']
2589 fragment_base_url = fmt_info['fragment_base_url']
2590 assert fragment_base_url
2591
2592 _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
2593 return True, _last_seq
2594
2595 self.write_debug(f'[{video_id}] Generating fragments for format {format_id}')
2596 while is_live:
2597 fetch_time = time.time()
2598 if no_fragment_score > 30:
2599 return
2600 if last_segment_url:
2601 # Obtain from "X-Head-Seqnum" header value from each segment
2602 try:
2603 urlh = self._request_webpage(
2604 last_segment_url, None, note=False, errnote=False, fatal=False)
2605 except ExtractorError:
2606 urlh = None
2607 last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
2608 if last_seq is None:
2609 no_fragment_score += 2
2610 last_segment_url = None
2611 continue
2612 else:
2613 should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)
2614 no_fragment_score += 2
2615 if not should_continue:
2616 continue
2617
2618 if known_idx > last_seq:
2619 last_segment_url = None
2620 continue
2621
2622 last_seq += 1
2623
2624 if begin_index < 0 and known_idx < 0:
2625 # skip from the start when it's negative value
2626 known_idx = last_seq + begin_index
2627 if lack_early_segments:
2628 known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
2629 try:
2630 for idx in range(known_idx, last_seq):
2631 # do not update sequence here or you'll get skipped some part of it
2632 should_continue, _ = _extract_sequence_from_mpd(False, False)
2633 if not should_continue:
2634 known_idx = idx - 1
2635 raise ExtractorError('breaking out of outer loop')
2636 last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)
2637 yield {
2638 'url': last_segment_url,
2639 'fragment_count': last_seq,
2640 }
2641 if known_idx == last_seq:
2642 no_fragment_score += 5
2643 else:
2644 no_fragment_score = 0
2645 known_idx = last_seq
2646 except ExtractorError:
2647 continue
2648
2649 if manifestless_orig_fmt:
2650 # Stop at the first iteration if running for post-live manifestless;
2651 # fragment count no longer increase since it starts
2652 break
2653
2654 time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
2655
2656 def _extract_player_url(self, *ytcfgs, webpage=None):
2657 player_url = traverse_obj(
2658 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
2659 get_all=False, expected_type=str)
2660 if not player_url:
2661 return
2662 return urljoin('https://www.youtube.com', player_url)
2663
2664 def _download_player_url(self, video_id, fatal=False):
2665 res = self._download_webpage(
2666 'https://www.youtube.com/iframe_api',
2667 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
2668 if res:
2669 player_version = self._search_regex(
2670 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
2671 if player_version:
2672 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
2673
2674 def _signature_cache_id(self, example_sig):
2675 """ Return a string representation of a signature """
2676 return '.'.join(str(len(part)) for part in example_sig.split('.'))
2677
2678 @classmethod
2679 def _extract_player_info(cls, player_url):
2680 for player_re in cls._PLAYER_INFO_RE:
2681 id_m = re.search(player_re, player_url)
2682 if id_m:
2683 break
2684 else:
2685 raise ExtractorError('Cannot identify player %r' % player_url)
2686 return id_m.group('id')
2687
2688 def _load_player(self, video_id, player_url, fatal=True):
2689 player_id = self._extract_player_info(player_url)
2690 if player_id not in self._code_cache:
2691 code = self._download_webpage(
2692 player_url, video_id, fatal=fatal,
2693 note='Downloading player ' + player_id,
2694 errnote='Download of %s failed' % player_url)
2695 if code:
2696 self._code_cache[player_id] = code
2697 return self._code_cache.get(player_id)
2698
2699 def _extract_signature_function(self, video_id, player_url, example_sig):
2700 player_id = self._extract_player_info(player_url)
2701
2702 # Read from filesystem cache
2703 func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'
2704 assert os.path.basename(func_id) == func_id
2705
2706 self.write_debug(f'Extracting signature function {func_id}')
2707 cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
2708
2709 if not cache_spec:
2710 code = self._load_player(video_id, player_url)
2711 if code:
2712 res = self._parse_sig_js(code)
2713 test_string = ''.join(map(chr, range(len(example_sig))))
2714 cache_spec = [ord(c) for c in res(test_string)]
2715 self.cache.store('youtube-sigfuncs', func_id, cache_spec)
2716
2717 return lambda s: ''.join(s[i] for i in cache_spec)
2718
2719 def _print_sig_code(self, func, example_sig):
2720 if not self.get_param('youtube_print_sig_code'):
2721 return
2722
2723 def gen_sig_code(idxs):
2724 def _genslice(start, end, step):
2725 starts = '' if start == 0 else str(start)
2726 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
2727 steps = '' if step == 1 else (':%d' % step)
2728 return f's[{starts}{ends}{steps}]'
2729
2730 step = None
2731 # Quelch pyflakes warnings - start will be set when step is set
2732 start = '(Never used)'
2733 for i, prev in zip(idxs[1:], idxs[:-1]):
2734 if step is not None:
2735 if i - prev == step:
2736 continue
2737 yield _genslice(start, prev, step)
2738 step = None
2739 continue
2740 if i - prev in [-1, 1]:
2741 step = i - prev
2742 start = prev
2743 continue
2744 else:
2745 yield 's[%d]' % prev
2746 if step is None:
2747 yield 's[%d]' % i
2748 else:
2749 yield _genslice(start, i, step)
2750
2751 test_string = ''.join(map(chr, range(len(example_sig))))
2752 cache_res = func(test_string)
2753 cache_spec = [ord(c) for c in cache_res]
2754 expr_code = ' + '.join(gen_sig_code(cache_spec))
2755 signature_id_tuple = '(%s)' % (
2756 ', '.join(str(len(p)) for p in example_sig.split('.')))
2757 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
2758 ' return %s\n') % (signature_id_tuple, expr_code)
2759 self.to_screen('Extracted signature function:\n' + code)
2760
2761 def _parse_sig_js(self, jscode):
2762 funcname = self._search_regex(
2763 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2764 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2765 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
2766 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
2767 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
2768 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
2769 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
2770 # Obsolete patterns
2771 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2772 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
2773 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2774 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2775 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2776 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2777 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2778 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
2779 jscode, 'Initial JS player signature function name', group='sig')
2780
2781 jsi = JSInterpreter(jscode)
2782 initial_function = jsi.extract_function(funcname)
2783 return lambda s: initial_function([s])
2784
2785 def _cached(self, func, *cache_id):
2786 def inner(*args, **kwargs):
2787 if cache_id not in self._player_cache:
2788 try:
2789 self._player_cache[cache_id] = func(*args, **kwargs)
2790 except ExtractorError as e:
2791 self._player_cache[cache_id] = e
2792 except Exception as e:
2793 self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)
2794
2795 ret = self._player_cache[cache_id]
2796 if isinstance(ret, Exception):
2797 raise ret
2798 return ret
2799 return inner
2800
2801 def _decrypt_signature(self, s, video_id, player_url):
2802 """Turn the encrypted s field into a working signature"""
2803 extract_sig = self._cached(
2804 self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))
2805 func = extract_sig(video_id, player_url, s)
2806 self._print_sig_code(func, s)
2807 return func(s)
2808
2809 def _decrypt_nsig(self, s, video_id, player_url):
2810 """Turn the encrypted n field into a working signature"""
2811 if player_url is None:
2812 raise ExtractorError('Cannot decrypt nsig without player_url')
2813 player_url = urljoin('https://www.youtube.com', player_url)
2814
2815 try:
2816 jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
2817 except ExtractorError as e:
2818 raise ExtractorError('Unable to extract nsig function code', cause=e)
2819 if self.get_param('youtube_print_sig_code'):
2820 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
2821
2822 try:
2823 extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)
2824 ret = extract_nsig(jsi, func_code)(s)
2825 except JSInterpreter.Exception as e:
2826 try:
2827 jsi = PhantomJSwrapper(self, timeout=5000)
2828 except ExtractorError:
2829 raise e
2830 self.report_warning(
2831 f'Native nsig extraction failed: Trying with PhantomJS\n'
2832 f' n = {s} ; player = {player_url}', video_id)
2833 self.write_debug(e)
2834
2835 args, func_body = func_code
2836 ret = jsi.execute(
2837 f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',
2838 video_id=video_id, note='Executing signature code').strip()
2839
2840 self.write_debug(f'Decrypted nsig {s} => {ret}')
2841 return ret
2842
2843 def _extract_n_function_name(self, jscode):
2844 funcname, idx = self._search_regex(
2845 r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)',
2846 jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
2847 if not idx:
2848 return funcname
2849
2850 return json.loads(js_to_json(self._search_regex(
2851 rf'var {re.escape(funcname)}\s*=\s*(\[.+?\]);', jscode,
2852 f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
2853
2854 def _extract_n_function_code(self, video_id, player_url):
2855 player_id = self._extract_player_info(player_url)
2856 func_code = self.cache.load('youtube-nsig', player_id, min_ver='2022.09.1')
2857 jscode = func_code or self._load_player(video_id, player_url)
2858 jsi = JSInterpreter(jscode)
2859
2860 if func_code:
2861 return jsi, player_id, func_code
2862
2863 func_name = self._extract_n_function_name(jscode)
2864
2865 # For redundancy
2866 func_code = self._search_regex(
2867 r'''(?xs)%s\s*=\s*function\s*\((?P<var>[\w$]+)\)\s*
2868 # NB: The end of the regex is intentionally kept strict
2869 {(?P<code>.+?}\s*return\ [\w$]+.join\(""\))};''' % func_name,
2870 jscode, 'nsig function', group=('var', 'code'), default=None)
2871 if func_code:
2872 func_code = ([func_code[0]], func_code[1])
2873 else:
2874 self.write_debug('Extracting nsig function with jsinterp')
2875 func_code = jsi.extract_function_code(func_name)
2876
2877 self.cache.store('youtube-nsig', player_id, func_code)
2878 return jsi, player_id, func_code
2879
2880 def _extract_n_function_from_code(self, jsi, func_code):
2881 func = jsi.extract_function_from_code(*func_code)
2882
2883 def extract_nsig(s):
2884 try:
2885 ret = func([s])
2886 except JSInterpreter.Exception:
2887 raise
2888 except Exception as e:
2889 raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
2890
2891 if ret.startswith('enhanced_except_'):
2892 raise JSInterpreter.Exception('Signature function returned an exception')
2893 return ret
2894
2895 return extract_nsig
2896
2897 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
2898 """
2899 Extract signatureTimestamp (sts)
2900 Required to tell API what sig/player version is in use.
2901 """
2902 sts = None
2903 if isinstance(ytcfg, dict):
2904 sts = int_or_none(ytcfg.get('STS'))
2905
2906 if not sts:
2907 # Attempt to extract from player
2908 if player_url is None:
2909 error_msg = 'Cannot extract signature timestamp without player_url.'
2910 if fatal:
2911 raise ExtractorError(error_msg)
2912 self.report_warning(error_msg)
2913 return
2914 code = self._load_player(video_id, player_url, fatal=fatal)
2915 if code:
2916 sts = int_or_none(self._search_regex(
2917 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2918 'JS player signature timestamp', group='sts', fatal=fatal))
2919 return sts
2920
2921 def _mark_watched(self, video_id, player_responses):
2922 for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):
2923 label = 'fully ' if is_full else ''
2924 url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),
2925 expected_type=url_or_none)
2926 if not url:
2927 self.report_warning(f'Unable to mark {label}watched')
2928 return
2929 parsed_url = urllib.parse.urlparse(url)
2930 qs = urllib.parse.parse_qs(parsed_url.query)
2931
2932 # cpn generation algorithm is reverse engineered from base.js.
2933 # In fact it works even with dummy cpn.
2934 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2935 cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))
2936
2937 # # more consistent results setting it to right before the end
2938 video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]
2939
2940 qs.update({
2941 'ver': ['2'],
2942 'cpn': [cpn],
2943 'cmt': video_length,
2944 'el': 'detailpage', # otherwise defaults to "shorts"
2945 })
2946
2947 if is_full:
2948 # these seem to mark watchtime "history" in the real world
2949 # they're required, so send in a single value
2950 qs.update({
2951 'st': video_length,
2952 'et': video_length,
2953 })
2954
2955 url = urllib.parse.urlunparse(
2956 parsed_url._replace(query=urllib.parse.urlencode(qs, True)))
2957
2958 self._download_webpage(
2959 url, video_id, f'Marking {label}watched',
2960 'Unable to mark watched', fatal=False)
2961
2962 @classmethod
2963 def _extract_from_webpage(cls, url, webpage):
2964 # Invidious Instances
2965 # https://github.com/yt-dlp/yt-dlp/issues/195
2966 # https://github.com/iv-org/invidious/pull/1730
2967 mobj = re.search(
2968 r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',
2969 webpage)
2970 if mobj:
2971 yield cls.url_result(mobj.group('url'), cls)
2972 raise cls.StopExtraction()
2973
2974 yield from super()._extract_from_webpage(url, webpage)
2975
2976 # lazyYT YouTube embed
2977 for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):
2978 yield cls.url_result(unescapeHTML(id_), cls, id_)
2979
2980 # Wordpress "YouTube Video Importer" plugin
2981 for m in re.findall(r'''(?x)<div[^>]+
2982 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2983 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):
2984 yield cls.url_result(m[-1], cls, m[-1])
2985
2986 @classmethod
2987 def extract_id(cls, url):
2988 video_id = cls.get_temp_id(url)
2989 if not video_id:
2990 raise ExtractorError(f'Invalid URL: {url}')
2991 return video_id
2992
2993 def _extract_chapters_from_json(self, data, duration):
2994 chapter_list = traverse_obj(
2995 data, (
2996 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2997 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2998 ), expected_type=list)
2999
3000 return self._extract_chapters(
3001 chapter_list,
3002 chapter_time=lambda chapter: float_or_none(
3003 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
3004 chapter_title=lambda chapter: traverse_obj(
3005 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
3006 duration=duration)
3007
3008 def _extract_chapters_from_engagement_panel(self, data, duration):
3009 content_list = traverse_obj(
3010 data,
3011 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
3012 expected_type=list, default=[])
3013 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
3014 chapter_title = lambda chapter: self._get_text(chapter, 'title')
3015
3016 return next(filter(None, (
3017 self._extract_chapters(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
3018 chapter_time, chapter_title, duration)
3019 for contents in content_list)), [])
3020
3021 def _extract_chapters_from_description(self, description, duration):
3022 return self._extract_chapters(
3023 re.findall(r'(?m)^((?:\d+:)?\d{1,2}:\d{2})\b\W*\s(.+?)\s*$', description or ''),
3024 chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1],
3025 duration=duration, strict=False)
3026
3027 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True):
3028 if not duration:
3029 return
3030 chapter_list = [{
3031 'start_time': chapter_time(chapter),
3032 'title': chapter_title(chapter),
3033 } for chapter in chapter_list or []]
3034 if not strict:
3035 chapter_list.sort(key=lambda c: c['start_time'] or 0)
3036
3037 chapters = [{'start_time': 0}]
3038 for idx, chapter in enumerate(chapter_list):
3039 if chapter['start_time'] is None:
3040 self.report_warning(f'Incomplete chapter {idx}')
3041 elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:
3042 chapters.append(chapter)
3043 elif chapter not in chapters:
3044 self.report_warning(
3045 f'Invalid start time ({chapter["start_time"]} < {chapters[-1]["start_time"]}) for chapter "{chapter["title"]}"')
3046 return chapters[1:]
3047
3048 def _extract_comment(self, comment_renderer, parent=None):
3049 comment_id = comment_renderer.get('commentId')
3050 if not comment_id:
3051 return
3052
3053 text = self._get_text(comment_renderer, 'contentText')
3054
3055 # Timestamp is an estimate calculated from the current time and time_text
3056 time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
3057 timestamp = self._parse_time_text(time_text)
3058
3059 author = self._get_text(comment_renderer, 'authorText')
3060 author_id = try_get(comment_renderer,
3061 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str)
3062
3063 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
3064 lambda x: x['likeCount']), str)) or 0
3065 author_thumbnail = try_get(comment_renderer,
3066 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], str)
3067
3068 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
3069 is_favorited = 'creatorHeart' in (try_get(
3070 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
3071 return {
3072 'id': comment_id,
3073 'text': text,
3074 'timestamp': timestamp,
3075 'time_text': time_text,
3076 'like_count': votes,
3077 'is_favorited': is_favorited,
3078 'author': author,
3079 'author_id': author_id,
3080 'author_thumbnail': author_thumbnail,
3081 'author_is_uploader': author_is_uploader,
3082 'parent': parent or 'root'
3083 }
3084
3085 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
3086
3087 get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
3088
3089 def extract_header(contents):
3090 _continuation = None
3091 for content in contents:
3092 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
3093 expected_comment_count = self._get_count(
3094 comments_header_renderer, 'countText', 'commentsCount')
3095
3096 if expected_comment_count:
3097 tracker['est_total'] = expected_comment_count
3098 self.to_screen(f'Downloading ~{expected_comment_count} comments')
3099 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
3100
3101 sort_menu_item = try_get(
3102 comments_header_renderer,
3103 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
3104 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
3105
3106 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
3107 if not _continuation:
3108 continue
3109
3110 sort_text = str_or_none(sort_menu_item.get('title'))
3111 if not sort_text:
3112 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
3113 self.to_screen('Sorting comments by %s' % sort_text.lower())
3114 break
3115 return _continuation
3116
3117 def extract_thread(contents):
3118 if not parent:
3119 tracker['current_page_thread'] = 0
3120 for content in contents:
3121 if not parent and tracker['total_parent_comments'] >= max_parents:
3122 yield
3123 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
3124 comment_renderer = get_first(
3125 (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
3126 expected_type=dict, default={})
3127
3128 comment = self._extract_comment(comment_renderer, parent)
3129 if not comment:
3130 continue
3131
3132 tracker['running_total'] += 1
3133 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
3134 yield comment
3135
3136 # Attempt to get the replies
3137 comment_replies_renderer = try_get(
3138 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
3139
3140 if comment_replies_renderer:
3141 tracker['current_page_thread'] += 1
3142 comment_entries_iter = self._comment_entries(
3143 comment_replies_renderer, ytcfg, video_id,
3144 parent=comment.get('id'), tracker=tracker)
3145 yield from itertools.islice(comment_entries_iter, min(
3146 max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))
3147
3148 # Keeps track of counts across recursive calls
3149 if not tracker:
3150 tracker = dict(
3151 running_total=0,
3152 est_total=0,
3153 current_page_thread=0,
3154 total_parent_comments=0,
3155 total_reply_comments=0)
3156
3157 # TODO: Deprecated
3158 # YouTube comments have a max depth of 2
3159 max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
3160 if max_depth:
3161 self._downloader.deprecated_feature('[youtube] max_comment_depth extractor argument is deprecated. '
3162 'Set max replies in the max-comments extractor argument instead')
3163 if max_depth == 1 and parent:
3164 return
3165
3166 max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(
3167 lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
3168
3169 continuation = self._extract_continuation(root_continuation_data)
3170
3171 response = None
3172 is_forced_continuation = False
3173 is_first_continuation = parent is None
3174 if is_first_continuation and not continuation:
3175 # Sometimes you can get comments by generating the continuation yourself,
3176 # even if YouTube initially reports them being disabled - e.g. stories comments.
3177 # Note: if the comment section is actually disabled, YouTube may return a response with
3178 # required check_get_keys missing. So we will disable that check initially in this case.
3179 continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
3180 is_forced_continuation = True
3181
3182 for page_num in itertools.count(0):
3183 if not continuation:
3184 break
3185 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
3186 comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"
3187 if page_num == 0:
3188 if is_first_continuation:
3189 note_prefix = 'Downloading comment section API JSON'
3190 else:
3191 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
3192 tracker['current_page_thread'], comment_prog_str)
3193 else:
3194 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
3195 ' ' if parent else '', ' replies' if parent else '',
3196 page_num, comment_prog_str)
3197
3198 response = self._extract_response(
3199 item_id=None, query=continuation,
3200 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
3201 check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)
3202 is_forced_continuation = False
3203 continuation_contents = traverse_obj(
3204 response, 'onResponseReceivedEndpoints', expected_type=list, default=[])
3205
3206 continuation = None
3207 for continuation_section in continuation_contents:
3208 continuation_items = traverse_obj(
3209 continuation_section,
3210 (('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),
3211 get_all=False, expected_type=list) or []
3212 if is_first_continuation:
3213 continuation = extract_header(continuation_items)
3214 is_first_continuation = False
3215 if continuation:
3216 break
3217 continue
3218
3219 for entry in extract_thread(continuation_items):
3220 if not entry:
3221 return
3222 yield entry
3223 continuation = self._extract_continuation({'contents': continuation_items})
3224 if continuation:
3225 break
3226
3227 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
3228 if message and not parent and tracker['running_total'] == 0:
3229 self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)
3230
3231 @staticmethod
3232 def _generate_comment_continuation(video_id):
3233 """
3234 Generates initial comment section continuation token from given video id
3235 """
3236 token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'
3237 return base64.b64encode(token.encode()).decode()
3238
3239 def _get_comments(self, ytcfg, video_id, contents, webpage):
3240 """Entry for comment extraction"""
3241 def _real_comment_extract(contents):
3242 renderer = next((
3243 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
3244 if item.get('sectionIdentifier') == 'comment-item-section'), None)
3245 yield from self._comment_entries(renderer, ytcfg, video_id)
3246
3247 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
3248 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
3249
3250 @staticmethod
3251 def _get_checkok_params():
3252 return {'contentCheckOk': True, 'racyCheckOk': True}
3253
3254 @classmethod
3255 def _generate_player_context(cls, sts=None):
3256 context = {
3257 'html5Preference': 'HTML5_PREF_WANTS',
3258 }
3259 if sts is not None:
3260 context['signatureTimestamp'] = sts
3261 return {
3262 'playbackContext': {
3263 'contentPlaybackContext': context
3264 },
3265 **cls._get_checkok_params()
3266 }
3267
3268 @staticmethod
3269 def _is_agegated(player_response):
3270 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
3271 return True
3272
3273 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
3274 AGE_GATE_REASONS = (
3275 'confirm your age', 'age-restricted', 'inappropriate', # reason
3276 'age_verification_required', 'age_check_required', # status
3277 )
3278 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
3279
3280 @staticmethod
3281 def _is_unplayable(player_response):
3282 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
3283
3284 _STORY_PLAYER_PARAMS = '8AEB'
3285
3286 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):
3287
3288 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
3289 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
3290 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
3291 headers = self.generate_api_headers(
3292 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
3293
3294 yt_query = {
3295 'videoId': video_id,
3296 }
3297 if smuggled_data.get('is_story') or _split_innertube_client(client)[0] == 'android':
3298 yt_query['params'] = self._STORY_PLAYER_PARAMS
3299
3300 yt_query.update(self._generate_player_context(sts))
3301 return self._extract_response(
3302 item_id=video_id, ep='player', query=yt_query,
3303 ytcfg=player_ytcfg, headers=headers, fatal=True,
3304 default_client=client,
3305 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
3306 ) or None
3307
3308 def _get_requested_clients(self, url, smuggled_data):
3309 requested_clients = []
3310 default = ['android', 'web']
3311 allowed_clients = sorted(
3312 (client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),
3313 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
3314 for client in self._configuration_arg('player_client'):
3315 if client in allowed_clients:
3316 requested_clients.append(client)
3317 elif client == 'default':
3318 requested_clients.extend(default)
3319 elif client == 'all':
3320 requested_clients.extend(allowed_clients)
3321 else:
3322 self.report_warning(f'Skipping unsupported client {client}')
3323 if not requested_clients:
3324 requested_clients = default
3325
3326 if smuggled_data.get('is_music_url') or self.is_music_url(url):
3327 requested_clients.extend(
3328 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
3329
3330 return orderedSet(requested_clients)
3331
3332 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
3333 initial_pr = None
3334 if webpage:
3335 initial_pr = self._search_json(
3336 self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
3337
3338 all_clients = set(clients)
3339 clients = clients[::-1]
3340 prs = []
3341
3342 def append_client(*client_names):
3343 """ Append the first client name that exists but not already used """
3344 for client_name in client_names:
3345 actual_client = _split_innertube_client(client_name)[0]
3346 if actual_client in INNERTUBE_CLIENTS:
3347 if actual_client not in all_clients:
3348 clients.append(client_name)
3349 all_clients.add(actual_client)
3350 return
3351
3352 # Android player_response does not have microFormats which are needed for
3353 # extraction of some data. So we return the initial_pr with formats
3354 # stripped out even if not requested by the user
3355 # See: https://github.com/yt-dlp/yt-dlp/issues/501
3356 if initial_pr:
3357 pr = dict(initial_pr)
3358 pr['streamingData'] = None
3359 prs.append(pr)
3360
3361 last_error = None
3362 tried_iframe_fallback = False
3363 player_url = None
3364 while clients:
3365 client, base_client, variant = _split_innertube_client(clients.pop())
3366 player_ytcfg = master_ytcfg if client == 'web' else {}
3367 if 'configs' not in self._configuration_arg('player_skip') and client != 'web':
3368 player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
3369
3370 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
3371 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
3372 if 'js' in self._configuration_arg('player_skip'):
3373 require_js_player = False
3374 player_url = None
3375
3376 if not player_url and not tried_iframe_fallback and require_js_player:
3377 player_url = self._download_player_url(video_id)
3378 tried_iframe_fallback = True
3379
3380 try:
3381 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
3382 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)
3383 except ExtractorError as e:
3384 if last_error:
3385 self.report_warning(last_error)
3386 last_error = e
3387 continue
3388
3389 if pr:
3390 # YouTube may return a different video player response than expected.
3391 # See: https://github.com/TeamNewPipe/NewPipe/issues/8713
3392 pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId'))
3393 if pr_video_id and pr_video_id != video_id:
3394 self.report_warning(
3395 f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message())
3396 else:
3397 prs.append(pr)
3398
3399 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
3400 if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
3401 append_client(f'{base_client}_creator')
3402 elif self._is_agegated(pr):
3403 if variant == 'tv_embedded':
3404 append_client(f'{base_client}_embedded')
3405 elif not variant:
3406 append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
3407
3408 if last_error:
3409 if not len(prs):
3410 raise last_error
3411 self.report_warning(last_error)
3412 return prs, player_url
3413
3414 def _needs_live_processing(self, live_status, duration):
3415 if (live_status == 'is_live' and self.get_param('live_from_start')
3416 or live_status == 'post_live' and (duration or 0) > 4 * 3600):
3417 return live_status
3418
3419 def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):
3420 itags, stream_ids = {}, []
3421 itag_qualities, res_qualities = {}, {0: None}
3422 q = qualities([
3423 # Normally tiny is the smallest video-only formats. But
3424 # audio-only formats with unknown quality may get tagged as tiny
3425 'tiny',
3426 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
3427 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
3428 ])
3429 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
3430
3431 for fmt in streaming_formats:
3432 if fmt.get('targetDurationSec'):
3433 continue
3434
3435 itag = str_or_none(fmt.get('itag'))
3436 audio_track = fmt.get('audioTrack') or {}
3437 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
3438 if stream_id in stream_ids:
3439 continue
3440
3441 quality = fmt.get('quality')
3442 height = int_or_none(fmt.get('height'))
3443 if quality == 'tiny' or not quality:
3444 quality = fmt.get('audioQuality', '').lower() or quality
3445 # The 3gp format (17) in android client has a quality of "small",
3446 # but is actually worse than other formats
3447 if itag == '17':
3448 quality = 'tiny'
3449 if quality:
3450 if itag:
3451 itag_qualities[itag] = quality
3452 if height:
3453 res_qualities[height] = quality
3454 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
3455 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
3456 # number of fragment that would subsequently requested with (`&sq=N`)
3457 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
3458 continue
3459
3460 fmt_url = fmt.get('url')
3461 if not fmt_url:
3462 sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))
3463 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
3464 encrypted_sig = try_get(sc, lambda x: x['s'][0])
3465 if not all((sc, fmt_url, player_url, encrypted_sig)):
3466 continue
3467 try:
3468 fmt_url += '&%s=%s' % (
3469 traverse_obj(sc, ('sp', -1)) or 'signature',
3470 self._decrypt_signature(encrypted_sig, video_id, player_url)
3471 )
3472 except ExtractorError as e:
3473 self.report_warning('Signature extraction failed: Some formats may be missing',
3474 video_id=video_id, only_once=True)
3475 self.write_debug(e, only_once=True)
3476 continue
3477
3478 query = parse_qs(fmt_url)
3479 throttled = False
3480 if query.get('n'):
3481 try:
3482 decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])
3483 fmt_url = update_url_query(fmt_url, {
3484 'n': decrypt_nsig(query['n'][0], video_id, player_url)
3485 })
3486 except ExtractorError as e:
3487 phantomjs_hint = ''
3488 if isinstance(e, JSInterpreter.Exception):
3489 phantomjs_hint = (f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} '
3490 f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')
3491 if player_url:
3492 self.report_warning(
3493 f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}'
3494 f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)
3495 self.write_debug(e, only_once=True)
3496 else:
3497 self.report_warning(
3498 'Cannot decrypt nsig without player_url: You may experience throttling for some formats',
3499 video_id=video_id, only_once=True)
3500 throttled = True
3501
3502 if itag:
3503 itags[itag] = 'https'
3504 stream_ids.append(stream_id)
3505
3506 tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
3507 language_preference = (
3508 10 if audio_track.get('audioIsDefault') and 10
3509 else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
3510 else -1)
3511 # Some formats may have much smaller duration than others (possibly damaged during encoding)
3512 # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
3513 # Make sure to avoid false positives with small duration differences.
3514 # E.g. __2ABJjxzNo, ySuUZEjARPY
3515 is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)
3516 if is_damaged:
3517 self.report_warning(
3518 f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
3519 dct = {
3520 'asr': int_or_none(fmt.get('audioSampleRate')),
3521 'filesize': int_or_none(fmt.get('contentLength')),
3522 'format_id': itag,
3523 'format_note': join_nonempty(
3524 '%s%s' % (audio_track.get('displayName') or '',
3525 ' (default)' if language_preference > 0 else ''),
3526 fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
3527 try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
3528 try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
3529 throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),
3530 # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
3531 'source_preference': -10 if throttled else -5 if itag == '22' else -1,
3532 'fps': int_or_none(fmt.get('fps')) or None,
3533 'audio_channels': fmt.get('audioChannels'),
3534 'height': height,
3535 'quality': q(quality),
3536 'has_drm': bool(fmt.get('drmFamilies')),
3537 'tbr': tbr,
3538 'url': fmt_url,
3539 'width': int_or_none(fmt.get('width')),
3540 'language': join_nonempty(audio_track.get('id', '').split('.')[0],
3541 'desc' if language_preference < -1 else ''),
3542 'language_preference': language_preference,
3543 # Strictly de-prioritize damaged and 3gp formats
3544 'preference': -10 if is_damaged else -2 if itag == '17' else None,
3545 }
3546 mime_mobj = re.match(
3547 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
3548 if mime_mobj:
3549 dct['ext'] = mimetype2ext(mime_mobj.group(1))
3550 dct.update(parse_codecs(mime_mobj.group(2)))
3551 no_audio = dct.get('acodec') == 'none'
3552 no_video = dct.get('vcodec') == 'none'
3553 if no_audio:
3554 dct['vbr'] = tbr
3555 if no_video:
3556 dct['abr'] = tbr
3557 if no_audio or no_video:
3558 dct['downloader_options'] = {
3559 # Youtube throttles chunks >~10M
3560 'http_chunk_size': 10485760,
3561 }
3562 if dct.get('ext'):
3563 dct['container'] = dct['ext'] + '_dash'
3564 yield dct
3565
3566 needs_live_processing = self._needs_live_processing(live_status, duration)
3567 skip_bad_formats = not self._configuration_arg('include_incomplete_formats')
3568
3569 skip_manifests = set(self._configuration_arg('skip'))
3570 if (not self.get_param('youtube_include_hls_manifest', True)
3571 or needs_live_processing == 'is_live' # These will be filtered out by YoutubeDL anyway
3572 or needs_live_processing and skip_bad_formats):
3573 skip_manifests.add('hls')
3574
3575 if not self.get_param('youtube_include_dash_manifest', True):
3576 skip_manifests.add('dash')
3577 if self._configuration_arg('include_live_dash'):
3578 self._downloader.deprecated_feature('[youtube] include_live_dash extractor argument is deprecated. '
3579 'Use include_incomplete_formats extractor argument instead')
3580 elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':
3581 skip_manifests.add('dash')
3582
3583 def process_manifest_format(f, proto, itag):
3584 if itag in itags:
3585 if itags[itag] == proto or f'{itag}-{proto}' in itags:
3586 return False
3587 itag = f'{itag}-{proto}'
3588 if itag:
3589 f['format_id'] = itag
3590 itags[itag] = proto
3591
3592 f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
3593 if f['quality'] == -1 and f.get('height'):
3594 f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
3595 return True
3596
3597 subtitles = {}
3598 for sd in streaming_data:
3599 hls_manifest_url = 'hls' not in skip_manifests and sd.get('hlsManifestUrl')
3600 if hls_manifest_url:
3601 fmts, subs = self._extract_m3u8_formats_and_subtitles(
3602 hls_manifest_url, video_id, 'mp4', fatal=False, live=live_status == 'is_live')
3603 subtitles = self._merge_subtitles(subs, subtitles)
3604 for f in fmts:
3605 if process_manifest_format(f, 'hls', self._search_regex(
3606 r'/itag/(\d+)', f['url'], 'itag', default=None)):
3607 yield f
3608
3609 dash_manifest_url = 'dash' not in skip_manifests and sd.get('dashManifestUrl')
3610 if dash_manifest_url:
3611 formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
3612 subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
3613 for f in formats:
3614 if process_manifest_format(f, 'dash', f['format_id']):
3615 f['filesize'] = int_or_none(self._search_regex(
3616 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
3617 if needs_live_processing:
3618 f['is_from_start'] = True
3619
3620 yield f
3621 yield subtitles
3622
3623 def _extract_storyboard(self, player_responses, duration):
3624 spec = get_first(
3625 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
3626 base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))
3627 if not base_url:
3628 return
3629 L = len(spec) - 1
3630 for i, args in enumerate(spec):
3631 args = args.split('#')
3632 counts = list(map(int_or_none, args[:5]))
3633 if len(args) != 8 or not all(counts):
3634 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
3635 continue
3636 width, height, frame_count, cols, rows = counts
3637 N, sigh = args[6:]
3638
3639 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
3640 fragment_count = frame_count / (cols * rows)
3641 fragment_duration = duration / fragment_count
3642 yield {
3643 'format_id': f'sb{i}',
3644 'format_note': 'storyboard',
3645 'ext': 'mhtml',
3646 'protocol': 'mhtml',
3647 'acodec': 'none',
3648 'vcodec': 'none',
3649 'url': url,
3650 'width': width,
3651 'height': height,
3652 'fps': frame_count / duration,
3653 'rows': rows,
3654 'columns': cols,
3655 'fragments': [{
3656 'url': url.replace('$M', str(j)),
3657 'duration': min(fragment_duration, duration - (j * fragment_duration)),
3658 } for j in range(math.ceil(fragment_count))],
3659 }
3660
3661 def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
3662 webpage = None
3663 if 'webpage' not in self._configuration_arg('player_skip'):
3664 query = {'bpctr': '9999999999', 'has_verified': '1'}
3665 if smuggled_data.get('is_story'):
3666 query['pp'] = self._STORY_PLAYER_PARAMS
3667 webpage = self._download_webpage(
3668 webpage_url, video_id, fatal=False, query=query)
3669
3670 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
3671
3672 player_responses, player_url = self._extract_player_responses(
3673 self._get_requested_clients(url, smuggled_data),
3674 video_id, webpage, master_ytcfg, smuggled_data)
3675
3676 return webpage, master_ytcfg, player_responses, player_url
3677
3678 def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):
3679 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
3680 is_live = get_first(video_details, 'isLive')
3681 if is_live is None:
3682 is_live = get_first(live_broadcast_details, 'isLiveNow')
3683 live_content = get_first(video_details, 'isLiveContent')
3684 is_upcoming = get_first(video_details, 'isUpcoming')
3685 if is_live is None and is_upcoming or live_content is False:
3686 is_live = False
3687 if is_upcoming is None and (live_content or is_live):
3688 is_upcoming = False
3689 post_live = get_first(video_details, 'isPostLiveDvr')
3690 live_status = ('post_live' if post_live
3691 else 'is_live' if is_live
3692 else 'is_upcoming' if is_upcoming
3693 else None if None in (is_live, is_upcoming, live_content)
3694 else 'was_live' if live_content else 'not_live')
3695
3696 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
3697 *formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, live_status, duration)
3698
3699 return live_broadcast_details, live_status, streaming_data, formats, subtitles
3700
3701 def _real_extract(self, url):
3702 url, smuggled_data = unsmuggle_url(url, {})
3703 video_id = self._match_id(url)
3704
3705 base_url = self.http_scheme() + '//www.youtube.com/'
3706 webpage_url = base_url + 'watch?v=' + video_id
3707
3708 webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
3709
3710 playability_statuses = traverse_obj(
3711 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
3712
3713 trailer_video_id = get_first(
3714 playability_statuses,
3715 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
3716 expected_type=str)
3717 if trailer_video_id:
3718 return self.url_result(
3719 trailer_video_id, self.ie_key(), trailer_video_id)
3720
3721 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
3722 if webpage else (lambda x: None))
3723
3724 video_details = traverse_obj(
3725 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
3726 microformats = traverse_obj(
3727 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
3728 expected_type=dict, default=[])
3729
3730 translated_title = self._get_text(microformats, (..., 'title'))
3731 video_title = (self._preferred_lang and translated_title
3732 or get_first(video_details, 'title') # primary
3733 or translated_title
3734 or search_meta(['og:title', 'twitter:title', 'title']))
3735 translated_description = self._get_text(microformats, (..., 'description'))
3736 original_description = get_first(video_details, 'shortDescription')
3737 video_description = (
3738 self._preferred_lang and translated_description
3739 # If original description is blank, it will be an empty string.
3740 # Do not prefer translated description in this case.
3741 or original_description if original_description is not None else translated_description)
3742
3743 multifeed_metadata_list = get_first(
3744 player_responses,
3745 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
3746 expected_type=str)
3747 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
3748 if self.get_param('noplaylist'):
3749 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3750 else:
3751 entries = []
3752 feed_ids = []
3753 for feed in multifeed_metadata_list.split(','):
3754 # Unquote should take place before split on comma (,) since textual
3755 # fields may contain comma as well (see
3756 # https://github.com/ytdl-org/youtube-dl/issues/8536)
3757 feed_data = urllib.parse.parse_qs(
3758 urllib.parse.unquote_plus(feed))
3759
3760 def feed_entry(name):
3761 return try_get(
3762 feed_data, lambda x: x[name][0], str)
3763
3764 feed_id = feed_entry('id')
3765 if not feed_id:
3766 continue
3767 feed_title = feed_entry('title')
3768 title = video_title
3769 if feed_title:
3770 title += ' (%s)' % feed_title
3771 entries.append({
3772 '_type': 'url_transparent',
3773 'ie_key': 'Youtube',
3774 'url': smuggle_url(
3775 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
3776 {'force_singlefeed': True}),
3777 'title': title,
3778 })
3779 feed_ids.append(feed_id)
3780 self.to_screen(
3781 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
3782 % (', '.join(feed_ids), video_id))
3783 return self.playlist_result(
3784 entries, video_id, video_title, video_description)
3785
3786 duration = int_or_none(
3787 get_first(video_details, 'lengthSeconds')
3788 or get_first(microformats, 'lengthSeconds')
3789 or parse_duration(search_meta('duration'))) or None
3790
3791 live_broadcast_details, live_status, streaming_data, formats, automatic_captions = \
3792 self._list_formats(video_id, microformats, video_details, player_responses, player_url, duration)
3793 if live_status == 'post_live':
3794 self.write_debug(f'{video_id}: Video is in Post-Live Manifestless mode')
3795
3796 if not formats:
3797 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
3798 self.report_drm(video_id)
3799 pemr = get_first(
3800 playability_statuses,
3801 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
3802 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
3803 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
3804 if subreason:
3805 if subreason == 'The uploader has not made this video available in your country.':
3806 countries = get_first(microformats, 'availableCountries')
3807 if not countries:
3808 regions_allowed = search_meta('regionsAllowed')
3809 countries = regions_allowed.split(',') if regions_allowed else None
3810 self.raise_geo_restricted(subreason, countries, metadata_available=True)
3811 reason += f'. {subreason}'
3812 if reason:
3813 self.raise_no_formats(reason, expected=True)
3814
3815 keywords = get_first(video_details, 'keywords', expected_type=list) or []
3816 if not keywords and webpage:
3817 keywords = [
3818 unescapeHTML(m.group('content'))
3819 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
3820 for keyword in keywords:
3821 if keyword.startswith('yt:stretch='):
3822 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
3823 if mobj:
3824 # NB: float is intentional for forcing float division
3825 w, h = (float(v) for v in mobj.groups())
3826 if w > 0 and h > 0:
3827 ratio = w / h
3828 for f in formats:
3829 if f.get('vcodec') != 'none':
3830 f['stretched_ratio'] = ratio
3831 break
3832 thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))
3833 thumbnail_url = search_meta(['og:image', 'twitter:image'])
3834 if thumbnail_url:
3835 thumbnails.append({
3836 'url': thumbnail_url,
3837 })
3838 original_thumbnails = thumbnails.copy()
3839
3840 # The best resolution thumbnails sometimes does not appear in the webpage
3841 # See: https://github.com/yt-dlp/yt-dlp/issues/340
3842 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
3843 thumbnail_names = [
3844 # While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants
3845 # in resolution, these are not the custom thumbnail. So de-prioritize them
3846 'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',
3847 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'
3848 ]
3849 n_thumbnail_names = len(thumbnail_names)
3850 thumbnails.extend({
3851 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
3852 video_id=video_id, name=name, ext=ext,
3853 webp='_webp' if ext == 'webp' else '', live='_live' if live_status == 'is_live' else ''),
3854 } for name in thumbnail_names for ext in ('webp', 'jpg'))
3855 for thumb in thumbnails:
3856 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
3857 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
3858 self._remove_duplicate_formats(thumbnails)
3859 self._downloader._sort_thumbnails(original_thumbnails)
3860
3861 category = get_first(microformats, 'category') or search_meta('genre')
3862 channel_id = str_or_none(
3863 get_first(video_details, 'channelId')
3864 or get_first(microformats, 'externalChannelId')
3865 or search_meta('channelId'))
3866 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
3867
3868 live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
3869 live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
3870 if not duration and live_end_time and live_start_time:
3871 duration = live_end_time - live_start_time
3872
3873 needs_live_processing = self._needs_live_processing(live_status, duration)
3874
3875 def is_bad_format(fmt):
3876 if needs_live_processing and not fmt.get('is_from_start'):
3877 return True
3878 elif (live_status == 'is_live' and needs_live_processing != 'is_live'
3879 and fmt.get('protocol') == 'http_dash_segments'):
3880 return True
3881
3882 for fmt in filter(is_bad_format, formats):
3883 fmt['preference'] = (fmt.get('preference') or -1) - 10
3884 fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 4 hours)', delim=' ')
3885
3886 if needs_live_processing:
3887 self._prepare_live_from_start_formats(
3888 formats, video_id, live_start_time, url, webpage_url, smuggled_data, live_status == 'is_live')
3889
3890 formats.extend(self._extract_storyboard(player_responses, duration))
3891
3892 # source_preference is lower for throttled/potentially damaged formats
3893 self._sort_formats(formats, (
3894 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto'))
3895
3896 info = {
3897 'id': video_id,
3898 'title': video_title,
3899 'formats': formats,
3900 'thumbnails': thumbnails,
3901 # The best thumbnail that we are sure exists. Prevents unnecessary
3902 # URL checking if user don't care about getting the best possible thumbnail
3903 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
3904 'description': video_description,
3905 'uploader': get_first(video_details, 'author'),
3906 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
3907 'uploader_url': owner_profile_url,
3908 'channel_id': channel_id,
3909 'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s'),
3910 'duration': duration,
3911 'view_count': int_or_none(
3912 get_first((video_details, microformats), (..., 'viewCount'))
3913 or search_meta('interactionCount')),
3914 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
3915 'age_limit': 18 if (
3916 get_first(microformats, 'isFamilySafe') is False
3917 or search_meta('isFamilyFriendly') == 'false'
3918 or search_meta('og:restrictions:age') == '18+') else 0,
3919 'webpage_url': webpage_url,
3920 'categories': [category] if category else None,
3921 'tags': keywords,
3922 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
3923 'live_status': live_status,
3924 'release_timestamp': live_start_time,
3925 }
3926
3927 subtitles = {}
3928 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
3929 if pctr:
3930 def get_lang_code(track):
3931 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
3932 or track.get('languageCode'))
3933
3934 # Converted into dicts to remove duplicates
3935 captions = {
3936 get_lang_code(sub): sub
3937 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
3938 translation_languages = {
3939 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
3940 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
3941
3942 def process_language(container, base_url, lang_code, sub_name, query):
3943 lang_subs = container.setdefault(lang_code, [])
3944 for fmt in self._SUBTITLE_FORMATS:
3945 query.update({
3946 'fmt': fmt,
3947 })
3948 lang_subs.append({
3949 'ext': fmt,
3950 'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),
3951 'name': sub_name,
3952 })
3953
3954 # NB: Constructing the full subtitle dictionary is slow
3955 get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (
3956 self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))
3957 for lang_code, caption_track in captions.items():
3958 base_url = caption_track.get('baseUrl')
3959 orig_lang = parse_qs(base_url).get('lang', [None])[-1]
3960 if not base_url:
3961 continue
3962 lang_name = self._get_text(caption_track, 'name', max_runs=1)
3963 if caption_track.get('kind') != 'asr':
3964 if not lang_code:
3965 continue
3966 process_language(
3967 subtitles, base_url, lang_code, lang_name, {})
3968 if not caption_track.get('isTranslatable'):
3969 continue
3970 for trans_code, trans_name in translation_languages.items():
3971 if not trans_code:
3972 continue
3973 orig_trans_code = trans_code
3974 if caption_track.get('kind') != 'asr':
3975 if not get_translated_subs:
3976 continue
3977 trans_code += f'-{lang_code}'
3978 trans_name += format_field(lang_name, None, ' from %s')
3979 # Add an "-orig" label to the original language so that it can be distinguished.
3980 # The subs are returned without "-orig" as well for compatibility
3981 if lang_code == f'a-{orig_trans_code}':
3982 process_language(
3983 automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
3984 # Setting tlang=lang returns damaged subtitles.
3985 process_language(automatic_captions, base_url, trans_code, trans_name,
3986 {} if orig_lang == orig_trans_code else {'tlang': trans_code})
3987
3988 info['automatic_captions'] = automatic_captions
3989 info['subtitles'] = subtitles
3990
3991 parsed_url = urllib.parse.urlparse(url)
3992 for component in [parsed_url.fragment, parsed_url.query]:
3993 query = urllib.parse.parse_qs(component)
3994 for k, v in query.items():
3995 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
3996 d_k += '_time'
3997 if d_k not in info and k in s_ks:
3998 info[d_k] = parse_duration(query[k][0])
3999
4000 # Youtube Music Auto-generated description
4001 if video_description:
4002 mobj = re.search(
4003 r'''(?xs)
4004 (?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+
4005 (?P<album>[^\n]+)
4006 (?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
4007 (?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
4008 (.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?
4009 .+\nAuto-generated\ by\ YouTube\.\s*$
4010 ''', video_description)
4011 if mobj:
4012 release_year = mobj.group('release_year')
4013 release_date = mobj.group('release_date')
4014 if release_date:
4015 release_date = release_date.replace('-', '')
4016 if not release_year:
4017 release_year = release_date[:4]
4018 info.update({
4019 'album': mobj.group('album'.strip()),
4020 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
4021 'track': mobj.group('track').strip(),
4022 'release_date': release_date,
4023 'release_year': int_or_none(release_year),
4024 })
4025
4026 initial_data = None
4027 if webpage:
4028 initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)
4029 if not initial_data:
4030 query = {'videoId': video_id}
4031 query.update(self._get_checkok_params())
4032 initial_data = self._extract_response(
4033 item_id=video_id, ep='next', fatal=False,
4034 ytcfg=master_ytcfg, query=query,
4035 headers=self.generate_api_headers(ytcfg=master_ytcfg),
4036 note='Downloading initial data API JSON')
4037
4038 info['comment_count'] = traverse_obj(initial_data, (
4039 'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',
4040 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount', 'simpleText'
4041 ), (
4042 'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',
4043 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo', 'runs', ..., 'text'
4044 ), expected_type=int_or_none, get_all=False)
4045
4046 try: # This will error if there is no livechat
4047 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
4048 except (KeyError, IndexError, TypeError):
4049 pass
4050 else:
4051 info.setdefault('subtitles', {})['live_chat'] = [{
4052 # url is needed to set cookies
4053 'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',
4054 'video_id': video_id,
4055 'ext': 'json',
4056 'protocol': ('youtube_live_chat' if live_status in ('is_live', 'is_upcoming')
4057 else 'youtube_live_chat_replay'),
4058 }]
4059
4060 if initial_data:
4061 info['chapters'] = (
4062 self._extract_chapters_from_json(initial_data, duration)
4063 or self._extract_chapters_from_engagement_panel(initial_data, duration)
4064 or self._extract_chapters_from_description(video_description, duration)
4065 or None)
4066
4067 contents = traverse_obj(
4068 initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
4069 expected_type=list, default=[])
4070
4071 vpir = get_first(contents, 'videoPrimaryInfoRenderer')
4072 if vpir:
4073 stl = vpir.get('superTitleLink')
4074 if stl:
4075 stl = self._get_text(stl)
4076 if try_get(
4077 vpir,
4078 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
4079 info['location'] = stl
4080 else:
4081 mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)
4082 if mobj:
4083 info.update({
4084 'series': mobj.group(1),
4085 'season_number': int(mobj.group(2)),
4086 'episode_number': int(mobj.group(3)),
4087 })
4088 for tlb in (try_get(
4089 vpir,
4090 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
4091 list) or []):
4092 tbrs = variadic(
4093 traverse_obj(
4094 tlb, 'toggleButtonRenderer',
4095 ('segmentedLikeDislikeButtonRenderer', ..., 'toggleButtonRenderer'),
4096 default=[]))
4097 for tbr in tbrs:
4098 for getter, regex in [(
4099 lambda x: x['defaultText']['accessibility']['accessibilityData'],
4100 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
4101 lambda x: x['accessibility'],
4102 lambda x: x['accessibilityData']['accessibilityData'],
4103 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
4104 label = (try_get(tbr, getter, dict) or {}).get('label')
4105 if label:
4106 mobj = re.match(regex, label)
4107 if mobj:
4108 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
4109 break
4110 sbr_tooltip = try_get(
4111 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
4112 if sbr_tooltip:
4113 like_count, dislike_count = sbr_tooltip.split(' / ')
4114 info.update({
4115 'like_count': str_to_int(like_count),
4116 'dislike_count': str_to_int(dislike_count),
4117 })
4118 vsir = get_first(contents, 'videoSecondaryInfoRenderer')
4119 if vsir:
4120 vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))
4121 info.update({
4122 'channel': self._get_text(vor, 'title'),
4123 'channel_follower_count': self._get_count(vor, 'subscriberCountText')})
4124
4125 rows = try_get(
4126 vsir,
4127 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
4128 list) or []
4129 multiple_songs = False
4130 for row in rows:
4131 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
4132 multiple_songs = True
4133 break
4134 for row in rows:
4135 mrr = row.get('metadataRowRenderer') or {}
4136 mrr_title = mrr.get('title')
4137 if not mrr_title:
4138 continue
4139 mrr_title = self._get_text(mrr, 'title')
4140 mrr_contents_text = self._get_text(mrr, ('contents', 0))
4141 if mrr_title == 'License':
4142 info['license'] = mrr_contents_text
4143 elif not multiple_songs:
4144 if mrr_title == 'Album':
4145 info['album'] = mrr_contents_text
4146 elif mrr_title == 'Artist':
4147 info['artist'] = mrr_contents_text
4148 elif mrr_title == 'Song':
4149 info['track'] = mrr_contents_text
4150
4151 fallbacks = {
4152 'channel': 'uploader',
4153 'channel_id': 'uploader_id',
4154 'channel_url': 'uploader_url',
4155 }
4156
4157 # The upload date for scheduled, live and past live streams / premieres in microformats
4158 # may be different from the stream date. Although not in UTC, we will prefer it in this case.
4159 # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139
4160 upload_date = (
4161 unified_strdate(get_first(microformats, 'uploadDate'))
4162 or unified_strdate(search_meta('uploadDate')))
4163 if not upload_date or (
4164 live_status in ('not_live', None)
4165 and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', [])
4166 ):
4167 upload_date = strftime_or_none(
4168 self._parse_time_text(self._get_text(vpir, 'dateText')), '%Y%m%d') or upload_date
4169 info['upload_date'] = upload_date
4170
4171 for to, frm in fallbacks.items():
4172 if not info.get(to):
4173 info[to] = info.get(frm)
4174
4175 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
4176 v = info.get(s_k)
4177 if v:
4178 info[d_k] = v
4179
4180 badges = self._extract_badges(traverse_obj(contents, (..., 'videoPrimaryInfoRenderer'), get_all=False))
4181
4182 is_private = (self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
4183 or get_first(video_details, 'isPrivate', expected_type=bool))
4184
4185 info['availability'] = (
4186 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
4187 else self._availability(
4188 is_private=is_private,
4189 needs_premium=(
4190 self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM)
4191 or False if initial_data and is_private is not None else None),
4192 needs_subscription=(
4193 self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION)
4194 or False if initial_data and is_private is not None else None),
4195 needs_auth=info['age_limit'] >= 18,
4196 is_unlisted=None if is_private is None else (
4197 self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
4198 or get_first(microformats, 'isUnlisted', expected_type=bool))))
4199
4200 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
4201
4202 self.mark_watched(video_id, player_responses)
4203
4204 return info
4205
4206
4207class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
4208
4209 @staticmethod
4210 def passthrough_smuggled_data(func):
4211 def _smuggle(entries, smuggled_data):
4212 for entry in entries:
4213 # TODO: Convert URL to music.youtube instead.
4214 # Do we need to passthrough any other smuggled_data?
4215 entry['url'] = smuggle_url(entry['url'], smuggled_data)
4216 yield entry
4217
4218 @functools.wraps(func)
4219 def wrapper(self, url):
4220 url, smuggled_data = unsmuggle_url(url, {})
4221 if self.is_music_url(url):
4222 smuggled_data['is_music_url'] = True
4223 info_dict = func(self, url, smuggled_data)
4224 if smuggled_data and info_dict.get('entries'):
4225 info_dict['entries'] = _smuggle(info_dict['entries'], smuggled_data)
4226 return info_dict
4227 return wrapper
4228
4229 def _extract_channel_id(self, webpage):
4230 channel_id = self._html_search_meta(
4231 'channelId', webpage, 'channel id', default=None)
4232 if channel_id:
4233 return channel_id
4234 channel_url = self._html_search_meta(
4235 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
4236 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
4237 'twitter:app:url:googleplay'), webpage, 'channel url')
4238 return self._search_regex(
4239 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
4240 channel_url, 'channel id')
4241
4242 @staticmethod
4243 def _extract_basic_item_renderer(item):
4244 # Modified from _extract_grid_item_renderer
4245 known_basic_renderers = (
4246 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'
4247 )
4248 for key, renderer in item.items():
4249 if not isinstance(renderer, dict):
4250 continue
4251 elif key in known_basic_renderers:
4252 return renderer
4253 elif key.startswith('grid') and key.endswith('Renderer'):
4254 return renderer
4255
4256 def _grid_entries(self, grid_renderer):
4257 for item in grid_renderer['items']:
4258 if not isinstance(item, dict):
4259 continue
4260 renderer = self._extract_basic_item_renderer(item)
4261 if not isinstance(renderer, dict):
4262 continue
4263 title = self._get_text(renderer, 'title')
4264
4265 # playlist
4266 playlist_id = renderer.get('playlistId')
4267 if playlist_id:
4268 yield self.url_result(
4269 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4270 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4271 video_title=title)
4272 continue
4273 # video
4274 video_id = renderer.get('videoId')
4275 if video_id:
4276 yield self._extract_video(renderer)
4277 continue
4278 # channel
4279 channel_id = renderer.get('channelId')
4280 if channel_id:
4281 yield self.url_result(
4282 'https://www.youtube.com/channel/%s' % channel_id,
4283 ie=YoutubeTabIE.ie_key(), video_title=title)
4284 continue
4285 # generic endpoint URL support
4286 ep_url = urljoin('https://www.youtube.com/', try_get(
4287 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
4288 str))
4289 if ep_url:
4290 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
4291 if ie.suitable(ep_url):
4292 yield self.url_result(
4293 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
4294 break
4295
4296 def _music_reponsive_list_entry(self, renderer):
4297 video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))
4298 if video_id:
4299 return self.url_result(f'https://music.youtube.com/watch?v={video_id}',
4300 ie=YoutubeIE.ie_key(), video_id=video_id)
4301 playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))
4302 if playlist_id:
4303 video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))
4304 if video_id:
4305 return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',
4306 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4307 return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',
4308 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4309 browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))
4310 if browse_id:
4311 return self.url_result(f'https://music.youtube.com/browse/{browse_id}',
4312 ie=YoutubeTabIE.ie_key(), video_id=browse_id)
4313
4314 def _shelf_entries_from_content(self, shelf_renderer):
4315 content = shelf_renderer.get('content')
4316 if not isinstance(content, dict):
4317 return
4318 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
4319 if renderer:
4320 # TODO: add support for nested playlists so each shelf is processed
4321 # as separate playlist
4322 # TODO: this includes only first N items
4323 yield from self._grid_entries(renderer)
4324 renderer = content.get('horizontalListRenderer')
4325 if renderer:
4326 # TODO
4327 pass
4328
4329 def _shelf_entries(self, shelf_renderer, skip_channels=False):
4330 ep = try_get(
4331 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4332 str)
4333 shelf_url = urljoin('https://www.youtube.com', ep)
4334 if shelf_url:
4335 # Skipping links to another channels, note that checking for
4336 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
4337 # will not work
4338 if skip_channels and '/channels?' in shelf_url:
4339 return
4340 title = self._get_text(shelf_renderer, 'title')
4341 yield self.url_result(shelf_url, video_title=title)
4342 # Shelf may not contain shelf URL, fallback to extraction from content
4343 yield from self._shelf_entries_from_content(shelf_renderer)
4344
4345 def _playlist_entries(self, video_list_renderer):
4346 for content in video_list_renderer['contents']:
4347 if not isinstance(content, dict):
4348 continue
4349 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
4350 if not isinstance(renderer, dict):
4351 continue
4352 video_id = renderer.get('videoId')
4353 if not video_id:
4354 continue
4355 yield self._extract_video(renderer)
4356
4357 def _rich_entries(self, rich_grid_renderer):
4358 renderer = traverse_obj(
4359 rich_grid_renderer, ('content', ('videoRenderer', 'reelItemRenderer')), get_all=False) or {}
4360 video_id = renderer.get('videoId')
4361 if not video_id:
4362 return
4363 yield self._extract_video(renderer)
4364
4365 def _video_entry(self, video_renderer):
4366 video_id = video_renderer.get('videoId')
4367 if video_id:
4368 return self._extract_video(video_renderer)
4369
4370 def _hashtag_tile_entry(self, hashtag_tile_renderer):
4371 url = urljoin('https://youtube.com', traverse_obj(
4372 hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))
4373 if url:
4374 return self.url_result(
4375 url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))
4376
4377 def _post_thread_entries(self, post_thread_renderer):
4378 post_renderer = try_get(
4379 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
4380 if not post_renderer:
4381 return
4382 # video attachment
4383 video_renderer = try_get(
4384 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
4385 video_id = video_renderer.get('videoId')
4386 if video_id:
4387 entry = self._extract_video(video_renderer)
4388 if entry:
4389 yield entry
4390 # playlist attachment
4391 playlist_id = try_get(
4392 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)
4393 if playlist_id:
4394 yield self.url_result(
4395 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4396 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4397 # inline video links
4398 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
4399 for run in runs:
4400 if not isinstance(run, dict):
4401 continue
4402 ep_url = try_get(
4403 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)
4404 if not ep_url:
4405 continue
4406 if not YoutubeIE.suitable(ep_url):
4407 continue
4408 ep_video_id = YoutubeIE._match_id(ep_url)
4409 if video_id == ep_video_id:
4410 continue
4411 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
4412
4413 def _post_thread_continuation_entries(self, post_thread_continuation):
4414 contents = post_thread_continuation.get('contents')
4415 if not isinstance(contents, list):
4416 return
4417 for content in contents:
4418 renderer = content.get('backstagePostThreadRenderer')
4419 if isinstance(renderer, dict):
4420 yield from self._post_thread_entries(renderer)
4421 continue
4422 renderer = content.get('videoRenderer')
4423 if isinstance(renderer, dict):
4424 yield self._video_entry(renderer)
4425
4426 r''' # unused
4427 def _rich_grid_entries(self, contents):
4428 for content in contents:
4429 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
4430 if video_renderer:
4431 entry = self._video_entry(video_renderer)
4432 if entry:
4433 yield entry
4434 '''
4435
4436 def _report_history_entries(self, renderer):
4437 for url in traverse_obj(renderer, (
4438 'rows', ..., 'reportHistoryTableRowRenderer', 'cells', ...,
4439 'reportHistoryTableCellRenderer', 'cell', 'reportHistoryTableTextCellRenderer', 'text', 'runs', ...,
4440 'navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url')):
4441 yield self.url_result(urljoin('https://www.youtube.com', url), YoutubeIE)
4442
4443 def _extract_entries(self, parent_renderer, continuation_list):
4444 # continuation_list is modified in-place with continuation_list = [continuation_token]
4445 continuation_list[:] = [None]
4446 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
4447 for content in contents:
4448 if not isinstance(content, dict):
4449 continue
4450 is_renderer = traverse_obj(
4451 content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',
4452 expected_type=dict)
4453 if not is_renderer:
4454 if content.get('richItemRenderer'):
4455 for entry in self._rich_entries(content['richItemRenderer']):
4456 yield entry
4457 continuation_list[0] = self._extract_continuation(parent_renderer)
4458 elif content.get('reportHistorySectionRenderer'): # https://www.youtube.com/reporthistory
4459 table = traverse_obj(content, ('reportHistorySectionRenderer', 'table', 'tableRenderer'))
4460 yield from self._report_history_entries(table)
4461 continuation_list[0] = self._extract_continuation(table)
4462 continue
4463
4464 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
4465 for isr_content in isr_contents:
4466 if not isinstance(isr_content, dict):
4467 continue
4468
4469 known_renderers = {
4470 'playlistVideoListRenderer': self._playlist_entries,
4471 'gridRenderer': self._grid_entries,
4472 'reelShelfRenderer': self._grid_entries,
4473 'shelfRenderer': self._shelf_entries,
4474 'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],
4475 'backstagePostThreadRenderer': self._post_thread_entries,
4476 'videoRenderer': lambda x: [self._video_entry(x)],
4477 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
4478 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
4479 'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]
4480 }
4481 for key, renderer in isr_content.items():
4482 if key not in known_renderers:
4483 continue
4484 for entry in known_renderers[key](renderer):
4485 if entry:
4486 yield entry
4487 continuation_list[0] = self._extract_continuation(renderer)
4488 break
4489
4490 if not continuation_list[0]:
4491 continuation_list[0] = self._extract_continuation(is_renderer)
4492
4493 if not continuation_list[0]:
4494 continuation_list[0] = self._extract_continuation(parent_renderer)
4495
4496 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
4497 continuation_list = [None]
4498 extract_entries = lambda x: self._extract_entries(x, continuation_list)
4499 tab_content = try_get(tab, lambda x: x['content'], dict)
4500 if not tab_content:
4501 return
4502 parent_renderer = (
4503 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
4504 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
4505 yield from extract_entries(parent_renderer)
4506 continuation = continuation_list[0]
4507
4508 for page_num in itertools.count(1):
4509 if not continuation:
4510 break
4511 headers = self.generate_api_headers(
4512 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
4513 response = self._extract_response(
4514 item_id=f'{item_id} page {page_num}',
4515 query=continuation, headers=headers, ytcfg=ytcfg,
4516 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
4517
4518 if not response:
4519 break
4520 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
4521 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
4522 visitor_data = self._extract_visitor_data(response) or visitor_data
4523
4524 known_renderers = {
4525 'videoRenderer': (self._grid_entries, 'items'), # for membership tab
4526 'gridPlaylistRenderer': (self._grid_entries, 'items'),
4527 'gridVideoRenderer': (self._grid_entries, 'items'),
4528 'gridChannelRenderer': (self._grid_entries, 'items'),
4529 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
4530 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
4531 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
4532 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents'),
4533 'reportHistoryTableRowRenderer': (self._report_history_entries, 'rows'),
4534 'playlistVideoListContinuation': (self._playlist_entries, None),
4535 'gridContinuation': (self._grid_entries, None),
4536 'itemSectionContinuation': (self._post_thread_continuation_entries, None),
4537 'sectionListContinuation': (extract_entries, None), # for feeds
4538 }
4539
4540 continuation_items = traverse_obj(response, (
4541 ('onResponseReceivedActions', 'onResponseReceivedEndpoints'), ...,
4542 'appendContinuationItemsAction', 'continuationItems'
4543 ), 'continuationContents', get_all=False)
4544 continuation_item = traverse_obj(continuation_items, 0, None, expected_type=dict, default={})
4545
4546 video_items_renderer = None
4547 for key in continuation_item.keys():
4548 if key not in known_renderers:
4549 continue
4550 func, parent_key = known_renderers[key]
4551 video_items_renderer = {parent_key: continuation_items} if parent_key else continuation_items
4552 continuation_list = [None]
4553 yield from func(video_items_renderer)
4554 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
4555
4556 if not video_items_renderer:
4557 break
4558
4559 @staticmethod
4560 def _extract_selected_tab(tabs, fatal=True):
4561 for tab in tabs:
4562 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
4563 if renderer.get('selected') is True:
4564 return renderer
4565 else:
4566 if fatal:
4567 raise ExtractorError('Unable to find selected tab')
4568
4569 def _extract_uploader(self, data):
4570 uploader = {}
4571 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
4572 owner = try_get(
4573 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
4574 if owner:
4575 owner_text = owner.get('text')
4576 uploader['uploader'] = self._search_regex(
4577 r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text)
4578 uploader['uploader_id'] = try_get(
4579 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], str)
4580 uploader['uploader_url'] = urljoin(
4581 'https://www.youtube.com/',
4582 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], str))
4583 return filter_dict(uploader)
4584
4585 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
4586 playlist_id = title = description = channel_url = channel_name = channel_id = None
4587 tags = []
4588
4589 selected_tab = self._extract_selected_tab(tabs)
4590 primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4591 renderer = try_get(
4592 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
4593 if renderer:
4594 channel_name = renderer.get('title')
4595 channel_url = renderer.get('channelUrl')
4596 channel_id = renderer.get('externalId')
4597 else:
4598 renderer = try_get(
4599 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
4600
4601 if renderer:
4602 title = renderer.get('title')
4603 description = renderer.get('description', '')
4604 playlist_id = channel_id
4605 tags = renderer.get('keywords', '').split()
4606
4607 # We can get the uncropped banner/avatar by replacing the crop params with '=s0'
4608 # See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714
4609 def _get_uncropped(url):
4610 return url_or_none((url or '').split('=')[0] + '=s0')
4611
4612 avatar_thumbnails = self._extract_thumbnails(renderer, 'avatar')
4613 if avatar_thumbnails:
4614 uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])
4615 if uncropped_avatar:
4616 avatar_thumbnails.append({
4617 'url': uncropped_avatar,
4618 'id': 'avatar_uncropped',
4619 'preference': 1
4620 })
4621
4622 channel_banners = self._extract_thumbnails(
4623 data, ('header', ..., ['banner', 'mobileBanner', 'tvBanner']))
4624 for banner in channel_banners:
4625 banner['preference'] = -10
4626
4627 if channel_banners:
4628 uncropped_banner = _get_uncropped(channel_banners[0]['url'])
4629 if uncropped_banner:
4630 channel_banners.append({
4631 'url': uncropped_banner,
4632 'id': 'banner_uncropped',
4633 'preference': -5
4634 })
4635
4636 primary_thumbnails = self._extract_thumbnails(
4637 primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))
4638
4639 if playlist_id is None:
4640 playlist_id = item_id
4641
4642 playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')
4643 last_updated_unix = self._parse_time_text(self._get_text(playlist_stats, 2))
4644 if title is None:
4645 title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id
4646 title += format_field(selected_tab, 'title', ' - %s')
4647 title += format_field(selected_tab, 'expandedText', ' - %s')
4648
4649 metadata = {
4650 'playlist_id': playlist_id,
4651 'playlist_title': title,
4652 'playlist_description': description,
4653 'uploader': channel_name,
4654 'uploader_id': channel_id,
4655 'uploader_url': channel_url,
4656 'thumbnails': primary_thumbnails + avatar_thumbnails + channel_banners,
4657 'tags': tags,
4658 'view_count': self._get_count(playlist_stats, 1),
4659 'availability': self._extract_availability(data),
4660 'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'),
4661 'playlist_count': self._get_count(playlist_stats, 0),
4662 'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),
4663 }
4664 if not channel_id:
4665 metadata.update(self._extract_uploader(data))
4666 metadata.update({
4667 'channel': metadata['uploader'],
4668 'channel_id': metadata['uploader_id'],
4669 'channel_url': metadata['uploader_url']})
4670 return self.playlist_result(
4671 self._entries(
4672 selected_tab, playlist_id, ytcfg,
4673 self._extract_account_syncid(ytcfg, data),
4674 self._extract_visitor_data(data, ytcfg)),
4675 **metadata)
4676
4677 def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):
4678 first_id = last_id = response = None
4679 for page_num in itertools.count(1):
4680 videos = list(self._playlist_entries(playlist))
4681 if not videos:
4682 return
4683 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4684 if start >= len(videos):
4685 return
4686 yield from videos[start:]
4687 first_id = first_id or videos[0]['id']
4688 last_id = videos[-1]['id']
4689 watch_endpoint = try_get(
4690 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
4691 headers = self.generate_api_headers(
4692 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4693 visitor_data=self._extract_visitor_data(response, data, ytcfg))
4694 query = {
4695 'playlistId': playlist_id,
4696 'videoId': watch_endpoint.get('videoId') or last_id,
4697 'index': watch_endpoint.get('index') or len(videos),
4698 'params': watch_endpoint.get('params') or 'OAE%3D'
4699 }
4700 response = self._extract_response(
4701 item_id='%s page %d' % (playlist_id, page_num),
4702 query=query, ep='next', headers=headers, ytcfg=ytcfg,
4703 check_get_keys='contents'
4704 )
4705 playlist = try_get(
4706 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4707
4708 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
4709 title = playlist.get('title') or try_get(
4710 data, lambda x: x['titleText']['simpleText'], str)
4711 playlist_id = playlist.get('playlistId') or item_id
4712
4713 # Delegating everything except mix playlists to regular tab-based playlist URL
4714 playlist_url = urljoin(url, try_get(
4715 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4716 str))
4717
4718 # Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]
4719 # [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg
4720 is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)
4721
4722 if playlist_url and playlist_url != url and not is_known_unviewable:
4723 return self.url_result(
4724 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4725 video_title=title)
4726
4727 return self.playlist_result(
4728 self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),
4729 playlist_id=playlist_id, playlist_title=title)
4730
4731 def _extract_availability(self, data):
4732 """
4733 Gets the availability of a given playlist/tab.
4734 Note: Unless YouTube tells us explicitly, we do not assume it is public
4735 @param data: response
4736 """
4737 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4738
4739 player_header_privacy = traverse_obj(
4740 data, ('header', 'playlistHeaderRenderer', 'privacy'), expected_type=str)
4741
4742 badges = self._extract_badges(renderer)
4743
4744 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4745 privacy_setting_icon = traverse_obj(
4746 renderer, (
4747 'privacyForm', 'dropdownFormFieldRenderer', 'dropdown', 'dropdownRenderer', 'entries',
4748 lambda _, v: v['privacyDropdownItemRenderer']['isSelected'], 'privacyDropdownItemRenderer', 'icon', 'iconType'),
4749 get_all=False, expected_type=str)
4750
4751 return (
4752 'public' if (
4753 self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
4754 or player_header_privacy == 'PUBLIC'
4755 or privacy_setting_icon == 'PRIVACY_PUBLIC')
4756 else self._availability(
4757 is_private=(
4758 self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
4759 or player_header_privacy == 'PRIVATE' if player_header_privacy is not None
4760 else privacy_setting_icon == 'PRIVACY_PRIVATE' if privacy_setting_icon is not None else None),
4761 is_unlisted=(
4762 self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
4763 or player_header_privacy == 'UNLISTED' if player_header_privacy is not None
4764 else privacy_setting_icon == 'PRIVACY_UNLISTED' if privacy_setting_icon is not None else None),
4765 needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
4766 needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
4767 needs_auth=False))
4768
4769 @staticmethod
4770 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4771 sidebar_renderer = try_get(
4772 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4773 for item in sidebar_renderer:
4774 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4775 if renderer:
4776 return renderer
4777
4778 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
4779 """
4780 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4781 """
4782 browse_id = params = None
4783 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4784 if not renderer:
4785 return
4786 menu_renderer = try_get(
4787 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4788 for menu_item in menu_renderer:
4789 if not isinstance(menu_item, dict):
4790 continue
4791 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4792 text = try_get(
4793 nav_item_renderer, lambda x: x['text']['simpleText'], str)
4794 if not text or text.lower() != 'show unavailable videos':
4795 continue
4796 browse_endpoint = try_get(
4797 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4798 browse_id = browse_endpoint.get('browseId')
4799 params = browse_endpoint.get('params')
4800 break
4801
4802 headers = self.generate_api_headers(
4803 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4804 visitor_data=self._extract_visitor_data(data, ytcfg))
4805 query = {
4806 'params': params or 'wgYCCAA=',
4807 'browseId': browse_id or 'VL%s' % item_id
4808 }
4809 return self._extract_response(
4810 item_id=item_id, headers=headers, query=query,
4811 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
4812 note='Downloading API JSON with unavailable videos')
4813
4814 @functools.cached_property
4815 def skip_webpage(self):
4816 return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())
4817
4818 def _extract_webpage(self, url, item_id, fatal=True):
4819 webpage, data = None, None
4820 for retry in self.RetryManager(fatal=fatal):
4821 try:
4822 webpage = self._download_webpage(url, item_id, note='Downloading webpage')
4823 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
4824 except ExtractorError as e:
4825 if isinstance(e.cause, network_exceptions):
4826 if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):
4827 retry.error = e
4828 continue
4829 self._error_or_warning(e, fatal=fatal)
4830 break
4831
4832 try:
4833 self._extract_and_report_alerts(data)
4834 except ExtractorError as e:
4835 self._error_or_warning(e, fatal=fatal)
4836 break
4837
4838 # Sometimes youtube returns a webpage with incomplete ytInitialData
4839 # See: https://github.com/yt-dlp/yt-dlp/issues/116
4840 if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):
4841 retry.error = ExtractorError('Incomplete yt initial data received')
4842 continue
4843
4844 return webpage, data
4845
4846 def _report_playlist_authcheck(self, ytcfg, fatal=True):
4847 """Use if failed to extract ytcfg (and data) from initial webpage"""
4848 if not ytcfg and self.is_authenticated:
4849 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'
4850 if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:
4851 raise ExtractorError(
4852 f'{msg}. If you are not downloading private content, or '
4853 'your cookies are only for the first account and channel,'
4854 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
4855 expected=True)
4856 self.report_warning(msg, only_once=True)
4857
4858 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
4859 data = None
4860 if not self.skip_webpage:
4861 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
4862 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
4863 # Reject webpage data if redirected to home page without explicitly requesting
4864 selected_tab = self._extract_selected_tab(traverse_obj(
4865 data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list, default=[]), fatal=False) or {}
4866 if (url != 'https://www.youtube.com/feed/recommended'
4867 and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page
4868 and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):
4869 msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'
4870 if fatal:
4871 raise ExtractorError(msg, expected=True)
4872 self.report_warning(msg, only_once=True)
4873 if not data:
4874 self._report_playlist_authcheck(ytcfg, fatal=fatal)
4875 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
4876 return data, ytcfg
4877
4878 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
4879 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
4880 resolve_response = self._extract_response(
4881 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
4882 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
4883 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
4884 for ep_key, ep in endpoints.items():
4885 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
4886 if params:
4887 return self._extract_response(
4888 item_id=item_id, query=params, ep=ep, headers=headers,
4889 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
4890 check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))
4891 err_note = 'Failed to resolve url (does the playlist exist?)'
4892 if fatal:
4893 raise ExtractorError(err_note, expected=True)
4894 self.report_warning(err_note, item_id)
4895
4896 _SEARCH_PARAMS = None
4897
4898 def _search_results(self, query, params=NO_DEFAULT, default_client='web'):
4899 data = {'query': query}
4900 if params is NO_DEFAULT:
4901 params = self._SEARCH_PARAMS
4902 if params:
4903 data['params'] = params
4904
4905 content_keys = (
4906 ('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),
4907 ('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),
4908 # ytmusic search
4909 ('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),
4910 ('continuationContents', ),
4911 )
4912 display_id = f'query "{query}"'
4913 check_get_keys = tuple({keys[0] for keys in content_keys})
4914 ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}
4915 self._report_playlist_authcheck(ytcfg, fatal=False)
4916
4917 continuation_list = [None]
4918 search = None
4919 for page_num in itertools.count(1):
4920 data.update(continuation_list[0] or {})
4921 headers = self.generate_api_headers(
4922 ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)
4923 search = self._extract_response(
4924 item_id=f'{display_id} page {page_num}', ep='search', query=data,
4925 default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)
4926 slr_contents = traverse_obj(search, *content_keys)
4927 yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)
4928 if not continuation_list[0]:
4929 break
4930
4931
4932class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
4933 IE_DESC = 'YouTube Tabs'
4934 _VALID_URL = r'''(?x:
4935 https?://
4936 (?:\w+\.)?
4937 (?:
4938 youtube(?:kids)?\.com|
4939 %(invidious)s
4940 )/
4941 (?:
4942 (?P<channel_type>channel|c|user|browse)/|
4943 (?P<not_channel>
4944 feed/|hashtag/|
4945 (?:playlist|watch)\?.*?\blist=
4946 )|
4947 (?!(?:%(reserved_names)s)\b) # Direct URLs
4948 )
4949 (?P<id>[^/?\#&]+)
4950 )''' % {
4951 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
4952 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
4953 }
4954 IE_NAME = 'youtube:tab'
4955
4956 _TESTS = [{
4957 'note': 'playlists, multipage',
4958 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
4959 'playlist_mincount': 94,
4960 'info_dict': {
4961 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
4962 'title': 'Igor Kleiner - Playlists',
4963 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
4964 'uploader': 'Igor Kleiner',
4965 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4966 'channel': 'Igor Kleiner',
4967 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4968 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
4969 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4970 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4971 'channel_follower_count': int
4972 },
4973 }, {
4974 'note': 'playlists, multipage, different order',
4975 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
4976 'playlist_mincount': 94,
4977 'info_dict': {
4978 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
4979 'title': 'Igor Kleiner - Playlists',
4980 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
4981 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4982 'uploader': 'Igor Kleiner',
4983 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4984 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
4985 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4986 'channel': 'Igor Kleiner',
4987 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4988 'channel_follower_count': int
4989 },
4990 }, {
4991 'note': 'playlists, series',
4992 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
4993 'playlist_mincount': 5,
4994 'info_dict': {
4995 'id': 'UCYO_jab_esuFRV4b17AJtAw',
4996 'title': '3Blue1Brown - Playlists',
4997 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
4998 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
4999 'uploader': '3Blue1Brown',
5000 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5001 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5002 'channel': '3Blue1Brown',
5003 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
5004 'tags': ['Mathematics'],
5005 'channel_follower_count': int
5006 },
5007 }, {
5008 'note': 'playlists, singlepage',
5009 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
5010 'playlist_mincount': 4,
5011 'info_dict': {
5012 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
5013 'title': 'ThirstForScience - Playlists',
5014 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
5015 'uploader': 'ThirstForScience',
5016 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
5017 'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
5018 'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
5019 'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
5020 'tags': 'count:13',
5021 'channel': 'ThirstForScience',
5022 'channel_follower_count': int
5023 }
5024 }, {
5025 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
5026 'only_matching': True,
5027 }, {
5028 'note': 'basic, single video playlist',
5029 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5030 'info_dict': {
5031 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5032 'uploader': 'Sergey M.',
5033 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5034 'title': 'youtube-dl public playlist',
5035 'description': '',
5036 'tags': [],
5037 'view_count': int,
5038 'modified_date': '20201130',
5039 'channel': 'Sergey M.',
5040 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5041 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5042 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5043 'availability': 'public',
5044 },
5045 'playlist_count': 1,
5046 }, {
5047 'note': 'empty playlist',
5048 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
5049 'info_dict': {
5050 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5051 'uploader': 'Sergey M.',
5052 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
5053 'title': 'youtube-dl empty playlist',
5054 'tags': [],
5055 'channel': 'Sergey M.',
5056 'description': '',
5057 'modified_date': '20160902',
5058 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5059 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5060 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5061 'availability': 'public',
5062 },
5063 'playlist_count': 0,
5064 }, {
5065 'note': 'Home tab',
5066 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
5067 'info_dict': {
5068 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5069 'title': 'lex will - Home',
5070 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5071 'uploader': 'lex will',
5072 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5073 'channel': 'lex will',
5074 'tags': ['bible', 'history', 'prophesy'],
5075 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5076 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5077 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5078 'channel_follower_count': int
5079 },
5080 'playlist_mincount': 2,
5081 }, {
5082 'note': 'Videos tab',
5083 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
5084 'info_dict': {
5085 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5086 'title': 'lex will - Videos',
5087 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5088 'uploader': 'lex will',
5089 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5090 'tags': ['bible', 'history', 'prophesy'],
5091 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5092 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5093 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5094 'channel': 'lex will',
5095 'channel_follower_count': int
5096 },
5097 'playlist_mincount': 975,
5098 }, {
5099 'note': 'Videos tab, sorted by popular',
5100 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
5101 'info_dict': {
5102 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5103 'title': 'lex will - Videos',
5104 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5105 'uploader': 'lex will',
5106 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5107 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5108 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5109 'channel': 'lex will',
5110 'tags': ['bible', 'history', 'prophesy'],
5111 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5112 'channel_follower_count': int
5113 },
5114 'playlist_mincount': 199,
5115 }, {
5116 'note': 'Playlists tab',
5117 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
5118 'info_dict': {
5119 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5120 'title': 'lex will - Playlists',
5121 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5122 'uploader': 'lex will',
5123 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5124 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5125 'channel': 'lex will',
5126 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5127 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5128 'tags': ['bible', 'history', 'prophesy'],
5129 'channel_follower_count': int
5130 },
5131 'playlist_mincount': 17,
5132 }, {
5133 'note': 'Community tab',
5134 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
5135 'info_dict': {
5136 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5137 'title': 'lex will - Community',
5138 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5139 'uploader': 'lex will',
5140 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5141 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5142 'channel': 'lex will',
5143 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5144 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5145 'tags': ['bible', 'history', 'prophesy'],
5146 'channel_follower_count': int
5147 },
5148 'playlist_mincount': 18,
5149 }, {
5150 'note': 'Channels tab',
5151 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
5152 'info_dict': {
5153 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5154 'title': 'lex will - Channels',
5155 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5156 'uploader': 'lex will',
5157 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5158 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5159 'channel': 'lex will',
5160 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5161 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5162 'tags': ['bible', 'history', 'prophesy'],
5163 'channel_follower_count': int
5164 },
5165 'playlist_mincount': 12,
5166 }, {
5167 'note': 'Search tab',
5168 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
5169 'playlist_mincount': 40,
5170 'info_dict': {
5171 'id': 'UCYO_jab_esuFRV4b17AJtAw',
5172 'title': '3Blue1Brown - Search - linear algebra',
5173 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
5174 'uploader': '3Blue1Brown',
5175 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
5176 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5177 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5178 'tags': ['Mathematics'],
5179 'channel': '3Blue1Brown',
5180 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
5181 'channel_follower_count': int
5182 },
5183 }, {
5184 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5185 'only_matching': True,
5186 }, {
5187 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5188 'only_matching': True,
5189 }, {
5190 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5191 'only_matching': True,
5192 }, {
5193 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
5194 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
5195 'info_dict': {
5196 'title': '29C3: Not my department',
5197 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
5198 'uploader': 'Christiaan008',
5199 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
5200 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
5201 'tags': [],
5202 'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',
5203 'view_count': int,
5204 'modified_date': '20150605',
5205 'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
5206 'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',
5207 'channel': 'Christiaan008',
5208 'availability': 'public',
5209 },
5210 'playlist_count': 96,
5211 }, {
5212 'note': 'Large playlist',
5213 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
5214 'info_dict': {
5215 'title': 'Uploads from Cauchemar',
5216 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
5217 'uploader': 'Cauchemar',
5218 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
5219 'channel_url': 'https://www.youtube.com/c/Cauchemar89',
5220 'tags': [],
5221 'modified_date': r're:\d{8}',
5222 'channel': 'Cauchemar',
5223 'uploader_url': 'https://www.youtube.com/c/Cauchemar89',
5224 'view_count': int,
5225 'description': '',
5226 'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
5227 'availability': 'public',
5228 },
5229 'playlist_mincount': 1123,
5230 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5231 }, {
5232 'note': 'even larger playlist, 8832 videos',
5233 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
5234 'only_matching': True,
5235 }, {
5236 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
5237 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
5238 'info_dict': {
5239 'title': 'Uploads from Interstellar Movie',
5240 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
5241 'uploader': 'Interstellar Movie',
5242 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
5243 'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',
5244 'tags': [],
5245 'view_count': int,
5246 'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
5247 'channel_url': 'https://www.youtube.com/c/InterstellarMovie',
5248 'channel': 'Interstellar Movie',
5249 'description': '',
5250 'modified_date': r're:\d{8}',
5251 'availability': 'public',
5252 },
5253 'playlist_mincount': 21,
5254 }, {
5255 'note': 'Playlist with "show unavailable videos" button',
5256 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
5257 'info_dict': {
5258 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
5259 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
5260 'uploader': 'Phim Siêu Nhân Nhật Bản',
5261 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
5262 'view_count': int,
5263 'channel': 'Phim Siêu Nhân Nhật Bản',
5264 'tags': [],
5265 'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5266 'description': '',
5267 'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5268 'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
5269 'modified_date': r're:\d{8}',
5270 'availability': 'public',
5271 },
5272 'playlist_mincount': 200,
5273 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5274 }, {
5275 'note': 'Playlist with unavailable videos in page 7',
5276 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
5277 'info_dict': {
5278 'title': 'Uploads from BlankTV',
5279 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
5280 'uploader': 'BlankTV',
5281 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
5282 'channel': 'BlankTV',
5283 'channel_url': 'https://www.youtube.com/c/blanktv',
5284 'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',
5285 'view_count': int,
5286 'tags': [],
5287 'uploader_url': 'https://www.youtube.com/c/blanktv',
5288 'modified_date': r're:\d{8}',
5289 'description': '',
5290 'availability': 'public',
5291 },
5292 'playlist_mincount': 1000,
5293 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5294 }, {
5295 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
5296 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5297 'info_dict': {
5298 'title': 'Data Analysis with Dr Mike Pound',
5299 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5300 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
5301 'uploader': 'Computerphile',
5302 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
5303 'uploader_url': 'https://www.youtube.com/user/Computerphile',
5304 'tags': [],
5305 'view_count': int,
5306 'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
5307 'channel_url': 'https://www.youtube.com/user/Computerphile',
5308 'channel': 'Computerphile',
5309 'availability': 'public',
5310 },
5311 'playlist_mincount': 11,
5312 }, {
5313 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5314 'only_matching': True,
5315 }, {
5316 'note': 'Playlist URL that does not actually serve a playlist',
5317 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
5318 'info_dict': {
5319 'id': 'FqZTN594JQw',
5320 'ext': 'webm',
5321 'title': "Smiley's People 01 detective, Adventure Series, Action",
5322 'uploader': 'STREEM',
5323 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
5324 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
5325 'upload_date': '20150526',
5326 'license': 'Standard YouTube License',
5327 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
5328 'categories': ['People & Blogs'],
5329 'tags': list,
5330 'view_count': int,
5331 'like_count': int,
5332 },
5333 'params': {
5334 'skip_download': True,
5335 },
5336 'skip': 'This video is not available.',
5337 'add_ie': [YoutubeIE.ie_key()],
5338 }, {
5339 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
5340 'only_matching': True,
5341 }, {
5342 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
5343 'only_matching': True,
5344 }, {
5345 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
5346 'info_dict': {
5347 'id': 'Wq15eF5vCbI', # This will keep changing
5348 'ext': 'mp4',
5349 'title': str,
5350 'uploader': 'Sky News',
5351 'uploader_id': 'skynews',
5352 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
5353 'upload_date': r're:\d{8}',
5354 'description': str,
5355 'categories': ['News & Politics'],
5356 'tags': list,
5357 'like_count': int,
5358 'release_timestamp': 1642502819,
5359 'channel': 'Sky News',
5360 'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',
5361 'age_limit': 0,
5362 'view_count': int,
5363 'thumbnail': 'https://i.ytimg.com/vi/GgL890LIznQ/maxresdefault_live.jpg',
5364 'playable_in_embed': True,
5365 'release_date': '20220118',
5366 'availability': 'public',
5367 'live_status': 'is_live',
5368 'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',
5369 'channel_follower_count': int
5370 },
5371 'params': {
5372 'skip_download': True,
5373 },
5374 'expected_warnings': ['Ignoring subtitle tracks found in '],
5375 }, {
5376 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
5377 'info_dict': {
5378 'id': 'a48o2S1cPoo',
5379 'ext': 'mp4',
5380 'title': 'The Young Turks - Live Main Show',
5381 'uploader': 'The Young Turks',
5382 'uploader_id': 'TheYoungTurks',
5383 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
5384 'upload_date': '20150715',
5385 'license': 'Standard YouTube License',
5386 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
5387 'categories': ['News & Politics'],
5388 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
5389 'like_count': int,
5390 },
5391 'params': {
5392 'skip_download': True,
5393 },
5394 'only_matching': True,
5395 }, {
5396 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
5397 'only_matching': True,
5398 }, {
5399 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
5400 'only_matching': True,
5401 }, {
5402 'note': 'A channel that is not live. Should raise error',
5403 'url': 'https://www.youtube.com/user/numberphile/live',
5404 'only_matching': True,
5405 }, {
5406 'url': 'https://www.youtube.com/feed/trending',
5407 'only_matching': True,
5408 }, {
5409 'url': 'https://www.youtube.com/feed/library',
5410 'only_matching': True,
5411 }, {
5412 'url': 'https://www.youtube.com/feed/history',
5413 'only_matching': True,
5414 }, {
5415 'url': 'https://www.youtube.com/feed/subscriptions',
5416 'only_matching': True,
5417 }, {
5418 'url': 'https://www.youtube.com/feed/watch_later',
5419 'only_matching': True,
5420 }, {
5421 'note': 'Recommended - redirects to home page.',
5422 'url': 'https://www.youtube.com/feed/recommended',
5423 'only_matching': True,
5424 }, {
5425 'note': 'inline playlist with not always working continuations',
5426 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
5427 'only_matching': True,
5428 }, {
5429 'url': 'https://www.youtube.com/course',
5430 'only_matching': True,
5431 }, {
5432 'url': 'https://www.youtube.com/zsecurity',
5433 'only_matching': True,
5434 }, {
5435 'url': 'http://www.youtube.com/NASAgovVideo/videos',
5436 'only_matching': True,
5437 }, {
5438 'url': 'https://www.youtube.com/TheYoungTurks/live',
5439 'only_matching': True,
5440 }, {
5441 'url': 'https://www.youtube.com/hashtag/cctv9',
5442 'info_dict': {
5443 'id': 'cctv9',
5444 'title': '#cctv9',
5445 'tags': [],
5446 },
5447 'playlist_mincount': 350,
5448 }, {
5449 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
5450 'only_matching': True,
5451 }, {
5452 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
5453 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5454 'only_matching': True
5455 }, {
5456 'note': '/browse/ should redirect to /channel/',
5457 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
5458 'only_matching': True
5459 }, {
5460 'note': 'VLPL, should redirect to playlist?list=PL...',
5461 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5462 'info_dict': {
5463 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5464 'uploader': 'NoCopyrightSounds',
5465 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
5466 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
5467 'title': 'NCS : All Releases 💿',
5468 'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',
5469 'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',
5470 'modified_date': r're:\d{8}',
5471 'view_count': int,
5472 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
5473 'tags': [],
5474 'channel': 'NoCopyrightSounds',
5475 'availability': 'public',
5476 },
5477 'playlist_mincount': 166,
5478 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5479 }, {
5480 'note': 'Topic, should redirect to playlist?list=UU...',
5481 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5482 'info_dict': {
5483 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5484 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5485 'title': 'Uploads from Royalty Free Music - Topic',
5486 'uploader': 'Royalty Free Music - Topic',
5487 'tags': [],
5488 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5489 'channel': 'Royalty Free Music - Topic',
5490 'view_count': int,
5491 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5492 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5493 'modified_date': r're:\d{8}',
5494 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5495 'description': '',
5496 'availability': 'public',
5497 },
5498 'expected_warnings': [
5499 'The URL does not have a videos tab',
5500 r'[Uu]navailable videos (are|will be) hidden',
5501 ],
5502 'playlist_mincount': 101,
5503 }, {
5504 'note': 'Topic without a UU playlist',
5505 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
5506 'info_dict': {
5507 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
5508 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
5509 'tags': [],
5510 },
5511 'expected_warnings': [
5512 'the playlist redirect gave error',
5513 ],
5514 'playlist_mincount': 9,
5515 }, {
5516 'note': 'Youtube music Album',
5517 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
5518 'info_dict': {
5519 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
5520 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
5521 'tags': [],
5522 'view_count': int,
5523 'description': '',
5524 'availability': 'unlisted',
5525 'modified_date': r're:\d{8}',
5526 },
5527 'playlist_count': 50,
5528 }, {
5529 'note': 'unlisted single video playlist',
5530 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5531 'info_dict': {
5532 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5533 'uploader': 'colethedj',
5534 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5535 'title': 'yt-dlp unlisted playlist test',
5536 'availability': 'unlisted',
5537 'tags': [],
5538 'modified_date': '20220418',
5539 'channel': 'colethedj',
5540 'view_count': int,
5541 'description': '',
5542 'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
5543 'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5544 'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
5545 },
5546 'playlist_count': 1,
5547 }, {
5548 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
5549 'url': 'https://www.youtube.com/feed/recommended',
5550 'info_dict': {
5551 'id': 'recommended',
5552 'title': 'recommended',
5553 'tags': [],
5554 },
5555 'playlist_mincount': 50,
5556 'params': {
5557 'skip_download': True,
5558 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5559 },
5560 }, {
5561 'note': 'API Fallback: /videos tab, sorted by oldest first',
5562 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
5563 'info_dict': {
5564 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5565 'title': 'Cody\'sLab - Videos',
5566 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
5567 'uploader': 'Cody\'sLab',
5568 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5569 'channel': 'Cody\'sLab',
5570 'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5571 'tags': [],
5572 'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
5573 'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
5574 'channel_follower_count': int
5575 },
5576 'playlist_mincount': 650,
5577 'params': {
5578 'skip_download': True,
5579 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5580 },
5581 }, {
5582 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
5583 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5584 'info_dict': {
5585 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5586 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5587 'title': 'Uploads from Royalty Free Music - Topic',
5588 'uploader': 'Royalty Free Music - Topic',
5589 'modified_date': r're:\d{8}',
5590 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5591 'description': '',
5592 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5593 'tags': [],
5594 'channel': 'Royalty Free Music - Topic',
5595 'view_count': int,
5596 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5597 'availability': 'public',
5598 },
5599 'expected_warnings': [
5600 'does not have a videos tab',
5601 r'[Uu]navailable videos (are|will be) hidden',
5602 ],
5603 'playlist_mincount': 101,
5604 'params': {
5605 'skip_download': True,
5606 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5607 },
5608 }, {
5609 'note': 'non-standard redirect to regional channel',
5610 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
5611 'only_matching': True
5612 }, {
5613 'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
5614 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5615 'info_dict': {
5616 'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5617 'modified_date': '20220407',
5618 'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5619 'tags': [],
5620 'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5621 'uploader': 'pukkandan',
5622 'availability': 'unlisted',
5623 'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5624 'channel': 'pukkandan',
5625 'description': 'Test for collaborative playlist',
5626 'title': 'yt-dlp test - collaborative playlist',
5627 'view_count': int,
5628 'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5629 },
5630 'playlist_mincount': 2
5631 }, {
5632 'note': 'translated tab name',
5633 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/playlists',
5634 'info_dict': {
5635 'id': 'UCiu-3thuViMebBjw_5nWYrA',
5636 'tags': [],
5637 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
5638 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
5639 'description': '',
5640 'title': 'cole-dlp-test-acc - 再生リスト',
5641 'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
5642 'uploader': 'cole-dlp-test-acc',
5643 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
5644 'channel': 'cole-dlp-test-acc',
5645 },
5646 'playlist_mincount': 1,
5647 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
5648 'expected_warnings': ['Preferring "ja"'],
5649 }, {
5650 # XXX: this should really check flat playlist entries, but the test suite doesn't support that
5651 'note': 'preferred lang set with playlist with translated video titles',
5652 'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
5653 'info_dict': {
5654 'id': 'PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
5655 'tags': [],
5656 'view_count': int,
5657 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
5658 'uploader': 'cole-dlp-test-acc',
5659 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
5660 'channel': 'cole-dlp-test-acc',
5661 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
5662 'description': 'test',
5663 'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
5664 'title': 'dlp test playlist',
5665 'availability': 'public',
5666 },
5667 'playlist_mincount': 1,
5668 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
5669 'expected_warnings': ['Preferring "ja"'],
5670 }, {
5671 # shorts audio pivot for 2GtVksBMYFM.
5672 'url': 'https://www.youtube.com/feed/sfv_audio_pivot?bp=8gUrCikSJwoLMkd0VmtzQk1ZRk0SCzJHdFZrc0JNWUZNGgsyR3RWa3NCTVlGTQ==',
5673 'info_dict': {
5674 'id': 'sfv_audio_pivot',
5675 'title': 'sfv_audio_pivot',
5676 'tags': [],
5677 },
5678 'playlist_mincount': 50,
5679
5680 }]
5681
5682 @classmethod
5683 def suitable(cls, url):
5684 return False if YoutubeIE.suitable(url) else super().suitable(url)
5685
5686 _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$')
5687
5688 @YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
5689 def _real_extract(self, url, smuggled_data):
5690 item_id = self._match_id(url)
5691 url = urllib.parse.urlunparse(
5692 urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))
5693 compat_opts = self.get_param('compat_opts', [])
5694
5695 def get_mobj(url):
5696 mobj = self._URL_RE.match(url).groupdict()
5697 mobj.update((k, '') for k, v in mobj.items() if v is None)
5698 return mobj
5699
5700 mobj, redirect_warning = get_mobj(url), None
5701 # Youtube returns incomplete data if tabname is not lower case
5702 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
5703 if is_channel:
5704 if smuggled_data.get('is_music_url'):
5705 if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
5706 item_id = item_id[2:]
5707 pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False
5708 elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
5709 mdata = self._extract_tab_endpoint(
5710 f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
5711 murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
5712 get_all=False, expected_type=str)
5713 if not murl:
5714 raise ExtractorError('Failed to resolve album to playlist')
5715 return self.url_result(murl, ie=YoutubeTabIE.ie_key())
5716 elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
5717 pre = f'https://www.youtube.com/channel/{item_id}'
5718
5719 original_tab_name = tab
5720 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
5721 # Home URLs should redirect to /videos/
5722 redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '
5723 'To download only the videos in the home page, add a "/featured" to the URL')
5724 tab = '/videos'
5725
5726 url = ''.join((pre, tab, post))
5727 mobj = get_mobj(url)
5728
5729 # Handle both video/playlist URLs
5730 qs = parse_qs(url)
5731 video_id, playlist_id = (qs.get(key, [None])[0] for key in ('v', 'list'))
5732
5733 if not video_id and mobj['not_channel'].startswith('watch'):
5734 if not playlist_id:
5735 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
5736 raise ExtractorError('Unable to recognize tab page')
5737 # Common mistake: https://www.youtube.com/watch?list=playlist_id
5738 self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
5739 url = f'https://www.youtube.com/playlist?list={playlist_id}'
5740 mobj = get_mobj(url)
5741
5742 if video_id and playlist_id:
5743 if self.get_param('noplaylist'):
5744 self.to_screen(f'Downloading just video {video_id} because of --no-playlist')
5745 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
5746 ie=YoutubeIE.ie_key(), video_id=video_id)
5747 self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')
5748
5749 data, ytcfg = self._extract_data(url, item_id)
5750
5751 # YouTube may provide a non-standard redirect to the regional channel
5752 # See: https://github.com/yt-dlp/yt-dlp/issues/2694
5753 redirect_url = traverse_obj(
5754 data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)
5755 if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:
5756 redirect_url = ''.join((
5757 urljoin('https://www.youtube.com', redirect_url), mobj['tab'], mobj['post']))
5758 self.to_screen(f'This playlist is likely not available in your region. Following redirect to regional playlist {redirect_url}')
5759 return self.url_result(redirect_url, ie=YoutubeTabIE.ie_key())
5760
5761 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
5762 if tabs:
5763 selected_tab = self._extract_selected_tab(tabs)
5764 selected_tab_url = urljoin(
5765 url, traverse_obj(selected_tab, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url')))
5766 translated_tab_name = selected_tab.get('title', '').lower()
5767
5768 # Prefer tab name from tab url as it is always in en,
5769 # but only when preferred lang is set as it may not extract reliably in all cases.
5770 selected_tab_name = (self._preferred_lang in (None, 'en') and translated_tab_name
5771 or selected_tab_url and get_mobj(selected_tab_url)['tab'][1:] # primary
5772 or translated_tab_name)
5773
5774 if selected_tab_name == 'home':
5775 selected_tab_name = 'featured'
5776 requested_tab_name = mobj['tab'][1:]
5777
5778 if 'no-youtube-channel-redirect' not in compat_opts:
5779 if requested_tab_name == 'live': # Live tab should have redirected to the video
5780 raise UserNotLive(video_id=mobj['id'])
5781 if requested_tab_name not in ('', selected_tab_name):
5782 redirect_warning = f'The channel does not have a {requested_tab_name} tab'
5783 if not original_tab_name:
5784 if item_id[:2] == 'UC':
5785 # Topic channels don't have /videos. Use the equivalent playlist instead
5786 pl_id = f'UU{item_id[2:]}'
5787 pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
5788 try:
5789 data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)
5790 except ExtractorError:
5791 redirect_warning += ' and the playlist redirect gave error'
5792 else:
5793 item_id, url, selected_tab_name = pl_id, pl_url, requested_tab_name
5794 redirect_warning += f'. Redirecting to playlist {pl_id} instead'
5795 if selected_tab_name and selected_tab_name != requested_tab_name:
5796 redirect_warning += f'. {selected_tab_name} tab is being downloaded instead'
5797 else:
5798 raise ExtractorError(redirect_warning, expected=True)
5799
5800 if redirect_warning:
5801 self.to_screen(redirect_warning)
5802 self.write_debug(f'Final URL: {url}')
5803
5804 # YouTube sometimes provides a button to reload playlist with unavailable videos.
5805 if 'no-youtube-unavailable-videos' not in compat_opts:
5806 data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data
5807 self._extract_and_report_alerts(data, only_once=True)
5808 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
5809 if tabs:
5810 return self._extract_from_tabs(item_id, ytcfg, data, tabs)
5811
5812 playlist = traverse_obj(
5813 data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
5814 if playlist:
5815 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
5816
5817 video_id = traverse_obj(
5818 data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
5819 if video_id:
5820 if mobj['tab'] != '/live': # live tab is expected to redirect to video
5821 self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
5822 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
5823 ie=YoutubeIE.ie_key(), video_id=video_id)
5824
5825 raise ExtractorError('Unable to recognize tab page')
5826
5827
5828class YoutubePlaylistIE(InfoExtractor):
5829 IE_DESC = 'YouTube playlists'
5830 _VALID_URL = r'''(?x)(?:
5831 (?:https?://)?
5832 (?:\w+\.)?
5833 (?:
5834 (?:
5835 youtube(?:kids)?\.com|
5836 %(invidious)s
5837 )
5838 /.*?\?.*?\blist=
5839 )?
5840 (?P<id>%(playlist_id)s)
5841 )''' % {
5842 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
5843 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
5844 }
5845 IE_NAME = 'youtube:playlist'
5846 _TESTS = [{
5847 'note': 'issue #673',
5848 'url': 'PLBB231211A4F62143',
5849 'info_dict': {
5850 'title': '[OLD]Team Fortress 2 (Class-based LP)',
5851 'id': 'PLBB231211A4F62143',
5852 'uploader': 'Wickman',
5853 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
5854 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
5855 'view_count': int,
5856 'uploader_url': 'https://www.youtube.com/user/Wickydoo',
5857 'modified_date': r're:\d{8}',
5858 'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
5859 'channel': 'Wickman',
5860 'tags': [],
5861 'channel_url': 'https://www.youtube.com/user/Wickydoo',
5862 },
5863 'playlist_mincount': 29,
5864 }, {
5865 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
5866 'info_dict': {
5867 'title': 'YDL_safe_search',
5868 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
5869 },
5870 'playlist_count': 2,
5871 'skip': 'This playlist is private',
5872 }, {
5873 'note': 'embedded',
5874 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
5875 'playlist_count': 4,
5876 'info_dict': {
5877 'title': 'JODA15',
5878 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
5879 'uploader': 'milan',
5880 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
5881 'description': '',
5882 'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
5883 'tags': [],
5884 'modified_date': '20140919',
5885 'view_count': int,
5886 'channel': 'milan',
5887 'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
5888 'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
5889 'availability': 'public',
5890 },
5891 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5892 }, {
5893 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
5894 'playlist_mincount': 455,
5895 'info_dict': {
5896 'title': '2018 Chinese New Singles (11/6 updated)',
5897 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
5898 'uploader': 'LBK',
5899 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
5900 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
5901 'channel': 'LBK',
5902 'view_count': int,
5903 'channel_url': 'https://www.youtube.com/c/愛低音的國王',
5904 'tags': [],
5905 'uploader_url': 'https://www.youtube.com/c/愛低音的國王',
5906 'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
5907 'modified_date': r're:\d{8}',
5908 'availability': 'public',
5909 },
5910 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5911 }, {
5912 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
5913 'only_matching': True,
5914 }, {
5915 # music album playlist
5916 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
5917 'only_matching': True,
5918 }]
5919
5920 @classmethod
5921 def suitable(cls, url):
5922 if YoutubeTabIE.suitable(url):
5923 return False
5924 from ..utils import parse_qs
5925 qs = parse_qs(url)
5926 if qs.get('v', [None])[0]:
5927 return False
5928 return super().suitable(url)
5929
5930 def _real_extract(self, url):
5931 playlist_id = self._match_id(url)
5932 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
5933 url = update_url_query(
5934 'https://www.youtube.com/playlist',
5935 parse_qs(url) or {'list': playlist_id})
5936 if is_music_url:
5937 url = smuggle_url(url, {'is_music_url': True})
5938 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
5939
5940
5941class YoutubeYtBeIE(InfoExtractor):
5942 IE_DESC = 'youtu.be'
5943 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
5944 _TESTS = [{
5945 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
5946 'info_dict': {
5947 'id': 'yeWKywCrFtk',
5948 'ext': 'mp4',
5949 'title': 'Small Scale Baler and Braiding Rugs',
5950 'uploader': 'Backus-Page House Museum',
5951 'uploader_id': 'backuspagemuseum',
5952 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
5953 'upload_date': '20161008',
5954 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
5955 'categories': ['Nonprofits & Activism'],
5956 'tags': list,
5957 'like_count': int,
5958 'age_limit': 0,
5959 'playable_in_embed': True,
5960 'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',
5961 'channel': 'Backus-Page House Museum',
5962 'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',
5963 'live_status': 'not_live',
5964 'view_count': int,
5965 'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',
5966 'availability': 'public',
5967 'duration': 59,
5968 'comment_count': int,
5969 'channel_follower_count': int
5970 },
5971 'params': {
5972 'noplaylist': True,
5973 'skip_download': True,
5974 },
5975 }, {
5976 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
5977 'only_matching': True,
5978 }]
5979
5980 def _real_extract(self, url):
5981 mobj = self._match_valid_url(url)
5982 video_id = mobj.group('id')
5983 playlist_id = mobj.group('playlist_id')
5984 return self.url_result(
5985 update_url_query('https://www.youtube.com/watch', {
5986 'v': video_id,
5987 'list': playlist_id,
5988 'feature': 'youtu.be',
5989 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
5990
5991
5992class YoutubeLivestreamEmbedIE(InfoExtractor):
5993 IE_DESC = 'YouTube livestream embeds'
5994 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'
5995 _TESTS = [{
5996 'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',
5997 'only_matching': True,
5998 }]
5999
6000 def _real_extract(self, url):
6001 channel_id = self._match_id(url)
6002 return self.url_result(
6003 f'https://www.youtube.com/channel/{channel_id}/live',
6004 ie=YoutubeTabIE.ie_key(), video_id=channel_id)
6005
6006
6007class YoutubeYtUserIE(InfoExtractor):
6008 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
6009 IE_NAME = 'youtube:user'
6010 _VALID_URL = r'ytuser:(?P<id>.+)'
6011 _TESTS = [{
6012 'url': 'ytuser:phihag',
6013 'only_matching': True,
6014 }]
6015
6016 def _real_extract(self, url):
6017 user_id = self._match_id(url)
6018 return self.url_result(
6019 'https://www.youtube.com/user/%s/videos' % user_id,
6020 ie=YoutubeTabIE.ie_key(), video_id=user_id)
6021
6022
6023class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
6024 IE_NAME = 'youtube:favorites'
6025 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
6026 _VALID_URL = r':ytfav(?:ou?rite)?s?'
6027 _LOGIN_REQUIRED = True
6028 _TESTS = [{
6029 'url': ':ytfav',
6030 'only_matching': True,
6031 }, {
6032 'url': ':ytfavorites',
6033 'only_matching': True,
6034 }]
6035
6036 def _real_extract(self, url):
6037 return self.url_result(
6038 'https://www.youtube.com/playlist?list=LL',
6039 ie=YoutubeTabIE.ie_key())
6040
6041
6042class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):
6043 IE_NAME = 'youtube:notif'
6044 IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'
6045 _VALID_URL = r':ytnotif(?:ication)?s?'
6046 _LOGIN_REQUIRED = True
6047 _TESTS = [{
6048 'url': ':ytnotif',
6049 'only_matching': True,
6050 }, {
6051 'url': ':ytnotifications',
6052 'only_matching': True,
6053 }]
6054
6055 def _extract_notification_menu(self, response, continuation_list):
6056 notification_list = traverse_obj(
6057 response,
6058 ('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),
6059 ('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),
6060 expected_type=list) or []
6061 continuation_list[0] = None
6062 for item in notification_list:
6063 entry = self._extract_notification_renderer(item.get('notificationRenderer'))
6064 if entry:
6065 yield entry
6066 continuation = item.get('continuationItemRenderer')
6067 if continuation:
6068 continuation_list[0] = continuation
6069
6070 def _extract_notification_renderer(self, notification):
6071 video_id = traverse_obj(
6072 notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)
6073 url = f'https://www.youtube.com/watch?v={video_id}'
6074 channel_id = None
6075 if not video_id:
6076 browse_ep = traverse_obj(
6077 notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)
6078 channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)
6079 post_id = self._search_regex(
6080 r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),
6081 'post id', default=None)
6082 if not channel_id or not post_id:
6083 return
6084 # The direct /post url redirects to this in the browser
6085 url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'
6086
6087 channel = traverse_obj(
6088 notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),
6089 expected_type=str)
6090 notification_title = self._get_text(notification, 'shortMessage')
6091 if notification_title:
6092 notification_title = notification_title.replace('\xad', '') # remove soft hyphens
6093 # TODO: handle recommended videos
6094 title = self._search_regex(
6095 rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,
6096 'video title', default=None)
6097 upload_date = (strftime_or_none(self._parse_time_text(self._get_text(notification, 'sentTimeText')), '%Y%m%d')
6098 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE.ie_key())
6099 else None)
6100 return {
6101 '_type': 'url',
6102 'url': url,
6103 'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),
6104 'video_id': video_id,
6105 'title': title,
6106 'channel_id': channel_id,
6107 'channel': channel,
6108 'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),
6109 'upload_date': upload_date,
6110 }
6111
6112 def _notification_menu_entries(self, ytcfg):
6113 continuation_list = [None]
6114 response = None
6115 for page in itertools.count(1):
6116 ctoken = traverse_obj(
6117 continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)
6118 response = self._extract_response(
6119 item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,
6120 ep='notification/get_notification_menu', check_get_keys='actions',
6121 headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))
6122 yield from self._extract_notification_menu(response, continuation_list)
6123 if not continuation_list[0]:
6124 break
6125
6126 def _real_extract(self, url):
6127 display_id = 'notifications'
6128 ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}
6129 self._report_playlist_authcheck(ytcfg)
6130 return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)
6131
6132
6133class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
6134 IE_DESC = 'YouTube search'
6135 IE_NAME = 'youtube:search'
6136 _SEARCH_KEY = 'ytsearch'
6137 _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
6138 _TESTS = [{
6139 'url': 'ytsearch5:youtube-dl test video',
6140 'playlist_count': 5,
6141 'info_dict': {
6142 'id': 'youtube-dl test video',
6143 'title': 'youtube-dl test video',
6144 }
6145 }]
6146
6147
6148class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
6149 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
6150 _SEARCH_KEY = 'ytsearchdate'
6151 IE_DESC = 'YouTube search, newest videos first'
6152 _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
6153 _TESTS = [{
6154 'url': 'ytsearchdate5:youtube-dl test video',
6155 'playlist_count': 5,
6156 'info_dict': {
6157 'id': 'youtube-dl test video',
6158 'title': 'youtube-dl test video',
6159 }
6160 }]
6161
6162
6163class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
6164 IE_DESC = 'YouTube search URLs with sorting and filter support'
6165 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
6166 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
6167 _TESTS = [{
6168 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
6169 'playlist_mincount': 5,
6170 'info_dict': {
6171 'id': 'youtube-dl test video',
6172 'title': 'youtube-dl test video',
6173 }
6174 }, {
6175 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
6176 'playlist_mincount': 5,
6177 'info_dict': {
6178 'id': 'python',
6179 'title': 'python',
6180 }
6181 }, {
6182 'url': 'https://www.youtube.com/results?search_query=%23cats',
6183 'playlist_mincount': 1,
6184 'info_dict': {
6185 'id': '#cats',
6186 'title': '#cats',
6187 # The test suite does not have support for nested playlists
6188 # 'entries': [{
6189 # 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',
6190 # 'title': '#cats',
6191 # }],
6192 },
6193 }, {
6194 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
6195 'only_matching': True,
6196 }]
6197
6198 def _real_extract(self, url):
6199 qs = parse_qs(url)
6200 query = (qs.get('search_query') or qs.get('q'))[0]
6201 return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
6202
6203
6204class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
6205 IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'
6206 IE_NAME = 'youtube:music:search_url'
6207 _VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
6208 _TESTS = [{
6209 'url': 'https://music.youtube.com/search?q=royalty+free+music',
6210 'playlist_count': 16,
6211 'info_dict': {
6212 'id': 'royalty free music',
6213 'title': 'royalty free music',
6214 }
6215 }, {
6216 'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',
6217 'playlist_mincount': 30,
6218 'info_dict': {
6219 'id': 'royalty free music - songs',
6220 'title': 'royalty free music - songs',
6221 },
6222 'params': {'extract_flat': 'in_playlist'}
6223 }, {
6224 'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',
6225 'playlist_mincount': 30,
6226 'info_dict': {
6227 'id': 'royalty free music - community playlists',
6228 'title': 'royalty free music - community playlists',
6229 },
6230 'params': {'extract_flat': 'in_playlist'}
6231 }]
6232
6233 _SECTIONS = {
6234 'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',
6235 'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',
6236 'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',
6237 'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',
6238 'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',
6239 'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',
6240 }
6241
6242 def _real_extract(self, url):
6243 qs = parse_qs(url)
6244 query = (qs.get('search_query') or qs.get('q'))[0]
6245 params = qs.get('sp', (None,))[0]
6246 if params:
6247 section = next((k for k, v in self._SECTIONS.items() if v == params), params)
6248 else:
6249 section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower()
6250 params = self._SECTIONS.get(section)
6251 if not params:
6252 section = None
6253 title = join_nonempty(query, section, delim=' - ')
6254 return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)
6255
6256
6257class YoutubeFeedsInfoExtractor(InfoExtractor):
6258 """
6259 Base class for feed extractors
6260 Subclasses must re-define the _FEED_NAME property.
6261 """
6262 _LOGIN_REQUIRED = True
6263 _FEED_NAME = 'feeds'
6264
6265 def _real_initialize(self):
6266 YoutubeBaseInfoExtractor._check_login_required(self)
6267
6268 @classproperty
6269 def IE_NAME(self):
6270 return f'youtube:{self._FEED_NAME}'
6271
6272 def _real_extract(self, url):
6273 return self.url_result(
6274 f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())
6275
6276
6277class YoutubeWatchLaterIE(InfoExtractor):
6278 IE_NAME = 'youtube:watchlater'
6279 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
6280 _VALID_URL = r':ytwatchlater'
6281 _TESTS = [{
6282 'url': ':ytwatchlater',
6283 'only_matching': True,
6284 }]
6285
6286 def _real_extract(self, url):
6287 return self.url_result(
6288 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
6289
6290
6291class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
6292 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
6293 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
6294 _FEED_NAME = 'recommended'
6295 _LOGIN_REQUIRED = False
6296 _TESTS = [{
6297 'url': ':ytrec',
6298 'only_matching': True,
6299 }, {
6300 'url': ':ytrecommended',
6301 'only_matching': True,
6302 }, {
6303 'url': 'https://youtube.com',
6304 'only_matching': True,
6305 }]
6306
6307
6308class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
6309 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
6310 _VALID_URL = r':ytsub(?:scription)?s?'
6311 _FEED_NAME = 'subscriptions'
6312 _TESTS = [{
6313 'url': ':ytsubs',
6314 'only_matching': True,
6315 }, {
6316 'url': ':ytsubscriptions',
6317 'only_matching': True,
6318 }]
6319
6320
6321class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
6322 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
6323 _VALID_URL = r':ythis(?:tory)?'
6324 _FEED_NAME = 'history'
6325 _TESTS = [{
6326 'url': ':ythistory',
6327 'only_matching': True,
6328 }]
6329
6330
6331class YoutubeStoriesIE(InfoExtractor):
6332 IE_DESC = 'YouTube channel stories; "ytstories:" prefix'
6333 IE_NAME = 'youtube:stories'
6334 _VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'
6335 _TESTS = [{
6336 'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',
6337 'only_matching': True,
6338 }]
6339
6340 def _real_extract(self, url):
6341 playlist_id = f'RLTD{self._match_id(url)}'
6342 return self.url_result(
6343 smuggle_url(f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1', {'is_story': True}),
6344 ie=YoutubeTabIE, video_id=playlist_id)
6345
6346
6347class YoutubeShortsAudioPivotIE(InfoExtractor):
6348 IE_DESC = 'YouTube Shorts audio pivot (Shorts using audio of a given video)'
6349 IE_NAME = 'youtube:shorts:pivot:audio'
6350 _VALID_URL = r'https?://(?:www\.)?youtube\.com/source/(?P<id>[\w-]{11})/shorts'
6351 _TESTS = [{
6352 'url': 'https://www.youtube.com/source/Lyj-MZSAA9o/shorts',
6353 'only_matching': True,
6354 }]
6355
6356 @staticmethod
6357 def _generate_audio_pivot_params(video_id):
6358 """
6359 Generates sfv_audio_pivot browse params for this video id
6360 """
6361 pb_params = b'\xf2\x05+\n)\x12\'\n\x0b%b\x12\x0b%b\x1a\x0b%b' % ((video_id.encode(),) * 3)
6362 return urllib.parse.quote(base64.b64encode(pb_params).decode())
6363
6364 def _real_extract(self, url):
6365 video_id = self._match_id(url)
6366 return self.url_result(
6367 f'https://www.youtube.com/feed/sfv_audio_pivot?bp={self._generate_audio_pivot_params(video_id)}',
6368 ie=YoutubeTabIE)
6369
6370
6371class YoutubeTruncatedURLIE(InfoExtractor):
6372 IE_NAME = 'youtube:truncated_url'
6373 IE_DESC = False # Do not list
6374 _VALID_URL = r'''(?x)
6375 (?:https?://)?
6376 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
6377 (?:watch\?(?:
6378 feature=[a-z_]+|
6379 annotation_id=annotation_[^&]+|
6380 x-yt-cl=[0-9]+|
6381 hl=[^&]*|
6382 t=[0-9]+
6383 )?
6384 |
6385 attribution_link\?a=[^&]+
6386 )
6387 $
6388 '''
6389
6390 _TESTS = [{
6391 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
6392 'only_matching': True,
6393 }, {
6394 'url': 'https://www.youtube.com/watch?',
6395 'only_matching': True,
6396 }, {
6397 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
6398 'only_matching': True,
6399 }, {
6400 'url': 'https://www.youtube.com/watch?feature=foo',
6401 'only_matching': True,
6402 }, {
6403 'url': 'https://www.youtube.com/watch?hl=en-GB',
6404 'only_matching': True,
6405 }, {
6406 'url': 'https://www.youtube.com/watch?t=2372',
6407 'only_matching': True,
6408 }]
6409
6410 def _real_extract(self, url):
6411 raise ExtractorError(
6412 'Did you forget to quote the URL? Remember that & is a meta '
6413 'character in most shells, so you want to put the URL in quotes, '
6414 'like youtube-dl '
6415 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
6416 ' or simply youtube-dl BaW_jenozKc .',
6417 expected=True)
6418
6419
6420class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
6421 IE_NAME = 'youtube:clip'
6422 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'
6423 _TESTS = [{
6424 # FIXME: Other metadata should be extracted from the clip, not from the base video
6425 'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',
6426 'info_dict': {
6427 'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',
6428 'ext': 'mp4',
6429 'section_start': 29.0,
6430 'section_end': 39.7,
6431 'duration': 10.7,
6432 'age_limit': 0,
6433 'availability': 'public',
6434 'categories': ['Gaming'],
6435 'channel': 'Scott The Woz',
6436 'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',
6437 'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',
6438 'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',
6439 'like_count': int,
6440 'playable_in_embed': True,
6441 'tags': 'count:17',
6442 'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',
6443 'title': 'Mobile Games on Console - Scott The Woz',
6444 'upload_date': '20210920',
6445 'uploader': 'Scott The Woz',
6446 'uploader_id': 'scottthewoz',
6447 'uploader_url': 'http://www.youtube.com/user/scottthewoz',
6448 'view_count': int,
6449 'live_status': 'not_live',
6450 'channel_follower_count': int
6451 }
6452 }]
6453
6454 def _real_extract(self, url):
6455 clip_id = self._match_id(url)
6456 _, data = self._extract_webpage(url, clip_id)
6457
6458 video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))
6459 if not video_id:
6460 raise ExtractorError('Unable to find video ID')
6461
6462 clip_data = traverse_obj(data, (
6463 'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',
6464 'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,
6465 'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',
6466 'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)
6467
6468 return {
6469 '_type': 'url_transparent',
6470 'url': f'https://www.youtube.com/watch?v={video_id}',
6471 'ie_key': YoutubeIE.ie_key(),
6472 'id': clip_id,
6473 'section_start': int(clip_data['startTimeMs']) / 1000,
6474 'section_end': int(clip_data['endTimeMs']) / 1000,
6475 }
6476
6477
6478class YoutubeTruncatedIDIE(InfoExtractor):
6479 IE_NAME = 'youtube:truncated_id'
6480 IE_DESC = False # Do not list
6481 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
6482
6483 _TESTS = [{
6484 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
6485 'only_matching': True,
6486 }]
6487
6488 def _real_extract(self, url):
6489 video_id = self._match_id(url)
6490 raise ExtractorError(
6491 f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',
6492 expected=True)