]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/youtube.py
[extractor/youtube] Do not warn on duplicate chapters
[yt-dlp.git] / yt_dlp / extractor / youtube.py
1 import base64
2 import calendar
3 import copy
4 import datetime
5 import enum
6 import hashlib
7 import itertools
8 import json
9 import math
10 import os.path
11 import random
12 import re
13 import sys
14 import threading
15 import time
16 import traceback
17 import urllib.error
18 import urllib.parse
19
20 from .common import InfoExtractor, SearchInfoExtractor
21 from .openload import PhantomJSwrapper
22 from ..compat import functools
23 from ..jsinterp import JSInterpreter
24 from ..utils import (
25 NO_DEFAULT,
26 ExtractorError,
27 UserNotLive,
28 bug_reports_message,
29 classproperty,
30 clean_html,
31 datetime_from_str,
32 dict_get,
33 float_or_none,
34 format_field,
35 get_first,
36 int_or_none,
37 is_html,
38 join_nonempty,
39 js_to_json,
40 mimetype2ext,
41 network_exceptions,
42 orderedSet,
43 parse_codecs,
44 parse_count,
45 parse_duration,
46 parse_iso8601,
47 parse_qs,
48 qualities,
49 remove_start,
50 smuggle_url,
51 str_or_none,
52 str_to_int,
53 strftime_or_none,
54 traverse_obj,
55 try_get,
56 unescapeHTML,
57 unified_strdate,
58 unified_timestamp,
59 unsmuggle_url,
60 update_url_query,
61 url_or_none,
62 urljoin,
63 variadic,
64 )
65
66 # any clients starting with _ cannot be explicitly requested by the user
67 INNERTUBE_CLIENTS = {
68 'web': {
69 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
70 'INNERTUBE_CONTEXT': {
71 'client': {
72 'clientName': 'WEB',
73 'clientVersion': '2.20220801.00.00',
74 }
75 },
76 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
77 },
78 'web_embedded': {
79 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
80 'INNERTUBE_CONTEXT': {
81 'client': {
82 'clientName': 'WEB_EMBEDDED_PLAYER',
83 'clientVersion': '1.20220731.00.00',
84 },
85 },
86 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
87 },
88 'web_music': {
89 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
90 'INNERTUBE_HOST': 'music.youtube.com',
91 'INNERTUBE_CONTEXT': {
92 'client': {
93 'clientName': 'WEB_REMIX',
94 'clientVersion': '1.20220727.01.00',
95 }
96 },
97 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
98 },
99 'web_creator': {
100 'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
101 'INNERTUBE_CONTEXT': {
102 'client': {
103 'clientName': 'WEB_CREATOR',
104 'clientVersion': '1.20220726.00.00',
105 }
106 },
107 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
108 },
109 'android': {
110 'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
111 'INNERTUBE_CONTEXT': {
112 'client': {
113 'clientName': 'ANDROID',
114 'clientVersion': '17.31.35',
115 'androidSdkVersion': 30,
116 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
117 }
118 },
119 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
120 'REQUIRE_JS_PLAYER': False
121 },
122 'android_embedded': {
123 'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
124 'INNERTUBE_CONTEXT': {
125 'client': {
126 'clientName': 'ANDROID_EMBEDDED_PLAYER',
127 'clientVersion': '17.31.35',
128 'androidSdkVersion': 30,
129 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
130 },
131 },
132 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
133 'REQUIRE_JS_PLAYER': False
134 },
135 'android_music': {
136 'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
137 'INNERTUBE_CONTEXT': {
138 'client': {
139 'clientName': 'ANDROID_MUSIC',
140 'clientVersion': '5.16.51',
141 'androidSdkVersion': 30,
142 'userAgent': 'com.google.android.apps.youtube.music/5.16.51 (Linux; U; Android 11) gzip'
143 }
144 },
145 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
146 'REQUIRE_JS_PLAYER': False
147 },
148 'android_creator': {
149 'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
150 'INNERTUBE_CONTEXT': {
151 'client': {
152 'clientName': 'ANDROID_CREATOR',
153 'clientVersion': '22.30.100',
154 'androidSdkVersion': 30,
155 'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip'
156 },
157 },
158 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
159 'REQUIRE_JS_PLAYER': False
160 },
161 # iOS clients have HLS live streams. Setting device model to get 60fps formats.
162 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
163 'ios': {
164 'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
165 'INNERTUBE_CONTEXT': {
166 'client': {
167 'clientName': 'IOS',
168 'clientVersion': '17.33.2',
169 'deviceModel': 'iPhone14,3',
170 'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
171 }
172 },
173 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
174 'REQUIRE_JS_PLAYER': False
175 },
176 'ios_embedded': {
177 'INNERTUBE_CONTEXT': {
178 'client': {
179 'clientName': 'IOS_MESSAGES_EXTENSION',
180 'clientVersion': '17.33.2',
181 'deviceModel': 'iPhone14,3',
182 'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
183 },
184 },
185 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
186 'REQUIRE_JS_PLAYER': False
187 },
188 'ios_music': {
189 'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
190 'INNERTUBE_CONTEXT': {
191 'client': {
192 'clientName': 'IOS_MUSIC',
193 'clientVersion': '5.21',
194 'deviceModel': 'iPhone14,3',
195 'userAgent': 'com.google.ios.youtubemusic/5.21 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
196 },
197 },
198 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
199 'REQUIRE_JS_PLAYER': False
200 },
201 'ios_creator': {
202 'INNERTUBE_CONTEXT': {
203 'client': {
204 'clientName': 'IOS_CREATOR',
205 'clientVersion': '22.33.101',
206 'deviceModel': 'iPhone14,3',
207 'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
208 },
209 },
210 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
211 'REQUIRE_JS_PLAYER': False
212 },
213 # mweb has 'ultralow' formats
214 # See: https://github.com/yt-dlp/yt-dlp/pull/557
215 'mweb': {
216 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
217 'INNERTUBE_CONTEXT': {
218 'client': {
219 'clientName': 'MWEB',
220 'clientVersion': '2.20220801.00.00',
221 }
222 },
223 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
224 },
225 # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
226 # See: https://github.com/zerodytrash/YouTube-Internal-Clients
227 'tv_embedded': {
228 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
229 'INNERTUBE_CONTEXT': {
230 'client': {
231 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
232 'clientVersion': '2.0',
233 },
234 },
235 'INNERTUBE_CONTEXT_CLIENT_NAME': 85
236 },
237 }
238
239
240 def _split_innertube_client(client_name):
241 variant, *base = client_name.rsplit('.', 1)
242 if base:
243 return variant, base[0], variant
244 base, *variant = client_name.split('_', 1)
245 return client_name, base, variant[0] if variant else None
246
247
248 def build_innertube_clients():
249 THIRD_PARTY = {
250 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
251 }
252 BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
253 priority = qualities(BASE_CLIENTS[::-1])
254
255 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
256 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
257 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
258 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
259 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
260
261 _, base_client, variant = _split_innertube_client(client)
262 ytcfg['priority'] = 10 * priority(base_client)
263
264 if not variant:
265 INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
266 embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
267 embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
268 embedscreen['priority'] -= 3
269 elif variant == 'embedded':
270 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
271 ytcfg['priority'] -= 2
272 else:
273 ytcfg['priority'] -= 3
274
275
276 build_innertube_clients()
277
278
279 class BadgeType(enum.Enum):
280 AVAILABILITY_UNLISTED = enum.auto()
281 AVAILABILITY_PRIVATE = enum.auto()
282 AVAILABILITY_PUBLIC = enum.auto()
283 AVAILABILITY_PREMIUM = enum.auto()
284 AVAILABILITY_SUBSCRIPTION = enum.auto()
285 LIVE_NOW = enum.auto()
286
287
288 class YoutubeBaseInfoExtractor(InfoExtractor):
289 """Provide base functions for Youtube extractors"""
290
291 _RESERVED_NAMES = (
292 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
293 r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|'
294 r'browse|oembed|get_video_info|iframe_api|s/player|source|'
295 r'storefront|oops|index|account|t/terms|about|upload|signin|logout')
296
297 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
298
299 # _NETRC_MACHINE = 'youtube'
300
301 # If True it will raise an error if no login info is provided
302 _LOGIN_REQUIRED = False
303
304 _INVIDIOUS_SITES = (
305 # invidious-redirect websites
306 r'(?:www\.)?redirect\.invidious\.io',
307 r'(?:(?:www|dev)\.)?invidio\.us',
308 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
309 r'(?:www\.)?invidious\.pussthecat\.org',
310 r'(?:www\.)?invidious\.zee\.li',
311 r'(?:www\.)?invidious\.ethibox\.fr',
312 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
313 r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
314 r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
315 # youtube-dl invidious instances list
316 r'(?:(?:www|no)\.)?invidiou\.sh',
317 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
318 r'(?:www\.)?invidious\.kabi\.tk',
319 r'(?:www\.)?invidious\.mastodon\.host',
320 r'(?:www\.)?invidious\.zapashcanon\.fr',
321 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
322 r'(?:www\.)?invidious\.tinfoil-hat\.net',
323 r'(?:www\.)?invidious\.himiko\.cloud',
324 r'(?:www\.)?invidious\.reallyancient\.tech',
325 r'(?:www\.)?invidious\.tube',
326 r'(?:www\.)?invidiou\.site',
327 r'(?:www\.)?invidious\.site',
328 r'(?:www\.)?invidious\.xyz',
329 r'(?:www\.)?invidious\.nixnet\.xyz',
330 r'(?:www\.)?invidious\.048596\.xyz',
331 r'(?:www\.)?invidious\.drycat\.fr',
332 r'(?:www\.)?inv\.skyn3t\.in',
333 r'(?:www\.)?tube\.poal\.co',
334 r'(?:www\.)?tube\.connect\.cafe',
335 r'(?:www\.)?vid\.wxzm\.sx',
336 r'(?:www\.)?vid\.mint\.lgbt',
337 r'(?:www\.)?vid\.puffyan\.us',
338 r'(?:www\.)?yewtu\.be',
339 r'(?:www\.)?yt\.elukerio\.org',
340 r'(?:www\.)?yt\.lelux\.fi',
341 r'(?:www\.)?invidious\.ggc-project\.de',
342 r'(?:www\.)?yt\.maisputain\.ovh',
343 r'(?:www\.)?ytprivate\.com',
344 r'(?:www\.)?invidious\.13ad\.de',
345 r'(?:www\.)?invidious\.toot\.koeln',
346 r'(?:www\.)?invidious\.fdn\.fr',
347 r'(?:www\.)?watch\.nettohikari\.com',
348 r'(?:www\.)?invidious\.namazso\.eu',
349 r'(?:www\.)?invidious\.silkky\.cloud',
350 r'(?:www\.)?invidious\.exonip\.de',
351 r'(?:www\.)?invidious\.riverside\.rocks',
352 r'(?:www\.)?invidious\.blamefran\.net',
353 r'(?:www\.)?invidious\.moomoo\.de',
354 r'(?:www\.)?ytb\.trom\.tf',
355 r'(?:www\.)?yt\.cyberhost\.uk',
356 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
357 r'(?:www\.)?qklhadlycap4cnod\.onion',
358 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
359 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
360 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
361 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
362 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
363 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
364 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
365 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
366 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
367 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
368 # piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
369 r'(?:www\.)?piped\.kavin\.rocks',
370 r'(?:www\.)?piped\.silkky\.cloud',
371 r'(?:www\.)?piped\.tokhmi\.xyz',
372 r'(?:www\.)?piped\.moomoo\.me',
373 r'(?:www\.)?il\.ax',
374 r'(?:www\.)?piped\.syncpundit\.com',
375 r'(?:www\.)?piped\.mha\.fi',
376 r'(?:www\.)?piped\.mint\.lgbt',
377 r'(?:www\.)?piped\.privacy\.com\.de',
378 )
379
380 # extracted from account/account_menu ep
381 # XXX: These are the supported YouTube UI and API languages,
382 # which is slightly different from languages supported for translation in YouTube studio
383 _SUPPORTED_LANG_CODES = [
384 'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',
385 'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',
386 'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',
387 'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',
388 'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',
389 'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko'
390 ]
391
392 @functools.cached_property
393 def _preferred_lang(self):
394 """
395 Returns a language code supported by YouTube for the user preferred language.
396 Returns None if no preferred language set.
397 """
398 preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0]
399 if not preferred_lang:
400 return
401 if preferred_lang not in self._SUPPORTED_LANG_CODES:
402 raise ExtractorError(
403 f'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.',
404 expected=True)
405 elif preferred_lang != 'en':
406 self.report_warning(
407 f'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.')
408 return preferred_lang
409
410 def _initialize_consent(self):
411 cookies = self._get_cookies('https://www.youtube.com/')
412 if cookies.get('__Secure-3PSID'):
413 return
414 consent_id = None
415 consent = cookies.get('CONSENT')
416 if consent:
417 if 'YES' in consent.value:
418 return
419 consent_id = self._search_regex(
420 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
421 if not consent_id:
422 consent_id = random.randint(100, 999)
423 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
424
425 def _initialize_pref(self):
426 cookies = self._get_cookies('https://www.youtube.com/')
427 pref_cookie = cookies.get('PREF')
428 pref = {}
429 if pref_cookie:
430 try:
431 pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
432 except ValueError:
433 self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
434 pref.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'})
435 self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
436
437 def _real_initialize(self):
438 self._initialize_pref()
439 self._initialize_consent()
440 self._check_login_required()
441
442 def _check_login_required(self):
443 if self._LOGIN_REQUIRED and not self._cookies_passed:
444 self.raise_login_required('Login details are needed to download this content', method='cookies')
445
446 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='
447 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
448
449 def _get_default_ytcfg(self, client='web'):
450 return copy.deepcopy(INNERTUBE_CLIENTS[client])
451
452 def _get_innertube_host(self, client='web'):
453 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
454
455 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
456 # try_get but with fallback to default ytcfg client values when present
457 _func = lambda y: try_get(y, getter, expected_type)
458 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
459
460 def _extract_client_name(self, ytcfg, default_client='web'):
461 return self._ytcfg_get_safe(
462 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
463 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
464
465 def _extract_client_version(self, ytcfg, default_client='web'):
466 return self._ytcfg_get_safe(
467 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
468 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
469
470 def _select_api_hostname(self, req_api_hostname, default_client=None):
471 return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
472 or req_api_hostname or self._get_innertube_host(default_client or 'web'))
473
474 def _extract_api_key(self, ytcfg=None, default_client='web'):
475 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
476
477 def _extract_context(self, ytcfg=None, default_client='web'):
478 context = get_first(
479 (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
480 # Enforce language and tz for extraction
481 client_context = traverse_obj(context, 'client', expected_type=dict, default={})
482 client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
483 return context
484
485 _SAPISID = None
486
487 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
488 time_now = round(time.time())
489 if self._SAPISID is None:
490 yt_cookies = self._get_cookies('https://www.youtube.com')
491 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
492 # See: https://github.com/yt-dlp/yt-dlp/issues/393
493 sapisid_cookie = dict_get(
494 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
495 if sapisid_cookie and sapisid_cookie.value:
496 self._SAPISID = sapisid_cookie.value
497 self.write_debug('Extracted SAPISID cookie')
498 # SAPISID cookie is required if not already present
499 if not yt_cookies.get('SAPISID'):
500 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
501 self._set_cookie(
502 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
503 else:
504 self._SAPISID = False
505 if not self._SAPISID:
506 return None
507 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
508 sapisidhash = hashlib.sha1(
509 f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
510 return f'SAPISIDHASH {time_now}_{sapisidhash}'
511
512 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
513 note='Downloading API JSON', errnote='Unable to download API page',
514 context=None, api_key=None, api_hostname=None, default_client='web'):
515
516 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
517 data.update(query)
518 real_headers = self.generate_api_headers(default_client=default_client)
519 real_headers.update({'content-type': 'application/json'})
520 if headers:
521 real_headers.update(headers)
522 api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]
523 or api_key or self._extract_api_key(default_client=default_client))
524 return self._download_json(
525 f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
526 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
527 data=json.dumps(data).encode('utf8'), headers=real_headers,
528 query={'key': api_key, 'prettyPrint': 'false'})
529
530 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
531 return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
532
533 @staticmethod
534 def _extract_session_index(*data):
535 """
536 Index of current account in account list.
537 See: https://github.com/yt-dlp/yt-dlp/pull/519
538 """
539 for ytcfg in data:
540 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
541 if session_index is not None:
542 return session_index
543
544 # Deprecated?
545 def _extract_identity_token(self, ytcfg=None, webpage=None):
546 if ytcfg:
547 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)
548 if token:
549 return token
550 if webpage:
551 return self._search_regex(
552 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
553 'identity token', default=None, fatal=False)
554
555 @staticmethod
556 def _extract_account_syncid(*args):
557 """
558 Extract syncId required to download private playlists of secondary channels
559 @params response and/or ytcfg
560 """
561 for data in args:
562 # ytcfg includes channel_syncid if on secondary channel
563 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)
564 if delegated_sid:
565 return delegated_sid
566 sync_ids = (try_get(
567 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
568 lambda x: x['DATASYNC_ID']), str) or '').split('||')
569 if len(sync_ids) >= 2 and sync_ids[1]:
570 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
571 # and just "user_syncid||" for primary channel. We only want the channel_syncid
572 return sync_ids[0]
573
574 @staticmethod
575 def _extract_visitor_data(*args):
576 """
577 Extracts visitorData from an API response or ytcfg
578 Appears to be used to track session state
579 """
580 return get_first(
581 args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
582 expected_type=str)
583
584 @functools.cached_property
585 def is_authenticated(self):
586 return bool(self._generate_sapisidhash_header())
587
588 def extract_ytcfg(self, video_id, webpage):
589 if not webpage:
590 return {}
591 return self._parse_json(
592 self._search_regex(
593 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
594 default='{}'), video_id, fatal=False) or {}
595
596 def generate_api_headers(
597 self, *, ytcfg=None, account_syncid=None, session_index=None,
598 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
599
600 origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))
601 headers = {
602 'X-YouTube-Client-Name': str(
603 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
604 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
605 'Origin': origin,
606 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
607 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
608 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),
609 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client)
610 }
611 if session_index is None:
612 session_index = self._extract_session_index(ytcfg)
613 if account_syncid or session_index is not None:
614 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
615
616 auth = self._generate_sapisidhash_header(origin)
617 if auth is not None:
618 headers['Authorization'] = auth
619 headers['X-Origin'] = origin
620 return {h: v for h, v in headers.items() if v is not None}
621
622 def _download_ytcfg(self, client, video_id):
623 url = {
624 'web': 'https://www.youtube.com',
625 'web_music': 'https://music.youtube.com',
626 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
627 }.get(client)
628 if not url:
629 return {}
630 webpage = self._download_webpage(
631 url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')
632 return self.extract_ytcfg(video_id, webpage) or {}
633
634 @staticmethod
635 def _build_api_continuation_query(continuation, ctp=None):
636 query = {
637 'continuation': continuation
638 }
639 # TODO: Inconsistency with clickTrackingParams.
640 # Currently we have a fixed ctp contained within context (from ytcfg)
641 # and a ctp in root query for continuation.
642 if ctp:
643 query['clickTracking'] = {'clickTrackingParams': ctp}
644 return query
645
646 @classmethod
647 def _extract_next_continuation_data(cls, renderer):
648 next_continuation = try_get(
649 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
650 lambda x: x['continuation']['reloadContinuationData']), dict)
651 if not next_continuation:
652 return
653 continuation = next_continuation.get('continuation')
654 if not continuation:
655 return
656 ctp = next_continuation.get('clickTrackingParams')
657 return cls._build_api_continuation_query(continuation, ctp)
658
659 @classmethod
660 def _extract_continuation_ep_data(cls, continuation_ep: dict):
661 if isinstance(continuation_ep, dict):
662 continuation = try_get(
663 continuation_ep, lambda x: x['continuationCommand']['token'], str)
664 if not continuation:
665 return
666 ctp = continuation_ep.get('clickTrackingParams')
667 return cls._build_api_continuation_query(continuation, ctp)
668
669 @classmethod
670 def _extract_continuation(cls, renderer):
671 next_continuation = cls._extract_next_continuation_data(renderer)
672 if next_continuation:
673 return next_continuation
674
675 contents = []
676 for key in ('contents', 'items', 'rows'):
677 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
678
679 for content in contents:
680 if not isinstance(content, dict):
681 continue
682 continuation_ep = try_get(
683 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
684 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
685 dict)
686 continuation = cls._extract_continuation_ep_data(continuation_ep)
687 if continuation:
688 return continuation
689
690 @classmethod
691 def _extract_alerts(cls, data):
692 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
693 if not isinstance(alert_dict, dict):
694 continue
695 for alert in alert_dict.values():
696 alert_type = alert.get('type')
697 if not alert_type:
698 continue
699 message = cls._get_text(alert, 'text')
700 if message:
701 yield alert_type, message
702
703 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
704 errors = []
705 warnings = []
706 for alert_type, alert_message in alerts:
707 if alert_type.lower() == 'error' and fatal:
708 errors.append([alert_type, alert_message])
709 else:
710 warnings.append([alert_type, alert_message])
711
712 for alert_type, alert_message in (warnings + errors[:-1]):
713 self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)
714 if errors:
715 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
716
717 def _extract_and_report_alerts(self, data, *args, **kwargs):
718 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
719
720 def _extract_badges(self, renderer: dict):
721 privacy_icon_map = {
722 'PRIVACY_UNLISTED': BadgeType.AVAILABILITY_UNLISTED,
723 'PRIVACY_PRIVATE': BadgeType.AVAILABILITY_PRIVATE,
724 'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC
725 }
726
727 badge_style_map = {
728 'BADGE_STYLE_TYPE_MEMBERS_ONLY': BadgeType.AVAILABILITY_SUBSCRIPTION,
729 'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM,
730 'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW
731 }
732
733 label_map = {
734 'unlisted': BadgeType.AVAILABILITY_UNLISTED,
735 'private': BadgeType.AVAILABILITY_PRIVATE,
736 'members only': BadgeType.AVAILABILITY_SUBSCRIPTION,
737 'live': BadgeType.LIVE_NOW,
738 'premium': BadgeType.AVAILABILITY_PREMIUM
739 }
740
741 badges = []
742 for badge in traverse_obj(renderer, ('badges', ..., 'metadataBadgeRenderer'), default=[]):
743 badge_type = (
744 privacy_icon_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))
745 or badge_style_map.get(traverse_obj(badge, 'style'))
746 )
747 if badge_type:
748 badges.append({'type': badge_type})
749 continue
750
751 # fallback, won't work in some languages
752 label = traverse_obj(badge, 'label', expected_type=str, default='')
753 for match, label_badge_type in label_map.items():
754 if match in label.lower():
755 badges.append({'type': badge_type})
756 continue
757
758 return badges
759
760 @staticmethod
761 def _has_badge(badges, badge_type):
762 return bool(traverse_obj(badges, lambda _, v: v['type'] == badge_type))
763
764 @staticmethod
765 def _get_text(data, *path_list, max_runs=None):
766 for path in path_list or [None]:
767 if path is None:
768 obj = [data]
769 else:
770 obj = traverse_obj(data, path, default=[])
771 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
772 obj = [obj]
773 for item in obj:
774 text = try_get(item, lambda x: x['simpleText'], str)
775 if text:
776 return text
777 runs = try_get(item, lambda x: x['runs'], list) or []
778 if not runs and isinstance(item, list):
779 runs = item
780
781 runs = runs[:min(len(runs), max_runs or len(runs))]
782 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
783 if text:
784 return text
785
786 def _get_count(self, data, *path_list):
787 count_text = self._get_text(data, *path_list) or ''
788 count = parse_count(count_text)
789 if count is None:
790 count = str_to_int(
791 self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))
792 return count
793
794 @staticmethod
795 def _extract_thumbnails(data, *path_list):
796 """
797 Extract thumbnails from thumbnails dict
798 @param path_list: path list to level that contains 'thumbnails' key
799 """
800 thumbnails = []
801 for path in path_list or [()]:
802 for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):
803 thumbnail_url = url_or_none(thumbnail.get('url'))
804 if not thumbnail_url:
805 continue
806 # Sometimes youtube gives a wrong thumbnail URL. See:
807 # https://github.com/yt-dlp/yt-dlp/issues/233
808 # https://github.com/ytdl-org/youtube-dl/issues/28023
809 if 'maxresdefault' in thumbnail_url:
810 thumbnail_url = thumbnail_url.split('?')[0]
811 thumbnails.append({
812 'url': thumbnail_url,
813 'height': int_or_none(thumbnail.get('height')),
814 'width': int_or_none(thumbnail.get('width')),
815 })
816 return thumbnails
817
818 @staticmethod
819 def extract_relative_time(relative_time_text):
820 """
821 Extracts a relative time from string and converts to dt object
822 e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'
823 """
824 mobj = re.search(r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?\s*ago', relative_time_text)
825 if mobj:
826 start = mobj.group('start')
827 if start:
828 return datetime_from_str(start)
829 try:
830 return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))
831 except ValueError:
832 return None
833
834 def _parse_time_text(self, text):
835 if not text:
836 return
837 dt = self.extract_relative_time(text)
838 timestamp = None
839 if isinstance(dt, datetime.datetime):
840 timestamp = calendar.timegm(dt.timetuple())
841
842 if timestamp is None:
843 timestamp = (
844 unified_timestamp(text) or unified_timestamp(
845 self._search_regex(
846 (r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),
847 text.lower(), 'time text', default=None)))
848
849 if text and timestamp is None and self._preferred_lang in (None, 'en'):
850 self.report_warning(
851 f'Cannot parse localized time text "{text}"', only_once=True)
852 return timestamp
853
854 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
855 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
856 default_client='web'):
857 for retry in self.RetryManager():
858 try:
859 response = self._call_api(
860 ep=ep, fatal=True, headers=headers,
861 video_id=item_id, query=query, note=note,
862 context=self._extract_context(ytcfg, default_client),
863 api_key=self._extract_api_key(ytcfg, default_client),
864 api_hostname=api_hostname, default_client=default_client)
865 except ExtractorError as e:
866 if not isinstance(e.cause, network_exceptions):
867 return self._error_or_warning(e, fatal=fatal)
868 elif not isinstance(e.cause, urllib.error.HTTPError):
869 retry.error = e
870 continue
871
872 first_bytes = e.cause.read(512)
873 if not is_html(first_bytes):
874 yt_error = try_get(
875 self._parse_json(
876 self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
877 lambda x: x['error']['message'], str)
878 if yt_error:
879 self._report_alerts([('ERROR', yt_error)], fatal=False)
880 # Downloading page may result in intermittent 5xx HTTP error
881 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
882 # We also want to catch all other network exceptions since errors in later pages can be troublesome
883 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
884 if e.cause.code not in (403, 429):
885 retry.error = e
886 continue
887 return self._error_or_warning(e, fatal=fatal)
888
889 try:
890 self._extract_and_report_alerts(response, only_once=True)
891 except ExtractorError as e:
892 # YouTube servers may return errors we want to retry on in a 200 OK response
893 # See: https://github.com/yt-dlp/yt-dlp/issues/839
894 if 'unknown error' in e.msg.lower():
895 retry.error = e
896 continue
897 return self._error_or_warning(e, fatal=fatal)
898 # Youtube sometimes sends incomplete data
899 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
900 if not traverse_obj(response, *variadic(check_get_keys)):
901 retry.error = ExtractorError('Incomplete data received', expected=True)
902 continue
903
904 return response
905
906 @staticmethod
907 def is_music_url(url):
908 return re.match(r'https?://music\.youtube\.com/', url) is not None
909
910 def _extract_video(self, renderer):
911 video_id = renderer.get('videoId')
912 title = self._get_text(renderer, 'title')
913 description = self._get_text(renderer, 'descriptionSnippet')
914 duration = parse_duration(self._get_text(
915 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
916 if duration is None:
917 duration = parse_duration(self._search_regex(
918 r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',
919 traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),
920 video_id, default=None, group='duration'))
921
922 view_count = self._get_count(renderer, 'viewCountText')
923
924 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
925 channel_id = traverse_obj(
926 renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
927 expected_type=str, get_all=False)
928 time_text = self._get_text(renderer, 'publishedTimeText') or ''
929 scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
930 overlay_style = traverse_obj(
931 renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
932 get_all=False, expected_type=str)
933 badges = self._extract_badges(renderer)
934 thumbnails = self._extract_thumbnails(renderer, 'thumbnail')
935 navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
936 renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
937 expected_type=str)) or ''
938 url = f'https://www.youtube.com/watch?v={video_id}'
939 if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:
940 url = f'https://www.youtube.com/shorts/{video_id}'
941
942 return {
943 '_type': 'url',
944 'ie_key': YoutubeIE.ie_key(),
945 'id': video_id,
946 'url': url,
947 'title': title,
948 'description': description,
949 'duration': duration,
950 'view_count': view_count,
951 'uploader': uploader,
952 'channel_id': channel_id,
953 'thumbnails': thumbnails,
954 'upload_date': (strftime_or_none(self._parse_time_text(time_text), '%Y%m%d')
955 if self._configuration_arg('approximate_date', ie_key='youtubetab')
956 else None),
957 'live_status': ('is_upcoming' if scheduled_timestamp is not None
958 else 'was_live' if 'streamed' in time_text.lower()
959 else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW)
960 else None),
961 'release_timestamp': scheduled_timestamp,
962 'availability':
963 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
964 else self._availability(
965 is_private=self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE) or None,
966 needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
967 needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
968 is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None)
969 }
970
971
972 class YoutubeIE(YoutubeBaseInfoExtractor):
973 IE_DESC = 'YouTube'
974 _VALID_URL = r"""(?x)^
975 (
976 (?:https?://|//) # http(s):// or protocol-independent URL
977 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
978 (?:www\.)?deturl\.com/www\.youtube\.com|
979 (?:www\.)?pwnyoutube\.com|
980 (?:www\.)?hooktube\.com|
981 (?:www\.)?yourepeat\.com|
982 tube\.majestyc\.net|
983 %(invidious)s|
984 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
985 (?:.*?\#/)? # handle anchor (#/) redirect urls
986 (?: # the various things that can precede the ID:
987 (?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/
988 |(?: # or the v= param in all its forms
989 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
990 (?:\?|\#!?) # the params delimiter ? or # or #!
991 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
992 v=
993 )
994 ))
995 |(?:
996 youtu\.be| # just youtu.be/xxxx
997 vid\.plus| # or vid.plus/xxxx
998 zwearz\.com/watch| # or zwearz.com/watch/xxxx
999 %(invidious)s
1000 )/
1001 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
1002 )
1003 )? # all until now is optional -> you can pass the naked ID
1004 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
1005 (?(1).+)? # if we found the ID, everything can follow
1006 (?:\#|$)""" % {
1007 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
1008 }
1009 _EMBED_REGEX = [
1010 r'''(?x)
1011 (?:
1012 <(?:[0-9A-Za-z-]+?)?iframe[^>]+?src=|
1013 data-video-url=|
1014 <embed[^>]+?src=|
1015 embedSWF\(?:\s*|
1016 <object[^>]+data=|
1017 new\s+SWFObject\(
1018 )
1019 (["\'])
1020 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1021 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1022 \1''',
1023 # https://wordpress.org/plugins/lazy-load-for-videos/
1024 r'''(?xs)
1025 <a\s[^>]*\bhref="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"
1026 \s[^>]*\bclass="[^"]*\blazy-load-youtube''',
1027 ]
1028
1029 _PLAYER_INFO_RE = (
1030 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
1031 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
1032 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
1033 )
1034 _formats = {
1035 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1036 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1037 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
1038 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
1039 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
1040 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1041 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1042 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1043 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
1044 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
1045 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1046 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1047 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1048 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1049 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
1050 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
1051 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1052 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1053
1054
1055 # 3D videos
1056 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1057 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1058 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1059 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1060 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
1061 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1062 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1063
1064 # Apple HTTP Live Streaming
1065 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1066 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1067 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1068 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1069 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1070 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1071 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1072 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
1073
1074 # DASH mp4 video
1075 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
1076 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
1077 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1078 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
1079 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
1080 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
1081 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
1082 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1083 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
1084 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1085 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1086 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
1087
1088 # Dash mp4 audio
1089 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1090 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1091 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1092 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1093 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1094 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1095 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
1096
1097 # Dash webm
1098 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1099 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1100 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1101 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1102 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1103 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1104 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1105 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1106 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1107 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1108 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1109 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1110 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1111 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1112 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1113 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
1114 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1115 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1116 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1117 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1118 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1119 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1120
1121 # Dash webm audio
1122 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1123 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
1124
1125 # Dash webm audio with opus inside
1126 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1127 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1128 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
1129
1130 # RTMP (unnamed)
1131 '_rtmp': {'protocol': 'rtmp'},
1132
1133 # av01 video only formats sometimes served with "unknown" codecs
1134 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1135 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1136 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
1137 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1138 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1139 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1140 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1141 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1142 }
1143 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
1144
1145 _GEO_BYPASS = False
1146
1147 IE_NAME = 'youtube'
1148 _TESTS = [
1149 {
1150 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
1151 'info_dict': {
1152 'id': 'BaW_jenozKc',
1153 'ext': 'mp4',
1154 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1155 'uploader': 'Philipp Hagemeister',
1156 'uploader_id': 'phihag',
1157 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1158 'channel': 'Philipp Hagemeister',
1159 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1160 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1161 'upload_date': '20121002',
1162 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
1163 'categories': ['Science & Technology'],
1164 'tags': ['youtube-dl'],
1165 'duration': 10,
1166 'view_count': int,
1167 'like_count': int,
1168 'availability': 'public',
1169 'playable_in_embed': True,
1170 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1171 'live_status': 'not_live',
1172 'age_limit': 0,
1173 'start_time': 1,
1174 'end_time': 9,
1175 'comment_count': int,
1176 'channel_follower_count': int
1177 }
1178 },
1179 {
1180 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1181 'note': 'Embed-only video (#1746)',
1182 'info_dict': {
1183 'id': 'yZIXLfi8CZQ',
1184 'ext': 'mp4',
1185 'upload_date': '20120608',
1186 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1187 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1188 'uploader': 'SET India',
1189 'uploader_id': 'setindia',
1190 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
1191 'age_limit': 18,
1192 },
1193 'skip': 'Private video',
1194 },
1195 {
1196 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1197 'note': 'Use the first video ID in the URL',
1198 'info_dict': {
1199 'id': 'BaW_jenozKc',
1200 'ext': 'mp4',
1201 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1202 'uploader': 'Philipp Hagemeister',
1203 'uploader_id': 'phihag',
1204 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1205 'channel': 'Philipp Hagemeister',
1206 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1207 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1208 'upload_date': '20121002',
1209 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
1210 'categories': ['Science & Technology'],
1211 'tags': ['youtube-dl'],
1212 'duration': 10,
1213 'view_count': int,
1214 'like_count': int,
1215 'availability': 'public',
1216 'playable_in_embed': True,
1217 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1218 'live_status': 'not_live',
1219 'age_limit': 0,
1220 'comment_count': int,
1221 'channel_follower_count': int
1222 },
1223 'params': {
1224 'skip_download': True,
1225 },
1226 },
1227 {
1228 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1229 'note': '256k DASH audio (format 141) via DASH manifest',
1230 'info_dict': {
1231 'id': 'a9LDPn-MO4I',
1232 'ext': 'm4a',
1233 'upload_date': '20121002',
1234 'uploader_id': '8KVIDEO',
1235 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1236 'description': '',
1237 'uploader': '8KVIDEO',
1238 'title': 'UHDTV TEST 8K VIDEO.mp4'
1239 },
1240 'params': {
1241 'youtube_include_dash_manifest': True,
1242 'format': '141',
1243 },
1244 'skip': 'format 141 not served anymore',
1245 },
1246 # DASH manifest with encrypted signature
1247 {
1248 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1249 'info_dict': {
1250 'id': 'IB3lcPjvWLA',
1251 'ext': 'm4a',
1252 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1253 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1254 'duration': 244,
1255 'uploader': 'AfrojackVEVO',
1256 'uploader_id': 'AfrojackVEVO',
1257 'upload_date': '20131011',
1258 'abr': 129.495,
1259 'like_count': int,
1260 'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',
1261 'playable_in_embed': True,
1262 'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',
1263 'view_count': int,
1264 'track': 'The Spark',
1265 'live_status': 'not_live',
1266 'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',
1267 'channel': 'Afrojack',
1268 'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',
1269 'tags': 'count:19',
1270 'availability': 'public',
1271 'categories': ['Music'],
1272 'age_limit': 0,
1273 'alt_title': 'The Spark',
1274 'channel_follower_count': int
1275 },
1276 'params': {
1277 'youtube_include_dash_manifest': True,
1278 'format': '141/bestaudio[ext=m4a]',
1279 },
1280 },
1281 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
1282 {
1283 'note': 'Embed allowed age-gate video',
1284 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1285 'info_dict': {
1286 'id': 'HtVdAasjOgU',
1287 'ext': 'mp4',
1288 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1289 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1290 'duration': 142,
1291 'uploader': 'The Witcher',
1292 'uploader_id': 'WitcherGame',
1293 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1294 'upload_date': '20140605',
1295 'age_limit': 18,
1296 'categories': ['Gaming'],
1297 'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',
1298 'availability': 'needs_auth',
1299 'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
1300 'like_count': int,
1301 'channel': 'The Witcher',
1302 'live_status': 'not_live',
1303 'tags': 'count:17',
1304 'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
1305 'playable_in_embed': True,
1306 'view_count': int,
1307 'channel_follower_count': int
1308 },
1309 },
1310 {
1311 'note': 'Age-gate video with embed allowed in public site',
1312 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1313 'info_dict': {
1314 'id': 'HsUATh_Nc2U',
1315 'ext': 'mp4',
1316 'title': 'Godzilla 2 (Official Video)',
1317 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1318 'upload_date': '20200408',
1319 'uploader_id': 'FlyingKitty900',
1320 'uploader': 'FlyingKitty',
1321 'age_limit': 18,
1322 'availability': 'needs_auth',
1323 'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
1324 'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',
1325 'channel': 'FlyingKitty',
1326 'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
1327 'view_count': int,
1328 'categories': ['Entertainment'],
1329 'live_status': 'not_live',
1330 'tags': ['Flyingkitty', 'godzilla 2'],
1331 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
1332 'like_count': int,
1333 'duration': 177,
1334 'playable_in_embed': True,
1335 'channel_follower_count': int
1336 },
1337 },
1338 {
1339 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1340 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1341 'info_dict': {
1342 'id': 'Tq92D6wQ1mg',
1343 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
1344 'ext': 'mp4',
1345 'upload_date': '20191228',
1346 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1347 'uploader': 'Projekt Melody',
1348 'description': 'md5:17eccca93a786d51bc67646756894066',
1349 'age_limit': 18,
1350 'like_count': int,
1351 'availability': 'needs_auth',
1352 'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
1353 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1354 'view_count': int,
1355 'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',
1356 'channel': 'Projekt Melody',
1357 'live_status': 'not_live',
1358 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
1359 'playable_in_embed': True,
1360 'categories': ['Entertainment'],
1361 'duration': 106,
1362 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
1363 'comment_count': int,
1364 'channel_follower_count': int
1365 },
1366 },
1367 {
1368 'note': 'Non-Agegated non-embeddable video',
1369 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1370 'info_dict': {
1371 'id': 'MeJVWBSsPAY',
1372 'ext': 'mp4',
1373 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1374 'uploader': 'Herr Lurik',
1375 'uploader_id': 'st3in234',
1376 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1377 'upload_date': '20130730',
1378 'track': 'Such mich find mich',
1379 'age_limit': 0,
1380 'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
1381 'like_count': int,
1382 'playable_in_embed': False,
1383 'creator': 'OOMPH!',
1384 'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',
1385 'view_count': int,
1386 'alt_title': 'Such mich find mich',
1387 'duration': 210,
1388 'channel': 'Herr Lurik',
1389 'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
1390 'categories': ['Music'],
1391 'availability': 'public',
1392 'uploader_url': 'http://www.youtube.com/user/st3in234',
1393 'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
1394 'live_status': 'not_live',
1395 'artist': 'OOMPH!',
1396 'channel_follower_count': int
1397 },
1398 },
1399 {
1400 'note': 'Non-bypassable age-gated video',
1401 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1402 'only_matching': True,
1403 },
1404 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1405 # YouTube Red ad is not captured for creator
1406 {
1407 'url': '__2ABJjxzNo',
1408 'info_dict': {
1409 'id': '__2ABJjxzNo',
1410 'ext': 'mp4',
1411 'duration': 266,
1412 'upload_date': '20100430',
1413 'uploader_id': 'deadmau5',
1414 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1415 'creator': 'deadmau5',
1416 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1417 'uploader': 'deadmau5',
1418 'title': 'Deadmau5 - Some Chords (HD)',
1419 'alt_title': 'Some Chords',
1420 'availability': 'public',
1421 'tags': 'count:14',
1422 'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',
1423 'view_count': int,
1424 'live_status': 'not_live',
1425 'channel': 'deadmau5',
1426 'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',
1427 'like_count': int,
1428 'track': 'Some Chords',
1429 'artist': 'deadmau5',
1430 'playable_in_embed': True,
1431 'age_limit': 0,
1432 'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',
1433 'categories': ['Music'],
1434 'album': 'Some Chords',
1435 'channel_follower_count': int
1436 },
1437 'expected_warnings': [
1438 'DASH manifest missing',
1439 ]
1440 },
1441 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1442 {
1443 'url': 'lqQg6PlCWgI',
1444 'info_dict': {
1445 'id': 'lqQg6PlCWgI',
1446 'ext': 'mp4',
1447 'duration': 6085,
1448 'upload_date': '20150827',
1449 'uploader_id': 'olympic',
1450 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1451 'description': 'md5:04bbbf3ccceb6795947572ca36f45904',
1452 'uploader': 'Olympics',
1453 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1454 'like_count': int,
1455 'release_timestamp': 1343767800,
1456 'playable_in_embed': True,
1457 'categories': ['Sports'],
1458 'release_date': '20120731',
1459 'channel': 'Olympics',
1460 'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
1461 'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
1462 'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
1463 'age_limit': 0,
1464 'availability': 'public',
1465 'live_status': 'was_live',
1466 'view_count': int,
1467 'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
1468 'channel_follower_count': int
1469 },
1470 'params': {
1471 'skip_download': 'requires avconv',
1472 }
1473 },
1474 # Non-square pixels
1475 {
1476 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1477 'info_dict': {
1478 'id': '_b-2C3KPAM0',
1479 'ext': 'mp4',
1480 'stretched_ratio': 16 / 9.,
1481 'duration': 85,
1482 'upload_date': '20110310',
1483 'uploader_id': 'AllenMeow',
1484 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1485 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1486 'uploader': '孫ᄋᄅ',
1487 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1488 'playable_in_embed': True,
1489 'channel': '孫ᄋᄅ',
1490 'age_limit': 0,
1491 'tags': 'count:11',
1492 'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',
1493 'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',
1494 'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',
1495 'view_count': int,
1496 'categories': ['People & Blogs'],
1497 'like_count': int,
1498 'live_status': 'not_live',
1499 'availability': 'unlisted',
1500 'comment_count': int,
1501 'channel_follower_count': int
1502 },
1503 },
1504 # url_encoded_fmt_stream_map is empty string
1505 {
1506 'url': 'qEJwOuvDf7I',
1507 'info_dict': {
1508 'id': 'qEJwOuvDf7I',
1509 'ext': 'webm',
1510 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1511 'description': '',
1512 'upload_date': '20150404',
1513 'uploader_id': 'spbelect',
1514 'uploader': 'Наблюдатели Петербурга',
1515 },
1516 'params': {
1517 'skip_download': 'requires avconv',
1518 },
1519 'skip': 'This live event has ended.',
1520 },
1521 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1522 {
1523 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1524 'info_dict': {
1525 'id': 'FIl7x6_3R5Y',
1526 'ext': 'webm',
1527 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1528 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1529 'duration': 220,
1530 'upload_date': '20150625',
1531 'uploader_id': 'dorappi2000',
1532 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1533 'uploader': 'dorappi2000',
1534 'formats': 'mincount:31',
1535 },
1536 'skip': 'not actual anymore',
1537 },
1538 # DASH manifest with segment_list
1539 {
1540 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1541 'md5': '8ce563a1d667b599d21064e982ab9e31',
1542 'info_dict': {
1543 'id': 'CsmdDsKjzN8',
1544 'ext': 'mp4',
1545 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1546 'uploader': 'Airtek',
1547 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1548 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1549 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1550 },
1551 'params': {
1552 'youtube_include_dash_manifest': True,
1553 'format': '135', # bestvideo
1554 },
1555 'skip': 'This live event has ended.',
1556 },
1557 {
1558 # Multifeed videos (multiple cameras), URL is for Main Camera
1559 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1560 'info_dict': {
1561 'id': 'jvGDaLqkpTg',
1562 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1563 'description': 'md5:e03b909557865076822aa169218d6a5d',
1564 },
1565 'playlist': [{
1566 'info_dict': {
1567 'id': 'jvGDaLqkpTg',
1568 'ext': 'mp4',
1569 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1570 'description': 'md5:e03b909557865076822aa169218d6a5d',
1571 'duration': 10643,
1572 'upload_date': '20161111',
1573 'uploader': 'Team PGP',
1574 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1575 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1576 },
1577 }, {
1578 'info_dict': {
1579 'id': '3AKt1R1aDnw',
1580 'ext': 'mp4',
1581 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1582 'description': 'md5:e03b909557865076822aa169218d6a5d',
1583 'duration': 10991,
1584 'upload_date': '20161111',
1585 'uploader': 'Team PGP',
1586 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1587 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1588 },
1589 }, {
1590 'info_dict': {
1591 'id': 'RtAMM00gpVc',
1592 'ext': 'mp4',
1593 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1594 'description': 'md5:e03b909557865076822aa169218d6a5d',
1595 'duration': 10995,
1596 'upload_date': '20161111',
1597 'uploader': 'Team PGP',
1598 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1599 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1600 },
1601 }, {
1602 'info_dict': {
1603 'id': '6N2fdlP3C5U',
1604 'ext': 'mp4',
1605 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1606 'description': 'md5:e03b909557865076822aa169218d6a5d',
1607 'duration': 10990,
1608 'upload_date': '20161111',
1609 'uploader': 'Team PGP',
1610 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1611 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1612 },
1613 }],
1614 'params': {
1615 'skip_download': True,
1616 },
1617 'skip': 'Not multifeed anymore',
1618 },
1619 {
1620 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1621 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1622 'info_dict': {
1623 'id': 'gVfLd0zydlo',
1624 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1625 },
1626 'playlist_count': 2,
1627 'skip': 'Not multifeed anymore',
1628 },
1629 {
1630 'url': 'https://vid.plus/FlRa-iH7PGw',
1631 'only_matching': True,
1632 },
1633 {
1634 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1635 'only_matching': True,
1636 },
1637 {
1638 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1639 # Also tests cut-off URL expansion in video description (see
1640 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1641 # https://github.com/ytdl-org/youtube-dl/issues/8164)
1642 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1643 'info_dict': {
1644 'id': 'lsguqyKfVQg',
1645 'ext': 'mp4',
1646 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1647 'alt_title': 'Dark Walk',
1648 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1649 'duration': 133,
1650 'upload_date': '20151119',
1651 'uploader_id': 'IronSoulElf',
1652 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1653 'uploader': 'IronSoulElf',
1654 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1655 'track': 'Dark Walk',
1656 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1657 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1658 'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',
1659 'categories': ['Film & Animation'],
1660 'view_count': int,
1661 'live_status': 'not_live',
1662 'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',
1663 'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',
1664 'tags': 'count:13',
1665 'availability': 'public',
1666 'channel': 'IronSoulElf',
1667 'playable_in_embed': True,
1668 'like_count': int,
1669 'age_limit': 0,
1670 'channel_follower_count': int
1671 },
1672 'params': {
1673 'skip_download': True,
1674 },
1675 },
1676 {
1677 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1678 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1679 'only_matching': True,
1680 },
1681 {
1682 # Video with yt:stretch=17:0
1683 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1684 'info_dict': {
1685 'id': 'Q39EVAstoRM',
1686 'ext': 'mp4',
1687 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1688 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1689 'upload_date': '20151107',
1690 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1691 'uploader': 'CH GAMER DROID',
1692 },
1693 'params': {
1694 'skip_download': True,
1695 },
1696 'skip': 'This video does not exist.',
1697 },
1698 {
1699 # Video with incomplete 'yt:stretch=16:'
1700 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1701 'only_matching': True,
1702 },
1703 {
1704 # Video licensed under Creative Commons
1705 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1706 'info_dict': {
1707 'id': 'M4gD1WSo5mA',
1708 'ext': 'mp4',
1709 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1710 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1711 'duration': 721,
1712 'upload_date': '20150128',
1713 'uploader_id': 'BerkmanCenter',
1714 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1715 'uploader': 'The Berkman Klein Center for Internet & Society',
1716 'license': 'Creative Commons Attribution license (reuse allowed)',
1717 'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',
1718 'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',
1719 'like_count': int,
1720 'age_limit': 0,
1721 'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],
1722 'channel': 'The Berkman Klein Center for Internet & Society',
1723 'availability': 'public',
1724 'view_count': int,
1725 'categories': ['Education'],
1726 'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',
1727 'live_status': 'not_live',
1728 'playable_in_embed': True,
1729 'comment_count': int,
1730 'channel_follower_count': int
1731 },
1732 'params': {
1733 'skip_download': True,
1734 },
1735 },
1736 {
1737 # Channel-like uploader_url
1738 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1739 'info_dict': {
1740 'id': 'eQcmzGIKrzg',
1741 'ext': 'mp4',
1742 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1743 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1744 'duration': 4060,
1745 'upload_date': '20151120',
1746 'uploader': 'Bernie Sanders',
1747 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1748 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1749 'license': 'Creative Commons Attribution license (reuse allowed)',
1750 'playable_in_embed': True,
1751 'tags': 'count:12',
1752 'like_count': int,
1753 'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1754 'age_limit': 0,
1755 'availability': 'public',
1756 'categories': ['News & Politics'],
1757 'channel': 'Bernie Sanders',
1758 'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',
1759 'view_count': int,
1760 'live_status': 'not_live',
1761 'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1762 'comment_count': int,
1763 'channel_follower_count': int
1764 },
1765 'params': {
1766 'skip_download': True,
1767 },
1768 },
1769 {
1770 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1771 'only_matching': True,
1772 },
1773 {
1774 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1775 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1776 'only_matching': True,
1777 },
1778 {
1779 # Rental video preview
1780 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1781 'info_dict': {
1782 'id': 'uGpuVWrhIzE',
1783 'ext': 'mp4',
1784 'title': 'Piku - Trailer',
1785 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1786 'upload_date': '20150811',
1787 'uploader': 'FlixMatrix',
1788 'uploader_id': 'FlixMatrixKaravan',
1789 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1790 'license': 'Standard YouTube License',
1791 },
1792 'params': {
1793 'skip_download': True,
1794 },
1795 'skip': 'This video is not available.',
1796 },
1797 {
1798 # YouTube Red video with episode data
1799 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1800 'info_dict': {
1801 'id': 'iqKdEhx-dD4',
1802 'ext': 'mp4',
1803 'title': 'Isolation - Mind Field (Ep 1)',
1804 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1805 'duration': 2085,
1806 'upload_date': '20170118',
1807 'uploader': 'Vsauce',
1808 'uploader_id': 'Vsauce',
1809 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1810 'series': 'Mind Field',
1811 'season_number': 1,
1812 'episode_number': 1,
1813 'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',
1814 'tags': 'count:12',
1815 'view_count': int,
1816 'availability': 'public',
1817 'age_limit': 0,
1818 'channel': 'Vsauce',
1819 'episode': 'Episode 1',
1820 'categories': ['Entertainment'],
1821 'season': 'Season 1',
1822 'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',
1823 'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',
1824 'like_count': int,
1825 'playable_in_embed': True,
1826 'live_status': 'not_live',
1827 'channel_follower_count': int
1828 },
1829 'params': {
1830 'skip_download': True,
1831 },
1832 'expected_warnings': [
1833 'Skipping DASH manifest',
1834 ],
1835 },
1836 {
1837 # The following content has been identified by the YouTube community
1838 # as inappropriate or offensive to some audiences.
1839 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1840 'info_dict': {
1841 'id': '6SJNVb0GnPI',
1842 'ext': 'mp4',
1843 'title': 'Race Differences in Intelligence',
1844 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1845 'duration': 965,
1846 'upload_date': '20140124',
1847 'uploader': 'New Century Foundation',
1848 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1849 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1850 },
1851 'params': {
1852 'skip_download': True,
1853 },
1854 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1855 },
1856 {
1857 # itag 212
1858 'url': '1t24XAntNCY',
1859 'only_matching': True,
1860 },
1861 {
1862 # geo restricted to JP
1863 'url': 'sJL6WA-aGkQ',
1864 'only_matching': True,
1865 },
1866 {
1867 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1868 'only_matching': True,
1869 },
1870 {
1871 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1872 'only_matching': True,
1873 },
1874 {
1875 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1876 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1877 'only_matching': True,
1878 },
1879 {
1880 # DRM protected
1881 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1882 'only_matching': True,
1883 },
1884 {
1885 # Video with unsupported adaptive stream type formats
1886 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1887 'info_dict': {
1888 'id': 'Z4Vy8R84T1U',
1889 'ext': 'mp4',
1890 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1891 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1892 'duration': 433,
1893 'upload_date': '20130923',
1894 'uploader': 'Amelia Putri Harwita',
1895 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1896 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1897 'formats': 'maxcount:10',
1898 },
1899 'params': {
1900 'skip_download': True,
1901 'youtube_include_dash_manifest': False,
1902 },
1903 'skip': 'not actual anymore',
1904 },
1905 {
1906 # Youtube Music Auto-generated description
1907 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1908 'info_dict': {
1909 'id': 'MgNrAu2pzNs',
1910 'ext': 'mp4',
1911 'title': 'Voyeur Girl',
1912 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1913 'upload_date': '20190312',
1914 'uploader': 'Stephen - Topic',
1915 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1916 'artist': 'Stephen',
1917 'track': 'Voyeur Girl',
1918 'album': 'it\'s too much love to know my dear',
1919 'release_date': '20190313',
1920 'release_year': 2019,
1921 'alt_title': 'Voyeur Girl',
1922 'view_count': int,
1923 'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1924 'playable_in_embed': True,
1925 'like_count': int,
1926 'categories': ['Music'],
1927 'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1928 'channel': 'Stephen',
1929 'availability': 'public',
1930 'creator': 'Stephen',
1931 'duration': 169,
1932 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
1933 'age_limit': 0,
1934 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1935 'tags': 'count:11',
1936 'live_status': 'not_live',
1937 'channel_follower_count': int
1938 },
1939 'params': {
1940 'skip_download': True,
1941 },
1942 },
1943 {
1944 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1945 'only_matching': True,
1946 },
1947 {
1948 # invalid -> valid video id redirection
1949 'url': 'DJztXj2GPfl',
1950 'info_dict': {
1951 'id': 'DJztXj2GPfk',
1952 'ext': 'mp4',
1953 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1954 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1955 'upload_date': '20090125',
1956 'uploader': 'Prochorowka',
1957 'uploader_id': 'Prochorowka',
1958 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1959 'artist': 'Panjabi MC',
1960 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1961 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1962 },
1963 'params': {
1964 'skip_download': True,
1965 },
1966 'skip': 'Video unavailable',
1967 },
1968 {
1969 # empty description results in an empty string
1970 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1971 'info_dict': {
1972 'id': 'x41yOUIvK2k',
1973 'ext': 'mp4',
1974 'title': 'IMG 3456',
1975 'description': '',
1976 'upload_date': '20170613',
1977 'uploader_id': 'ElevageOrVert',
1978 'uploader': 'ElevageOrVert',
1979 'view_count': int,
1980 'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',
1981 'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',
1982 'like_count': int,
1983 'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',
1984 'tags': [],
1985 'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',
1986 'availability': 'public',
1987 'age_limit': 0,
1988 'categories': ['Pets & Animals'],
1989 'duration': 7,
1990 'playable_in_embed': True,
1991 'live_status': 'not_live',
1992 'channel': 'ElevageOrVert',
1993 'channel_follower_count': int
1994 },
1995 'params': {
1996 'skip_download': True,
1997 },
1998 },
1999 {
2000 # with '};' inside yt initial data (see [1])
2001 # see [2] for an example with '};' inside ytInitialPlayerResponse
2002 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
2003 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
2004 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
2005 'info_dict': {
2006 'id': 'CHqg6qOn4no',
2007 'ext': 'mp4',
2008 'title': 'Part 77 Sort a list of simple types in c#',
2009 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
2010 'upload_date': '20130831',
2011 'uploader_id': 'kudvenkat',
2012 'uploader': 'kudvenkat',
2013 'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',
2014 'like_count': int,
2015 'uploader_url': 'http://www.youtube.com/user/kudvenkat',
2016 'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',
2017 'live_status': 'not_live',
2018 'categories': ['Education'],
2019 'availability': 'public',
2020 'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',
2021 'tags': 'count:12',
2022 'playable_in_embed': True,
2023 'age_limit': 0,
2024 'view_count': int,
2025 'duration': 522,
2026 'channel': 'kudvenkat',
2027 'comment_count': int,
2028 'channel_follower_count': int
2029 },
2030 'params': {
2031 'skip_download': True,
2032 },
2033 },
2034 {
2035 # another example of '};' in ytInitialData
2036 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
2037 'only_matching': True,
2038 },
2039 {
2040 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
2041 'only_matching': True,
2042 },
2043 {
2044 # https://github.com/ytdl-org/youtube-dl/pull/28094
2045 'url': 'OtqTfy26tG0',
2046 'info_dict': {
2047 'id': 'OtqTfy26tG0',
2048 'ext': 'mp4',
2049 'title': 'Burn Out',
2050 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
2051 'upload_date': '20141120',
2052 'uploader': 'The Cinematic Orchestra - Topic',
2053 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
2054 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
2055 'artist': 'The Cinematic Orchestra',
2056 'track': 'Burn Out',
2057 'album': 'Every Day',
2058 'like_count': int,
2059 'live_status': 'not_live',
2060 'alt_title': 'Burn Out',
2061 'duration': 614,
2062 'age_limit': 0,
2063 'view_count': int,
2064 'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
2065 'creator': 'The Cinematic Orchestra',
2066 'channel': 'The Cinematic Orchestra',
2067 'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],
2068 'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
2069 'availability': 'public',
2070 'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',
2071 'categories': ['Music'],
2072 'playable_in_embed': True,
2073 'channel_follower_count': int
2074 },
2075 'params': {
2076 'skip_download': True,
2077 },
2078 },
2079 {
2080 # controversial video, only works with bpctr when authenticated with cookies
2081 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
2082 'only_matching': True,
2083 },
2084 {
2085 # controversial video, requires bpctr/contentCheckOk
2086 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
2087 'info_dict': {
2088 'id': 'SZJvDhaSDnc',
2089 'ext': 'mp4',
2090 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
2091 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
2092 'uploader': 'CBS Mornings',
2093 'uploader_id': 'CBSThisMorning',
2094 'upload_date': '20140716',
2095 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',
2096 'duration': 170,
2097 'categories': ['News & Politics'],
2098 'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',
2099 'view_count': int,
2100 'channel': 'CBS Mornings',
2101 'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],
2102 'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',
2103 'age_limit': 18,
2104 'availability': 'needs_auth',
2105 'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',
2106 'like_count': int,
2107 'live_status': 'not_live',
2108 'playable_in_embed': True,
2109 'channel_follower_count': int
2110 }
2111 },
2112 {
2113 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
2114 'url': 'cBvYw8_A0vQ',
2115 'info_dict': {
2116 'id': 'cBvYw8_A0vQ',
2117 'ext': 'mp4',
2118 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
2119 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
2120 'upload_date': '20201120',
2121 'uploader': 'Walk around Japan',
2122 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2123 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2124 'duration': 1456,
2125 'categories': ['Travel & Events'],
2126 'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2127 'view_count': int,
2128 'channel': 'Walk around Japan',
2129 'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],
2130 'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',
2131 'age_limit': 0,
2132 'availability': 'public',
2133 'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2134 'live_status': 'not_live',
2135 'playable_in_embed': True,
2136 'channel_follower_count': int
2137 },
2138 'params': {
2139 'skip_download': True,
2140 },
2141 }, {
2142 # Has multiple audio streams
2143 'url': 'WaOKSUlf4TM',
2144 'only_matching': True
2145 }, {
2146 # Requires Premium: has format 141 when requested using YTM url
2147 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
2148 'only_matching': True
2149 }, {
2150 # multiple subtitles with same lang_code
2151 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
2152 'only_matching': True,
2153 }, {
2154 # Force use android client fallback
2155 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
2156 'info_dict': {
2157 'id': 'YOelRv7fMxY',
2158 'title': 'DIGGING A SECRET TUNNEL Part 1',
2159 'ext': '3gp',
2160 'upload_date': '20210624',
2161 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
2162 'uploader': 'colinfurze',
2163 'uploader_id': 'colinfurze',
2164 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
2165 'description': 'md5:5d5991195d599b56cd0c4148907eec50',
2166 'duration': 596,
2167 'categories': ['Entertainment'],
2168 'uploader_url': 'http://www.youtube.com/user/colinfurze',
2169 'view_count': int,
2170 'channel': 'colinfurze',
2171 'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],
2172 'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',
2173 'age_limit': 0,
2174 'availability': 'public',
2175 'like_count': int,
2176 'live_status': 'not_live',
2177 'playable_in_embed': True,
2178 'channel_follower_count': int
2179 },
2180 'params': {
2181 'format': '17', # 3gp format available on android
2182 'extractor_args': {'youtube': {'player_client': ['android']}},
2183 },
2184 },
2185 {
2186 # Skip download of additional client configs (remix client config in this case)
2187 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2188 'only_matching': True,
2189 'params': {
2190 'extractor_args': {'youtube': {'player_skip': ['configs']}},
2191 },
2192 }, {
2193 # shorts
2194 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
2195 'only_matching': True,
2196 }, {
2197 'note': 'Storyboards',
2198 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
2199 'info_dict': {
2200 'id': '5KLPxDtMqe8',
2201 'ext': 'mhtml',
2202 'format_id': 'sb0',
2203 'title': 'Your Brain is Plastic',
2204 'uploader_id': 'scishow',
2205 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
2206 'upload_date': '20140324',
2207 'uploader': 'SciShow',
2208 'like_count': int,
2209 'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',
2210 'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',
2211 'view_count': int,
2212 'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',
2213 'playable_in_embed': True,
2214 'tags': 'count:12',
2215 'uploader_url': 'http://www.youtube.com/user/scishow',
2216 'availability': 'public',
2217 'channel': 'SciShow',
2218 'live_status': 'not_live',
2219 'duration': 248,
2220 'categories': ['Education'],
2221 'age_limit': 0,
2222 'channel_follower_count': int
2223 }, 'params': {'format': 'mhtml', 'skip_download': True}
2224 }, {
2225 # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
2226 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2227 'info_dict': {
2228 'id': '2NUZ8W2llS4',
2229 'ext': 'mp4',
2230 'title': 'The NP that test your phone performance 🙂',
2231 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
2232 'uploader': 'Leon Nguyen',
2233 'uploader_id': 'VNSXIII',
2234 'uploader_url': 'http://www.youtube.com/user/VNSXIII',
2235 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2236 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2237 'duration': 21,
2238 'view_count': int,
2239 'age_limit': 0,
2240 'categories': ['Gaming'],
2241 'tags': 'count:23',
2242 'playable_in_embed': True,
2243 'live_status': 'not_live',
2244 'upload_date': '20220103',
2245 'like_count': int,
2246 'availability': 'public',
2247 'channel': 'Leon Nguyen',
2248 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
2249 'comment_count': int,
2250 'channel_follower_count': int
2251 }
2252 }, {
2253 # Same video as above, but with --compat-opt no-youtube-prefer-utc-upload-date
2254 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2255 'info_dict': {
2256 'id': '2NUZ8W2llS4',
2257 'ext': 'mp4',
2258 'title': 'The NP that test your phone performance 🙂',
2259 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
2260 'uploader': 'Leon Nguyen',
2261 'uploader_id': 'VNSXIII',
2262 'uploader_url': 'http://www.youtube.com/user/VNSXIII',
2263 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2264 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2265 'duration': 21,
2266 'view_count': int,
2267 'age_limit': 0,
2268 'categories': ['Gaming'],
2269 'tags': 'count:23',
2270 'playable_in_embed': True,
2271 'live_status': 'not_live',
2272 'upload_date': '20220102',
2273 'like_count': int,
2274 'availability': 'public',
2275 'channel': 'Leon Nguyen',
2276 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
2277 'comment_count': int,
2278 'channel_follower_count': int
2279 },
2280 'params': {'compat_opts': ['no-youtube-prefer-utc-upload-date']}
2281 }, {
2282 # date text is premiered video, ensure upload date in UTC (published 1641172509)
2283 'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',
2284 'info_dict': {
2285 'id': 'mzZzzBU6lrM',
2286 'ext': 'mp4',
2287 'title': 'I Met GeorgeNotFound In Real Life...',
2288 'description': 'md5:cca98a355c7184e750f711f3a1b22c84',
2289 'uploader': 'Quackity',
2290 'uploader_id': 'QuackityHQ',
2291 'uploader_url': 'http://www.youtube.com/user/QuackityHQ',
2292 'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',
2293 'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',
2294 'duration': 955,
2295 'view_count': int,
2296 'age_limit': 0,
2297 'categories': ['Entertainment'],
2298 'tags': 'count:26',
2299 'playable_in_embed': True,
2300 'live_status': 'not_live',
2301 'release_timestamp': 1641172509,
2302 'release_date': '20220103',
2303 'upload_date': '20220103',
2304 'like_count': int,
2305 'availability': 'public',
2306 'channel': 'Quackity',
2307 'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',
2308 'channel_follower_count': int
2309 }
2310 },
2311 { # continuous livestream. Microformat upload date should be preferred.
2312 # Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27
2313 'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',
2314 'info_dict': {
2315 'id': 'kgx4WGK0oNU',
2316 'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
2317 'ext': 'mp4',
2318 'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2319 'availability': 'public',
2320 'age_limit': 0,
2321 'release_timestamp': 1637975704,
2322 'upload_date': '20210619',
2323 'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2324 'live_status': 'is_live',
2325 'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',
2326 'uploader': '阿鲍Abao',
2327 'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2328 'channel': 'Abao in Tokyo',
2329 'channel_follower_count': int,
2330 'release_date': '20211127',
2331 'tags': 'count:39',
2332 'categories': ['People & Blogs'],
2333 'like_count': int,
2334 'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2335 'view_count': int,
2336 'playable_in_embed': True,
2337 'description': 'md5:2ef1d002cad520f65825346e2084e49d',
2338 },
2339 'params': {'skip_download': True}
2340 }, {
2341 # Story. Requires specific player params to work.
2342 'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',
2343 'info_dict': {
2344 'id': 'vv8qTUWmulI',
2345 'ext': 'mp4',
2346 'availability': 'unlisted',
2347 'view_count': int,
2348 'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',
2349 'upload_date': '20220526',
2350 'categories': ['Education'],
2351 'title': 'Story',
2352 'channel': 'IT\'S HISTORY',
2353 'description': '',
2354 'uploader_id': 'BlastfromthePast',
2355 'duration': 12,
2356 'uploader': 'IT\'S HISTORY',
2357 'playable_in_embed': True,
2358 'age_limit': 0,
2359 'live_status': 'not_live',
2360 'tags': [],
2361 'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',
2362 'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',
2363 'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',
2364 },
2365 'skip': 'stories get removed after some period of time',
2366 }, {
2367 'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
2368 'info_dict': {
2369 'id': 'tjjjtzRLHvA',
2370 'ext': 'mp4',
2371 'title': 'ハッシュタグ無し };if window.ytcsi',
2372 'upload_date': '20220323',
2373 'like_count': int,
2374 'availability': 'unlisted',
2375 'channel': 'nao20010128nao',
2376 'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',
2377 'age_limit': 0,
2378 'uploader': 'nao20010128nao',
2379 'uploader_id': 'nao20010128nao',
2380 'categories': ['Music'],
2381 'view_count': int,
2382 'description': '',
2383 'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',
2384 'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',
2385 'live_status': 'not_live',
2386 'playable_in_embed': True,
2387 'channel_follower_count': int,
2388 'duration': 6,
2389 'tags': [],
2390 'uploader_url': 'http://www.youtube.com/user/nao20010128nao',
2391 }
2392 }, {
2393 # Prefer primary title+description language metadata by default
2394 # Do not prefer translated description if primary is empty
2395 'url': 'https://www.youtube.com/watch?v=el3E4MbxRqQ',
2396 'info_dict': {
2397 'id': 'el3E4MbxRqQ',
2398 'ext': 'mp4',
2399 'title': 'dlp test video 2 - primary sv no desc',
2400 'description': '',
2401 'channel': 'cole-dlp-test-acc',
2402 'tags': [],
2403 'view_count': int,
2404 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2405 'like_count': int,
2406 'playable_in_embed': True,
2407 'availability': 'unlisted',
2408 'thumbnail': 'https://i.ytimg.com/vi_webp/el3E4MbxRqQ/maxresdefault.webp',
2409 'age_limit': 0,
2410 'duration': 5,
2411 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
2412 'uploader_url': 'http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2413 'live_status': 'not_live',
2414 'upload_date': '20220908',
2415 'categories': ['People & Blogs'],
2416 'uploader': 'cole-dlp-test-acc',
2417 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
2418 },
2419 'params': {'skip_download': True}
2420 }, {
2421 # Extractor argument: prefer translated title+description
2422 'url': 'https://www.youtube.com/watch?v=gHKT4uU8Zng',
2423 'info_dict': {
2424 'id': 'gHKT4uU8Zng',
2425 'ext': 'mp4',
2426 'channel': 'cole-dlp-test-acc',
2427 'tags': [],
2428 'duration': 5,
2429 'live_status': 'not_live',
2430 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
2431 'upload_date': '20220728',
2432 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
2433 'view_count': int,
2434 'categories': ['People & Blogs'],
2435 'thumbnail': 'https://i.ytimg.com/vi_webp/gHKT4uU8Zng/maxresdefault.webp',
2436 'title': 'dlp test video title translated (fr)',
2437 'availability': 'public',
2438 'uploader': 'cole-dlp-test-acc',
2439 'age_limit': 0,
2440 'description': 'dlp test video description translated (fr)',
2441 'playable_in_embed': True,
2442 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2443 'uploader_url': 'http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2444 },
2445 'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}},
2446 'expected_warnings': [r'Preferring "fr" translated fields'],
2447 }, {
2448 'note': '6 channel audio',
2449 'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',
2450 'only_matching': True,
2451 }
2452 ]
2453
2454 _WEBPAGE_TESTS = [
2455 # YouTube <object> embed
2456 {
2457 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
2458 'md5': '873c81d308b979f0e23ee7e620b312a3',
2459 'info_dict': {
2460 'id': 'msN87y-iEx0',
2461 'ext': 'mp4',
2462 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
2463 'upload_date': '20080526',
2464 'description': 'md5:873c81d308b979f0e23ee7e620b312a3',
2465 'uploader': 'Christopher Sykes',
2466 'uploader_id': 'ChristopherJSykes',
2467 'age_limit': 0,
2468 'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],
2469 'channel_id': 'UCCeo--lls1vna5YJABWAcVA',
2470 'playable_in_embed': True,
2471 'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',
2472 'like_count': int,
2473 'comment_count': int,
2474 'channel': 'Christopher Sykes',
2475 'live_status': 'not_live',
2476 'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',
2477 'availability': 'public',
2478 'duration': 195,
2479 'view_count': int,
2480 'categories': ['Science & Technology'],
2481 'channel_follower_count': int,
2482 'uploader_url': 'http://www.youtube.com/user/ChristopherJSykes',
2483 },
2484 'params': {
2485 'skip_download': True,
2486 }
2487 },
2488 ]
2489
2490 @classmethod
2491 def suitable(cls, url):
2492 from ..utils import parse_qs
2493
2494 qs = parse_qs(url)
2495 if qs.get('list', [None])[0]:
2496 return False
2497 return super().suitable(url)
2498
2499 def __init__(self, *args, **kwargs):
2500 super().__init__(*args, **kwargs)
2501 self._code_cache = {}
2502 self._player_cache = {}
2503
2504 def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):
2505 lock = threading.Lock()
2506
2507 is_live = True
2508 start_time = time.time()
2509 formats = [f for f in formats if f.get('is_from_start')]
2510
2511 def refetch_manifest(format_id, delay):
2512 nonlocal formats, start_time, is_live
2513 if time.time() <= start_time + delay:
2514 return
2515
2516 _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
2517 video_details = traverse_obj(
2518 prs, (..., 'videoDetails'), expected_type=dict, default=[])
2519 microformats = traverse_obj(
2520 prs, (..., 'microformat', 'playerMicroformatRenderer'),
2521 expected_type=dict, default=[])
2522 _, is_live, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)
2523 start_time = time.time()
2524
2525 def mpd_feed(format_id, delay):
2526 """
2527 @returns (manifest_url, manifest_stream_number, is_live) or None
2528 """
2529 with lock:
2530 refetch_manifest(format_id, delay)
2531
2532 f = next((f for f in formats if f['format_id'] == format_id), None)
2533 if not f:
2534 if not is_live:
2535 self.to_screen(f'{video_id}: Video is no longer live')
2536 else:
2537 self.report_warning(
2538 f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')
2539 return None
2540 return f['manifest_url'], f['manifest_stream_number'], is_live
2541
2542 for f in formats:
2543 f['is_live'] = True
2544 f['protocol'] = 'http_dash_segments_generator'
2545 f['fragments'] = functools.partial(
2546 self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)
2547
2548 def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):
2549 FETCH_SPAN, MAX_DURATION = 5, 432000
2550
2551 mpd_url, stream_number, is_live = None, None, True
2552
2553 begin_index = 0
2554 download_start_time = ctx.get('start') or time.time()
2555
2556 lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
2557 if lack_early_segments:
2558 self.report_warning(bug_reports_message(
2559 'Starting download from the last 120 hours of the live stream since '
2560 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
2561 lack_early_segments = True
2562
2563 known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
2564 fragments, fragment_base_url = None, None
2565
2566 def _extract_sequence_from_mpd(refresh_sequence, immediate):
2567 nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
2568 # Obtain from MPD's maximum seq value
2569 old_mpd_url = mpd_url
2570 last_error = ctx.pop('last_error', None)
2571 expire_fast = immediate or last_error and isinstance(last_error, urllib.error.HTTPError) and last_error.code == 403
2572 mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
2573 or (mpd_url, stream_number, False))
2574 if not refresh_sequence:
2575 if expire_fast and not is_live:
2576 return False, last_seq
2577 elif old_mpd_url == mpd_url:
2578 return True, last_seq
2579 try:
2580 fmts, _ = self._extract_mpd_formats_and_subtitles(
2581 mpd_url, None, note=False, errnote=False, fatal=False)
2582 except ExtractorError:
2583 fmts = None
2584 if not fmts:
2585 no_fragment_score += 2
2586 return False, last_seq
2587 fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
2588 fragments = fmt_info['fragments']
2589 fragment_base_url = fmt_info['fragment_base_url']
2590 assert fragment_base_url
2591
2592 _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
2593 return True, _last_seq
2594
2595 while is_live:
2596 fetch_time = time.time()
2597 if no_fragment_score > 30:
2598 return
2599 if last_segment_url:
2600 # Obtain from "X-Head-Seqnum" header value from each segment
2601 try:
2602 urlh = self._request_webpage(
2603 last_segment_url, None, note=False, errnote=False, fatal=False)
2604 except ExtractorError:
2605 urlh = None
2606 last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
2607 if last_seq is None:
2608 no_fragment_score += 2
2609 last_segment_url = None
2610 continue
2611 else:
2612 should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)
2613 no_fragment_score += 2
2614 if not should_continue:
2615 continue
2616
2617 if known_idx > last_seq:
2618 last_segment_url = None
2619 continue
2620
2621 last_seq += 1
2622
2623 if begin_index < 0 and known_idx < 0:
2624 # skip from the start when it's negative value
2625 known_idx = last_seq + begin_index
2626 if lack_early_segments:
2627 known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
2628 try:
2629 for idx in range(known_idx, last_seq):
2630 # do not update sequence here or you'll get skipped some part of it
2631 should_continue, _ = _extract_sequence_from_mpd(False, False)
2632 if not should_continue:
2633 known_idx = idx - 1
2634 raise ExtractorError('breaking out of outer loop')
2635 last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)
2636 yield {
2637 'url': last_segment_url,
2638 'fragment_count': last_seq,
2639 }
2640 if known_idx == last_seq:
2641 no_fragment_score += 5
2642 else:
2643 no_fragment_score = 0
2644 known_idx = last_seq
2645 except ExtractorError:
2646 continue
2647
2648 time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
2649
2650 def _extract_player_url(self, *ytcfgs, webpage=None):
2651 player_url = traverse_obj(
2652 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
2653 get_all=False, expected_type=str)
2654 if not player_url:
2655 return
2656 return urljoin('https://www.youtube.com', player_url)
2657
2658 def _download_player_url(self, video_id, fatal=False):
2659 res = self._download_webpage(
2660 'https://www.youtube.com/iframe_api',
2661 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
2662 if res:
2663 player_version = self._search_regex(
2664 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
2665 if player_version:
2666 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
2667
2668 def _signature_cache_id(self, example_sig):
2669 """ Return a string representation of a signature """
2670 return '.'.join(str(len(part)) for part in example_sig.split('.'))
2671
2672 @classmethod
2673 def _extract_player_info(cls, player_url):
2674 for player_re in cls._PLAYER_INFO_RE:
2675 id_m = re.search(player_re, player_url)
2676 if id_m:
2677 break
2678 else:
2679 raise ExtractorError('Cannot identify player %r' % player_url)
2680 return id_m.group('id')
2681
2682 def _load_player(self, video_id, player_url, fatal=True):
2683 player_id = self._extract_player_info(player_url)
2684 if player_id not in self._code_cache:
2685 code = self._download_webpage(
2686 player_url, video_id, fatal=fatal,
2687 note='Downloading player ' + player_id,
2688 errnote='Download of %s failed' % player_url)
2689 if code:
2690 self._code_cache[player_id] = code
2691 return self._code_cache.get(player_id)
2692
2693 def _extract_signature_function(self, video_id, player_url, example_sig):
2694 player_id = self._extract_player_info(player_url)
2695
2696 # Read from filesystem cache
2697 func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'
2698 assert os.path.basename(func_id) == func_id
2699
2700 self.write_debug(f'Extracting signature function {func_id}')
2701 cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
2702
2703 if not cache_spec:
2704 code = self._load_player(video_id, player_url)
2705 if code:
2706 res = self._parse_sig_js(code)
2707 test_string = ''.join(map(chr, range(len(example_sig))))
2708 cache_spec = [ord(c) for c in res(test_string)]
2709 self.cache.store('youtube-sigfuncs', func_id, cache_spec)
2710
2711 return lambda s: ''.join(s[i] for i in cache_spec)
2712
2713 def _print_sig_code(self, func, example_sig):
2714 if not self.get_param('youtube_print_sig_code'):
2715 return
2716
2717 def gen_sig_code(idxs):
2718 def _genslice(start, end, step):
2719 starts = '' if start == 0 else str(start)
2720 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
2721 steps = '' if step == 1 else (':%d' % step)
2722 return f's[{starts}{ends}{steps}]'
2723
2724 step = None
2725 # Quelch pyflakes warnings - start will be set when step is set
2726 start = '(Never used)'
2727 for i, prev in zip(idxs[1:], idxs[:-1]):
2728 if step is not None:
2729 if i - prev == step:
2730 continue
2731 yield _genslice(start, prev, step)
2732 step = None
2733 continue
2734 if i - prev in [-1, 1]:
2735 step = i - prev
2736 start = prev
2737 continue
2738 else:
2739 yield 's[%d]' % prev
2740 if step is None:
2741 yield 's[%d]' % i
2742 else:
2743 yield _genslice(start, i, step)
2744
2745 test_string = ''.join(map(chr, range(len(example_sig))))
2746 cache_res = func(test_string)
2747 cache_spec = [ord(c) for c in cache_res]
2748 expr_code = ' + '.join(gen_sig_code(cache_spec))
2749 signature_id_tuple = '(%s)' % (
2750 ', '.join(str(len(p)) for p in example_sig.split('.')))
2751 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
2752 ' return %s\n') % (signature_id_tuple, expr_code)
2753 self.to_screen('Extracted signature function:\n' + code)
2754
2755 def _parse_sig_js(self, jscode):
2756 funcname = self._search_regex(
2757 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2758 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2759 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
2760 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
2761 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
2762 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
2763 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
2764 # Obsolete patterns
2765 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2766 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
2767 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2768 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2769 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2770 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2771 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2772 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
2773 jscode, 'Initial JS player signature function name', group='sig')
2774
2775 jsi = JSInterpreter(jscode)
2776 initial_function = jsi.extract_function(funcname)
2777 return lambda s: initial_function([s])
2778
2779 def _cached(self, func, *cache_id):
2780 def inner(*args, **kwargs):
2781 if cache_id not in self._player_cache:
2782 try:
2783 self._player_cache[cache_id] = func(*args, **kwargs)
2784 except ExtractorError as e:
2785 self._player_cache[cache_id] = e
2786 except Exception as e:
2787 self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)
2788
2789 ret = self._player_cache[cache_id]
2790 if isinstance(ret, Exception):
2791 raise ret
2792 return ret
2793 return inner
2794
2795 def _decrypt_signature(self, s, video_id, player_url):
2796 """Turn the encrypted s field into a working signature"""
2797 extract_sig = self._cached(
2798 self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))
2799 func = extract_sig(video_id, player_url, s)
2800 self._print_sig_code(func, s)
2801 return func(s)
2802
2803 def _decrypt_nsig(self, s, video_id, player_url):
2804 """Turn the encrypted n field into a working signature"""
2805 if player_url is None:
2806 raise ExtractorError('Cannot decrypt nsig without player_url')
2807 player_url = urljoin('https://www.youtube.com', player_url)
2808
2809 try:
2810 jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
2811 except ExtractorError as e:
2812 raise ExtractorError('Unable to extract nsig function code', cause=e)
2813 if self.get_param('youtube_print_sig_code'):
2814 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
2815
2816 try:
2817 extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)
2818 ret = extract_nsig(jsi, func_code)(s)
2819 except JSInterpreter.Exception as e:
2820 try:
2821 jsi = PhantomJSwrapper(self, timeout=5000)
2822 except ExtractorError:
2823 raise e
2824 self.report_warning(
2825 f'Native nsig extraction failed: Trying with PhantomJS\n'
2826 f' n = {s} ; player = {player_url}', video_id)
2827 self.write_debug(e)
2828
2829 args, func_body = func_code
2830 ret = jsi.execute(
2831 f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',
2832 video_id=video_id, note='Executing signature code').strip()
2833
2834 self.write_debug(f'Decrypted nsig {s} => {ret}')
2835 return ret
2836
2837 def _extract_n_function_name(self, jscode):
2838 funcname, idx = self._search_regex(
2839 r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)',
2840 jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
2841 if not idx:
2842 return funcname
2843
2844 return json.loads(js_to_json(self._search_regex(
2845 rf'var {re.escape(funcname)}\s*=\s*(\[.+?\]);', jscode,
2846 f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
2847
2848 def _extract_n_function_code(self, video_id, player_url):
2849 player_id = self._extract_player_info(player_url)
2850 func_code = self.cache.load('youtube-nsig', player_id, min_ver='2022.09.1')
2851 jscode = func_code or self._load_player(video_id, player_url)
2852 jsi = JSInterpreter(jscode)
2853
2854 if func_code:
2855 return jsi, player_id, func_code
2856
2857 func_name = self._extract_n_function_name(jscode)
2858
2859 # For redundancy
2860 func_code = self._search_regex(
2861 r'''(?xs)%s\s*=\s*function\s*\((?P<var>[\w$]+)\)\s*
2862 # NB: The end of the regex is intentionally kept strict
2863 {(?P<code>.+?}\s*return\ [\w$]+.join\(""\))};''' % func_name,
2864 jscode, 'nsig function', group=('var', 'code'), default=None)
2865 if func_code:
2866 func_code = ([func_code[0]], func_code[1])
2867 else:
2868 self.write_debug('Extracting nsig function with jsinterp')
2869 func_code = jsi.extract_function_code(func_name)
2870
2871 self.cache.store('youtube-nsig', player_id, func_code)
2872 return jsi, player_id, func_code
2873
2874 def _extract_n_function_from_code(self, jsi, func_code):
2875 func = jsi.extract_function_from_code(*func_code)
2876
2877 def extract_nsig(s):
2878 try:
2879 ret = func([s])
2880 except JSInterpreter.Exception:
2881 raise
2882 except Exception as e:
2883 raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
2884
2885 if ret.startswith('enhanced_except_'):
2886 raise JSInterpreter.Exception('Signature function returned an exception')
2887 return ret
2888
2889 return extract_nsig
2890
2891 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
2892 """
2893 Extract signatureTimestamp (sts)
2894 Required to tell API what sig/player version is in use.
2895 """
2896 sts = None
2897 if isinstance(ytcfg, dict):
2898 sts = int_or_none(ytcfg.get('STS'))
2899
2900 if not sts:
2901 # Attempt to extract from player
2902 if player_url is None:
2903 error_msg = 'Cannot extract signature timestamp without player_url.'
2904 if fatal:
2905 raise ExtractorError(error_msg)
2906 self.report_warning(error_msg)
2907 return
2908 code = self._load_player(video_id, player_url, fatal=fatal)
2909 if code:
2910 sts = int_or_none(self._search_regex(
2911 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2912 'JS player signature timestamp', group='sts', fatal=fatal))
2913 return sts
2914
2915 def _mark_watched(self, video_id, player_responses):
2916 for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):
2917 label = 'fully ' if is_full else ''
2918 url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),
2919 expected_type=url_or_none)
2920 if not url:
2921 self.report_warning(f'Unable to mark {label}watched')
2922 return
2923 parsed_url = urllib.parse.urlparse(url)
2924 qs = urllib.parse.parse_qs(parsed_url.query)
2925
2926 # cpn generation algorithm is reverse engineered from base.js.
2927 # In fact it works even with dummy cpn.
2928 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2929 cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))
2930
2931 # # more consistent results setting it to right before the end
2932 video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]
2933
2934 qs.update({
2935 'ver': ['2'],
2936 'cpn': [cpn],
2937 'cmt': video_length,
2938 'el': 'detailpage', # otherwise defaults to "shorts"
2939 })
2940
2941 if is_full:
2942 # these seem to mark watchtime "history" in the real world
2943 # they're required, so send in a single value
2944 qs.update({
2945 'st': video_length,
2946 'et': video_length,
2947 })
2948
2949 url = urllib.parse.urlunparse(
2950 parsed_url._replace(query=urllib.parse.urlencode(qs, True)))
2951
2952 self._download_webpage(
2953 url, video_id, f'Marking {label}watched',
2954 'Unable to mark watched', fatal=False)
2955
2956 @classmethod
2957 def _extract_from_webpage(cls, url, webpage):
2958 # Invidious Instances
2959 # https://github.com/yt-dlp/yt-dlp/issues/195
2960 # https://github.com/iv-org/invidious/pull/1730
2961 mobj = re.search(
2962 r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',
2963 webpage)
2964 if mobj:
2965 yield cls.url_result(mobj.group('url'), cls)
2966 raise cls.StopExtraction()
2967
2968 yield from super()._extract_from_webpage(url, webpage)
2969
2970 # lazyYT YouTube embed
2971 for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):
2972 yield cls.url_result(unescapeHTML(id_), cls, id_)
2973
2974 # Wordpress "YouTube Video Importer" plugin
2975 for m in re.findall(r'''(?x)<div[^>]+
2976 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2977 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):
2978 yield cls.url_result(m[-1], cls, m[-1])
2979
2980 @classmethod
2981 def extract_id(cls, url):
2982 video_id = cls.get_temp_id(url)
2983 if not video_id:
2984 raise ExtractorError(f'Invalid URL: {url}')
2985 return video_id
2986
2987 def _extract_chapters_from_json(self, data, duration):
2988 chapter_list = traverse_obj(
2989 data, (
2990 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2991 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2992 ), expected_type=list)
2993
2994 return self._extract_chapters(
2995 chapter_list,
2996 chapter_time=lambda chapter: float_or_none(
2997 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2998 chapter_title=lambda chapter: traverse_obj(
2999 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
3000 duration=duration)
3001
3002 def _extract_chapters_from_engagement_panel(self, data, duration):
3003 content_list = traverse_obj(
3004 data,
3005 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
3006 expected_type=list, default=[])
3007 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
3008 chapter_title = lambda chapter: self._get_text(chapter, 'title')
3009
3010 return next(filter(None, (
3011 self._extract_chapters(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
3012 chapter_time, chapter_title, duration)
3013 for contents in content_list)), [])
3014
3015 def _extract_chapters_from_description(self, description, duration):
3016 return self._extract_chapters(
3017 re.findall(r'(?m)^((?:\d+:)?\d{1,2}:\d{2})\b\W*\s(.+?)\s*$', description or ''),
3018 chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1],
3019 duration=duration, strict=False)
3020
3021 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True):
3022 if not duration:
3023 return
3024 chapter_list = [{
3025 'start_time': chapter_time(chapter),
3026 'title': chapter_title(chapter),
3027 } for chapter in chapter_list or []]
3028 if not strict:
3029 chapter_list.sort(key=lambda c: c['start_time'] or 0)
3030
3031 chapters = [{'start_time': 0}]
3032 for idx, chapter in enumerate(chapter_list):
3033 if chapter['start_time'] is None:
3034 self.report_warning(f'Incomplete chapter {idx}')
3035 elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:
3036 chapters.append(chapter)
3037 elif chapter not in chapters:
3038 self.report_warning(
3039 f'Invalid start time ({chapter["start_time"]} < {chapters[-1]["start_time"]}) for chapter "{chapter["title"]}"')
3040 return chapters[1:]
3041
3042 def _extract_comment(self, comment_renderer, parent=None):
3043 comment_id = comment_renderer.get('commentId')
3044 if not comment_id:
3045 return
3046
3047 text = self._get_text(comment_renderer, 'contentText')
3048
3049 # Timestamp is an estimate calculated from the current time and time_text
3050 time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
3051 timestamp = self._parse_time_text(time_text)
3052
3053 author = self._get_text(comment_renderer, 'authorText')
3054 author_id = try_get(comment_renderer,
3055 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str)
3056
3057 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
3058 lambda x: x['likeCount']), str)) or 0
3059 author_thumbnail = try_get(comment_renderer,
3060 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], str)
3061
3062 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
3063 is_favorited = 'creatorHeart' in (try_get(
3064 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
3065 return {
3066 'id': comment_id,
3067 'text': text,
3068 'timestamp': timestamp,
3069 'time_text': time_text,
3070 'like_count': votes,
3071 'is_favorited': is_favorited,
3072 'author': author,
3073 'author_id': author_id,
3074 'author_thumbnail': author_thumbnail,
3075 'author_is_uploader': author_is_uploader,
3076 'parent': parent or 'root'
3077 }
3078
3079 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
3080
3081 get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
3082
3083 def extract_header(contents):
3084 _continuation = None
3085 for content in contents:
3086 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
3087 expected_comment_count = self._get_count(
3088 comments_header_renderer, 'countText', 'commentsCount')
3089
3090 if expected_comment_count:
3091 tracker['est_total'] = expected_comment_count
3092 self.to_screen(f'Downloading ~{expected_comment_count} comments')
3093 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
3094
3095 sort_menu_item = try_get(
3096 comments_header_renderer,
3097 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
3098 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
3099
3100 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
3101 if not _continuation:
3102 continue
3103
3104 sort_text = str_or_none(sort_menu_item.get('title'))
3105 if not sort_text:
3106 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
3107 self.to_screen('Sorting comments by %s' % sort_text.lower())
3108 break
3109 return _continuation
3110
3111 def extract_thread(contents):
3112 if not parent:
3113 tracker['current_page_thread'] = 0
3114 for content in contents:
3115 if not parent and tracker['total_parent_comments'] >= max_parents:
3116 yield
3117 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
3118 comment_renderer = get_first(
3119 (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
3120 expected_type=dict, default={})
3121
3122 comment = self._extract_comment(comment_renderer, parent)
3123 if not comment:
3124 continue
3125
3126 tracker['running_total'] += 1
3127 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
3128 yield comment
3129
3130 # Attempt to get the replies
3131 comment_replies_renderer = try_get(
3132 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
3133
3134 if comment_replies_renderer:
3135 tracker['current_page_thread'] += 1
3136 comment_entries_iter = self._comment_entries(
3137 comment_replies_renderer, ytcfg, video_id,
3138 parent=comment.get('id'), tracker=tracker)
3139 yield from itertools.islice(comment_entries_iter, min(
3140 max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))
3141
3142 # Keeps track of counts across recursive calls
3143 if not tracker:
3144 tracker = dict(
3145 running_total=0,
3146 est_total=0,
3147 current_page_thread=0,
3148 total_parent_comments=0,
3149 total_reply_comments=0)
3150
3151 # TODO: Deprecated
3152 # YouTube comments have a max depth of 2
3153 max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
3154 if max_depth:
3155 self._downloader.deprecated_feature('[youtube] max_comment_depth extractor argument is deprecated. '
3156 'Set max replies in the max-comments extractor argument instead')
3157 if max_depth == 1 and parent:
3158 return
3159
3160 max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(
3161 lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
3162
3163 continuation = self._extract_continuation(root_continuation_data)
3164
3165 response = None
3166 is_forced_continuation = False
3167 is_first_continuation = parent is None
3168 if is_first_continuation and not continuation:
3169 # Sometimes you can get comments by generating the continuation yourself,
3170 # even if YouTube initially reports them being disabled - e.g. stories comments.
3171 # Note: if the comment section is actually disabled, YouTube may return a response with
3172 # required check_get_keys missing. So we will disable that check initially in this case.
3173 continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
3174 is_forced_continuation = True
3175
3176 for page_num in itertools.count(0):
3177 if not continuation:
3178 break
3179 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
3180 comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"
3181 if page_num == 0:
3182 if is_first_continuation:
3183 note_prefix = 'Downloading comment section API JSON'
3184 else:
3185 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
3186 tracker['current_page_thread'], comment_prog_str)
3187 else:
3188 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
3189 ' ' if parent else '', ' replies' if parent else '',
3190 page_num, comment_prog_str)
3191
3192 response = self._extract_response(
3193 item_id=None, query=continuation,
3194 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
3195 check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)
3196 is_forced_continuation = False
3197 continuation_contents = traverse_obj(
3198 response, 'onResponseReceivedEndpoints', expected_type=list, default=[])
3199
3200 continuation = None
3201 for continuation_section in continuation_contents:
3202 continuation_items = traverse_obj(
3203 continuation_section,
3204 (('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),
3205 get_all=False, expected_type=list) or []
3206 if is_first_continuation:
3207 continuation = extract_header(continuation_items)
3208 is_first_continuation = False
3209 if continuation:
3210 break
3211 continue
3212
3213 for entry in extract_thread(continuation_items):
3214 if not entry:
3215 return
3216 yield entry
3217 continuation = self._extract_continuation({'contents': continuation_items})
3218 if continuation:
3219 break
3220
3221 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
3222 if message and not parent and tracker['running_total'] == 0:
3223 self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)
3224
3225 @staticmethod
3226 def _generate_comment_continuation(video_id):
3227 """
3228 Generates initial comment section continuation token from given video id
3229 """
3230 token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'
3231 return base64.b64encode(token.encode()).decode()
3232
3233 def _get_comments(self, ytcfg, video_id, contents, webpage):
3234 """Entry for comment extraction"""
3235 def _real_comment_extract(contents):
3236 renderer = next((
3237 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
3238 if item.get('sectionIdentifier') == 'comment-item-section'), None)
3239 yield from self._comment_entries(renderer, ytcfg, video_id)
3240
3241 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
3242 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
3243
3244 @staticmethod
3245 def _get_checkok_params():
3246 return {'contentCheckOk': True, 'racyCheckOk': True}
3247
3248 @classmethod
3249 def _generate_player_context(cls, sts=None):
3250 context = {
3251 'html5Preference': 'HTML5_PREF_WANTS',
3252 }
3253 if sts is not None:
3254 context['signatureTimestamp'] = sts
3255 return {
3256 'playbackContext': {
3257 'contentPlaybackContext': context
3258 },
3259 **cls._get_checkok_params()
3260 }
3261
3262 @staticmethod
3263 def _is_agegated(player_response):
3264 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
3265 return True
3266
3267 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
3268 AGE_GATE_REASONS = (
3269 'confirm your age', 'age-restricted', 'inappropriate', # reason
3270 'age_verification_required', 'age_check_required', # status
3271 )
3272 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
3273
3274 @staticmethod
3275 def _is_unplayable(player_response):
3276 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
3277
3278 _STORY_PLAYER_PARAMS = '8AEB'
3279
3280 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):
3281
3282 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
3283 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
3284 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
3285 headers = self.generate_api_headers(
3286 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
3287
3288 yt_query = {
3289 'videoId': video_id,
3290 }
3291 if smuggled_data.get('is_story') or _split_innertube_client(client)[0] == 'android':
3292 yt_query['params'] = self._STORY_PLAYER_PARAMS
3293
3294 yt_query.update(self._generate_player_context(sts))
3295 return self._extract_response(
3296 item_id=video_id, ep='player', query=yt_query,
3297 ytcfg=player_ytcfg, headers=headers, fatal=True,
3298 default_client=client,
3299 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
3300 ) or None
3301
3302 def _get_requested_clients(self, url, smuggled_data):
3303 requested_clients = []
3304 default = ['android', 'web']
3305 allowed_clients = sorted(
3306 (client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),
3307 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
3308 for client in self._configuration_arg('player_client'):
3309 if client in allowed_clients:
3310 requested_clients.append(client)
3311 elif client == 'default':
3312 requested_clients.extend(default)
3313 elif client == 'all':
3314 requested_clients.extend(allowed_clients)
3315 else:
3316 self.report_warning(f'Skipping unsupported client {client}')
3317 if not requested_clients:
3318 requested_clients = default
3319
3320 if smuggled_data.get('is_music_url') or self.is_music_url(url):
3321 requested_clients.extend(
3322 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
3323
3324 return orderedSet(requested_clients)
3325
3326 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
3327 initial_pr = None
3328 if webpage:
3329 initial_pr = self._search_json(
3330 self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
3331
3332 all_clients = set(clients)
3333 clients = clients[::-1]
3334 prs = []
3335
3336 def append_client(*client_names):
3337 """ Append the first client name that exists but not already used """
3338 for client_name in client_names:
3339 actual_client = _split_innertube_client(client_name)[0]
3340 if actual_client in INNERTUBE_CLIENTS:
3341 if actual_client not in all_clients:
3342 clients.append(client_name)
3343 all_clients.add(actual_client)
3344 return
3345
3346 # Android player_response does not have microFormats which are needed for
3347 # extraction of some data. So we return the initial_pr with formats
3348 # stripped out even if not requested by the user
3349 # See: https://github.com/yt-dlp/yt-dlp/issues/501
3350 if initial_pr:
3351 pr = dict(initial_pr)
3352 pr['streamingData'] = None
3353 prs.append(pr)
3354
3355 last_error = None
3356 tried_iframe_fallback = False
3357 player_url = None
3358 while clients:
3359 client, base_client, variant = _split_innertube_client(clients.pop())
3360 player_ytcfg = master_ytcfg if client == 'web' else {}
3361 if 'configs' not in self._configuration_arg('player_skip') and client != 'web':
3362 player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
3363
3364 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
3365 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
3366 if 'js' in self._configuration_arg('player_skip'):
3367 require_js_player = False
3368 player_url = None
3369
3370 if not player_url and not tried_iframe_fallback and require_js_player:
3371 player_url = self._download_player_url(video_id)
3372 tried_iframe_fallback = True
3373
3374 try:
3375 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
3376 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)
3377 except ExtractorError as e:
3378 if last_error:
3379 self.report_warning(last_error)
3380 last_error = e
3381 continue
3382
3383 if pr:
3384 # YouTube may return a different video player response than expected.
3385 # See: https://github.com/TeamNewPipe/NewPipe/issues/8713
3386 pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId'))
3387 if pr_video_id and pr_video_id != video_id:
3388 self.report_warning(
3389 f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message())
3390 else:
3391 prs.append(pr)
3392
3393 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
3394 if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
3395 append_client(f'{base_client}_creator')
3396 elif self._is_agegated(pr):
3397 if variant == 'tv_embedded':
3398 append_client(f'{base_client}_embedded')
3399 elif not variant:
3400 append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
3401
3402 if last_error:
3403 if not len(prs):
3404 raise last_error
3405 self.report_warning(last_error)
3406 return prs, player_url
3407
3408 def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, is_live, duration):
3409 itags, stream_ids = {}, []
3410 itag_qualities, res_qualities = {}, {0: None}
3411 q = qualities([
3412 # Normally tiny is the smallest video-only formats. But
3413 # audio-only formats with unknown quality may get tagged as tiny
3414 'tiny',
3415 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
3416 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
3417 ])
3418 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
3419
3420 for fmt in streaming_formats:
3421 if fmt.get('targetDurationSec'):
3422 continue
3423
3424 itag = str_or_none(fmt.get('itag'))
3425 audio_track = fmt.get('audioTrack') or {}
3426 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
3427 if stream_id in stream_ids:
3428 continue
3429
3430 quality = fmt.get('quality')
3431 height = int_or_none(fmt.get('height'))
3432 if quality == 'tiny' or not quality:
3433 quality = fmt.get('audioQuality', '').lower() or quality
3434 # The 3gp format (17) in android client has a quality of "small",
3435 # but is actually worse than other formats
3436 if itag == '17':
3437 quality = 'tiny'
3438 if quality:
3439 if itag:
3440 itag_qualities[itag] = quality
3441 if height:
3442 res_qualities[height] = quality
3443 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
3444 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
3445 # number of fragment that would subsequently requested with (`&sq=N`)
3446 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
3447 continue
3448
3449 fmt_url = fmt.get('url')
3450 if not fmt_url:
3451 sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))
3452 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
3453 encrypted_sig = try_get(sc, lambda x: x['s'][0])
3454 if not all((sc, fmt_url, player_url, encrypted_sig)):
3455 continue
3456 try:
3457 fmt_url += '&%s=%s' % (
3458 traverse_obj(sc, ('sp', -1)) or 'signature',
3459 self._decrypt_signature(encrypted_sig, video_id, player_url)
3460 )
3461 except ExtractorError as e:
3462 self.report_warning('Signature extraction failed: Some formats may be missing',
3463 video_id=video_id, only_once=True)
3464 self.write_debug(e, only_once=True)
3465 continue
3466
3467 query = parse_qs(fmt_url)
3468 throttled = False
3469 if query.get('n'):
3470 try:
3471 decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])
3472 fmt_url = update_url_query(fmt_url, {
3473 'n': decrypt_nsig(query['n'][0], video_id, player_url)
3474 })
3475 except ExtractorError as e:
3476 phantomjs_hint = ''
3477 if isinstance(e, JSInterpreter.Exception):
3478 phantomjs_hint = (f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} '
3479 f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')
3480 if player_url:
3481 self.report_warning(
3482 f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}'
3483 f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)
3484 self.write_debug(e, only_once=True)
3485 else:
3486 self.report_warning(
3487 'Cannot decrypt nsig without player_url: You may experience throttling for some formats',
3488 video_id=video_id, only_once=True)
3489 throttled = True
3490
3491 if itag:
3492 itags[itag] = 'https'
3493 stream_ids.append(stream_id)
3494
3495 tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
3496 language_preference = (
3497 10 if audio_track.get('audioIsDefault') and 10
3498 else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
3499 else -1)
3500 # Some formats may have much smaller duration than others (possibly damaged during encoding)
3501 # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
3502 # Make sure to avoid false positives with small duration differences.
3503 # E.g. __2ABJjxzNo, ySuUZEjARPY
3504 is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)
3505 if is_damaged:
3506 self.report_warning(
3507 f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
3508 dct = {
3509 'asr': int_or_none(fmt.get('audioSampleRate')),
3510 'filesize': int_or_none(fmt.get('contentLength')),
3511 'format_id': itag,
3512 'format_note': join_nonempty(
3513 '%s%s' % (audio_track.get('displayName') or '',
3514 ' (default)' if language_preference > 0 else ''),
3515 fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
3516 try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
3517 try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
3518 throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),
3519 # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
3520 'source_preference': -10 if throttled else -5 if itag == '22' else -1,
3521 'fps': int_or_none(fmt.get('fps')) or None,
3522 'audio_channels': fmt.get('audioChannels'),
3523 'height': height,
3524 'quality': q(quality),
3525 'has_drm': bool(fmt.get('drmFamilies')),
3526 'tbr': tbr,
3527 'url': fmt_url,
3528 'width': int_or_none(fmt.get('width')),
3529 'language': join_nonempty(audio_track.get('id', '').split('.')[0],
3530 'desc' if language_preference < -1 else ''),
3531 'language_preference': language_preference,
3532 # Strictly de-prioritize damaged and 3gp formats
3533 'preference': -10 if is_damaged else -2 if itag == '17' else None,
3534 }
3535 mime_mobj = re.match(
3536 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
3537 if mime_mobj:
3538 dct['ext'] = mimetype2ext(mime_mobj.group(1))
3539 dct.update(parse_codecs(mime_mobj.group(2)))
3540 no_audio = dct.get('acodec') == 'none'
3541 no_video = dct.get('vcodec') == 'none'
3542 if no_audio:
3543 dct['vbr'] = tbr
3544 if no_video:
3545 dct['abr'] = tbr
3546 if no_audio or no_video:
3547 dct['downloader_options'] = {
3548 # Youtube throttles chunks >~10M
3549 'http_chunk_size': 10485760,
3550 }
3551 if dct.get('ext'):
3552 dct['container'] = dct['ext'] + '_dash'
3553 yield dct
3554
3555 live_from_start = is_live and self.get_param('live_from_start')
3556 skip_manifests = self._configuration_arg('skip')
3557 if not self.get_param('youtube_include_hls_manifest', True):
3558 skip_manifests.append('hls')
3559 if not self.get_param('youtube_include_dash_manifest', True):
3560 skip_manifests.append('dash')
3561 get_dash = 'dash' not in skip_manifests and (
3562 not is_live or live_from_start or self._configuration_arg('include_live_dash'))
3563 get_hls = not live_from_start and 'hls' not in skip_manifests
3564
3565 def process_manifest_format(f, proto, itag):
3566 if itag in itags:
3567 if itags[itag] == proto or f'{itag}-{proto}' in itags:
3568 return False
3569 itag = f'{itag}-{proto}'
3570 if itag:
3571 f['format_id'] = itag
3572 itags[itag] = proto
3573
3574 f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
3575 if f['quality'] == -1 and f.get('height'):
3576 f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
3577 return True
3578
3579 subtitles = {}
3580 for sd in streaming_data:
3581 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
3582 if hls_manifest_url:
3583 fmts, subs = self._extract_m3u8_formats_and_subtitles(hls_manifest_url, video_id, 'mp4', fatal=False, live=is_live)
3584 subtitles = self._merge_subtitles(subs, subtitles)
3585 for f in fmts:
3586 if process_manifest_format(f, 'hls', self._search_regex(
3587 r'/itag/(\d+)', f['url'], 'itag', default=None)):
3588 yield f
3589
3590 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
3591 if dash_manifest_url:
3592 formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
3593 subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
3594 for f in formats:
3595 if process_manifest_format(f, 'dash', f['format_id']):
3596 f['filesize'] = int_or_none(self._search_regex(
3597 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
3598 if live_from_start:
3599 f['is_from_start'] = True
3600
3601 yield f
3602 yield subtitles
3603
3604 def _extract_storyboard(self, player_responses, duration):
3605 spec = get_first(
3606 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
3607 base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))
3608 if not base_url:
3609 return
3610 L = len(spec) - 1
3611 for i, args in enumerate(spec):
3612 args = args.split('#')
3613 counts = list(map(int_or_none, args[:5]))
3614 if len(args) != 8 or not all(counts):
3615 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
3616 continue
3617 width, height, frame_count, cols, rows = counts
3618 N, sigh = args[6:]
3619
3620 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
3621 fragment_count = frame_count / (cols * rows)
3622 fragment_duration = duration / fragment_count
3623 yield {
3624 'format_id': f'sb{i}',
3625 'format_note': 'storyboard',
3626 'ext': 'mhtml',
3627 'protocol': 'mhtml',
3628 'acodec': 'none',
3629 'vcodec': 'none',
3630 'url': url,
3631 'width': width,
3632 'height': height,
3633 'fps': frame_count / duration,
3634 'rows': rows,
3635 'columns': cols,
3636 'fragments': [{
3637 'url': url.replace('$M', str(j)),
3638 'duration': min(fragment_duration, duration - (j * fragment_duration)),
3639 } for j in range(math.ceil(fragment_count))],
3640 }
3641
3642 def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
3643 webpage = None
3644 if 'webpage' not in self._configuration_arg('player_skip'):
3645 query = {'bpctr': '9999999999', 'has_verified': '1'}
3646 if smuggled_data.get('is_story'):
3647 query['pp'] = self._STORY_PLAYER_PARAMS
3648 webpage = self._download_webpage(
3649 webpage_url, video_id, fatal=False, query=query)
3650
3651 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
3652
3653 player_responses, player_url = self._extract_player_responses(
3654 self._get_requested_clients(url, smuggled_data),
3655 video_id, webpage, master_ytcfg, smuggled_data)
3656
3657 return webpage, master_ytcfg, player_responses, player_url
3658
3659 def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):
3660 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
3661 is_live = get_first(video_details, 'isLive')
3662 if is_live is None:
3663 is_live = get_first(live_broadcast_details, 'isLiveNow')
3664
3665 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
3666 *formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, is_live, duration)
3667
3668 return live_broadcast_details, is_live, streaming_data, formats, subtitles
3669
3670 def _real_extract(self, url):
3671 url, smuggled_data = unsmuggle_url(url, {})
3672 video_id = self._match_id(url)
3673
3674 base_url = self.http_scheme() + '//www.youtube.com/'
3675 webpage_url = base_url + 'watch?v=' + video_id
3676
3677 webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
3678
3679 playability_statuses = traverse_obj(
3680 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
3681
3682 trailer_video_id = get_first(
3683 playability_statuses,
3684 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
3685 expected_type=str)
3686 if trailer_video_id:
3687 return self.url_result(
3688 trailer_video_id, self.ie_key(), trailer_video_id)
3689
3690 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
3691 if webpage else (lambda x: None))
3692
3693 video_details = traverse_obj(
3694 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
3695 microformats = traverse_obj(
3696 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
3697 expected_type=dict, default=[])
3698
3699 translated_title = self._get_text(microformats, (..., 'title'))
3700 video_title = (self._preferred_lang and translated_title
3701 or get_first(video_details, 'title') # primary
3702 or translated_title
3703 or search_meta(['og:title', 'twitter:title', 'title']))
3704 translated_description = self._get_text(microformats, (..., 'description'))
3705 original_description = get_first(video_details, 'shortDescription')
3706 video_description = (
3707 self._preferred_lang and translated_description
3708 # If original description is blank, it will be an empty string.
3709 # Do not prefer translated description in this case.
3710 or original_description if original_description is not None else translated_description)
3711
3712 multifeed_metadata_list = get_first(
3713 player_responses,
3714 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
3715 expected_type=str)
3716 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
3717 if self.get_param('noplaylist'):
3718 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3719 else:
3720 entries = []
3721 feed_ids = []
3722 for feed in multifeed_metadata_list.split(','):
3723 # Unquote should take place before split on comma (,) since textual
3724 # fields may contain comma as well (see
3725 # https://github.com/ytdl-org/youtube-dl/issues/8536)
3726 feed_data = urllib.parse.parse_qs(
3727 urllib.parse.unquote_plus(feed))
3728
3729 def feed_entry(name):
3730 return try_get(
3731 feed_data, lambda x: x[name][0], str)
3732
3733 feed_id = feed_entry('id')
3734 if not feed_id:
3735 continue
3736 feed_title = feed_entry('title')
3737 title = video_title
3738 if feed_title:
3739 title += ' (%s)' % feed_title
3740 entries.append({
3741 '_type': 'url_transparent',
3742 'ie_key': 'Youtube',
3743 'url': smuggle_url(
3744 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
3745 {'force_singlefeed': True}),
3746 'title': title,
3747 })
3748 feed_ids.append(feed_id)
3749 self.to_screen(
3750 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
3751 % (', '.join(feed_ids), video_id))
3752 return self.playlist_result(
3753 entries, video_id, video_title, video_description)
3754
3755 duration = int_or_none(
3756 get_first(video_details, 'lengthSeconds')
3757 or get_first(microformats, 'lengthSeconds')
3758 or parse_duration(search_meta('duration'))) or None
3759
3760 live_broadcast_details, is_live, streaming_data, formats, automatic_captions = \
3761 self._list_formats(video_id, microformats, video_details, player_responses, player_url)
3762
3763 if not formats:
3764 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
3765 self.report_drm(video_id)
3766 pemr = get_first(
3767 playability_statuses,
3768 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
3769 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
3770 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
3771 if subreason:
3772 if subreason == 'The uploader has not made this video available in your country.':
3773 countries = get_first(microformats, 'availableCountries')
3774 if not countries:
3775 regions_allowed = search_meta('regionsAllowed')
3776 countries = regions_allowed.split(',') if regions_allowed else None
3777 self.raise_geo_restricted(subreason, countries, metadata_available=True)
3778 reason += f'. {subreason}'
3779 if reason:
3780 self.raise_no_formats(reason, expected=True)
3781
3782 keywords = get_first(video_details, 'keywords', expected_type=list) or []
3783 if not keywords and webpage:
3784 keywords = [
3785 unescapeHTML(m.group('content'))
3786 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
3787 for keyword in keywords:
3788 if keyword.startswith('yt:stretch='):
3789 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
3790 if mobj:
3791 # NB: float is intentional for forcing float division
3792 w, h = (float(v) for v in mobj.groups())
3793 if w > 0 and h > 0:
3794 ratio = w / h
3795 for f in formats:
3796 if f.get('vcodec') != 'none':
3797 f['stretched_ratio'] = ratio
3798 break
3799 thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))
3800 thumbnail_url = search_meta(['og:image', 'twitter:image'])
3801 if thumbnail_url:
3802 thumbnails.append({
3803 'url': thumbnail_url,
3804 })
3805 original_thumbnails = thumbnails.copy()
3806
3807 # The best resolution thumbnails sometimes does not appear in the webpage
3808 # See: https://github.com/yt-dlp/yt-dlp/issues/340
3809 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
3810 thumbnail_names = [
3811 # While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants
3812 # in resolution, these are not the custom thumbnail. So de-prioritize them
3813 'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',
3814 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'
3815 ]
3816 n_thumbnail_names = len(thumbnail_names)
3817 thumbnails.extend({
3818 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
3819 video_id=video_id, name=name, ext=ext,
3820 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
3821 } for name in thumbnail_names for ext in ('webp', 'jpg'))
3822 for thumb in thumbnails:
3823 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
3824 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
3825 self._remove_duplicate_formats(thumbnails)
3826 self._downloader._sort_thumbnails(original_thumbnails)
3827
3828 category = get_first(microformats, 'category') or search_meta('genre')
3829 channel_id = str_or_none(
3830 get_first(video_details, 'channelId')
3831 or get_first(microformats, 'externalChannelId')
3832 or search_meta('channelId'))
3833 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
3834
3835 live_content = get_first(video_details, 'isLiveContent')
3836 is_upcoming = get_first(video_details, 'isUpcoming')
3837 if is_live is None:
3838 if is_upcoming or live_content is False:
3839 is_live = False
3840 if is_upcoming is None and (live_content or is_live):
3841 is_upcoming = False
3842 live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
3843 live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
3844 if not duration and live_end_time and live_start_time:
3845 duration = live_end_time - live_start_time
3846
3847 if is_live and self.get_param('live_from_start'):
3848 self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)
3849
3850 formats.extend(self._extract_storyboard(player_responses, duration))
3851
3852 # source_preference is lower for throttled/potentially damaged formats
3853 self._sort_formats(formats, (
3854 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto'))
3855
3856 info = {
3857 'id': video_id,
3858 'title': video_title,
3859 'formats': formats,
3860 'thumbnails': thumbnails,
3861 # The best thumbnail that we are sure exists. Prevents unnecessary
3862 # URL checking if user don't care about getting the best possible thumbnail
3863 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
3864 'description': video_description,
3865 'uploader': get_first(video_details, 'author'),
3866 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
3867 'uploader_url': owner_profile_url,
3868 'channel_id': channel_id,
3869 'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s'),
3870 'duration': duration,
3871 'view_count': int_or_none(
3872 get_first((video_details, microformats), (..., 'viewCount'))
3873 or search_meta('interactionCount')),
3874 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
3875 'age_limit': 18 if (
3876 get_first(microformats, 'isFamilySafe') is False
3877 or search_meta('isFamilyFriendly') == 'false'
3878 or search_meta('og:restrictions:age') == '18+') else 0,
3879 'webpage_url': webpage_url,
3880 'categories': [category] if category else None,
3881 'tags': keywords,
3882 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
3883 'is_live': is_live,
3884 'was_live': (False if is_live or is_upcoming or live_content is False
3885 else None if is_live is None or is_upcoming is None
3886 else live_content),
3887 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
3888 'release_timestamp': live_start_time,
3889 }
3890
3891 if get_first(video_details, 'isPostLiveDvr'):
3892 self.write_debug('Video is in Post-Live Manifestless mode')
3893 info['live_status'] = 'post_live'
3894 if (duration or 0) > 4 * 3600:
3895 self.report_warning(
3896 'The livestream has not finished processing. Only 4 hours of the video can be currently downloaded. '
3897 'This is a known issue and patches are welcome')
3898
3899 subtitles = {}
3900 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
3901 if pctr:
3902 def get_lang_code(track):
3903 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
3904 or track.get('languageCode'))
3905
3906 # Converted into dicts to remove duplicates
3907 captions = {
3908 get_lang_code(sub): sub
3909 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
3910 translation_languages = {
3911 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
3912 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
3913
3914 def process_language(container, base_url, lang_code, sub_name, query):
3915 lang_subs = container.setdefault(lang_code, [])
3916 for fmt in self._SUBTITLE_FORMATS:
3917 query.update({
3918 'fmt': fmt,
3919 })
3920 lang_subs.append({
3921 'ext': fmt,
3922 'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),
3923 'name': sub_name,
3924 })
3925
3926 # NB: Constructing the full subtitle dictionary is slow
3927 get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (
3928 self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))
3929 for lang_code, caption_track in captions.items():
3930 base_url = caption_track.get('baseUrl')
3931 orig_lang = parse_qs(base_url).get('lang', [None])[-1]
3932 if not base_url:
3933 continue
3934 lang_name = self._get_text(caption_track, 'name', max_runs=1)
3935 if caption_track.get('kind') != 'asr':
3936 if not lang_code:
3937 continue
3938 process_language(
3939 subtitles, base_url, lang_code, lang_name, {})
3940 if not caption_track.get('isTranslatable'):
3941 continue
3942 for trans_code, trans_name in translation_languages.items():
3943 if not trans_code:
3944 continue
3945 orig_trans_code = trans_code
3946 if caption_track.get('kind') != 'asr':
3947 if not get_translated_subs:
3948 continue
3949 trans_code += f'-{lang_code}'
3950 trans_name += format_field(lang_name, None, ' from %s')
3951 # Add an "-orig" label to the original language so that it can be distinguished.
3952 # The subs are returned without "-orig" as well for compatibility
3953 if lang_code == f'a-{orig_trans_code}':
3954 process_language(
3955 automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
3956 # Setting tlang=lang returns damaged subtitles.
3957 process_language(automatic_captions, base_url, trans_code, trans_name,
3958 {} if orig_lang == orig_trans_code else {'tlang': trans_code})
3959
3960 info['automatic_captions'] = automatic_captions
3961 info['subtitles'] = subtitles
3962
3963 parsed_url = urllib.parse.urlparse(url)
3964 for component in [parsed_url.fragment, parsed_url.query]:
3965 query = urllib.parse.parse_qs(component)
3966 for k, v in query.items():
3967 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
3968 d_k += '_time'
3969 if d_k not in info and k in s_ks:
3970 info[d_k] = parse_duration(query[k][0])
3971
3972 # Youtube Music Auto-generated description
3973 if video_description:
3974 mobj = re.search(
3975 r'''(?xs)
3976 (?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+
3977 (?P<album>[^\n]+)
3978 (?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
3979 (?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
3980 (.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?
3981 .+\nAuto-generated\ by\ YouTube\.\s*$
3982 ''', video_description)
3983 if mobj:
3984 release_year = mobj.group('release_year')
3985 release_date = mobj.group('release_date')
3986 if release_date:
3987 release_date = release_date.replace('-', '')
3988 if not release_year:
3989 release_year = release_date[:4]
3990 info.update({
3991 'album': mobj.group('album'.strip()),
3992 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
3993 'track': mobj.group('track').strip(),
3994 'release_date': release_date,
3995 'release_year': int_or_none(release_year),
3996 })
3997
3998 initial_data = None
3999 if webpage:
4000 initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)
4001 if not initial_data:
4002 query = {'videoId': video_id}
4003 query.update(self._get_checkok_params())
4004 initial_data = self._extract_response(
4005 item_id=video_id, ep='next', fatal=False,
4006 ytcfg=master_ytcfg, query=query,
4007 headers=self.generate_api_headers(ytcfg=master_ytcfg),
4008 note='Downloading initial data API JSON')
4009
4010 info['comment_count'] = traverse_obj(initial_data, (
4011 'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',
4012 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount', 'simpleText'
4013 ), (
4014 'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',
4015 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo', 'runs', ..., 'text'
4016 ), expected_type=int_or_none, get_all=False)
4017
4018 try: # This will error if there is no livechat
4019 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
4020 except (KeyError, IndexError, TypeError):
4021 pass
4022 else:
4023 info.setdefault('subtitles', {})['live_chat'] = [{
4024 # url is needed to set cookies
4025 'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',
4026 'video_id': video_id,
4027 'ext': 'json',
4028 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
4029 }]
4030
4031 if initial_data:
4032 info['chapters'] = (
4033 self._extract_chapters_from_json(initial_data, duration)
4034 or self._extract_chapters_from_engagement_panel(initial_data, duration)
4035 or self._extract_chapters_from_description(video_description, duration)
4036 or None)
4037
4038 contents = traverse_obj(
4039 initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
4040 expected_type=list, default=[])
4041
4042 vpir = get_first(contents, 'videoPrimaryInfoRenderer')
4043 if vpir:
4044 stl = vpir.get('superTitleLink')
4045 if stl:
4046 stl = self._get_text(stl)
4047 if try_get(
4048 vpir,
4049 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
4050 info['location'] = stl
4051 else:
4052 mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)
4053 if mobj:
4054 info.update({
4055 'series': mobj.group(1),
4056 'season_number': int(mobj.group(2)),
4057 'episode_number': int(mobj.group(3)),
4058 })
4059 for tlb in (try_get(
4060 vpir,
4061 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
4062 list) or []):
4063 tbrs = variadic(
4064 traverse_obj(
4065 tlb, 'toggleButtonRenderer',
4066 ('segmentedLikeDislikeButtonRenderer', ..., 'toggleButtonRenderer'),
4067 default=[]))
4068 for tbr in tbrs:
4069 for getter, regex in [(
4070 lambda x: x['defaultText']['accessibility']['accessibilityData'],
4071 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
4072 lambda x: x['accessibility'],
4073 lambda x: x['accessibilityData']['accessibilityData'],
4074 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
4075 label = (try_get(tbr, getter, dict) or {}).get('label')
4076 if label:
4077 mobj = re.match(regex, label)
4078 if mobj:
4079 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
4080 break
4081 sbr_tooltip = try_get(
4082 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
4083 if sbr_tooltip:
4084 like_count, dislike_count = sbr_tooltip.split(' / ')
4085 info.update({
4086 'like_count': str_to_int(like_count),
4087 'dislike_count': str_to_int(dislike_count),
4088 })
4089 vsir = get_first(contents, 'videoSecondaryInfoRenderer')
4090 if vsir:
4091 vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))
4092 info.update({
4093 'channel': self._get_text(vor, 'title'),
4094 'channel_follower_count': self._get_count(vor, 'subscriberCountText')})
4095
4096 rows = try_get(
4097 vsir,
4098 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
4099 list) or []
4100 multiple_songs = False
4101 for row in rows:
4102 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
4103 multiple_songs = True
4104 break
4105 for row in rows:
4106 mrr = row.get('metadataRowRenderer') or {}
4107 mrr_title = mrr.get('title')
4108 if not mrr_title:
4109 continue
4110 mrr_title = self._get_text(mrr, 'title')
4111 mrr_contents_text = self._get_text(mrr, ('contents', 0))
4112 if mrr_title == 'License':
4113 info['license'] = mrr_contents_text
4114 elif not multiple_songs:
4115 if mrr_title == 'Album':
4116 info['album'] = mrr_contents_text
4117 elif mrr_title == 'Artist':
4118 info['artist'] = mrr_contents_text
4119 elif mrr_title == 'Song':
4120 info['track'] = mrr_contents_text
4121
4122 fallbacks = {
4123 'channel': 'uploader',
4124 'channel_id': 'uploader_id',
4125 'channel_url': 'uploader_url',
4126 }
4127
4128 # The upload date for scheduled, live and past live streams / premieres in microformats
4129 # may be different from the stream date. Although not in UTC, we will prefer it in this case.
4130 # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139
4131 upload_date = (
4132 unified_strdate(get_first(microformats, 'uploadDate'))
4133 or unified_strdate(search_meta('uploadDate')))
4134 if not upload_date or (
4135 not info.get('is_live')
4136 and not info.get('was_live')
4137 and info.get('live_status') != 'is_upcoming'
4138 and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', [])
4139 ):
4140 upload_date = strftime_or_none(
4141 self._parse_time_text(self._get_text(vpir, 'dateText')), '%Y%m%d') or upload_date
4142 info['upload_date'] = upload_date
4143
4144 for to, frm in fallbacks.items():
4145 if not info.get(to):
4146 info[to] = info.get(frm)
4147
4148 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
4149 v = info.get(s_k)
4150 if v:
4151 info[d_k] = v
4152
4153 badges = self._extract_badges(traverse_obj(contents, (..., 'videoPrimaryInfoRenderer'), get_all=False))
4154
4155 is_private = (self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
4156 or get_first(video_details, 'isPrivate', expected_type=bool))
4157
4158 info['availability'] = (
4159 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
4160 else self._availability(
4161 is_private=is_private,
4162 needs_premium=(
4163 self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM)
4164 or False if initial_data and is_private is not None else None),
4165 needs_subscription=(
4166 self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION)
4167 or False if initial_data and is_private is not None else None),
4168 needs_auth=info['age_limit'] >= 18,
4169 is_unlisted=None if is_private is None else (
4170 self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
4171 or get_first(microformats, 'isUnlisted', expected_type=bool))))
4172
4173 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
4174
4175 self.mark_watched(video_id, player_responses)
4176
4177 return info
4178
4179
4180 class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
4181
4182 @staticmethod
4183 def passthrough_smuggled_data(func):
4184 def _smuggle(entries, smuggled_data):
4185 for entry in entries:
4186 # TODO: Convert URL to music.youtube instead.
4187 # Do we need to passthrough any other smuggled_data?
4188 entry['url'] = smuggle_url(entry['url'], smuggled_data)
4189 yield entry
4190
4191 @functools.wraps(func)
4192 def wrapper(self, url):
4193 url, smuggled_data = unsmuggle_url(url, {})
4194 if self.is_music_url(url):
4195 smuggled_data['is_music_url'] = True
4196 info_dict = func(self, url, smuggled_data)
4197 if smuggled_data and info_dict.get('entries'):
4198 info_dict['entries'] = _smuggle(info_dict['entries'], smuggled_data)
4199 return info_dict
4200 return wrapper
4201
4202 def _extract_channel_id(self, webpage):
4203 channel_id = self._html_search_meta(
4204 'channelId', webpage, 'channel id', default=None)
4205 if channel_id:
4206 return channel_id
4207 channel_url = self._html_search_meta(
4208 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
4209 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
4210 'twitter:app:url:googleplay'), webpage, 'channel url')
4211 return self._search_regex(
4212 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
4213 channel_url, 'channel id')
4214
4215 @staticmethod
4216 def _extract_basic_item_renderer(item):
4217 # Modified from _extract_grid_item_renderer
4218 known_basic_renderers = (
4219 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'
4220 )
4221 for key, renderer in item.items():
4222 if not isinstance(renderer, dict):
4223 continue
4224 elif key in known_basic_renderers:
4225 return renderer
4226 elif key.startswith('grid') and key.endswith('Renderer'):
4227 return renderer
4228
4229 def _grid_entries(self, grid_renderer):
4230 for item in grid_renderer['items']:
4231 if not isinstance(item, dict):
4232 continue
4233 renderer = self._extract_basic_item_renderer(item)
4234 if not isinstance(renderer, dict):
4235 continue
4236 title = self._get_text(renderer, 'title')
4237
4238 # playlist
4239 playlist_id = renderer.get('playlistId')
4240 if playlist_id:
4241 yield self.url_result(
4242 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4243 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4244 video_title=title)
4245 continue
4246 # video
4247 video_id = renderer.get('videoId')
4248 if video_id:
4249 yield self._extract_video(renderer)
4250 continue
4251 # channel
4252 channel_id = renderer.get('channelId')
4253 if channel_id:
4254 yield self.url_result(
4255 'https://www.youtube.com/channel/%s' % channel_id,
4256 ie=YoutubeTabIE.ie_key(), video_title=title)
4257 continue
4258 # generic endpoint URL support
4259 ep_url = urljoin('https://www.youtube.com/', try_get(
4260 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
4261 str))
4262 if ep_url:
4263 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
4264 if ie.suitable(ep_url):
4265 yield self.url_result(
4266 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
4267 break
4268
4269 def _music_reponsive_list_entry(self, renderer):
4270 video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))
4271 if video_id:
4272 return self.url_result(f'https://music.youtube.com/watch?v={video_id}',
4273 ie=YoutubeIE.ie_key(), video_id=video_id)
4274 playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))
4275 if playlist_id:
4276 video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))
4277 if video_id:
4278 return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',
4279 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4280 return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',
4281 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4282 browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))
4283 if browse_id:
4284 return self.url_result(f'https://music.youtube.com/browse/{browse_id}',
4285 ie=YoutubeTabIE.ie_key(), video_id=browse_id)
4286
4287 def _shelf_entries_from_content(self, shelf_renderer):
4288 content = shelf_renderer.get('content')
4289 if not isinstance(content, dict):
4290 return
4291 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
4292 if renderer:
4293 # TODO: add support for nested playlists so each shelf is processed
4294 # as separate playlist
4295 # TODO: this includes only first N items
4296 yield from self._grid_entries(renderer)
4297 renderer = content.get('horizontalListRenderer')
4298 if renderer:
4299 # TODO
4300 pass
4301
4302 def _shelf_entries(self, shelf_renderer, skip_channels=False):
4303 ep = try_get(
4304 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4305 str)
4306 shelf_url = urljoin('https://www.youtube.com', ep)
4307 if shelf_url:
4308 # Skipping links to another channels, note that checking for
4309 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
4310 # will not work
4311 if skip_channels and '/channels?' in shelf_url:
4312 return
4313 title = self._get_text(shelf_renderer, 'title')
4314 yield self.url_result(shelf_url, video_title=title)
4315 # Shelf may not contain shelf URL, fallback to extraction from content
4316 yield from self._shelf_entries_from_content(shelf_renderer)
4317
4318 def _playlist_entries(self, video_list_renderer):
4319 for content in video_list_renderer['contents']:
4320 if not isinstance(content, dict):
4321 continue
4322 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
4323 if not isinstance(renderer, dict):
4324 continue
4325 video_id = renderer.get('videoId')
4326 if not video_id:
4327 continue
4328 yield self._extract_video(renderer)
4329
4330 def _rich_entries(self, rich_grid_renderer):
4331 renderer = traverse_obj(
4332 rich_grid_renderer, ('content', ('videoRenderer', 'reelItemRenderer')), get_all=False) or {}
4333 video_id = renderer.get('videoId')
4334 if not video_id:
4335 return
4336 yield self._extract_video(renderer)
4337
4338 def _video_entry(self, video_renderer):
4339 video_id = video_renderer.get('videoId')
4340 if video_id:
4341 return self._extract_video(video_renderer)
4342
4343 def _hashtag_tile_entry(self, hashtag_tile_renderer):
4344 url = urljoin('https://youtube.com', traverse_obj(
4345 hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))
4346 if url:
4347 return self.url_result(
4348 url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))
4349
4350 def _post_thread_entries(self, post_thread_renderer):
4351 post_renderer = try_get(
4352 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
4353 if not post_renderer:
4354 return
4355 # video attachment
4356 video_renderer = try_get(
4357 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
4358 video_id = video_renderer.get('videoId')
4359 if video_id:
4360 entry = self._extract_video(video_renderer)
4361 if entry:
4362 yield entry
4363 # playlist attachment
4364 playlist_id = try_get(
4365 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)
4366 if playlist_id:
4367 yield self.url_result(
4368 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4369 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4370 # inline video links
4371 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
4372 for run in runs:
4373 if not isinstance(run, dict):
4374 continue
4375 ep_url = try_get(
4376 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)
4377 if not ep_url:
4378 continue
4379 if not YoutubeIE.suitable(ep_url):
4380 continue
4381 ep_video_id = YoutubeIE._match_id(ep_url)
4382 if video_id == ep_video_id:
4383 continue
4384 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
4385
4386 def _post_thread_continuation_entries(self, post_thread_continuation):
4387 contents = post_thread_continuation.get('contents')
4388 if not isinstance(contents, list):
4389 return
4390 for content in contents:
4391 renderer = content.get('backstagePostThreadRenderer')
4392 if isinstance(renderer, dict):
4393 yield from self._post_thread_entries(renderer)
4394 continue
4395 renderer = content.get('videoRenderer')
4396 if isinstance(renderer, dict):
4397 yield self._video_entry(renderer)
4398
4399 r''' # unused
4400 def _rich_grid_entries(self, contents):
4401 for content in contents:
4402 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
4403 if video_renderer:
4404 entry = self._video_entry(video_renderer)
4405 if entry:
4406 yield entry
4407 '''
4408
4409 def _report_history_entries(self, renderer):
4410 for url in traverse_obj(renderer, (
4411 'rows', ..., 'reportHistoryTableRowRenderer', 'cells', ...,
4412 'reportHistoryTableCellRenderer', 'cell', 'reportHistoryTableTextCellRenderer', 'text', 'runs', ...,
4413 'navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url')):
4414 yield self.url_result(urljoin('https://www.youtube.com', url), YoutubeIE)
4415
4416 def _extract_entries(self, parent_renderer, continuation_list):
4417 # continuation_list is modified in-place with continuation_list = [continuation_token]
4418 continuation_list[:] = [None]
4419 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
4420 for content in contents:
4421 if not isinstance(content, dict):
4422 continue
4423 is_renderer = traverse_obj(
4424 content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',
4425 expected_type=dict)
4426 if not is_renderer:
4427 if content.get('richItemRenderer'):
4428 for entry in self._rich_entries(content['richItemRenderer']):
4429 yield entry
4430 continuation_list[0] = self._extract_continuation(parent_renderer)
4431 elif content.get('reportHistorySectionRenderer'): # https://www.youtube.com/reporthistory
4432 table = traverse_obj(content, ('reportHistorySectionRenderer', 'table', 'tableRenderer'))
4433 yield from self._report_history_entries(table)
4434 continuation_list[0] = self._extract_continuation(table)
4435 continue
4436
4437 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
4438 for isr_content in isr_contents:
4439 if not isinstance(isr_content, dict):
4440 continue
4441
4442 known_renderers = {
4443 'playlistVideoListRenderer': self._playlist_entries,
4444 'gridRenderer': self._grid_entries,
4445 'reelShelfRenderer': self._grid_entries,
4446 'shelfRenderer': self._shelf_entries,
4447 'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],
4448 'backstagePostThreadRenderer': self._post_thread_entries,
4449 'videoRenderer': lambda x: [self._video_entry(x)],
4450 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
4451 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
4452 'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]
4453 }
4454 for key, renderer in isr_content.items():
4455 if key not in known_renderers:
4456 continue
4457 for entry in known_renderers[key](renderer):
4458 if entry:
4459 yield entry
4460 continuation_list[0] = self._extract_continuation(renderer)
4461 break
4462
4463 if not continuation_list[0]:
4464 continuation_list[0] = self._extract_continuation(is_renderer)
4465
4466 if not continuation_list[0]:
4467 continuation_list[0] = self._extract_continuation(parent_renderer)
4468
4469 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
4470 continuation_list = [None]
4471 extract_entries = lambda x: self._extract_entries(x, continuation_list)
4472 tab_content = try_get(tab, lambda x: x['content'], dict)
4473 if not tab_content:
4474 return
4475 parent_renderer = (
4476 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
4477 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
4478 yield from extract_entries(parent_renderer)
4479 continuation = continuation_list[0]
4480
4481 for page_num in itertools.count(1):
4482 if not continuation:
4483 break
4484 headers = self.generate_api_headers(
4485 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
4486 response = self._extract_response(
4487 item_id=f'{item_id} page {page_num}',
4488 query=continuation, headers=headers, ytcfg=ytcfg,
4489 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
4490
4491 if not response:
4492 break
4493 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
4494 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
4495 visitor_data = self._extract_visitor_data(response) or visitor_data
4496
4497 known_renderers = {
4498 'videoRenderer': (self._grid_entries, 'items'), # for membership tab
4499 'gridPlaylistRenderer': (self._grid_entries, 'items'),
4500 'gridVideoRenderer': (self._grid_entries, 'items'),
4501 'gridChannelRenderer': (self._grid_entries, 'items'),
4502 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
4503 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
4504 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
4505 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents'),
4506 'reportHistoryTableRowRenderer': (self._report_history_entries, 'rows'),
4507 'playlistVideoListContinuation': (self._playlist_entries, None),
4508 'gridContinuation': (self._grid_entries, None),
4509 'itemSectionContinuation': (self._post_thread_continuation_entries, None),
4510 'sectionListContinuation': (extract_entries, None), # for feeds
4511 }
4512
4513 continuation_items = traverse_obj(response, (
4514 ('onResponseReceivedActions', 'onResponseReceivedEndpoints'), ...,
4515 'appendContinuationItemsAction', 'continuationItems'
4516 ), 'continuationContents', get_all=False)
4517 continuation_item = traverse_obj(continuation_items, 0, None, expected_type=dict, default={})
4518
4519 video_items_renderer = None
4520 for key in continuation_item.keys():
4521 if key not in known_renderers:
4522 continue
4523 func, parent_key = known_renderers[key]
4524 video_items_renderer = {parent_key: continuation_items} if parent_key else continuation_items
4525 continuation_list = [None]
4526 yield from func(video_items_renderer)
4527 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
4528
4529 if not video_items_renderer:
4530 break
4531
4532 @staticmethod
4533 def _extract_selected_tab(tabs, fatal=True):
4534 for tab in tabs:
4535 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
4536 if renderer.get('selected') is True:
4537 return renderer
4538 else:
4539 if fatal:
4540 raise ExtractorError('Unable to find selected tab')
4541
4542 def _extract_uploader(self, data):
4543 uploader = {}
4544 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
4545 owner = try_get(
4546 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
4547 if owner:
4548 owner_text = owner.get('text')
4549 uploader['uploader'] = self._search_regex(
4550 r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text)
4551 uploader['uploader_id'] = try_get(
4552 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], str)
4553 uploader['uploader_url'] = urljoin(
4554 'https://www.youtube.com/',
4555 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], str))
4556 return {k: v for k, v in uploader.items() if v is not None}
4557
4558 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
4559 playlist_id = title = description = channel_url = channel_name = channel_id = None
4560 tags = []
4561
4562 selected_tab = self._extract_selected_tab(tabs)
4563 primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4564 renderer = try_get(
4565 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
4566 if renderer:
4567 channel_name = renderer.get('title')
4568 channel_url = renderer.get('channelUrl')
4569 channel_id = renderer.get('externalId')
4570 else:
4571 renderer = try_get(
4572 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
4573
4574 if renderer:
4575 title = renderer.get('title')
4576 description = renderer.get('description', '')
4577 playlist_id = channel_id
4578 tags = renderer.get('keywords', '').split()
4579
4580 # We can get the uncropped banner/avatar by replacing the crop params with '=s0'
4581 # See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714
4582 def _get_uncropped(url):
4583 return url_or_none((url or '').split('=')[0] + '=s0')
4584
4585 avatar_thumbnails = self._extract_thumbnails(renderer, 'avatar')
4586 if avatar_thumbnails:
4587 uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])
4588 if uncropped_avatar:
4589 avatar_thumbnails.append({
4590 'url': uncropped_avatar,
4591 'id': 'avatar_uncropped',
4592 'preference': 1
4593 })
4594
4595 channel_banners = self._extract_thumbnails(
4596 data, ('header', ..., ['banner', 'mobileBanner', 'tvBanner']))
4597 for banner in channel_banners:
4598 banner['preference'] = -10
4599
4600 if channel_banners:
4601 uncropped_banner = _get_uncropped(channel_banners[0]['url'])
4602 if uncropped_banner:
4603 channel_banners.append({
4604 'url': uncropped_banner,
4605 'id': 'banner_uncropped',
4606 'preference': -5
4607 })
4608
4609 primary_thumbnails = self._extract_thumbnails(
4610 primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))
4611
4612 if playlist_id is None:
4613 playlist_id = item_id
4614
4615 playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')
4616 last_updated_unix = self._parse_time_text(self._get_text(playlist_stats, 2))
4617 if title is None:
4618 title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id
4619 title += format_field(selected_tab, 'title', ' - %s')
4620 title += format_field(selected_tab, 'expandedText', ' - %s')
4621
4622 metadata = {
4623 'playlist_id': playlist_id,
4624 'playlist_title': title,
4625 'playlist_description': description,
4626 'uploader': channel_name,
4627 'uploader_id': channel_id,
4628 'uploader_url': channel_url,
4629 'thumbnails': primary_thumbnails + avatar_thumbnails + channel_banners,
4630 'tags': tags,
4631 'view_count': self._get_count(playlist_stats, 1),
4632 'availability': self._extract_availability(data),
4633 'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'),
4634 'playlist_count': self._get_count(playlist_stats, 0),
4635 'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),
4636 }
4637 if not channel_id:
4638 metadata.update(self._extract_uploader(data))
4639 metadata.update({
4640 'channel': metadata['uploader'],
4641 'channel_id': metadata['uploader_id'],
4642 'channel_url': metadata['uploader_url']})
4643 return self.playlist_result(
4644 self._entries(
4645 selected_tab, playlist_id, ytcfg,
4646 self._extract_account_syncid(ytcfg, data),
4647 self._extract_visitor_data(data, ytcfg)),
4648 **metadata)
4649
4650 def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):
4651 first_id = last_id = response = None
4652 for page_num in itertools.count(1):
4653 videos = list(self._playlist_entries(playlist))
4654 if not videos:
4655 return
4656 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4657 if start >= len(videos):
4658 return
4659 yield from videos[start:]
4660 first_id = first_id or videos[0]['id']
4661 last_id = videos[-1]['id']
4662 watch_endpoint = try_get(
4663 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
4664 headers = self.generate_api_headers(
4665 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4666 visitor_data=self._extract_visitor_data(response, data, ytcfg))
4667 query = {
4668 'playlistId': playlist_id,
4669 'videoId': watch_endpoint.get('videoId') or last_id,
4670 'index': watch_endpoint.get('index') or len(videos),
4671 'params': watch_endpoint.get('params') or 'OAE%3D'
4672 }
4673 response = self._extract_response(
4674 item_id='%s page %d' % (playlist_id, page_num),
4675 query=query, ep='next', headers=headers, ytcfg=ytcfg,
4676 check_get_keys='contents'
4677 )
4678 playlist = try_get(
4679 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4680
4681 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
4682 title = playlist.get('title') or try_get(
4683 data, lambda x: x['titleText']['simpleText'], str)
4684 playlist_id = playlist.get('playlistId') or item_id
4685
4686 # Delegating everything except mix playlists to regular tab-based playlist URL
4687 playlist_url = urljoin(url, try_get(
4688 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4689 str))
4690
4691 # Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]
4692 # [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg
4693 is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)
4694
4695 if playlist_url and playlist_url != url and not is_known_unviewable:
4696 return self.url_result(
4697 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4698 video_title=title)
4699
4700 return self.playlist_result(
4701 self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),
4702 playlist_id=playlist_id, playlist_title=title)
4703
4704 def _extract_availability(self, data):
4705 """
4706 Gets the availability of a given playlist/tab.
4707 Note: Unless YouTube tells us explicitly, we do not assume it is public
4708 @param data: response
4709 """
4710 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4711
4712 player_header_privacy = traverse_obj(
4713 data, ('header', 'playlistHeaderRenderer', 'privacy'), expected_type=str)
4714
4715 badges = self._extract_badges(renderer)
4716
4717 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4718 privacy_setting_icon = traverse_obj(
4719 renderer, (
4720 'privacyForm', 'dropdownFormFieldRenderer', 'dropdown', 'dropdownRenderer', 'entries',
4721 lambda _, v: v['privacyDropdownItemRenderer']['isSelected'], 'privacyDropdownItemRenderer', 'icon', 'iconType'),
4722 get_all=False, expected_type=str)
4723
4724 return (
4725 'public' if (
4726 self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
4727 or player_header_privacy == 'PUBLIC'
4728 or privacy_setting_icon == 'PRIVACY_PUBLIC')
4729 else self._availability(
4730 is_private=(
4731 self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
4732 or player_header_privacy == 'PRIVATE' if player_header_privacy is not None
4733 else privacy_setting_icon == 'PRIVACY_PRIVATE' if privacy_setting_icon is not None else None),
4734 is_unlisted=(
4735 self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
4736 or player_header_privacy == 'UNLISTED' if player_header_privacy is not None
4737 else privacy_setting_icon == 'PRIVACY_UNLISTED' if privacy_setting_icon is not None else None),
4738 needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
4739 needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
4740 needs_auth=False))
4741
4742 @staticmethod
4743 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4744 sidebar_renderer = try_get(
4745 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4746 for item in sidebar_renderer:
4747 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4748 if renderer:
4749 return renderer
4750
4751 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
4752 """
4753 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4754 """
4755 browse_id = params = None
4756 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4757 if not renderer:
4758 return
4759 menu_renderer = try_get(
4760 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4761 for menu_item in menu_renderer:
4762 if not isinstance(menu_item, dict):
4763 continue
4764 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4765 text = try_get(
4766 nav_item_renderer, lambda x: x['text']['simpleText'], str)
4767 if not text or text.lower() != 'show unavailable videos':
4768 continue
4769 browse_endpoint = try_get(
4770 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4771 browse_id = browse_endpoint.get('browseId')
4772 params = browse_endpoint.get('params')
4773 break
4774
4775 headers = self.generate_api_headers(
4776 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4777 visitor_data=self._extract_visitor_data(data, ytcfg))
4778 query = {
4779 'params': params or 'wgYCCAA=',
4780 'browseId': browse_id or 'VL%s' % item_id
4781 }
4782 return self._extract_response(
4783 item_id=item_id, headers=headers, query=query,
4784 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
4785 note='Downloading API JSON with unavailable videos')
4786
4787 @functools.cached_property
4788 def skip_webpage(self):
4789 return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())
4790
4791 def _extract_webpage(self, url, item_id, fatal=True):
4792 webpage, data = None, None
4793 for retry in self.RetryManager(fatal=fatal):
4794 try:
4795 webpage = self._download_webpage(url, item_id, note='Downloading webpage')
4796 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
4797 except ExtractorError as e:
4798 if isinstance(e.cause, network_exceptions):
4799 if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):
4800 retry.error = e
4801 continue
4802 self._error_or_warning(e, fatal=fatal)
4803 break
4804
4805 try:
4806 self._extract_and_report_alerts(data)
4807 except ExtractorError as e:
4808 self._error_or_warning(e, fatal=fatal)
4809 break
4810
4811 # Sometimes youtube returns a webpage with incomplete ytInitialData
4812 # See: https://github.com/yt-dlp/yt-dlp/issues/116
4813 if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):
4814 retry.error = ExtractorError('Incomplete yt initial data received')
4815 continue
4816
4817 return webpage, data
4818
4819 def _report_playlist_authcheck(self, ytcfg, fatal=True):
4820 """Use if failed to extract ytcfg (and data) from initial webpage"""
4821 if not ytcfg and self.is_authenticated:
4822 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'
4823 if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:
4824 raise ExtractorError(
4825 f'{msg}. If you are not downloading private content, or '
4826 'your cookies are only for the first account and channel,'
4827 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
4828 expected=True)
4829 self.report_warning(msg, only_once=True)
4830
4831 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
4832 data = None
4833 if not self.skip_webpage:
4834 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
4835 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
4836 # Reject webpage data if redirected to home page without explicitly requesting
4837 selected_tab = self._extract_selected_tab(traverse_obj(
4838 data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list, default=[]), fatal=False) or {}
4839 if (url != 'https://www.youtube.com/feed/recommended'
4840 and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page
4841 and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):
4842 msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'
4843 if fatal:
4844 raise ExtractorError(msg, expected=True)
4845 self.report_warning(msg, only_once=True)
4846 if not data:
4847 self._report_playlist_authcheck(ytcfg, fatal=fatal)
4848 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
4849 return data, ytcfg
4850
4851 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
4852 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
4853 resolve_response = self._extract_response(
4854 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
4855 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
4856 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
4857 for ep_key, ep in endpoints.items():
4858 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
4859 if params:
4860 return self._extract_response(
4861 item_id=item_id, query=params, ep=ep, headers=headers,
4862 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
4863 check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))
4864 err_note = 'Failed to resolve url (does the playlist exist?)'
4865 if fatal:
4866 raise ExtractorError(err_note, expected=True)
4867 self.report_warning(err_note, item_id)
4868
4869 _SEARCH_PARAMS = None
4870
4871 def _search_results(self, query, params=NO_DEFAULT, default_client='web'):
4872 data = {'query': query}
4873 if params is NO_DEFAULT:
4874 params = self._SEARCH_PARAMS
4875 if params:
4876 data['params'] = params
4877
4878 content_keys = (
4879 ('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),
4880 ('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),
4881 # ytmusic search
4882 ('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),
4883 ('continuationContents', ),
4884 )
4885 display_id = f'query "{query}"'
4886 check_get_keys = tuple({keys[0] for keys in content_keys})
4887 ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}
4888 self._report_playlist_authcheck(ytcfg, fatal=False)
4889
4890 continuation_list = [None]
4891 search = None
4892 for page_num in itertools.count(1):
4893 data.update(continuation_list[0] or {})
4894 headers = self.generate_api_headers(
4895 ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)
4896 search = self._extract_response(
4897 item_id=f'{display_id} page {page_num}', ep='search', query=data,
4898 default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)
4899 slr_contents = traverse_obj(search, *content_keys)
4900 yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)
4901 if not continuation_list[0]:
4902 break
4903
4904
4905 class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
4906 IE_DESC = 'YouTube Tabs'
4907 _VALID_URL = r'''(?x:
4908 https?://
4909 (?:\w+\.)?
4910 (?:
4911 youtube(?:kids)?\.com|
4912 %(invidious)s
4913 )/
4914 (?:
4915 (?P<channel_type>channel|c|user|browse)/|
4916 (?P<not_channel>
4917 feed/|hashtag/|
4918 (?:playlist|watch)\?.*?\blist=
4919 )|
4920 (?!(?:%(reserved_names)s)\b) # Direct URLs
4921 )
4922 (?P<id>[^/?\#&]+)
4923 )''' % {
4924 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
4925 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
4926 }
4927 IE_NAME = 'youtube:tab'
4928
4929 _TESTS = [{
4930 'note': 'playlists, multipage',
4931 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
4932 'playlist_mincount': 94,
4933 'info_dict': {
4934 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
4935 'title': 'Igor Kleiner - Playlists',
4936 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
4937 'uploader': 'Igor Kleiner',
4938 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4939 'channel': 'Igor Kleiner',
4940 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4941 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
4942 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4943 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4944 'channel_follower_count': int
4945 },
4946 }, {
4947 'note': 'playlists, multipage, different order',
4948 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
4949 'playlist_mincount': 94,
4950 'info_dict': {
4951 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
4952 'title': 'Igor Kleiner - Playlists',
4953 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
4954 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4955 'uploader': 'Igor Kleiner',
4956 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4957 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
4958 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4959 'channel': 'Igor Kleiner',
4960 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4961 'channel_follower_count': int
4962 },
4963 }, {
4964 'note': 'playlists, series',
4965 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
4966 'playlist_mincount': 5,
4967 'info_dict': {
4968 'id': 'UCYO_jab_esuFRV4b17AJtAw',
4969 'title': '3Blue1Brown - Playlists',
4970 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
4971 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
4972 'uploader': '3Blue1Brown',
4973 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4974 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4975 'channel': '3Blue1Brown',
4976 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
4977 'tags': ['Mathematics'],
4978 'channel_follower_count': int
4979 },
4980 }, {
4981 'note': 'playlists, singlepage',
4982 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
4983 'playlist_mincount': 4,
4984 'info_dict': {
4985 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
4986 'title': 'ThirstForScience - Playlists',
4987 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
4988 'uploader': 'ThirstForScience',
4989 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
4990 'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
4991 'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
4992 'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
4993 'tags': 'count:13',
4994 'channel': 'ThirstForScience',
4995 'channel_follower_count': int
4996 }
4997 }, {
4998 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
4999 'only_matching': True,
5000 }, {
5001 'note': 'basic, single video playlist',
5002 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5003 'info_dict': {
5004 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5005 'uploader': 'Sergey M.',
5006 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5007 'title': 'youtube-dl public playlist',
5008 'description': '',
5009 'tags': [],
5010 'view_count': int,
5011 'modified_date': '20201130',
5012 'channel': 'Sergey M.',
5013 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5014 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5015 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5016 'availability': 'public',
5017 },
5018 'playlist_count': 1,
5019 }, {
5020 'note': 'empty playlist',
5021 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
5022 'info_dict': {
5023 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5024 'uploader': 'Sergey M.',
5025 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
5026 'title': 'youtube-dl empty playlist',
5027 'tags': [],
5028 'channel': 'Sergey M.',
5029 'description': '',
5030 'modified_date': '20160902',
5031 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5032 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5033 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5034 'availability': 'public',
5035 },
5036 'playlist_count': 0,
5037 }, {
5038 'note': 'Home tab',
5039 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
5040 'info_dict': {
5041 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5042 'title': 'lex will - Home',
5043 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5044 'uploader': 'lex will',
5045 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5046 'channel': 'lex will',
5047 'tags': ['bible', 'history', 'prophesy'],
5048 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5049 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5050 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5051 'channel_follower_count': int
5052 },
5053 'playlist_mincount': 2,
5054 }, {
5055 'note': 'Videos tab',
5056 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
5057 'info_dict': {
5058 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5059 'title': 'lex will - Videos',
5060 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5061 'uploader': 'lex will',
5062 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5063 'tags': ['bible', 'history', 'prophesy'],
5064 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5065 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5066 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5067 'channel': 'lex will',
5068 'channel_follower_count': int
5069 },
5070 'playlist_mincount': 975,
5071 }, {
5072 'note': 'Videos tab, sorted by popular',
5073 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
5074 'info_dict': {
5075 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5076 'title': 'lex will - Videos',
5077 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5078 'uploader': 'lex will',
5079 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5080 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5081 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5082 'channel': 'lex will',
5083 'tags': ['bible', 'history', 'prophesy'],
5084 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5085 'channel_follower_count': int
5086 },
5087 'playlist_mincount': 199,
5088 }, {
5089 'note': 'Playlists tab',
5090 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
5091 'info_dict': {
5092 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5093 'title': 'lex will - Playlists',
5094 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5095 'uploader': 'lex will',
5096 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5097 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5098 'channel': 'lex will',
5099 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5100 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5101 'tags': ['bible', 'history', 'prophesy'],
5102 'channel_follower_count': int
5103 },
5104 'playlist_mincount': 17,
5105 }, {
5106 'note': 'Community tab',
5107 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
5108 'info_dict': {
5109 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5110 'title': 'lex will - Community',
5111 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5112 'uploader': 'lex will',
5113 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5114 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5115 'channel': 'lex will',
5116 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5117 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5118 'tags': ['bible', 'history', 'prophesy'],
5119 'channel_follower_count': int
5120 },
5121 'playlist_mincount': 18,
5122 }, {
5123 'note': 'Channels tab',
5124 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
5125 'info_dict': {
5126 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5127 'title': 'lex will - Channels',
5128 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5129 'uploader': 'lex will',
5130 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5131 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5132 'channel': 'lex will',
5133 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5134 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5135 'tags': ['bible', 'history', 'prophesy'],
5136 'channel_follower_count': int
5137 },
5138 'playlist_mincount': 12,
5139 }, {
5140 'note': 'Search tab',
5141 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
5142 'playlist_mincount': 40,
5143 'info_dict': {
5144 'id': 'UCYO_jab_esuFRV4b17AJtAw',
5145 'title': '3Blue1Brown - Search - linear algebra',
5146 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
5147 'uploader': '3Blue1Brown',
5148 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
5149 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5150 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5151 'tags': ['Mathematics'],
5152 'channel': '3Blue1Brown',
5153 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
5154 'channel_follower_count': int
5155 },
5156 }, {
5157 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5158 'only_matching': True,
5159 }, {
5160 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5161 'only_matching': True,
5162 }, {
5163 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5164 'only_matching': True,
5165 }, {
5166 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
5167 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
5168 'info_dict': {
5169 'title': '29C3: Not my department',
5170 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
5171 'uploader': 'Christiaan008',
5172 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
5173 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
5174 'tags': [],
5175 'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',
5176 'view_count': int,
5177 'modified_date': '20150605',
5178 'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
5179 'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',
5180 'channel': 'Christiaan008',
5181 'availability': 'public',
5182 },
5183 'playlist_count': 96,
5184 }, {
5185 'note': 'Large playlist',
5186 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
5187 'info_dict': {
5188 'title': 'Uploads from Cauchemar',
5189 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
5190 'uploader': 'Cauchemar',
5191 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
5192 'channel_url': 'https://www.youtube.com/c/Cauchemar89',
5193 'tags': [],
5194 'modified_date': r're:\d{8}',
5195 'channel': 'Cauchemar',
5196 'uploader_url': 'https://www.youtube.com/c/Cauchemar89',
5197 'view_count': int,
5198 'description': '',
5199 'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
5200 'availability': 'public',
5201 },
5202 'playlist_mincount': 1123,
5203 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5204 }, {
5205 'note': 'even larger playlist, 8832 videos',
5206 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
5207 'only_matching': True,
5208 }, {
5209 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
5210 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
5211 'info_dict': {
5212 'title': 'Uploads from Interstellar Movie',
5213 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
5214 'uploader': 'Interstellar Movie',
5215 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
5216 'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',
5217 'tags': [],
5218 'view_count': int,
5219 'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
5220 'channel_url': 'https://www.youtube.com/c/InterstellarMovie',
5221 'channel': 'Interstellar Movie',
5222 'description': '',
5223 'modified_date': r're:\d{8}',
5224 'availability': 'public',
5225 },
5226 'playlist_mincount': 21,
5227 }, {
5228 'note': 'Playlist with "show unavailable videos" button',
5229 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
5230 'info_dict': {
5231 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
5232 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
5233 'uploader': 'Phim Siêu Nhân Nhật Bản',
5234 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
5235 'view_count': int,
5236 'channel': 'Phim Siêu Nhân Nhật Bản',
5237 'tags': [],
5238 'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5239 'description': '',
5240 'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5241 'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
5242 'modified_date': r're:\d{8}',
5243 'availability': 'public',
5244 },
5245 'playlist_mincount': 200,
5246 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5247 }, {
5248 'note': 'Playlist with unavailable videos in page 7',
5249 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
5250 'info_dict': {
5251 'title': 'Uploads from BlankTV',
5252 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
5253 'uploader': 'BlankTV',
5254 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
5255 'channel': 'BlankTV',
5256 'channel_url': 'https://www.youtube.com/c/blanktv',
5257 'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',
5258 'view_count': int,
5259 'tags': [],
5260 'uploader_url': 'https://www.youtube.com/c/blanktv',
5261 'modified_date': r're:\d{8}',
5262 'description': '',
5263 'availability': 'public',
5264 },
5265 'playlist_mincount': 1000,
5266 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5267 }, {
5268 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
5269 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5270 'info_dict': {
5271 'title': 'Data Analysis with Dr Mike Pound',
5272 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5273 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
5274 'uploader': 'Computerphile',
5275 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
5276 'uploader_url': 'https://www.youtube.com/user/Computerphile',
5277 'tags': [],
5278 'view_count': int,
5279 'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
5280 'channel_url': 'https://www.youtube.com/user/Computerphile',
5281 'channel': 'Computerphile',
5282 'availability': 'public',
5283 },
5284 'playlist_mincount': 11,
5285 }, {
5286 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5287 'only_matching': True,
5288 }, {
5289 'note': 'Playlist URL that does not actually serve a playlist',
5290 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
5291 'info_dict': {
5292 'id': 'FqZTN594JQw',
5293 'ext': 'webm',
5294 'title': "Smiley's People 01 detective, Adventure Series, Action",
5295 'uploader': 'STREEM',
5296 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
5297 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
5298 'upload_date': '20150526',
5299 'license': 'Standard YouTube License',
5300 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
5301 'categories': ['People & Blogs'],
5302 'tags': list,
5303 'view_count': int,
5304 'like_count': int,
5305 },
5306 'params': {
5307 'skip_download': True,
5308 },
5309 'skip': 'This video is not available.',
5310 'add_ie': [YoutubeIE.ie_key()],
5311 }, {
5312 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
5313 'only_matching': True,
5314 }, {
5315 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
5316 'only_matching': True,
5317 }, {
5318 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
5319 'info_dict': {
5320 'id': 'Wq15eF5vCbI', # This will keep changing
5321 'ext': 'mp4',
5322 'title': str,
5323 'uploader': 'Sky News',
5324 'uploader_id': 'skynews',
5325 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
5326 'upload_date': r're:\d{8}',
5327 'description': str,
5328 'categories': ['News & Politics'],
5329 'tags': list,
5330 'like_count': int,
5331 'release_timestamp': 1642502819,
5332 'channel': 'Sky News',
5333 'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',
5334 'age_limit': 0,
5335 'view_count': int,
5336 'thumbnail': 'https://i.ytimg.com/vi/GgL890LIznQ/maxresdefault_live.jpg',
5337 'playable_in_embed': True,
5338 'release_date': '20220118',
5339 'availability': 'public',
5340 'live_status': 'is_live',
5341 'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',
5342 'channel_follower_count': int
5343 },
5344 'params': {
5345 'skip_download': True,
5346 },
5347 'expected_warnings': ['Ignoring subtitle tracks found in '],
5348 }, {
5349 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
5350 'info_dict': {
5351 'id': 'a48o2S1cPoo',
5352 'ext': 'mp4',
5353 'title': 'The Young Turks - Live Main Show',
5354 'uploader': 'The Young Turks',
5355 'uploader_id': 'TheYoungTurks',
5356 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
5357 'upload_date': '20150715',
5358 'license': 'Standard YouTube License',
5359 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
5360 'categories': ['News & Politics'],
5361 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
5362 'like_count': int,
5363 },
5364 'params': {
5365 'skip_download': True,
5366 },
5367 'only_matching': True,
5368 }, {
5369 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
5370 'only_matching': True,
5371 }, {
5372 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
5373 'only_matching': True,
5374 }, {
5375 'note': 'A channel that is not live. Should raise error',
5376 'url': 'https://www.youtube.com/user/numberphile/live',
5377 'only_matching': True,
5378 }, {
5379 'url': 'https://www.youtube.com/feed/trending',
5380 'only_matching': True,
5381 }, {
5382 'url': 'https://www.youtube.com/feed/library',
5383 'only_matching': True,
5384 }, {
5385 'url': 'https://www.youtube.com/feed/history',
5386 'only_matching': True,
5387 }, {
5388 'url': 'https://www.youtube.com/feed/subscriptions',
5389 'only_matching': True,
5390 }, {
5391 'url': 'https://www.youtube.com/feed/watch_later',
5392 'only_matching': True,
5393 }, {
5394 'note': 'Recommended - redirects to home page.',
5395 'url': 'https://www.youtube.com/feed/recommended',
5396 'only_matching': True,
5397 }, {
5398 'note': 'inline playlist with not always working continuations',
5399 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
5400 'only_matching': True,
5401 }, {
5402 'url': 'https://www.youtube.com/course',
5403 'only_matching': True,
5404 }, {
5405 'url': 'https://www.youtube.com/zsecurity',
5406 'only_matching': True,
5407 }, {
5408 'url': 'http://www.youtube.com/NASAgovVideo/videos',
5409 'only_matching': True,
5410 }, {
5411 'url': 'https://www.youtube.com/TheYoungTurks/live',
5412 'only_matching': True,
5413 }, {
5414 'url': 'https://www.youtube.com/hashtag/cctv9',
5415 'info_dict': {
5416 'id': 'cctv9',
5417 'title': '#cctv9',
5418 'tags': [],
5419 },
5420 'playlist_mincount': 350,
5421 }, {
5422 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
5423 'only_matching': True,
5424 }, {
5425 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
5426 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5427 'only_matching': True
5428 }, {
5429 'note': '/browse/ should redirect to /channel/',
5430 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
5431 'only_matching': True
5432 }, {
5433 'note': 'VLPL, should redirect to playlist?list=PL...',
5434 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5435 'info_dict': {
5436 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5437 'uploader': 'NoCopyrightSounds',
5438 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
5439 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
5440 'title': 'NCS : All Releases 💿',
5441 'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',
5442 'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',
5443 'modified_date': r're:\d{8}',
5444 'view_count': int,
5445 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
5446 'tags': [],
5447 'channel': 'NoCopyrightSounds',
5448 'availability': 'public',
5449 },
5450 'playlist_mincount': 166,
5451 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5452 }, {
5453 'note': 'Topic, should redirect to playlist?list=UU...',
5454 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5455 'info_dict': {
5456 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5457 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5458 'title': 'Uploads from Royalty Free Music - Topic',
5459 'uploader': 'Royalty Free Music - Topic',
5460 'tags': [],
5461 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5462 'channel': 'Royalty Free Music - Topic',
5463 'view_count': int,
5464 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5465 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5466 'modified_date': r're:\d{8}',
5467 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5468 'description': '',
5469 'availability': 'public',
5470 },
5471 'expected_warnings': [
5472 'The URL does not have a videos tab',
5473 r'[Uu]navailable videos (are|will be) hidden',
5474 ],
5475 'playlist_mincount': 101,
5476 }, {
5477 'note': 'Topic without a UU playlist',
5478 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
5479 'info_dict': {
5480 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
5481 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
5482 'tags': [],
5483 },
5484 'expected_warnings': [
5485 'the playlist redirect gave error',
5486 ],
5487 'playlist_mincount': 9,
5488 }, {
5489 'note': 'Youtube music Album',
5490 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
5491 'info_dict': {
5492 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
5493 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
5494 'tags': [],
5495 'view_count': int,
5496 'description': '',
5497 'availability': 'unlisted',
5498 'modified_date': r're:\d{8}',
5499 },
5500 'playlist_count': 50,
5501 }, {
5502 'note': 'unlisted single video playlist',
5503 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5504 'info_dict': {
5505 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5506 'uploader': 'colethedj',
5507 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5508 'title': 'yt-dlp unlisted playlist test',
5509 'availability': 'unlisted',
5510 'tags': [],
5511 'modified_date': '20220418',
5512 'channel': 'colethedj',
5513 'view_count': int,
5514 'description': '',
5515 'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
5516 'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5517 'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
5518 },
5519 'playlist_count': 1,
5520 }, {
5521 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
5522 'url': 'https://www.youtube.com/feed/recommended',
5523 'info_dict': {
5524 'id': 'recommended',
5525 'title': 'recommended',
5526 'tags': [],
5527 },
5528 'playlist_mincount': 50,
5529 'params': {
5530 'skip_download': True,
5531 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5532 },
5533 }, {
5534 'note': 'API Fallback: /videos tab, sorted by oldest first',
5535 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
5536 'info_dict': {
5537 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5538 'title': 'Cody\'sLab - Videos',
5539 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
5540 'uploader': 'Cody\'sLab',
5541 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5542 'channel': 'Cody\'sLab',
5543 'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5544 'tags': [],
5545 'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
5546 'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
5547 'channel_follower_count': int
5548 },
5549 'playlist_mincount': 650,
5550 'params': {
5551 'skip_download': True,
5552 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5553 },
5554 }, {
5555 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
5556 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5557 'info_dict': {
5558 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5559 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5560 'title': 'Uploads from Royalty Free Music - Topic',
5561 'uploader': 'Royalty Free Music - Topic',
5562 'modified_date': r're:\d{8}',
5563 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5564 'description': '',
5565 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5566 'tags': [],
5567 'channel': 'Royalty Free Music - Topic',
5568 'view_count': int,
5569 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5570 'availability': 'public',
5571 },
5572 'expected_warnings': [
5573 'does not have a videos tab',
5574 r'[Uu]navailable videos (are|will be) hidden',
5575 ],
5576 'playlist_mincount': 101,
5577 'params': {
5578 'skip_download': True,
5579 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5580 },
5581 }, {
5582 'note': 'non-standard redirect to regional channel',
5583 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
5584 'only_matching': True
5585 }, {
5586 'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
5587 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5588 'info_dict': {
5589 'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5590 'modified_date': '20220407',
5591 'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5592 'tags': [],
5593 'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5594 'uploader': 'pukkandan',
5595 'availability': 'unlisted',
5596 'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5597 'channel': 'pukkandan',
5598 'description': 'Test for collaborative playlist',
5599 'title': 'yt-dlp test - collaborative playlist',
5600 'view_count': int,
5601 'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5602 },
5603 'playlist_mincount': 2
5604 }, {
5605 'note': 'translated tab name',
5606 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/playlists',
5607 'info_dict': {
5608 'id': 'UCiu-3thuViMebBjw_5nWYrA',
5609 'tags': [],
5610 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
5611 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
5612 'description': '',
5613 'title': 'cole-dlp-test-acc - 再生リスト',
5614 'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
5615 'uploader': 'cole-dlp-test-acc',
5616 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
5617 'channel': 'cole-dlp-test-acc',
5618 },
5619 'playlist_mincount': 1,
5620 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
5621 'expected_warnings': ['Preferring "ja"'],
5622 }, {
5623 # XXX: this should really check flat playlist entries, but the test suite doesn't support that
5624 'note': 'preferred lang set with playlist with translated video titles',
5625 'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
5626 'info_dict': {
5627 'id': 'PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
5628 'tags': [],
5629 'view_count': int,
5630 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
5631 'uploader': 'cole-dlp-test-acc',
5632 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
5633 'channel': 'cole-dlp-test-acc',
5634 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
5635 'description': 'test',
5636 'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
5637 'title': 'dlp test playlist',
5638 'availability': 'public',
5639 },
5640 'playlist_mincount': 1,
5641 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
5642 'expected_warnings': ['Preferring "ja"'],
5643 }, {
5644 # shorts audio pivot for 2GtVksBMYFM.
5645 'url': 'https://www.youtube.com/feed/sfv_audio_pivot?bp=8gUrCikSJwoLMkd0VmtzQk1ZRk0SCzJHdFZrc0JNWUZNGgsyR3RWa3NCTVlGTQ==',
5646 'info_dict': {
5647 'id': 'sfv_audio_pivot',
5648 'title': 'sfv_audio_pivot',
5649 'tags': [],
5650 },
5651 'playlist_mincount': 50,
5652
5653 }]
5654
5655 @classmethod
5656 def suitable(cls, url):
5657 return False if YoutubeIE.suitable(url) else super().suitable(url)
5658
5659 _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$')
5660
5661 @YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
5662 def _real_extract(self, url, smuggled_data):
5663 item_id = self._match_id(url)
5664 url = urllib.parse.urlunparse(
5665 urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))
5666 compat_opts = self.get_param('compat_opts', [])
5667
5668 def get_mobj(url):
5669 mobj = self._URL_RE.match(url).groupdict()
5670 mobj.update((k, '') for k, v in mobj.items() if v is None)
5671 return mobj
5672
5673 mobj, redirect_warning = get_mobj(url), None
5674 # Youtube returns incomplete data if tabname is not lower case
5675 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
5676 if is_channel:
5677 if smuggled_data.get('is_music_url'):
5678 if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
5679 item_id = item_id[2:]
5680 pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False
5681 elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
5682 mdata = self._extract_tab_endpoint(
5683 f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
5684 murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
5685 get_all=False, expected_type=str)
5686 if not murl:
5687 raise ExtractorError('Failed to resolve album to playlist')
5688 return self.url_result(murl, ie=YoutubeTabIE.ie_key())
5689 elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
5690 pre = f'https://www.youtube.com/channel/{item_id}'
5691
5692 original_tab_name = tab
5693 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
5694 # Home URLs should redirect to /videos/
5695 redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '
5696 'To download only the videos in the home page, add a "/featured" to the URL')
5697 tab = '/videos'
5698
5699 url = ''.join((pre, tab, post))
5700 mobj = get_mobj(url)
5701
5702 # Handle both video/playlist URLs
5703 qs = parse_qs(url)
5704 video_id, playlist_id = (qs.get(key, [None])[0] for key in ('v', 'list'))
5705
5706 if not video_id and mobj['not_channel'].startswith('watch'):
5707 if not playlist_id:
5708 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
5709 raise ExtractorError('Unable to recognize tab page')
5710 # Common mistake: https://www.youtube.com/watch?list=playlist_id
5711 self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
5712 url = f'https://www.youtube.com/playlist?list={playlist_id}'
5713 mobj = get_mobj(url)
5714
5715 if video_id and playlist_id:
5716 if self.get_param('noplaylist'):
5717 self.to_screen(f'Downloading just video {video_id} because of --no-playlist')
5718 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
5719 ie=YoutubeIE.ie_key(), video_id=video_id)
5720 self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')
5721
5722 data, ytcfg = self._extract_data(url, item_id)
5723
5724 # YouTube may provide a non-standard redirect to the regional channel
5725 # See: https://github.com/yt-dlp/yt-dlp/issues/2694
5726 redirect_url = traverse_obj(
5727 data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)
5728 if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:
5729 redirect_url = ''.join((
5730 urljoin('https://www.youtube.com', redirect_url), mobj['tab'], mobj['post']))
5731 self.to_screen(f'This playlist is likely not available in your region. Following redirect to regional playlist {redirect_url}')
5732 return self.url_result(redirect_url, ie=YoutubeTabIE.ie_key())
5733
5734 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
5735 if tabs:
5736 selected_tab = self._extract_selected_tab(tabs)
5737 selected_tab_url = urljoin(
5738 url, traverse_obj(selected_tab, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url')))
5739 translated_tab_name = selected_tab.get('title', '').lower()
5740
5741 # Prefer tab name from tab url as it is always in en,
5742 # but only when preferred lang is set as it may not extract reliably in all cases.
5743 selected_tab_name = (self._preferred_lang in (None, 'en') and translated_tab_name
5744 or selected_tab_url and get_mobj(selected_tab_url)['tab'][1:] # primary
5745 or translated_tab_name)
5746
5747 if selected_tab_name == 'home':
5748 selected_tab_name = 'featured'
5749 requested_tab_name = mobj['tab'][1:]
5750
5751 if 'no-youtube-channel-redirect' not in compat_opts:
5752 if requested_tab_name == 'live': # Live tab should have redirected to the video
5753 raise UserNotLive(video_id=mobj['id'])
5754 if requested_tab_name not in ('', selected_tab_name):
5755 redirect_warning = f'The channel does not have a {requested_tab_name} tab'
5756 if not original_tab_name:
5757 if item_id[:2] == 'UC':
5758 # Topic channels don't have /videos. Use the equivalent playlist instead
5759 pl_id = f'UU{item_id[2:]}'
5760 pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
5761 try:
5762 data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)
5763 except ExtractorError:
5764 redirect_warning += ' and the playlist redirect gave error'
5765 else:
5766 item_id, url, selected_tab_name = pl_id, pl_url, requested_tab_name
5767 redirect_warning += f'. Redirecting to playlist {pl_id} instead'
5768 if selected_tab_name and selected_tab_name != requested_tab_name:
5769 redirect_warning += f'. {selected_tab_name} tab is being downloaded instead'
5770 else:
5771 raise ExtractorError(redirect_warning, expected=True)
5772
5773 if redirect_warning:
5774 self.to_screen(redirect_warning)
5775 self.write_debug(f'Final URL: {url}')
5776
5777 # YouTube sometimes provides a button to reload playlist with unavailable videos.
5778 if 'no-youtube-unavailable-videos' not in compat_opts:
5779 data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data
5780 self._extract_and_report_alerts(data, only_once=True)
5781 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
5782 if tabs:
5783 return self._extract_from_tabs(item_id, ytcfg, data, tabs)
5784
5785 playlist = traverse_obj(
5786 data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
5787 if playlist:
5788 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
5789
5790 video_id = traverse_obj(
5791 data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
5792 if video_id:
5793 if mobj['tab'] != '/live': # live tab is expected to redirect to video
5794 self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
5795 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
5796 ie=YoutubeIE.ie_key(), video_id=video_id)
5797
5798 raise ExtractorError('Unable to recognize tab page')
5799
5800
5801 class YoutubePlaylistIE(InfoExtractor):
5802 IE_DESC = 'YouTube playlists'
5803 _VALID_URL = r'''(?x)(?:
5804 (?:https?://)?
5805 (?:\w+\.)?
5806 (?:
5807 (?:
5808 youtube(?:kids)?\.com|
5809 %(invidious)s
5810 )
5811 /.*?\?.*?\blist=
5812 )?
5813 (?P<id>%(playlist_id)s)
5814 )''' % {
5815 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
5816 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
5817 }
5818 IE_NAME = 'youtube:playlist'
5819 _TESTS = [{
5820 'note': 'issue #673',
5821 'url': 'PLBB231211A4F62143',
5822 'info_dict': {
5823 'title': '[OLD]Team Fortress 2 (Class-based LP)',
5824 'id': 'PLBB231211A4F62143',
5825 'uploader': 'Wickman',
5826 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
5827 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
5828 'view_count': int,
5829 'uploader_url': 'https://www.youtube.com/user/Wickydoo',
5830 'modified_date': r're:\d{8}',
5831 'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
5832 'channel': 'Wickman',
5833 'tags': [],
5834 'channel_url': 'https://www.youtube.com/user/Wickydoo',
5835 },
5836 'playlist_mincount': 29,
5837 }, {
5838 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
5839 'info_dict': {
5840 'title': 'YDL_safe_search',
5841 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
5842 },
5843 'playlist_count': 2,
5844 'skip': 'This playlist is private',
5845 }, {
5846 'note': 'embedded',
5847 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
5848 'playlist_count': 4,
5849 'info_dict': {
5850 'title': 'JODA15',
5851 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
5852 'uploader': 'milan',
5853 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
5854 'description': '',
5855 'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
5856 'tags': [],
5857 'modified_date': '20140919',
5858 'view_count': int,
5859 'channel': 'milan',
5860 'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
5861 'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
5862 'availability': 'public',
5863 },
5864 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5865 }, {
5866 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
5867 'playlist_mincount': 455,
5868 'info_dict': {
5869 'title': '2018 Chinese New Singles (11/6 updated)',
5870 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
5871 'uploader': 'LBK',
5872 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
5873 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
5874 'channel': 'LBK',
5875 'view_count': int,
5876 'channel_url': 'https://www.youtube.com/c/愛低音的國王',
5877 'tags': [],
5878 'uploader_url': 'https://www.youtube.com/c/愛低音的國王',
5879 'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
5880 'modified_date': r're:\d{8}',
5881 'availability': 'public',
5882 },
5883 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5884 }, {
5885 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
5886 'only_matching': True,
5887 }, {
5888 # music album playlist
5889 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
5890 'only_matching': True,
5891 }]
5892
5893 @classmethod
5894 def suitable(cls, url):
5895 if YoutubeTabIE.suitable(url):
5896 return False
5897 from ..utils import parse_qs
5898 qs = parse_qs(url)
5899 if qs.get('v', [None])[0]:
5900 return False
5901 return super().suitable(url)
5902
5903 def _real_extract(self, url):
5904 playlist_id = self._match_id(url)
5905 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
5906 url = update_url_query(
5907 'https://www.youtube.com/playlist',
5908 parse_qs(url) or {'list': playlist_id})
5909 if is_music_url:
5910 url = smuggle_url(url, {'is_music_url': True})
5911 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
5912
5913
5914 class YoutubeYtBeIE(InfoExtractor):
5915 IE_DESC = 'youtu.be'
5916 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
5917 _TESTS = [{
5918 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
5919 'info_dict': {
5920 'id': 'yeWKywCrFtk',
5921 'ext': 'mp4',
5922 'title': 'Small Scale Baler and Braiding Rugs',
5923 'uploader': 'Backus-Page House Museum',
5924 'uploader_id': 'backuspagemuseum',
5925 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
5926 'upload_date': '20161008',
5927 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
5928 'categories': ['Nonprofits & Activism'],
5929 'tags': list,
5930 'like_count': int,
5931 'age_limit': 0,
5932 'playable_in_embed': True,
5933 'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',
5934 'channel': 'Backus-Page House Museum',
5935 'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',
5936 'live_status': 'not_live',
5937 'view_count': int,
5938 'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',
5939 'availability': 'public',
5940 'duration': 59,
5941 'comment_count': int,
5942 'channel_follower_count': int
5943 },
5944 'params': {
5945 'noplaylist': True,
5946 'skip_download': True,
5947 },
5948 }, {
5949 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
5950 'only_matching': True,
5951 }]
5952
5953 def _real_extract(self, url):
5954 mobj = self._match_valid_url(url)
5955 video_id = mobj.group('id')
5956 playlist_id = mobj.group('playlist_id')
5957 return self.url_result(
5958 update_url_query('https://www.youtube.com/watch', {
5959 'v': video_id,
5960 'list': playlist_id,
5961 'feature': 'youtu.be',
5962 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
5963
5964
5965 class YoutubeLivestreamEmbedIE(InfoExtractor):
5966 IE_DESC = 'YouTube livestream embeds'
5967 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'
5968 _TESTS = [{
5969 'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',
5970 'only_matching': True,
5971 }]
5972
5973 def _real_extract(self, url):
5974 channel_id = self._match_id(url)
5975 return self.url_result(
5976 f'https://www.youtube.com/channel/{channel_id}/live',
5977 ie=YoutubeTabIE.ie_key(), video_id=channel_id)
5978
5979
5980 class YoutubeYtUserIE(InfoExtractor):
5981 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
5982 IE_NAME = 'youtube:user'
5983 _VALID_URL = r'ytuser:(?P<id>.+)'
5984 _TESTS = [{
5985 'url': 'ytuser:phihag',
5986 'only_matching': True,
5987 }]
5988
5989 def _real_extract(self, url):
5990 user_id = self._match_id(url)
5991 return self.url_result(
5992 'https://www.youtube.com/user/%s/videos' % user_id,
5993 ie=YoutubeTabIE.ie_key(), video_id=user_id)
5994
5995
5996 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
5997 IE_NAME = 'youtube:favorites'
5998 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
5999 _VALID_URL = r':ytfav(?:ou?rite)?s?'
6000 _LOGIN_REQUIRED = True
6001 _TESTS = [{
6002 'url': ':ytfav',
6003 'only_matching': True,
6004 }, {
6005 'url': ':ytfavorites',
6006 'only_matching': True,
6007 }]
6008
6009 def _real_extract(self, url):
6010 return self.url_result(
6011 'https://www.youtube.com/playlist?list=LL',
6012 ie=YoutubeTabIE.ie_key())
6013
6014
6015 class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):
6016 IE_NAME = 'youtube:notif'
6017 IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'
6018 _VALID_URL = r':ytnotif(?:ication)?s?'
6019 _LOGIN_REQUIRED = True
6020 _TESTS = [{
6021 'url': ':ytnotif',
6022 'only_matching': True,
6023 }, {
6024 'url': ':ytnotifications',
6025 'only_matching': True,
6026 }]
6027
6028 def _extract_notification_menu(self, response, continuation_list):
6029 notification_list = traverse_obj(
6030 response,
6031 ('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),
6032 ('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),
6033 expected_type=list) or []
6034 continuation_list[0] = None
6035 for item in notification_list:
6036 entry = self._extract_notification_renderer(item.get('notificationRenderer'))
6037 if entry:
6038 yield entry
6039 continuation = item.get('continuationItemRenderer')
6040 if continuation:
6041 continuation_list[0] = continuation
6042
6043 def _extract_notification_renderer(self, notification):
6044 video_id = traverse_obj(
6045 notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)
6046 url = f'https://www.youtube.com/watch?v={video_id}'
6047 channel_id = None
6048 if not video_id:
6049 browse_ep = traverse_obj(
6050 notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)
6051 channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)
6052 post_id = self._search_regex(
6053 r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),
6054 'post id', default=None)
6055 if not channel_id or not post_id:
6056 return
6057 # The direct /post url redirects to this in the browser
6058 url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'
6059
6060 channel = traverse_obj(
6061 notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),
6062 expected_type=str)
6063 notification_title = self._get_text(notification, 'shortMessage')
6064 if notification_title:
6065 notification_title = notification_title.replace('\xad', '') # remove soft hyphens
6066 # TODO: handle recommended videos
6067 title = self._search_regex(
6068 rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,
6069 'video title', default=None)
6070 upload_date = (strftime_or_none(self._parse_time_text(self._get_text(notification, 'sentTimeText')), '%Y%m%d')
6071 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE.ie_key())
6072 else None)
6073 return {
6074 '_type': 'url',
6075 'url': url,
6076 'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),
6077 'video_id': video_id,
6078 'title': title,
6079 'channel_id': channel_id,
6080 'channel': channel,
6081 'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),
6082 'upload_date': upload_date,
6083 }
6084
6085 def _notification_menu_entries(self, ytcfg):
6086 continuation_list = [None]
6087 response = None
6088 for page in itertools.count(1):
6089 ctoken = traverse_obj(
6090 continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)
6091 response = self._extract_response(
6092 item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,
6093 ep='notification/get_notification_menu', check_get_keys='actions',
6094 headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))
6095 yield from self._extract_notification_menu(response, continuation_list)
6096 if not continuation_list[0]:
6097 break
6098
6099 def _real_extract(self, url):
6100 display_id = 'notifications'
6101 ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}
6102 self._report_playlist_authcheck(ytcfg)
6103 return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)
6104
6105
6106 class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
6107 IE_DESC = 'YouTube search'
6108 IE_NAME = 'youtube:search'
6109 _SEARCH_KEY = 'ytsearch'
6110 _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
6111 _TESTS = [{
6112 'url': 'ytsearch5:youtube-dl test video',
6113 'playlist_count': 5,
6114 'info_dict': {
6115 'id': 'youtube-dl test video',
6116 'title': 'youtube-dl test video',
6117 }
6118 }]
6119
6120
6121 class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
6122 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
6123 _SEARCH_KEY = 'ytsearchdate'
6124 IE_DESC = 'YouTube search, newest videos first'
6125 _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
6126 _TESTS = [{
6127 'url': 'ytsearchdate5:youtube-dl test video',
6128 'playlist_count': 5,
6129 'info_dict': {
6130 'id': 'youtube-dl test video',
6131 'title': 'youtube-dl test video',
6132 }
6133 }]
6134
6135
6136 class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
6137 IE_DESC = 'YouTube search URLs with sorting and filter support'
6138 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
6139 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
6140 _TESTS = [{
6141 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
6142 'playlist_mincount': 5,
6143 'info_dict': {
6144 'id': 'youtube-dl test video',
6145 'title': 'youtube-dl test video',
6146 }
6147 }, {
6148 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
6149 'playlist_mincount': 5,
6150 'info_dict': {
6151 'id': 'python',
6152 'title': 'python',
6153 }
6154 }, {
6155 'url': 'https://www.youtube.com/results?search_query=%23cats',
6156 'playlist_mincount': 1,
6157 'info_dict': {
6158 'id': '#cats',
6159 'title': '#cats',
6160 # The test suite does not have support for nested playlists
6161 # 'entries': [{
6162 # 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',
6163 # 'title': '#cats',
6164 # }],
6165 },
6166 }, {
6167 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
6168 'only_matching': True,
6169 }]
6170
6171 def _real_extract(self, url):
6172 qs = parse_qs(url)
6173 query = (qs.get('search_query') or qs.get('q'))[0]
6174 return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
6175
6176
6177 class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
6178 IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'
6179 IE_NAME = 'youtube:music:search_url'
6180 _VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
6181 _TESTS = [{
6182 'url': 'https://music.youtube.com/search?q=royalty+free+music',
6183 'playlist_count': 16,
6184 'info_dict': {
6185 'id': 'royalty free music',
6186 'title': 'royalty free music',
6187 }
6188 }, {
6189 'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',
6190 'playlist_mincount': 30,
6191 'info_dict': {
6192 'id': 'royalty free music - songs',
6193 'title': 'royalty free music - songs',
6194 },
6195 'params': {'extract_flat': 'in_playlist'}
6196 }, {
6197 'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',
6198 'playlist_mincount': 30,
6199 'info_dict': {
6200 'id': 'royalty free music - community playlists',
6201 'title': 'royalty free music - community playlists',
6202 },
6203 'params': {'extract_flat': 'in_playlist'}
6204 }]
6205
6206 _SECTIONS = {
6207 'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',
6208 'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',
6209 'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',
6210 'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',
6211 'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',
6212 'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',
6213 }
6214
6215 def _real_extract(self, url):
6216 qs = parse_qs(url)
6217 query = (qs.get('search_query') or qs.get('q'))[0]
6218 params = qs.get('sp', (None,))[0]
6219 if params:
6220 section = next((k for k, v in self._SECTIONS.items() if v == params), params)
6221 else:
6222 section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower()
6223 params = self._SECTIONS.get(section)
6224 if not params:
6225 section = None
6226 title = join_nonempty(query, section, delim=' - ')
6227 return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)
6228
6229
6230 class YoutubeFeedsInfoExtractor(InfoExtractor):
6231 """
6232 Base class for feed extractors
6233 Subclasses must re-define the _FEED_NAME property.
6234 """
6235 _LOGIN_REQUIRED = True
6236 _FEED_NAME = 'feeds'
6237
6238 def _real_initialize(self):
6239 YoutubeBaseInfoExtractor._check_login_required(self)
6240
6241 @classproperty
6242 def IE_NAME(self):
6243 return f'youtube:{self._FEED_NAME}'
6244
6245 def _real_extract(self, url):
6246 return self.url_result(
6247 f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())
6248
6249
6250 class YoutubeWatchLaterIE(InfoExtractor):
6251 IE_NAME = 'youtube:watchlater'
6252 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
6253 _VALID_URL = r':ytwatchlater'
6254 _TESTS = [{
6255 'url': ':ytwatchlater',
6256 'only_matching': True,
6257 }]
6258
6259 def _real_extract(self, url):
6260 return self.url_result(
6261 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
6262
6263
6264 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
6265 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
6266 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
6267 _FEED_NAME = 'recommended'
6268 _LOGIN_REQUIRED = False
6269 _TESTS = [{
6270 'url': ':ytrec',
6271 'only_matching': True,
6272 }, {
6273 'url': ':ytrecommended',
6274 'only_matching': True,
6275 }, {
6276 'url': 'https://youtube.com',
6277 'only_matching': True,
6278 }]
6279
6280
6281 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
6282 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
6283 _VALID_URL = r':ytsub(?:scription)?s?'
6284 _FEED_NAME = 'subscriptions'
6285 _TESTS = [{
6286 'url': ':ytsubs',
6287 'only_matching': True,
6288 }, {
6289 'url': ':ytsubscriptions',
6290 'only_matching': True,
6291 }]
6292
6293
6294 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
6295 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
6296 _VALID_URL = r':ythis(?:tory)?'
6297 _FEED_NAME = 'history'
6298 _TESTS = [{
6299 'url': ':ythistory',
6300 'only_matching': True,
6301 }]
6302
6303
6304 class YoutubeStoriesIE(InfoExtractor):
6305 IE_DESC = 'YouTube channel stories; "ytstories:" prefix'
6306 IE_NAME = 'youtube:stories'
6307 _VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'
6308 _TESTS = [{
6309 'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',
6310 'only_matching': True,
6311 }]
6312
6313 def _real_extract(self, url):
6314 playlist_id = f'RLTD{self._match_id(url)}'
6315 return self.url_result(
6316 smuggle_url(f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1', {'is_story': True}),
6317 ie=YoutubeTabIE, video_id=playlist_id)
6318
6319
6320 class YoutubeShortsAudioPivotIE(InfoExtractor):
6321 IE_DESC = 'YouTube Shorts audio pivot (Shorts using audio of a given video)'
6322 IE_NAME = 'youtube:shorts:pivot:audio'
6323 _VALID_URL = r'https?://(?:www\.)?youtube\.com/source/(?P<id>[\w-]{11})/shorts'
6324 _TESTS = [{
6325 'url': 'https://www.youtube.com/source/Lyj-MZSAA9o/shorts',
6326 'only_matching': True,
6327 }]
6328
6329 @staticmethod
6330 def _generate_audio_pivot_params(video_id):
6331 """
6332 Generates sfv_audio_pivot browse params for this video id
6333 """
6334 pb_params = b'\xf2\x05+\n)\x12\'\n\x0b%b\x12\x0b%b\x1a\x0b%b' % ((video_id.encode(),) * 3)
6335 return urllib.parse.quote(base64.b64encode(pb_params).decode())
6336
6337 def _real_extract(self, url):
6338 video_id = self._match_id(url)
6339 return self.url_result(
6340 f'https://www.youtube.com/feed/sfv_audio_pivot?bp={self._generate_audio_pivot_params(video_id)}',
6341 ie=YoutubeTabIE)
6342
6343
6344 class YoutubeTruncatedURLIE(InfoExtractor):
6345 IE_NAME = 'youtube:truncated_url'
6346 IE_DESC = False # Do not list
6347 _VALID_URL = r'''(?x)
6348 (?:https?://)?
6349 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
6350 (?:watch\?(?:
6351 feature=[a-z_]+|
6352 annotation_id=annotation_[^&]+|
6353 x-yt-cl=[0-9]+|
6354 hl=[^&]*|
6355 t=[0-9]+
6356 )?
6357 |
6358 attribution_link\?a=[^&]+
6359 )
6360 $
6361 '''
6362
6363 _TESTS = [{
6364 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
6365 'only_matching': True,
6366 }, {
6367 'url': 'https://www.youtube.com/watch?',
6368 'only_matching': True,
6369 }, {
6370 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
6371 'only_matching': True,
6372 }, {
6373 'url': 'https://www.youtube.com/watch?feature=foo',
6374 'only_matching': True,
6375 }, {
6376 'url': 'https://www.youtube.com/watch?hl=en-GB',
6377 'only_matching': True,
6378 }, {
6379 'url': 'https://www.youtube.com/watch?t=2372',
6380 'only_matching': True,
6381 }]
6382
6383 def _real_extract(self, url):
6384 raise ExtractorError(
6385 'Did you forget to quote the URL? Remember that & is a meta '
6386 'character in most shells, so you want to put the URL in quotes, '
6387 'like youtube-dl '
6388 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
6389 ' or simply youtube-dl BaW_jenozKc .',
6390 expected=True)
6391
6392
6393 class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
6394 IE_NAME = 'youtube:clip'
6395 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'
6396 _TESTS = [{
6397 # FIXME: Other metadata should be extracted from the clip, not from the base video
6398 'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',
6399 'info_dict': {
6400 'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',
6401 'ext': 'mp4',
6402 'section_start': 29.0,
6403 'section_end': 39.7,
6404 'duration': 10.7,
6405 'age_limit': 0,
6406 'availability': 'public',
6407 'categories': ['Gaming'],
6408 'channel': 'Scott The Woz',
6409 'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',
6410 'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',
6411 'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',
6412 'like_count': int,
6413 'playable_in_embed': True,
6414 'tags': 'count:17',
6415 'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',
6416 'title': 'Mobile Games on Console - Scott The Woz',
6417 'upload_date': '20210920',
6418 'uploader': 'Scott The Woz',
6419 'uploader_id': 'scottthewoz',
6420 'uploader_url': 'http://www.youtube.com/user/scottthewoz',
6421 'view_count': int,
6422 'live_status': 'not_live',
6423 'channel_follower_count': int
6424 }
6425 }]
6426
6427 def _real_extract(self, url):
6428 clip_id = self._match_id(url)
6429 _, data = self._extract_webpage(url, clip_id)
6430
6431 video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))
6432 if not video_id:
6433 raise ExtractorError('Unable to find video ID')
6434
6435 clip_data = traverse_obj(data, (
6436 'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',
6437 'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,
6438 'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',
6439 'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)
6440
6441 return {
6442 '_type': 'url_transparent',
6443 'url': f'https://www.youtube.com/watch?v={video_id}',
6444 'ie_key': YoutubeIE.ie_key(),
6445 'id': clip_id,
6446 'section_start': int(clip_data['startTimeMs']) / 1000,
6447 'section_end': int(clip_data['endTimeMs']) / 1000,
6448 }
6449
6450
6451 class YoutubeTruncatedIDIE(InfoExtractor):
6452 IE_NAME = 'youtube:truncated_id'
6453 IE_DESC = False # Do not list
6454 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
6455
6456 _TESTS = [{
6457 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
6458 'only_matching': True,
6459 }]
6460
6461 def _real_extract(self, url):
6462 video_id = self._match_id(url)
6463 raise ExtractorError(
6464 f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',
6465 expected=True)