]> jfr.im git - yt-dlp.git/blame_incremental - yt_dlp/extractor/youtube.py
Make early reject of `--match-filter` stricter
[yt-dlp.git] / yt_dlp / extractor / youtube.py
... / ...
CommitLineData
1import base64
2import calendar
3import copy
4import datetime
5import enum
6import hashlib
7import itertools
8import json
9import math
10import os.path
11import random
12import re
13import sys
14import threading
15import time
16import traceback
17import urllib.error
18import urllib.parse
19
20from .common import InfoExtractor, SearchInfoExtractor
21from .openload import PhantomJSwrapper
22from ..compat import functools
23from ..jsinterp import JSInterpreter
24from ..utils import (
25 NO_DEFAULT,
26 ExtractorError,
27 LazyList,
28 UserNotLive,
29 bug_reports_message,
30 classproperty,
31 clean_html,
32 datetime_from_str,
33 dict_get,
34 filter_dict,
35 float_or_none,
36 format_field,
37 get_first,
38 int_or_none,
39 is_html,
40 join_nonempty,
41 js_to_json,
42 mimetype2ext,
43 network_exceptions,
44 orderedSet,
45 parse_codecs,
46 parse_count,
47 parse_duration,
48 parse_iso8601,
49 parse_qs,
50 qualities,
51 remove_start,
52 smuggle_url,
53 str_or_none,
54 str_to_int,
55 strftime_or_none,
56 traverse_obj,
57 try_get,
58 unescapeHTML,
59 unified_strdate,
60 unified_timestamp,
61 unsmuggle_url,
62 update_url_query,
63 url_or_none,
64 urljoin,
65 variadic,
66)
67
68# any clients starting with _ cannot be explicitly requested by the user
69INNERTUBE_CLIENTS = {
70 'web': {
71 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
72 'INNERTUBE_CONTEXT': {
73 'client': {
74 'clientName': 'WEB',
75 'clientVersion': '2.20220801.00.00',
76 }
77 },
78 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
79 },
80 'web_embedded': {
81 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
82 'INNERTUBE_CONTEXT': {
83 'client': {
84 'clientName': 'WEB_EMBEDDED_PLAYER',
85 'clientVersion': '1.20220731.00.00',
86 },
87 },
88 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
89 },
90 'web_music': {
91 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
92 'INNERTUBE_HOST': 'music.youtube.com',
93 'INNERTUBE_CONTEXT': {
94 'client': {
95 'clientName': 'WEB_REMIX',
96 'clientVersion': '1.20220727.01.00',
97 }
98 },
99 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
100 },
101 'web_creator': {
102 'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
103 'INNERTUBE_CONTEXT': {
104 'client': {
105 'clientName': 'WEB_CREATOR',
106 'clientVersion': '1.20220726.00.00',
107 }
108 },
109 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
110 },
111 'android': {
112 'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
113 'INNERTUBE_CONTEXT': {
114 'client': {
115 'clientName': 'ANDROID',
116 'clientVersion': '17.31.35',
117 'androidSdkVersion': 30,
118 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
119 }
120 },
121 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
122 'REQUIRE_JS_PLAYER': False
123 },
124 'android_embedded': {
125 'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
126 'INNERTUBE_CONTEXT': {
127 'client': {
128 'clientName': 'ANDROID_EMBEDDED_PLAYER',
129 'clientVersion': '17.31.35',
130 'androidSdkVersion': 30,
131 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
132 },
133 },
134 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
135 'REQUIRE_JS_PLAYER': False
136 },
137 'android_music': {
138 'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
139 'INNERTUBE_CONTEXT': {
140 'client': {
141 'clientName': 'ANDROID_MUSIC',
142 'clientVersion': '5.16.51',
143 'androidSdkVersion': 30,
144 'userAgent': 'com.google.android.apps.youtube.music/5.16.51 (Linux; U; Android 11) gzip'
145 }
146 },
147 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
148 'REQUIRE_JS_PLAYER': False
149 },
150 'android_creator': {
151 'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
152 'INNERTUBE_CONTEXT': {
153 'client': {
154 'clientName': 'ANDROID_CREATOR',
155 'clientVersion': '22.30.100',
156 'androidSdkVersion': 30,
157 'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip'
158 },
159 },
160 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
161 'REQUIRE_JS_PLAYER': False
162 },
163 # iOS clients have HLS live streams. Setting device model to get 60fps formats.
164 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
165 'ios': {
166 'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
167 'INNERTUBE_CONTEXT': {
168 'client': {
169 'clientName': 'IOS',
170 'clientVersion': '17.33.2',
171 'deviceModel': 'iPhone14,3',
172 'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
173 }
174 },
175 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
176 'REQUIRE_JS_PLAYER': False
177 },
178 'ios_embedded': {
179 'INNERTUBE_CONTEXT': {
180 'client': {
181 'clientName': 'IOS_MESSAGES_EXTENSION',
182 'clientVersion': '17.33.2',
183 'deviceModel': 'iPhone14,3',
184 'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
185 },
186 },
187 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
188 'REQUIRE_JS_PLAYER': False
189 },
190 'ios_music': {
191 'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
192 'INNERTUBE_CONTEXT': {
193 'client': {
194 'clientName': 'IOS_MUSIC',
195 'clientVersion': '5.21',
196 'deviceModel': 'iPhone14,3',
197 'userAgent': 'com.google.ios.youtubemusic/5.21 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
198 },
199 },
200 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
201 'REQUIRE_JS_PLAYER': False
202 },
203 'ios_creator': {
204 'INNERTUBE_CONTEXT': {
205 'client': {
206 'clientName': 'IOS_CREATOR',
207 'clientVersion': '22.33.101',
208 'deviceModel': 'iPhone14,3',
209 'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
210 },
211 },
212 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
213 'REQUIRE_JS_PLAYER': False
214 },
215 # mweb has 'ultralow' formats
216 # See: https://github.com/yt-dlp/yt-dlp/pull/557
217 'mweb': {
218 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
219 'INNERTUBE_CONTEXT': {
220 'client': {
221 'clientName': 'MWEB',
222 'clientVersion': '2.20220801.00.00',
223 }
224 },
225 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
226 },
227 # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
228 # See: https://github.com/zerodytrash/YouTube-Internal-Clients
229 'tv_embedded': {
230 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
231 'INNERTUBE_CONTEXT': {
232 'client': {
233 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
234 'clientVersion': '2.0',
235 },
236 },
237 'INNERTUBE_CONTEXT_CLIENT_NAME': 85
238 },
239}
240
241
242def _split_innertube_client(client_name):
243 variant, *base = client_name.rsplit('.', 1)
244 if base:
245 return variant, base[0], variant
246 base, *variant = client_name.split('_', 1)
247 return client_name, base, variant[0] if variant else None
248
249
250def build_innertube_clients():
251 THIRD_PARTY = {
252 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
253 }
254 BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
255 priority = qualities(BASE_CLIENTS[::-1])
256
257 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
258 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
259 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
260 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
261 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
262
263 _, base_client, variant = _split_innertube_client(client)
264 ytcfg['priority'] = 10 * priority(base_client)
265
266 if not variant:
267 INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
268 embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
269 embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
270 embedscreen['priority'] -= 3
271 elif variant == 'embedded':
272 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
273 ytcfg['priority'] -= 2
274 else:
275 ytcfg['priority'] -= 3
276
277
278build_innertube_clients()
279
280
281class BadgeType(enum.Enum):
282 AVAILABILITY_UNLISTED = enum.auto()
283 AVAILABILITY_PRIVATE = enum.auto()
284 AVAILABILITY_PUBLIC = enum.auto()
285 AVAILABILITY_PREMIUM = enum.auto()
286 AVAILABILITY_SUBSCRIPTION = enum.auto()
287 LIVE_NOW = enum.auto()
288
289
290class YoutubeBaseInfoExtractor(InfoExtractor):
291 """Provide base functions for Youtube extractors"""
292
293 _RESERVED_NAMES = (
294 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
295 r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|'
296 r'browse|oembed|get_video_info|iframe_api|s/player|source|'
297 r'storefront|oops|index|account|t/terms|about|upload|signin|logout')
298
299 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
300
301 # _NETRC_MACHINE = 'youtube'
302
303 # If True it will raise an error if no login info is provided
304 _LOGIN_REQUIRED = False
305
306 _INVIDIOUS_SITES = (
307 # invidious-redirect websites
308 r'(?:www\.)?redirect\.invidious\.io',
309 r'(?:(?:www|dev)\.)?invidio\.us',
310 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
311 r'(?:www\.)?invidious\.pussthecat\.org',
312 r'(?:www\.)?invidious\.zee\.li',
313 r'(?:www\.)?invidious\.ethibox\.fr',
314 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
315 r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
316 r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
317 # youtube-dl invidious instances list
318 r'(?:(?:www|no)\.)?invidiou\.sh',
319 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
320 r'(?:www\.)?invidious\.kabi\.tk',
321 r'(?:www\.)?invidious\.mastodon\.host',
322 r'(?:www\.)?invidious\.zapashcanon\.fr',
323 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
324 r'(?:www\.)?invidious\.tinfoil-hat\.net',
325 r'(?:www\.)?invidious\.himiko\.cloud',
326 r'(?:www\.)?invidious\.reallyancient\.tech',
327 r'(?:www\.)?invidious\.tube',
328 r'(?:www\.)?invidiou\.site',
329 r'(?:www\.)?invidious\.site',
330 r'(?:www\.)?invidious\.xyz',
331 r'(?:www\.)?invidious\.nixnet\.xyz',
332 r'(?:www\.)?invidious\.048596\.xyz',
333 r'(?:www\.)?invidious\.drycat\.fr',
334 r'(?:www\.)?inv\.skyn3t\.in',
335 r'(?:www\.)?tube\.poal\.co',
336 r'(?:www\.)?tube\.connect\.cafe',
337 r'(?:www\.)?vid\.wxzm\.sx',
338 r'(?:www\.)?vid\.mint\.lgbt',
339 r'(?:www\.)?vid\.puffyan\.us',
340 r'(?:www\.)?yewtu\.be',
341 r'(?:www\.)?yt\.elukerio\.org',
342 r'(?:www\.)?yt\.lelux\.fi',
343 r'(?:www\.)?invidious\.ggc-project\.de',
344 r'(?:www\.)?yt\.maisputain\.ovh',
345 r'(?:www\.)?ytprivate\.com',
346 r'(?:www\.)?invidious\.13ad\.de',
347 r'(?:www\.)?invidious\.toot\.koeln',
348 r'(?:www\.)?invidious\.fdn\.fr',
349 r'(?:www\.)?watch\.nettohikari\.com',
350 r'(?:www\.)?invidious\.namazso\.eu',
351 r'(?:www\.)?invidious\.silkky\.cloud',
352 r'(?:www\.)?invidious\.exonip\.de',
353 r'(?:www\.)?invidious\.riverside\.rocks',
354 r'(?:www\.)?invidious\.blamefran\.net',
355 r'(?:www\.)?invidious\.moomoo\.de',
356 r'(?:www\.)?ytb\.trom\.tf',
357 r'(?:www\.)?yt\.cyberhost\.uk',
358 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
359 r'(?:www\.)?qklhadlycap4cnod\.onion',
360 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
361 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
362 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
363 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
364 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
365 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
366 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
367 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
368 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
369 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
370 # piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
371 r'(?:www\.)?piped\.kavin\.rocks',
372 r'(?:www\.)?piped\.tokhmi\.xyz',
373 r'(?:www\.)?piped\.syncpundit\.io',
374 r'(?:www\.)?piped\.mha\.fi',
375 r'(?:www\.)?watch\.whatever\.social',
376 r'(?:www\.)?piped\.garudalinux\.org',
377 r'(?:www\.)?piped\.rivo\.lol',
378 r'(?:www\.)?piped-libre\.kavin\.rocks',
379 r'(?:www\.)?yt\.jae\.fi',
380 r'(?:www\.)?piped\.mint\.lgbt',
381 r'(?:www\.)?il\.ax',
382 r'(?:www\.)?piped\.esmailelbob\.xyz',
383 r'(?:www\.)?piped\.projectsegfau\.lt',
384 r'(?:www\.)?piped\.privacydev\.net',
385 r'(?:www\.)?piped\.palveluntarjoaja\.eu',
386 r'(?:www\.)?piped\.smnz\.de',
387 r'(?:www\.)?piped\.adminforge\.de',
388 r'(?:www\.)?watch\.whatevertinfoil\.de',
389 r'(?:www\.)?piped\.qdi\.fi',
390 )
391
392 # extracted from account/account_menu ep
393 # XXX: These are the supported YouTube UI and API languages,
394 # which is slightly different from languages supported for translation in YouTube studio
395 _SUPPORTED_LANG_CODES = [
396 'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',
397 'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',
398 'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',
399 'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',
400 'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',
401 'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko'
402 ]
403
404 _IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'}
405
406 @functools.cached_property
407 def _preferred_lang(self):
408 """
409 Returns a language code supported by YouTube for the user preferred language.
410 Returns None if no preferred language set.
411 """
412 preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0]
413 if not preferred_lang:
414 return
415 if preferred_lang not in self._SUPPORTED_LANG_CODES:
416 raise ExtractorError(
417 f'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.',
418 expected=True)
419 elif preferred_lang != 'en':
420 self.report_warning(
421 f'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.')
422 return preferred_lang
423
424 def _initialize_consent(self):
425 cookies = self._get_cookies('https://www.youtube.com/')
426 if cookies.get('__Secure-3PSID'):
427 return
428 consent_id = None
429 consent = cookies.get('CONSENT')
430 if consent:
431 if 'YES' in consent.value:
432 return
433 consent_id = self._search_regex(
434 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
435 if not consent_id:
436 consent_id = random.randint(100, 999)
437 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
438
439 def _initialize_pref(self):
440 cookies = self._get_cookies('https://www.youtube.com/')
441 pref_cookie = cookies.get('PREF')
442 pref = {}
443 if pref_cookie:
444 try:
445 pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
446 except ValueError:
447 self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
448 pref.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'})
449 self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
450
451 def _real_initialize(self):
452 self._initialize_pref()
453 self._initialize_consent()
454 self._check_login_required()
455
456 def _check_login_required(self):
457 if self._LOGIN_REQUIRED and not self._cookies_passed:
458 self.raise_login_required('Login details are needed to download this content', method='cookies')
459
460 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='
461 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
462
463 def _get_default_ytcfg(self, client='web'):
464 return copy.deepcopy(INNERTUBE_CLIENTS[client])
465
466 def _get_innertube_host(self, client='web'):
467 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
468
469 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
470 # try_get but with fallback to default ytcfg client values when present
471 _func = lambda y: try_get(y, getter, expected_type)
472 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
473
474 def _extract_client_name(self, ytcfg, default_client='web'):
475 return self._ytcfg_get_safe(
476 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
477 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
478
479 def _extract_client_version(self, ytcfg, default_client='web'):
480 return self._ytcfg_get_safe(
481 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
482 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
483
484 def _select_api_hostname(self, req_api_hostname, default_client=None):
485 return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
486 or req_api_hostname or self._get_innertube_host(default_client or 'web'))
487
488 def _extract_api_key(self, ytcfg=None, default_client='web'):
489 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
490
491 def _extract_context(self, ytcfg=None, default_client='web'):
492 context = get_first(
493 (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
494 # Enforce language and tz for extraction
495 client_context = traverse_obj(context, 'client', expected_type=dict, default={})
496 client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
497 return context
498
499 _SAPISID = None
500
501 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
502 time_now = round(time.time())
503 if self._SAPISID is None:
504 yt_cookies = self._get_cookies('https://www.youtube.com')
505 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
506 # See: https://github.com/yt-dlp/yt-dlp/issues/393
507 sapisid_cookie = dict_get(
508 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
509 if sapisid_cookie and sapisid_cookie.value:
510 self._SAPISID = sapisid_cookie.value
511 self.write_debug('Extracted SAPISID cookie')
512 # SAPISID cookie is required if not already present
513 if not yt_cookies.get('SAPISID'):
514 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
515 self._set_cookie(
516 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
517 else:
518 self._SAPISID = False
519 if not self._SAPISID:
520 return None
521 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
522 sapisidhash = hashlib.sha1(
523 f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
524 return f'SAPISIDHASH {time_now}_{sapisidhash}'
525
526 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
527 note='Downloading API JSON', errnote='Unable to download API page',
528 context=None, api_key=None, api_hostname=None, default_client='web'):
529
530 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
531 data.update(query)
532 real_headers = self.generate_api_headers(default_client=default_client)
533 real_headers.update({'content-type': 'application/json'})
534 if headers:
535 real_headers.update(headers)
536 api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]
537 or api_key or self._extract_api_key(default_client=default_client))
538 return self._download_json(
539 f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
540 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
541 data=json.dumps(data).encode('utf8'), headers=real_headers,
542 query={'key': api_key, 'prettyPrint': 'false'})
543
544 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
545 return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
546
547 @staticmethod
548 def _extract_session_index(*data):
549 """
550 Index of current account in account list.
551 See: https://github.com/yt-dlp/yt-dlp/pull/519
552 """
553 for ytcfg in data:
554 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
555 if session_index is not None:
556 return session_index
557
558 # Deprecated?
559 def _extract_identity_token(self, ytcfg=None, webpage=None):
560 if ytcfg:
561 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)
562 if token:
563 return token
564 if webpage:
565 return self._search_regex(
566 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
567 'identity token', default=None, fatal=False)
568
569 @staticmethod
570 def _extract_account_syncid(*args):
571 """
572 Extract syncId required to download private playlists of secondary channels
573 @params response and/or ytcfg
574 """
575 for data in args:
576 # ytcfg includes channel_syncid if on secondary channel
577 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)
578 if delegated_sid:
579 return delegated_sid
580 sync_ids = (try_get(
581 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
582 lambda x: x['DATASYNC_ID']), str) or '').split('||')
583 if len(sync_ids) >= 2 and sync_ids[1]:
584 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
585 # and just "user_syncid||" for primary channel. We only want the channel_syncid
586 return sync_ids[0]
587
588 @staticmethod
589 def _extract_visitor_data(*args):
590 """
591 Extracts visitorData from an API response or ytcfg
592 Appears to be used to track session state
593 """
594 return get_first(
595 args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
596 expected_type=str)
597
598 @functools.cached_property
599 def is_authenticated(self):
600 return bool(self._generate_sapisidhash_header())
601
602 def extract_ytcfg(self, video_id, webpage):
603 if not webpage:
604 return {}
605 return self._parse_json(
606 self._search_regex(
607 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
608 default='{}'), video_id, fatal=False) or {}
609
610 def generate_api_headers(
611 self, *, ytcfg=None, account_syncid=None, session_index=None,
612 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
613
614 origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))
615 headers = {
616 'X-YouTube-Client-Name': str(
617 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
618 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
619 'Origin': origin,
620 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
621 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
622 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),
623 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client)
624 }
625 if session_index is None:
626 session_index = self._extract_session_index(ytcfg)
627 if account_syncid or session_index is not None:
628 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
629
630 auth = self._generate_sapisidhash_header(origin)
631 if auth is not None:
632 headers['Authorization'] = auth
633 headers['X-Origin'] = origin
634 return filter_dict(headers)
635
636 def _download_ytcfg(self, client, video_id):
637 url = {
638 'web': 'https://www.youtube.com',
639 'web_music': 'https://music.youtube.com',
640 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
641 }.get(client)
642 if not url:
643 return {}
644 webpage = self._download_webpage(
645 url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')
646 return self.extract_ytcfg(video_id, webpage) or {}
647
648 @staticmethod
649 def _build_api_continuation_query(continuation, ctp=None):
650 query = {
651 'continuation': continuation
652 }
653 # TODO: Inconsistency with clickTrackingParams.
654 # Currently we have a fixed ctp contained within context (from ytcfg)
655 # and a ctp in root query for continuation.
656 if ctp:
657 query['clickTracking'] = {'clickTrackingParams': ctp}
658 return query
659
660 @classmethod
661 def _extract_next_continuation_data(cls, renderer):
662 next_continuation = try_get(
663 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
664 lambda x: x['continuation']['reloadContinuationData']), dict)
665 if not next_continuation:
666 return
667 continuation = next_continuation.get('continuation')
668 if not continuation:
669 return
670 ctp = next_continuation.get('clickTrackingParams')
671 return cls._build_api_continuation_query(continuation, ctp)
672
673 @classmethod
674 def _extract_continuation_ep_data(cls, continuation_ep: dict):
675 if isinstance(continuation_ep, dict):
676 continuation = try_get(
677 continuation_ep, lambda x: x['continuationCommand']['token'], str)
678 if not continuation:
679 return
680 ctp = continuation_ep.get('clickTrackingParams')
681 return cls._build_api_continuation_query(continuation, ctp)
682
683 @classmethod
684 def _extract_continuation(cls, renderer):
685 next_continuation = cls._extract_next_continuation_data(renderer)
686 if next_continuation:
687 return next_continuation
688
689 return traverse_obj(renderer, (
690 ('contents', 'items', 'rows'), ..., 'continuationItemRenderer',
691 ('continuationEndpoint', ('button', 'buttonRenderer', 'command'))
692 ), get_all=False, expected_type=cls._extract_continuation_ep_data)
693
694 @classmethod
695 def _extract_alerts(cls, data):
696 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
697 if not isinstance(alert_dict, dict):
698 continue
699 for alert in alert_dict.values():
700 alert_type = alert.get('type')
701 if not alert_type:
702 continue
703 message = cls._get_text(alert, 'text')
704 if message:
705 yield alert_type, message
706
707 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
708 errors, warnings = [], []
709 for alert_type, alert_message in alerts:
710 if alert_type.lower() == 'error' and fatal:
711 errors.append([alert_type, alert_message])
712 elif alert_message not in self._IGNORED_WARNINGS:
713 warnings.append([alert_type, alert_message])
714
715 for alert_type, alert_message in (warnings + errors[:-1]):
716 self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)
717 if errors:
718 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
719
720 def _extract_and_report_alerts(self, data, *args, **kwargs):
721 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
722
723 def _extract_badges(self, renderer: dict):
724 privacy_icon_map = {
725 'PRIVACY_UNLISTED': BadgeType.AVAILABILITY_UNLISTED,
726 'PRIVACY_PRIVATE': BadgeType.AVAILABILITY_PRIVATE,
727 'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC
728 }
729
730 badge_style_map = {
731 'BADGE_STYLE_TYPE_MEMBERS_ONLY': BadgeType.AVAILABILITY_SUBSCRIPTION,
732 'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM,
733 'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW
734 }
735
736 label_map = {
737 'unlisted': BadgeType.AVAILABILITY_UNLISTED,
738 'private': BadgeType.AVAILABILITY_PRIVATE,
739 'members only': BadgeType.AVAILABILITY_SUBSCRIPTION,
740 'live': BadgeType.LIVE_NOW,
741 'premium': BadgeType.AVAILABILITY_PREMIUM
742 }
743
744 badges = []
745 for badge in traverse_obj(renderer, ('badges', ..., 'metadataBadgeRenderer'), default=[]):
746 badge_type = (
747 privacy_icon_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))
748 or badge_style_map.get(traverse_obj(badge, 'style'))
749 )
750 if badge_type:
751 badges.append({'type': badge_type})
752 continue
753
754 # fallback, won't work in some languages
755 label = traverse_obj(badge, 'label', expected_type=str, default='')
756 for match, label_badge_type in label_map.items():
757 if match in label.lower():
758 badges.append({'type': badge_type})
759 continue
760
761 return badges
762
763 @staticmethod
764 def _has_badge(badges, badge_type):
765 return bool(traverse_obj(badges, lambda _, v: v['type'] == badge_type))
766
767 @staticmethod
768 def _get_text(data, *path_list, max_runs=None):
769 for path in path_list or [None]:
770 if path is None:
771 obj = [data]
772 else:
773 obj = traverse_obj(data, path, default=[])
774 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
775 obj = [obj]
776 for item in obj:
777 text = try_get(item, lambda x: x['simpleText'], str)
778 if text:
779 return text
780 runs = try_get(item, lambda x: x['runs'], list) or []
781 if not runs and isinstance(item, list):
782 runs = item
783
784 runs = runs[:min(len(runs), max_runs or len(runs))]
785 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
786 if text:
787 return text
788
789 def _get_count(self, data, *path_list):
790 count_text = self._get_text(data, *path_list) or ''
791 count = parse_count(count_text)
792 if count is None:
793 count = str_to_int(
794 self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))
795 return count
796
797 @staticmethod
798 def _extract_thumbnails(data, *path_list):
799 """
800 Extract thumbnails from thumbnails dict
801 @param path_list: path list to level that contains 'thumbnails' key
802 """
803 thumbnails = []
804 for path in path_list or [()]:
805 for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):
806 thumbnail_url = url_or_none(thumbnail.get('url'))
807 if not thumbnail_url:
808 continue
809 # Sometimes youtube gives a wrong thumbnail URL. See:
810 # https://github.com/yt-dlp/yt-dlp/issues/233
811 # https://github.com/ytdl-org/youtube-dl/issues/28023
812 if 'maxresdefault' in thumbnail_url:
813 thumbnail_url = thumbnail_url.split('?')[0]
814 thumbnails.append({
815 'url': thumbnail_url,
816 'height': int_or_none(thumbnail.get('height')),
817 'width': int_or_none(thumbnail.get('width')),
818 })
819 return thumbnails
820
821 @staticmethod
822 def extract_relative_time(relative_time_text):
823 """
824 Extracts a relative time from string and converts to dt object
825 e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'
826 """
827 mobj = re.search(r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?\s*ago', relative_time_text)
828 if mobj:
829 start = mobj.group('start')
830 if start:
831 return datetime_from_str(start)
832 try:
833 return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))
834 except ValueError:
835 return None
836
837 def _parse_time_text(self, text):
838 if not text:
839 return
840 dt = self.extract_relative_time(text)
841 timestamp = None
842 if isinstance(dt, datetime.datetime):
843 timestamp = calendar.timegm(dt.timetuple())
844
845 if timestamp is None:
846 timestamp = (
847 unified_timestamp(text) or unified_timestamp(
848 self._search_regex(
849 (r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),
850 text.lower(), 'time text', default=None)))
851
852 if text and timestamp is None and self._preferred_lang in (None, 'en'):
853 self.report_warning(
854 f'Cannot parse localized time text "{text}"', only_once=True)
855 return timestamp
856
857 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
858 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
859 default_client='web'):
860 for retry in self.RetryManager():
861 try:
862 response = self._call_api(
863 ep=ep, fatal=True, headers=headers,
864 video_id=item_id, query=query, note=note,
865 context=self._extract_context(ytcfg, default_client),
866 api_key=self._extract_api_key(ytcfg, default_client),
867 api_hostname=api_hostname, default_client=default_client)
868 except ExtractorError as e:
869 if not isinstance(e.cause, network_exceptions):
870 return self._error_or_warning(e, fatal=fatal)
871 elif not isinstance(e.cause, urllib.error.HTTPError):
872 retry.error = e
873 continue
874
875 first_bytes = e.cause.read(512)
876 if not is_html(first_bytes):
877 yt_error = try_get(
878 self._parse_json(
879 self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
880 lambda x: x['error']['message'], str)
881 if yt_error:
882 self._report_alerts([('ERROR', yt_error)], fatal=False)
883 # Downloading page may result in intermittent 5xx HTTP error
884 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
885 # We also want to catch all other network exceptions since errors in later pages can be troublesome
886 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
887 if e.cause.code not in (403, 429):
888 retry.error = e
889 continue
890 return self._error_or_warning(e, fatal=fatal)
891
892 try:
893 self._extract_and_report_alerts(response, only_once=True)
894 except ExtractorError as e:
895 # YouTube servers may return errors we want to retry on in a 200 OK response
896 # See: https://github.com/yt-dlp/yt-dlp/issues/839
897 if 'unknown error' in e.msg.lower():
898 retry.error = e
899 continue
900 return self._error_or_warning(e, fatal=fatal)
901 # Youtube sometimes sends incomplete data
902 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
903 if not traverse_obj(response, *variadic(check_get_keys)):
904 retry.error = ExtractorError('Incomplete data received', expected=True)
905 continue
906
907 return response
908
909 @staticmethod
910 def is_music_url(url):
911 return re.match(r'https?://music\.youtube\.com/', url) is not None
912
913 def _extract_video(self, renderer):
914 video_id = renderer.get('videoId')
915
916 reel_header_renderer = traverse_obj(renderer, (
917 'navigationEndpoint', 'reelWatchEndpoint', 'overlay', 'reelPlayerOverlayRenderer',
918 'reelPlayerHeaderSupportedRenderers', 'reelPlayerHeaderRenderer'))
919
920 title = self._get_text(renderer, 'title', 'headline') or self._get_text(reel_header_renderer, 'reelTitleText')
921 description = self._get_text(renderer, 'descriptionSnippet')
922
923 duration = int_or_none(renderer.get('lengthSeconds'))
924 if duration is None:
925 duration = parse_duration(self._get_text(
926 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
927 if duration is None:
928 # XXX: should write a parser to be more general to support more cases (e.g. shorts in shorts tab)
929 duration = parse_duration(self._search_regex(
930 r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',
931 traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),
932 video_id, default=None, group='duration'))
933
934 channel_id = traverse_obj(
935 renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
936 expected_type=str, get_all=False)
937 if not channel_id:
938 channel_id = traverse_obj(reel_header_renderer, ('channelNavigationEndpoint', 'browseEndpoint', 'browseId'))
939
940 overlay_style = traverse_obj(
941 renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
942 get_all=False, expected_type=str)
943 badges = self._extract_badges(renderer)
944
945 navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
946 renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
947 expected_type=str)) or ''
948 url = f'https://www.youtube.com/watch?v={video_id}'
949 if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:
950 url = f'https://www.youtube.com/shorts/{video_id}'
951
952 time_text = (self._get_text(renderer, 'publishedTimeText', 'videoInfo')
953 or self._get_text(reel_header_renderer, 'timestampText') or '')
954 scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
955
956 live_status = (
957 'is_upcoming' if scheduled_timestamp is not None
958 else 'was_live' if 'streamed' in time_text.lower()
959 else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW)
960 else None)
961
962 # videoInfo is a string like '50K views • 10 years ago'.
963 view_count_text = self._get_text(renderer, 'viewCountText', 'shortViewCountText', 'videoInfo') or ''
964 view_count = (0 if 'no views' in view_count_text.lower()
965 else self._get_count({'simpleText': view_count_text}))
966 view_count_field = 'concurrent_view_count' if live_status in ('is_live', 'is_upcoming') else 'view_count'
967
968 return {
969 '_type': 'url',
970 'ie_key': YoutubeIE.ie_key(),
971 'id': video_id,
972 'url': url,
973 'title': title,
974 'description': description,
975 'duration': duration,
976 'channel_id': channel_id,
977 'channel': (self._get_text(renderer, 'ownerText', 'shortBylineText')
978 or self._get_text(reel_header_renderer, 'channelTitleText')),
979 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
980 'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
981 'timestamp': (self._parse_time_text(time_text)
982 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)
983 else None),
984 'release_timestamp': scheduled_timestamp,
985 'availability':
986 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
987 else self._availability(
988 is_private=self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE) or None,
989 needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
990 needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
991 is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None),
992 view_count_field: view_count,
993 'live_status': live_status
994 }
995
996
997class YoutubeIE(YoutubeBaseInfoExtractor):
998 IE_DESC = 'YouTube'
999 _VALID_URL = r"""(?x)^
1000 (
1001 (?:https?://|//) # http(s):// or protocol-independent URL
1002 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
1003 (?:www\.)?deturl\.com/www\.youtube\.com|
1004 (?:www\.)?pwnyoutube\.com|
1005 (?:www\.)?hooktube\.com|
1006 (?:www\.)?yourepeat\.com|
1007 tube\.majestyc\.net|
1008 %(invidious)s|
1009 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
1010 (?:.*?\#/)? # handle anchor (#/) redirect urls
1011 (?: # the various things that can precede the ID:
1012 (?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/
1013 |(?: # or the v= param in all its forms
1014 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
1015 (?:\?|\#!?) # the params delimiter ? or # or #!
1016 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
1017 v=
1018 )
1019 ))
1020 |(?:
1021 youtu\.be| # just youtu.be/xxxx
1022 vid\.plus| # or vid.plus/xxxx
1023 zwearz\.com/watch| # or zwearz.com/watch/xxxx
1024 %(invidious)s
1025 )/
1026 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
1027 )
1028 )? # all until now is optional -> you can pass the naked ID
1029 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
1030 (?(1).+)? # if we found the ID, everything can follow
1031 (?:\#|$)""" % {
1032 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
1033 }
1034 _EMBED_REGEX = [
1035 r'''(?x)
1036 (?:
1037 <(?:[0-9A-Za-z-]+?)?iframe[^>]+?src=|
1038 data-video-url=|
1039 <embed[^>]+?src=|
1040 embedSWF\(?:\s*|
1041 <object[^>]+data=|
1042 new\s+SWFObject\(
1043 )
1044 (["\'])
1045 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1046 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1047 \1''',
1048 # https://wordpress.org/plugins/lazy-load-for-videos/
1049 r'''(?xs)
1050 <a\s[^>]*\bhref="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"
1051 \s[^>]*\bclass="[^"]*\blazy-load-youtube''',
1052 ]
1053 _RETURN_TYPE = 'video' # While there are "multifeed" test cases, they don't seem to actually exist anymore
1054
1055 _PLAYER_INFO_RE = (
1056 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
1057 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
1058 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
1059 )
1060 _formats = {
1061 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1062 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1063 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
1064 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
1065 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
1066 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1067 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1068 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1069 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
1070 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
1071 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1072 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1073 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1074 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1075 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
1076 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
1077 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1078 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1079
1080
1081 # 3D videos
1082 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1083 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1084 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1085 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1086 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
1087 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1088 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1089
1090 # Apple HTTP Live Streaming
1091 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1092 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1093 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1094 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1095 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1096 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1097 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1098 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
1099
1100 # DASH mp4 video
1101 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
1102 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
1103 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1104 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
1105 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
1106 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
1107 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
1108 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1109 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
1110 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1111 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1112 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
1113
1114 # Dash mp4 audio
1115 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1116 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1117 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1118 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1119 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1120 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1121 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
1122
1123 # Dash webm
1124 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1125 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1126 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1127 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1128 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1129 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1130 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1131 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1132 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1133 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1134 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1135 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1136 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1137 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1138 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1139 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
1140 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1141 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1142 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1143 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1144 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1145 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1146
1147 # Dash webm audio
1148 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1149 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
1150
1151 # Dash webm audio with opus inside
1152 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1153 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1154 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
1155
1156 # RTMP (unnamed)
1157 '_rtmp': {'protocol': 'rtmp'},
1158
1159 # av01 video only formats sometimes served with "unknown" codecs
1160 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1161 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1162 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
1163 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1164 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1165 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1166 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1167 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1168 }
1169 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
1170
1171 _GEO_BYPASS = False
1172
1173 IE_NAME = 'youtube'
1174 _TESTS = [
1175 {
1176 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
1177 'info_dict': {
1178 'id': 'BaW_jenozKc',
1179 'ext': 'mp4',
1180 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1181 'uploader': 'Philipp Hagemeister',
1182 'uploader_id': 'phihag',
1183 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1184 'channel': 'Philipp Hagemeister',
1185 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1186 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1187 'upload_date': '20121002',
1188 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
1189 'categories': ['Science & Technology'],
1190 'tags': ['youtube-dl'],
1191 'duration': 10,
1192 'view_count': int,
1193 'like_count': int,
1194 'availability': 'public',
1195 'playable_in_embed': True,
1196 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1197 'live_status': 'not_live',
1198 'age_limit': 0,
1199 'start_time': 1,
1200 'end_time': 9,
1201 'comment_count': int,
1202 'channel_follower_count': int
1203 }
1204 },
1205 {
1206 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1207 'note': 'Embed-only video (#1746)',
1208 'info_dict': {
1209 'id': 'yZIXLfi8CZQ',
1210 'ext': 'mp4',
1211 'upload_date': '20120608',
1212 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1213 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1214 'uploader': 'SET India',
1215 'uploader_id': 'setindia',
1216 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
1217 'age_limit': 18,
1218 },
1219 'skip': 'Private video',
1220 },
1221 {
1222 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1223 'note': 'Use the first video ID in the URL',
1224 'info_dict': {
1225 'id': 'BaW_jenozKc',
1226 'ext': 'mp4',
1227 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1228 'uploader': 'Philipp Hagemeister',
1229 'uploader_id': 'phihag',
1230 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1231 'channel': 'Philipp Hagemeister',
1232 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1233 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1234 'upload_date': '20121002',
1235 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
1236 'categories': ['Science & Technology'],
1237 'tags': ['youtube-dl'],
1238 'duration': 10,
1239 'view_count': int,
1240 'like_count': int,
1241 'availability': 'public',
1242 'playable_in_embed': True,
1243 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1244 'live_status': 'not_live',
1245 'age_limit': 0,
1246 'comment_count': int,
1247 'channel_follower_count': int
1248 },
1249 'params': {
1250 'skip_download': True,
1251 },
1252 },
1253 {
1254 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1255 'note': '256k DASH audio (format 141) via DASH manifest',
1256 'info_dict': {
1257 'id': 'a9LDPn-MO4I',
1258 'ext': 'm4a',
1259 'upload_date': '20121002',
1260 'uploader_id': '8KVIDEO',
1261 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1262 'description': '',
1263 'uploader': '8KVIDEO',
1264 'title': 'UHDTV TEST 8K VIDEO.mp4'
1265 },
1266 'params': {
1267 'youtube_include_dash_manifest': True,
1268 'format': '141',
1269 },
1270 'skip': 'format 141 not served anymore',
1271 },
1272 # DASH manifest with encrypted signature
1273 {
1274 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1275 'info_dict': {
1276 'id': 'IB3lcPjvWLA',
1277 'ext': 'm4a',
1278 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1279 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1280 'duration': 244,
1281 'uploader': 'AfrojackVEVO',
1282 'uploader_id': 'AfrojackVEVO',
1283 'upload_date': '20131011',
1284 'abr': 129.495,
1285 'like_count': int,
1286 'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',
1287 'playable_in_embed': True,
1288 'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',
1289 'view_count': int,
1290 'track': 'The Spark',
1291 'live_status': 'not_live',
1292 'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',
1293 'channel': 'Afrojack',
1294 'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',
1295 'tags': 'count:19',
1296 'availability': 'public',
1297 'categories': ['Music'],
1298 'age_limit': 0,
1299 'alt_title': 'The Spark',
1300 'channel_follower_count': int
1301 },
1302 'params': {
1303 'youtube_include_dash_manifest': True,
1304 'format': '141/bestaudio[ext=m4a]',
1305 },
1306 },
1307 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
1308 {
1309 'note': 'Embed allowed age-gate video',
1310 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1311 'info_dict': {
1312 'id': 'HtVdAasjOgU',
1313 'ext': 'mp4',
1314 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1315 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1316 'duration': 142,
1317 'uploader': 'The Witcher',
1318 'uploader_id': 'WitcherGame',
1319 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1320 'upload_date': '20140605',
1321 'age_limit': 18,
1322 'categories': ['Gaming'],
1323 'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',
1324 'availability': 'needs_auth',
1325 'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
1326 'like_count': int,
1327 'channel': 'The Witcher',
1328 'live_status': 'not_live',
1329 'tags': 'count:17',
1330 'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
1331 'playable_in_embed': True,
1332 'view_count': int,
1333 'channel_follower_count': int
1334 },
1335 },
1336 {
1337 'note': 'Age-gate video with embed allowed in public site',
1338 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1339 'info_dict': {
1340 'id': 'HsUATh_Nc2U',
1341 'ext': 'mp4',
1342 'title': 'Godzilla 2 (Official Video)',
1343 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1344 'upload_date': '20200408',
1345 'uploader_id': 'FlyingKitty900',
1346 'uploader': 'FlyingKitty',
1347 'age_limit': 18,
1348 'availability': 'needs_auth',
1349 'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
1350 'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',
1351 'channel': 'FlyingKitty',
1352 'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
1353 'view_count': int,
1354 'categories': ['Entertainment'],
1355 'live_status': 'not_live',
1356 'tags': ['Flyingkitty', 'godzilla 2'],
1357 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
1358 'like_count': int,
1359 'duration': 177,
1360 'playable_in_embed': True,
1361 'channel_follower_count': int
1362 },
1363 },
1364 {
1365 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1366 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1367 'info_dict': {
1368 'id': 'Tq92D6wQ1mg',
1369 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
1370 'ext': 'mp4',
1371 'upload_date': '20191228',
1372 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1373 'uploader': 'Projekt Melody',
1374 'description': 'md5:17eccca93a786d51bc67646756894066',
1375 'age_limit': 18,
1376 'like_count': int,
1377 'availability': 'needs_auth',
1378 'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
1379 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1380 'view_count': int,
1381 'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',
1382 'channel': 'Projekt Melody',
1383 'live_status': 'not_live',
1384 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
1385 'playable_in_embed': True,
1386 'categories': ['Entertainment'],
1387 'duration': 106,
1388 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
1389 'comment_count': int,
1390 'channel_follower_count': int
1391 },
1392 },
1393 {
1394 'note': 'Non-Agegated non-embeddable video',
1395 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1396 'info_dict': {
1397 'id': 'MeJVWBSsPAY',
1398 'ext': 'mp4',
1399 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1400 'uploader': 'Herr Lurik',
1401 'uploader_id': 'st3in234',
1402 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1403 'upload_date': '20130730',
1404 'track': 'Such mich find mich',
1405 'age_limit': 0,
1406 'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
1407 'like_count': int,
1408 'playable_in_embed': False,
1409 'creator': 'OOMPH!',
1410 'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',
1411 'view_count': int,
1412 'alt_title': 'Such mich find mich',
1413 'duration': 210,
1414 'channel': 'Herr Lurik',
1415 'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
1416 'categories': ['Music'],
1417 'availability': 'public',
1418 'uploader_url': 'http://www.youtube.com/user/st3in234',
1419 'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
1420 'live_status': 'not_live',
1421 'artist': 'OOMPH!',
1422 'channel_follower_count': int
1423 },
1424 },
1425 {
1426 'note': 'Non-bypassable age-gated video',
1427 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1428 'only_matching': True,
1429 },
1430 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1431 # YouTube Red ad is not captured for creator
1432 {
1433 'url': '__2ABJjxzNo',
1434 'info_dict': {
1435 'id': '__2ABJjxzNo',
1436 'ext': 'mp4',
1437 'duration': 266,
1438 'upload_date': '20100430',
1439 'uploader_id': 'deadmau5',
1440 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1441 'creator': 'deadmau5',
1442 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1443 'uploader': 'deadmau5',
1444 'title': 'Deadmau5 - Some Chords (HD)',
1445 'alt_title': 'Some Chords',
1446 'availability': 'public',
1447 'tags': 'count:14',
1448 'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',
1449 'view_count': int,
1450 'live_status': 'not_live',
1451 'channel': 'deadmau5',
1452 'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',
1453 'like_count': int,
1454 'track': 'Some Chords',
1455 'artist': 'deadmau5',
1456 'playable_in_embed': True,
1457 'age_limit': 0,
1458 'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',
1459 'categories': ['Music'],
1460 'album': 'Some Chords',
1461 'channel_follower_count': int
1462 },
1463 'expected_warnings': [
1464 'DASH manifest missing',
1465 ]
1466 },
1467 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1468 {
1469 'url': 'lqQg6PlCWgI',
1470 'info_dict': {
1471 'id': 'lqQg6PlCWgI',
1472 'ext': 'mp4',
1473 'duration': 6085,
1474 'upload_date': '20150827',
1475 'uploader_id': 'olympic',
1476 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1477 'description': 'md5:04bbbf3ccceb6795947572ca36f45904',
1478 'uploader': 'Olympics',
1479 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1480 'like_count': int,
1481 'release_timestamp': 1343767800,
1482 'playable_in_embed': True,
1483 'categories': ['Sports'],
1484 'release_date': '20120731',
1485 'channel': 'Olympics',
1486 'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
1487 'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
1488 'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
1489 'age_limit': 0,
1490 'availability': 'public',
1491 'live_status': 'was_live',
1492 'view_count': int,
1493 'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
1494 'channel_follower_count': int
1495 },
1496 'params': {
1497 'skip_download': 'requires avconv',
1498 }
1499 },
1500 # Non-square pixels
1501 {
1502 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1503 'info_dict': {
1504 'id': '_b-2C3KPAM0',
1505 'ext': 'mp4',
1506 'stretched_ratio': 16 / 9.,
1507 'duration': 85,
1508 'upload_date': '20110310',
1509 'uploader_id': 'AllenMeow',
1510 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1511 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1512 'uploader': '孫ᄋᄅ',
1513 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1514 'playable_in_embed': True,
1515 'channel': '孫ᄋᄅ',
1516 'age_limit': 0,
1517 'tags': 'count:11',
1518 'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',
1519 'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',
1520 'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',
1521 'view_count': int,
1522 'categories': ['People & Blogs'],
1523 'like_count': int,
1524 'live_status': 'not_live',
1525 'availability': 'unlisted',
1526 'comment_count': int,
1527 'channel_follower_count': int
1528 },
1529 },
1530 # url_encoded_fmt_stream_map is empty string
1531 {
1532 'url': 'qEJwOuvDf7I',
1533 'info_dict': {
1534 'id': 'qEJwOuvDf7I',
1535 'ext': 'webm',
1536 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1537 'description': '',
1538 'upload_date': '20150404',
1539 'uploader_id': 'spbelect',
1540 'uploader': 'Наблюдатели Петербурга',
1541 },
1542 'params': {
1543 'skip_download': 'requires avconv',
1544 },
1545 'skip': 'This live event has ended.',
1546 },
1547 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1548 {
1549 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1550 'info_dict': {
1551 'id': 'FIl7x6_3R5Y',
1552 'ext': 'webm',
1553 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1554 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1555 'duration': 220,
1556 'upload_date': '20150625',
1557 'uploader_id': 'dorappi2000',
1558 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1559 'uploader': 'dorappi2000',
1560 'formats': 'mincount:31',
1561 },
1562 'skip': 'not actual anymore',
1563 },
1564 # DASH manifest with segment_list
1565 {
1566 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1567 'md5': '8ce563a1d667b599d21064e982ab9e31',
1568 'info_dict': {
1569 'id': 'CsmdDsKjzN8',
1570 'ext': 'mp4',
1571 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1572 'uploader': 'Airtek',
1573 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1574 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1575 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1576 },
1577 'params': {
1578 'youtube_include_dash_manifest': True,
1579 'format': '135', # bestvideo
1580 },
1581 'skip': 'This live event has ended.',
1582 },
1583 {
1584 # Multifeed videos (multiple cameras), URL is for Main Camera
1585 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1586 'info_dict': {
1587 'id': 'jvGDaLqkpTg',
1588 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1589 'description': 'md5:e03b909557865076822aa169218d6a5d',
1590 },
1591 'playlist': [{
1592 'info_dict': {
1593 'id': 'jvGDaLqkpTg',
1594 'ext': 'mp4',
1595 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1596 'description': 'md5:e03b909557865076822aa169218d6a5d',
1597 'duration': 10643,
1598 'upload_date': '20161111',
1599 'uploader': 'Team PGP',
1600 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1601 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1602 },
1603 }, {
1604 'info_dict': {
1605 'id': '3AKt1R1aDnw',
1606 'ext': 'mp4',
1607 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1608 'description': 'md5:e03b909557865076822aa169218d6a5d',
1609 'duration': 10991,
1610 'upload_date': '20161111',
1611 'uploader': 'Team PGP',
1612 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1613 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1614 },
1615 }, {
1616 'info_dict': {
1617 'id': 'RtAMM00gpVc',
1618 'ext': 'mp4',
1619 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1620 'description': 'md5:e03b909557865076822aa169218d6a5d',
1621 'duration': 10995,
1622 'upload_date': '20161111',
1623 'uploader': 'Team PGP',
1624 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1625 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1626 },
1627 }, {
1628 'info_dict': {
1629 'id': '6N2fdlP3C5U',
1630 'ext': 'mp4',
1631 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1632 'description': 'md5:e03b909557865076822aa169218d6a5d',
1633 'duration': 10990,
1634 'upload_date': '20161111',
1635 'uploader': 'Team PGP',
1636 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1637 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1638 },
1639 }],
1640 'params': {
1641 'skip_download': True,
1642 },
1643 'skip': 'Not multifeed anymore',
1644 },
1645 {
1646 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1647 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1648 'info_dict': {
1649 'id': 'gVfLd0zydlo',
1650 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1651 },
1652 'playlist_count': 2,
1653 'skip': 'Not multifeed anymore',
1654 },
1655 {
1656 'url': 'https://vid.plus/FlRa-iH7PGw',
1657 'only_matching': True,
1658 },
1659 {
1660 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1661 'only_matching': True,
1662 },
1663 {
1664 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1665 # Also tests cut-off URL expansion in video description (see
1666 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1667 # https://github.com/ytdl-org/youtube-dl/issues/8164)
1668 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1669 'info_dict': {
1670 'id': 'lsguqyKfVQg',
1671 'ext': 'mp4',
1672 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1673 'alt_title': 'Dark Walk',
1674 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1675 'duration': 133,
1676 'upload_date': '20151119',
1677 'uploader_id': 'IronSoulElf',
1678 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1679 'uploader': 'IronSoulElf',
1680 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1681 'track': 'Dark Walk',
1682 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1683 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1684 'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',
1685 'categories': ['Film & Animation'],
1686 'view_count': int,
1687 'live_status': 'not_live',
1688 'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',
1689 'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',
1690 'tags': 'count:13',
1691 'availability': 'public',
1692 'channel': 'IronSoulElf',
1693 'playable_in_embed': True,
1694 'like_count': int,
1695 'age_limit': 0,
1696 'channel_follower_count': int
1697 },
1698 'params': {
1699 'skip_download': True,
1700 },
1701 },
1702 {
1703 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1704 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1705 'only_matching': True,
1706 },
1707 {
1708 # Video with yt:stretch=17:0
1709 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1710 'info_dict': {
1711 'id': 'Q39EVAstoRM',
1712 'ext': 'mp4',
1713 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1714 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1715 'upload_date': '20151107',
1716 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1717 'uploader': 'CH GAMER DROID',
1718 },
1719 'params': {
1720 'skip_download': True,
1721 },
1722 'skip': 'This video does not exist.',
1723 },
1724 {
1725 # Video with incomplete 'yt:stretch=16:'
1726 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1727 'only_matching': True,
1728 },
1729 {
1730 # Video licensed under Creative Commons
1731 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1732 'info_dict': {
1733 'id': 'M4gD1WSo5mA',
1734 'ext': 'mp4',
1735 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1736 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1737 'duration': 721,
1738 'upload_date': '20150128',
1739 'uploader_id': 'BerkmanCenter',
1740 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1741 'uploader': 'The Berkman Klein Center for Internet & Society',
1742 'license': 'Creative Commons Attribution license (reuse allowed)',
1743 'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',
1744 'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',
1745 'like_count': int,
1746 'age_limit': 0,
1747 'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],
1748 'channel': 'The Berkman Klein Center for Internet & Society',
1749 'availability': 'public',
1750 'view_count': int,
1751 'categories': ['Education'],
1752 'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',
1753 'live_status': 'not_live',
1754 'playable_in_embed': True,
1755 'comment_count': int,
1756 'channel_follower_count': int,
1757 'chapters': list,
1758 },
1759 'params': {
1760 'skip_download': True,
1761 },
1762 },
1763 {
1764 # Channel-like uploader_url
1765 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1766 'info_dict': {
1767 'id': 'eQcmzGIKrzg',
1768 'ext': 'mp4',
1769 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1770 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1771 'duration': 4060,
1772 'upload_date': '20151120',
1773 'uploader': 'Bernie Sanders',
1774 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1775 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1776 'license': 'Creative Commons Attribution license (reuse allowed)',
1777 'playable_in_embed': True,
1778 'tags': 'count:12',
1779 'like_count': int,
1780 'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1781 'age_limit': 0,
1782 'availability': 'public',
1783 'categories': ['News & Politics'],
1784 'channel': 'Bernie Sanders',
1785 'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',
1786 'view_count': int,
1787 'live_status': 'not_live',
1788 'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1789 'comment_count': int,
1790 'channel_follower_count': int,
1791 'chapters': list,
1792 },
1793 'params': {
1794 'skip_download': True,
1795 },
1796 },
1797 {
1798 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1799 'only_matching': True,
1800 },
1801 {
1802 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1803 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1804 'only_matching': True,
1805 },
1806 {
1807 # Rental video preview
1808 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1809 'info_dict': {
1810 'id': 'uGpuVWrhIzE',
1811 'ext': 'mp4',
1812 'title': 'Piku - Trailer',
1813 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1814 'upload_date': '20150811',
1815 'uploader': 'FlixMatrix',
1816 'uploader_id': 'FlixMatrixKaravan',
1817 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1818 'license': 'Standard YouTube License',
1819 },
1820 'params': {
1821 'skip_download': True,
1822 },
1823 'skip': 'This video is not available.',
1824 },
1825 {
1826 # YouTube Red video with episode data
1827 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1828 'info_dict': {
1829 'id': 'iqKdEhx-dD4',
1830 'ext': 'mp4',
1831 'title': 'Isolation - Mind Field (Ep 1)',
1832 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1833 'duration': 2085,
1834 'upload_date': '20170118',
1835 'uploader': 'Vsauce',
1836 'uploader_id': 'Vsauce',
1837 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1838 'series': 'Mind Field',
1839 'season_number': 1,
1840 'episode_number': 1,
1841 'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',
1842 'tags': 'count:12',
1843 'view_count': int,
1844 'availability': 'public',
1845 'age_limit': 0,
1846 'channel': 'Vsauce',
1847 'episode': 'Episode 1',
1848 'categories': ['Entertainment'],
1849 'season': 'Season 1',
1850 'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',
1851 'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',
1852 'like_count': int,
1853 'playable_in_embed': True,
1854 'live_status': 'not_live',
1855 'channel_follower_count': int
1856 },
1857 'params': {
1858 'skip_download': True,
1859 },
1860 'expected_warnings': [
1861 'Skipping DASH manifest',
1862 ],
1863 },
1864 {
1865 # The following content has been identified by the YouTube community
1866 # as inappropriate or offensive to some audiences.
1867 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1868 'info_dict': {
1869 'id': '6SJNVb0GnPI',
1870 'ext': 'mp4',
1871 'title': 'Race Differences in Intelligence',
1872 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1873 'duration': 965,
1874 'upload_date': '20140124',
1875 'uploader': 'New Century Foundation',
1876 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1877 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1878 },
1879 'params': {
1880 'skip_download': True,
1881 },
1882 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1883 },
1884 {
1885 # itag 212
1886 'url': '1t24XAntNCY',
1887 'only_matching': True,
1888 },
1889 {
1890 # geo restricted to JP
1891 'url': 'sJL6WA-aGkQ',
1892 'only_matching': True,
1893 },
1894 {
1895 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1896 'only_matching': True,
1897 },
1898 {
1899 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1900 'only_matching': True,
1901 },
1902 {
1903 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1904 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1905 'only_matching': True,
1906 },
1907 {
1908 # DRM protected
1909 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1910 'only_matching': True,
1911 },
1912 {
1913 # Video with unsupported adaptive stream type formats
1914 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1915 'info_dict': {
1916 'id': 'Z4Vy8R84T1U',
1917 'ext': 'mp4',
1918 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1919 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1920 'duration': 433,
1921 'upload_date': '20130923',
1922 'uploader': 'Amelia Putri Harwita',
1923 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1924 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1925 'formats': 'maxcount:10',
1926 },
1927 'params': {
1928 'skip_download': True,
1929 'youtube_include_dash_manifest': False,
1930 },
1931 'skip': 'not actual anymore',
1932 },
1933 {
1934 # Youtube Music Auto-generated description
1935 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1936 'info_dict': {
1937 'id': 'MgNrAu2pzNs',
1938 'ext': 'mp4',
1939 'title': 'Voyeur Girl',
1940 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1941 'upload_date': '20190312',
1942 'uploader': 'Stephen - Topic',
1943 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1944 'artist': 'Stephen',
1945 'track': 'Voyeur Girl',
1946 'album': 'it\'s too much love to know my dear',
1947 'release_date': '20190313',
1948 'release_year': 2019,
1949 'alt_title': 'Voyeur Girl',
1950 'view_count': int,
1951 'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1952 'playable_in_embed': True,
1953 'like_count': int,
1954 'categories': ['Music'],
1955 'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1956 'channel': 'Stephen',
1957 'availability': 'public',
1958 'creator': 'Stephen',
1959 'duration': 169,
1960 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
1961 'age_limit': 0,
1962 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1963 'tags': 'count:11',
1964 'live_status': 'not_live',
1965 'channel_follower_count': int
1966 },
1967 'params': {
1968 'skip_download': True,
1969 },
1970 },
1971 {
1972 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1973 'only_matching': True,
1974 },
1975 {
1976 # invalid -> valid video id redirection
1977 'url': 'DJztXj2GPfl',
1978 'info_dict': {
1979 'id': 'DJztXj2GPfk',
1980 'ext': 'mp4',
1981 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1982 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1983 'upload_date': '20090125',
1984 'uploader': 'Prochorowka',
1985 'uploader_id': 'Prochorowka',
1986 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1987 'artist': 'Panjabi MC',
1988 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1989 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1990 },
1991 'params': {
1992 'skip_download': True,
1993 },
1994 'skip': 'Video unavailable',
1995 },
1996 {
1997 # empty description results in an empty string
1998 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1999 'info_dict': {
2000 'id': 'x41yOUIvK2k',
2001 'ext': 'mp4',
2002 'title': 'IMG 3456',
2003 'description': '',
2004 'upload_date': '20170613',
2005 'uploader_id': 'ElevageOrVert',
2006 'uploader': 'ElevageOrVert',
2007 'view_count': int,
2008 'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',
2009 'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',
2010 'like_count': int,
2011 'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',
2012 'tags': [],
2013 'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',
2014 'availability': 'public',
2015 'age_limit': 0,
2016 'categories': ['Pets & Animals'],
2017 'duration': 7,
2018 'playable_in_embed': True,
2019 'live_status': 'not_live',
2020 'channel': 'ElevageOrVert',
2021 'channel_follower_count': int
2022 },
2023 'params': {
2024 'skip_download': True,
2025 },
2026 },
2027 {
2028 # with '};' inside yt initial data (see [1])
2029 # see [2] for an example with '};' inside ytInitialPlayerResponse
2030 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
2031 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
2032 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
2033 'info_dict': {
2034 'id': 'CHqg6qOn4no',
2035 'ext': 'mp4',
2036 'title': 'Part 77 Sort a list of simple types in c#',
2037 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
2038 'upload_date': '20130831',
2039 'uploader_id': 'kudvenkat',
2040 'uploader': 'kudvenkat',
2041 'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',
2042 'like_count': int,
2043 'uploader_url': 'http://www.youtube.com/user/kudvenkat',
2044 'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',
2045 'live_status': 'not_live',
2046 'categories': ['Education'],
2047 'availability': 'public',
2048 'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',
2049 'tags': 'count:12',
2050 'playable_in_embed': True,
2051 'age_limit': 0,
2052 'view_count': int,
2053 'duration': 522,
2054 'channel': 'kudvenkat',
2055 'comment_count': int,
2056 'channel_follower_count': int,
2057 'chapters': list,
2058 },
2059 'params': {
2060 'skip_download': True,
2061 },
2062 },
2063 {
2064 # another example of '};' in ytInitialData
2065 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
2066 'only_matching': True,
2067 },
2068 {
2069 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
2070 'only_matching': True,
2071 },
2072 {
2073 # https://github.com/ytdl-org/youtube-dl/pull/28094
2074 'url': 'OtqTfy26tG0',
2075 'info_dict': {
2076 'id': 'OtqTfy26tG0',
2077 'ext': 'mp4',
2078 'title': 'Burn Out',
2079 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
2080 'upload_date': '20141120',
2081 'uploader': 'The Cinematic Orchestra - Topic',
2082 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
2083 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
2084 'artist': 'The Cinematic Orchestra',
2085 'track': 'Burn Out',
2086 'album': 'Every Day',
2087 'like_count': int,
2088 'live_status': 'not_live',
2089 'alt_title': 'Burn Out',
2090 'duration': 614,
2091 'age_limit': 0,
2092 'view_count': int,
2093 'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
2094 'creator': 'The Cinematic Orchestra',
2095 'channel': 'The Cinematic Orchestra',
2096 'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],
2097 'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
2098 'availability': 'public',
2099 'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',
2100 'categories': ['Music'],
2101 'playable_in_embed': True,
2102 'channel_follower_count': int
2103 },
2104 'params': {
2105 'skip_download': True,
2106 },
2107 },
2108 {
2109 # controversial video, only works with bpctr when authenticated with cookies
2110 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
2111 'only_matching': True,
2112 },
2113 {
2114 # controversial video, requires bpctr/contentCheckOk
2115 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
2116 'info_dict': {
2117 'id': 'SZJvDhaSDnc',
2118 'ext': 'mp4',
2119 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
2120 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
2121 'uploader': 'CBS Mornings',
2122 'uploader_id': 'CBSThisMorning',
2123 'upload_date': '20140716',
2124 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',
2125 'duration': 170,
2126 'categories': ['News & Politics'],
2127 'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',
2128 'view_count': int,
2129 'channel': 'CBS Mornings',
2130 'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],
2131 'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',
2132 'age_limit': 18,
2133 'availability': 'needs_auth',
2134 'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',
2135 'like_count': int,
2136 'live_status': 'not_live',
2137 'playable_in_embed': True,
2138 'channel_follower_count': int
2139 }
2140 },
2141 {
2142 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
2143 'url': 'cBvYw8_A0vQ',
2144 'info_dict': {
2145 'id': 'cBvYw8_A0vQ',
2146 'ext': 'mp4',
2147 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
2148 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
2149 'upload_date': '20201120',
2150 'uploader': 'Walk around Japan',
2151 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2152 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2153 'duration': 1456,
2154 'categories': ['Travel & Events'],
2155 'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2156 'view_count': int,
2157 'channel': 'Walk around Japan',
2158 'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],
2159 'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',
2160 'age_limit': 0,
2161 'availability': 'public',
2162 'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2163 'live_status': 'not_live',
2164 'playable_in_embed': True,
2165 'channel_follower_count': int
2166 },
2167 'params': {
2168 'skip_download': True,
2169 },
2170 }, {
2171 # Has multiple audio streams
2172 'url': 'WaOKSUlf4TM',
2173 'only_matching': True
2174 }, {
2175 # Requires Premium: has format 141 when requested using YTM url
2176 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
2177 'only_matching': True
2178 }, {
2179 # multiple subtitles with same lang_code
2180 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
2181 'only_matching': True,
2182 }, {
2183 # Force use android client fallback
2184 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
2185 'info_dict': {
2186 'id': 'YOelRv7fMxY',
2187 'title': 'DIGGING A SECRET TUNNEL Part 1',
2188 'ext': '3gp',
2189 'upload_date': '20210624',
2190 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
2191 'uploader': 'colinfurze',
2192 'uploader_id': 'colinfurze',
2193 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
2194 'description': 'md5:5d5991195d599b56cd0c4148907eec50',
2195 'duration': 596,
2196 'categories': ['Entertainment'],
2197 'uploader_url': 'http://www.youtube.com/user/colinfurze',
2198 'view_count': int,
2199 'channel': 'colinfurze',
2200 'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],
2201 'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',
2202 'age_limit': 0,
2203 'availability': 'public',
2204 'like_count': int,
2205 'live_status': 'not_live',
2206 'playable_in_embed': True,
2207 'channel_follower_count': int,
2208 'chapters': list,
2209 },
2210 'params': {
2211 'format': '17', # 3gp format available on android
2212 'extractor_args': {'youtube': {'player_client': ['android']}},
2213 },
2214 },
2215 {
2216 # Skip download of additional client configs (remix client config in this case)
2217 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2218 'only_matching': True,
2219 'params': {
2220 'extractor_args': {'youtube': {'player_skip': ['configs']}},
2221 },
2222 }, {
2223 # shorts
2224 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
2225 'only_matching': True,
2226 }, {
2227 'note': 'Storyboards',
2228 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
2229 'info_dict': {
2230 'id': '5KLPxDtMqe8',
2231 'ext': 'mhtml',
2232 'format_id': 'sb0',
2233 'title': 'Your Brain is Plastic',
2234 'uploader_id': 'scishow',
2235 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
2236 'upload_date': '20140324',
2237 'uploader': 'SciShow',
2238 'like_count': int,
2239 'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',
2240 'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',
2241 'view_count': int,
2242 'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',
2243 'playable_in_embed': True,
2244 'tags': 'count:12',
2245 'uploader_url': 'http://www.youtube.com/user/scishow',
2246 'availability': 'public',
2247 'channel': 'SciShow',
2248 'live_status': 'not_live',
2249 'duration': 248,
2250 'categories': ['Education'],
2251 'age_limit': 0,
2252 'channel_follower_count': int,
2253 'chapters': list,
2254 }, 'params': {'format': 'mhtml', 'skip_download': True}
2255 }, {
2256 # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
2257 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2258 'info_dict': {
2259 'id': '2NUZ8W2llS4',
2260 'ext': 'mp4',
2261 'title': 'The NP that test your phone performance 🙂',
2262 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
2263 'uploader': 'Leon Nguyen',
2264 'uploader_id': 'VNSXIII',
2265 'uploader_url': 'http://www.youtube.com/user/VNSXIII',
2266 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2267 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2268 'duration': 21,
2269 'view_count': int,
2270 'age_limit': 0,
2271 'categories': ['Gaming'],
2272 'tags': 'count:23',
2273 'playable_in_embed': True,
2274 'live_status': 'not_live',
2275 'upload_date': '20220103',
2276 'like_count': int,
2277 'availability': 'public',
2278 'channel': 'Leon Nguyen',
2279 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
2280 'comment_count': int,
2281 'channel_follower_count': int
2282 }
2283 }, {
2284 # Same video as above, but with --compat-opt no-youtube-prefer-utc-upload-date
2285 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2286 'info_dict': {
2287 'id': '2NUZ8W2llS4',
2288 'ext': 'mp4',
2289 'title': 'The NP that test your phone performance 🙂',
2290 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
2291 'uploader': 'Leon Nguyen',
2292 'uploader_id': 'VNSXIII',
2293 'uploader_url': 'http://www.youtube.com/user/VNSXIII',
2294 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2295 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2296 'duration': 21,
2297 'view_count': int,
2298 'age_limit': 0,
2299 'categories': ['Gaming'],
2300 'tags': 'count:23',
2301 'playable_in_embed': True,
2302 'live_status': 'not_live',
2303 'upload_date': '20220102',
2304 'like_count': int,
2305 'availability': 'public',
2306 'channel': 'Leon Nguyen',
2307 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
2308 'comment_count': int,
2309 'channel_follower_count': int
2310 },
2311 'params': {'compat_opts': ['no-youtube-prefer-utc-upload-date']}
2312 }, {
2313 # date text is premiered video, ensure upload date in UTC (published 1641172509)
2314 'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',
2315 'info_dict': {
2316 'id': 'mzZzzBU6lrM',
2317 'ext': 'mp4',
2318 'title': 'I Met GeorgeNotFound In Real Life...',
2319 'description': 'md5:cca98a355c7184e750f711f3a1b22c84',
2320 'uploader': 'Quackity',
2321 'uploader_id': 'QuackityHQ',
2322 'uploader_url': 'http://www.youtube.com/user/QuackityHQ',
2323 'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',
2324 'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',
2325 'duration': 955,
2326 'view_count': int,
2327 'age_limit': 0,
2328 'categories': ['Entertainment'],
2329 'tags': 'count:26',
2330 'playable_in_embed': True,
2331 'live_status': 'not_live',
2332 'release_timestamp': 1641172509,
2333 'release_date': '20220103',
2334 'upload_date': '20220103',
2335 'like_count': int,
2336 'availability': 'public',
2337 'channel': 'Quackity',
2338 'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',
2339 'channel_follower_count': int
2340 }
2341 },
2342 { # continuous livestream. Microformat upload date should be preferred.
2343 # Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27
2344 'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',
2345 'info_dict': {
2346 'id': 'kgx4WGK0oNU',
2347 'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
2348 'ext': 'mp4',
2349 'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2350 'availability': 'public',
2351 'age_limit': 0,
2352 'release_timestamp': 1637975704,
2353 'upload_date': '20210619',
2354 'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2355 'live_status': 'is_live',
2356 'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',
2357 'uploader': '阿鲍Abao',
2358 'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2359 'channel': 'Abao in Tokyo',
2360 'channel_follower_count': int,
2361 'release_date': '20211127',
2362 'tags': 'count:39',
2363 'categories': ['People & Blogs'],
2364 'like_count': int,
2365 'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2366 'view_count': int,
2367 'playable_in_embed': True,
2368 'description': 'md5:2ef1d002cad520f65825346e2084e49d',
2369 'concurrent_view_count': int,
2370 },
2371 'params': {'skip_download': True}
2372 }, {
2373 # Story. Requires specific player params to work.
2374 'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',
2375 'info_dict': {
2376 'id': 'vv8qTUWmulI',
2377 'ext': 'mp4',
2378 'availability': 'unlisted',
2379 'view_count': int,
2380 'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',
2381 'upload_date': '20220526',
2382 'categories': ['Education'],
2383 'title': 'Story',
2384 'channel': 'IT\'S HISTORY',
2385 'description': '',
2386 'uploader_id': 'BlastfromthePast',
2387 'duration': 12,
2388 'uploader': 'IT\'S HISTORY',
2389 'playable_in_embed': True,
2390 'age_limit': 0,
2391 'live_status': 'not_live',
2392 'tags': [],
2393 'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',
2394 'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',
2395 'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',
2396 },
2397 'skip': 'stories get removed after some period of time',
2398 }, {
2399 'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
2400 'info_dict': {
2401 'id': 'tjjjtzRLHvA',
2402 'ext': 'mp4',
2403 'title': 'ハッシュタグ無し };if window.ytcsi',
2404 'upload_date': '20220323',
2405 'like_count': int,
2406 'availability': 'unlisted',
2407 'channel': 'nao20010128nao',
2408 'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',
2409 'age_limit': 0,
2410 'uploader': 'nao20010128nao',
2411 'uploader_id': 'nao20010128nao',
2412 'categories': ['Music'],
2413 'view_count': int,
2414 'description': '',
2415 'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',
2416 'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',
2417 'live_status': 'not_live',
2418 'playable_in_embed': True,
2419 'channel_follower_count': int,
2420 'duration': 6,
2421 'tags': [],
2422 'uploader_url': 'http://www.youtube.com/user/nao20010128nao',
2423 }
2424 }, {
2425 # Prefer primary title+description language metadata by default
2426 # Do not prefer translated description if primary is empty
2427 'url': 'https://www.youtube.com/watch?v=el3E4MbxRqQ',
2428 'info_dict': {
2429 'id': 'el3E4MbxRqQ',
2430 'ext': 'mp4',
2431 'title': 'dlp test video 2 - primary sv no desc',
2432 'description': '',
2433 'channel': 'cole-dlp-test-acc',
2434 'tags': [],
2435 'view_count': int,
2436 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2437 'like_count': int,
2438 'playable_in_embed': True,
2439 'availability': 'unlisted',
2440 'thumbnail': 'https://i.ytimg.com/vi_webp/el3E4MbxRqQ/maxresdefault.webp',
2441 'age_limit': 0,
2442 'duration': 5,
2443 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
2444 'uploader_url': 'http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2445 'live_status': 'not_live',
2446 'upload_date': '20220908',
2447 'categories': ['People & Blogs'],
2448 'uploader': 'cole-dlp-test-acc',
2449 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
2450 },
2451 'params': {'skip_download': True}
2452 }, {
2453 # Extractor argument: prefer translated title+description
2454 'url': 'https://www.youtube.com/watch?v=gHKT4uU8Zng',
2455 'info_dict': {
2456 'id': 'gHKT4uU8Zng',
2457 'ext': 'mp4',
2458 'channel': 'cole-dlp-test-acc',
2459 'tags': [],
2460 'duration': 5,
2461 'live_status': 'not_live',
2462 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
2463 'upload_date': '20220728',
2464 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
2465 'view_count': int,
2466 'categories': ['People & Blogs'],
2467 'thumbnail': 'https://i.ytimg.com/vi_webp/gHKT4uU8Zng/maxresdefault.webp',
2468 'title': 'dlp test video title translated (fr)',
2469 'availability': 'public',
2470 'uploader': 'cole-dlp-test-acc',
2471 'age_limit': 0,
2472 'description': 'dlp test video description translated (fr)',
2473 'playable_in_embed': True,
2474 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2475 'uploader_url': 'http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2476 },
2477 'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}},
2478 'expected_warnings': [r'Preferring "fr" translated fields'],
2479 }, {
2480 'note': '6 channel audio',
2481 'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',
2482 'only_matching': True,
2483 }
2484 ]
2485
2486 _WEBPAGE_TESTS = [
2487 # YouTube <object> embed
2488 {
2489 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
2490 'md5': '873c81d308b979f0e23ee7e620b312a3',
2491 'info_dict': {
2492 'id': 'msN87y-iEx0',
2493 'ext': 'mp4',
2494 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
2495 'upload_date': '20080526',
2496 'description': 'md5:873c81d308b979f0e23ee7e620b312a3',
2497 'uploader': 'Christopher Sykes',
2498 'uploader_id': 'ChristopherJSykes',
2499 'age_limit': 0,
2500 'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],
2501 'channel_id': 'UCCeo--lls1vna5YJABWAcVA',
2502 'playable_in_embed': True,
2503 'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',
2504 'like_count': int,
2505 'comment_count': int,
2506 'channel': 'Christopher Sykes',
2507 'live_status': 'not_live',
2508 'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',
2509 'availability': 'public',
2510 'duration': 195,
2511 'view_count': int,
2512 'categories': ['Science & Technology'],
2513 'channel_follower_count': int,
2514 'uploader_url': 'http://www.youtube.com/user/ChristopherJSykes',
2515 },
2516 'params': {
2517 'skip_download': True,
2518 }
2519 },
2520 ]
2521
2522 @classmethod
2523 def suitable(cls, url):
2524 from ..utils import parse_qs
2525
2526 qs = parse_qs(url)
2527 if qs.get('list', [None])[0]:
2528 return False
2529 return super().suitable(url)
2530
2531 def __init__(self, *args, **kwargs):
2532 super().__init__(*args, **kwargs)
2533 self._code_cache = {}
2534 self._player_cache = {}
2535
2536 def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data, is_live):
2537 lock = threading.Lock()
2538 start_time = time.time()
2539 formats = [f for f in formats if f.get('is_from_start')]
2540
2541 def refetch_manifest(format_id, delay):
2542 nonlocal formats, start_time, is_live
2543 if time.time() <= start_time + delay:
2544 return
2545
2546 _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
2547 video_details = traverse_obj(
2548 prs, (..., 'videoDetails'), expected_type=dict, default=[])
2549 microformats = traverse_obj(
2550 prs, (..., 'microformat', 'playerMicroformatRenderer'),
2551 expected_type=dict, default=[])
2552 _, live_status, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)
2553 is_live = live_status == 'is_live'
2554 start_time = time.time()
2555
2556 def mpd_feed(format_id, delay):
2557 """
2558 @returns (manifest_url, manifest_stream_number, is_live) or None
2559 """
2560 with lock:
2561 refetch_manifest(format_id, delay)
2562
2563 f = next((f for f in formats if f['format_id'] == format_id), None)
2564 if not f:
2565 if not is_live:
2566 self.to_screen(f'{video_id}: Video is no longer live')
2567 else:
2568 self.report_warning(
2569 f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')
2570 return None
2571 return f['manifest_url'], f['manifest_stream_number'], is_live
2572
2573 for f in formats:
2574 f['is_live'] = is_live
2575 gen = functools.partial(self._live_dash_fragments, video_id, f['format_id'],
2576 live_start_time, mpd_feed, not is_live and f.copy())
2577 if is_live:
2578 f['fragments'] = gen
2579 f['protocol'] = 'http_dash_segments_generator'
2580 else:
2581 f['fragments'] = LazyList(gen({}))
2582 del f['is_from_start']
2583
2584 def _live_dash_fragments(self, video_id, format_id, live_start_time, mpd_feed, manifestless_orig_fmt, ctx):
2585 FETCH_SPAN, MAX_DURATION = 5, 432000
2586
2587 mpd_url, stream_number, is_live = None, None, True
2588
2589 begin_index = 0
2590 download_start_time = ctx.get('start') or time.time()
2591
2592 lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
2593 if lack_early_segments:
2594 self.report_warning(bug_reports_message(
2595 'Starting download from the last 120 hours of the live stream since '
2596 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
2597 lack_early_segments = True
2598
2599 known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
2600 fragments, fragment_base_url = None, None
2601
2602 def _extract_sequence_from_mpd(refresh_sequence, immediate):
2603 nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
2604 # Obtain from MPD's maximum seq value
2605 old_mpd_url = mpd_url
2606 last_error = ctx.pop('last_error', None)
2607 expire_fast = immediate or last_error and isinstance(last_error, urllib.error.HTTPError) and last_error.code == 403
2608 mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
2609 or (mpd_url, stream_number, False))
2610 if not refresh_sequence:
2611 if expire_fast and not is_live:
2612 return False, last_seq
2613 elif old_mpd_url == mpd_url:
2614 return True, last_seq
2615 if manifestless_orig_fmt:
2616 fmt_info = manifestless_orig_fmt
2617 else:
2618 try:
2619 fmts, _ = self._extract_mpd_formats_and_subtitles(
2620 mpd_url, None, note=False, errnote=False, fatal=False)
2621 except ExtractorError:
2622 fmts = None
2623 if not fmts:
2624 no_fragment_score += 2
2625 return False, last_seq
2626 fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
2627 fragments = fmt_info['fragments']
2628 fragment_base_url = fmt_info['fragment_base_url']
2629 assert fragment_base_url
2630
2631 _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
2632 return True, _last_seq
2633
2634 self.write_debug(f'[{video_id}] Generating fragments for format {format_id}')
2635 while is_live:
2636 fetch_time = time.time()
2637 if no_fragment_score > 30:
2638 return
2639 if last_segment_url:
2640 # Obtain from "X-Head-Seqnum" header value from each segment
2641 try:
2642 urlh = self._request_webpage(
2643 last_segment_url, None, note=False, errnote=False, fatal=False)
2644 except ExtractorError:
2645 urlh = None
2646 last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
2647 if last_seq is None:
2648 no_fragment_score += 2
2649 last_segment_url = None
2650 continue
2651 else:
2652 should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)
2653 no_fragment_score += 2
2654 if not should_continue:
2655 continue
2656
2657 if known_idx > last_seq:
2658 last_segment_url = None
2659 continue
2660
2661 last_seq += 1
2662
2663 if begin_index < 0 and known_idx < 0:
2664 # skip from the start when it's negative value
2665 known_idx = last_seq + begin_index
2666 if lack_early_segments:
2667 known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
2668 try:
2669 for idx in range(known_idx, last_seq):
2670 # do not update sequence here or you'll get skipped some part of it
2671 should_continue, _ = _extract_sequence_from_mpd(False, False)
2672 if not should_continue:
2673 known_idx = idx - 1
2674 raise ExtractorError('breaking out of outer loop')
2675 last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)
2676 yield {
2677 'url': last_segment_url,
2678 'fragment_count': last_seq,
2679 }
2680 if known_idx == last_seq:
2681 no_fragment_score += 5
2682 else:
2683 no_fragment_score = 0
2684 known_idx = last_seq
2685 except ExtractorError:
2686 continue
2687
2688 if manifestless_orig_fmt:
2689 # Stop at the first iteration if running for post-live manifestless;
2690 # fragment count no longer increase since it starts
2691 break
2692
2693 time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
2694
2695 def _extract_player_url(self, *ytcfgs, webpage=None):
2696 player_url = traverse_obj(
2697 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
2698 get_all=False, expected_type=str)
2699 if not player_url:
2700 return
2701 return urljoin('https://www.youtube.com', player_url)
2702
2703 def _download_player_url(self, video_id, fatal=False):
2704 res = self._download_webpage(
2705 'https://www.youtube.com/iframe_api',
2706 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
2707 if res:
2708 player_version = self._search_regex(
2709 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
2710 if player_version:
2711 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
2712
2713 def _signature_cache_id(self, example_sig):
2714 """ Return a string representation of a signature """
2715 return '.'.join(str(len(part)) for part in example_sig.split('.'))
2716
2717 @classmethod
2718 def _extract_player_info(cls, player_url):
2719 for player_re in cls._PLAYER_INFO_RE:
2720 id_m = re.search(player_re, player_url)
2721 if id_m:
2722 break
2723 else:
2724 raise ExtractorError('Cannot identify player %r' % player_url)
2725 return id_m.group('id')
2726
2727 def _load_player(self, video_id, player_url, fatal=True):
2728 player_id = self._extract_player_info(player_url)
2729 if player_id not in self._code_cache:
2730 code = self._download_webpage(
2731 player_url, video_id, fatal=fatal,
2732 note='Downloading player ' + player_id,
2733 errnote='Download of %s failed' % player_url)
2734 if code:
2735 self._code_cache[player_id] = code
2736 return self._code_cache.get(player_id)
2737
2738 def _extract_signature_function(self, video_id, player_url, example_sig):
2739 player_id = self._extract_player_info(player_url)
2740
2741 # Read from filesystem cache
2742 func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'
2743 assert os.path.basename(func_id) == func_id
2744
2745 self.write_debug(f'Extracting signature function {func_id}')
2746 cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
2747
2748 if not cache_spec:
2749 code = self._load_player(video_id, player_url)
2750 if code:
2751 res = self._parse_sig_js(code)
2752 test_string = ''.join(map(chr, range(len(example_sig))))
2753 cache_spec = [ord(c) for c in res(test_string)]
2754 self.cache.store('youtube-sigfuncs', func_id, cache_spec)
2755
2756 return lambda s: ''.join(s[i] for i in cache_spec)
2757
2758 def _print_sig_code(self, func, example_sig):
2759 if not self.get_param('youtube_print_sig_code'):
2760 return
2761
2762 def gen_sig_code(idxs):
2763 def _genslice(start, end, step):
2764 starts = '' if start == 0 else str(start)
2765 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
2766 steps = '' if step == 1 else (':%d' % step)
2767 return f's[{starts}{ends}{steps}]'
2768
2769 step = None
2770 # Quelch pyflakes warnings - start will be set when step is set
2771 start = '(Never used)'
2772 for i, prev in zip(idxs[1:], idxs[:-1]):
2773 if step is not None:
2774 if i - prev == step:
2775 continue
2776 yield _genslice(start, prev, step)
2777 step = None
2778 continue
2779 if i - prev in [-1, 1]:
2780 step = i - prev
2781 start = prev
2782 continue
2783 else:
2784 yield 's[%d]' % prev
2785 if step is None:
2786 yield 's[%d]' % i
2787 else:
2788 yield _genslice(start, i, step)
2789
2790 test_string = ''.join(map(chr, range(len(example_sig))))
2791 cache_res = func(test_string)
2792 cache_spec = [ord(c) for c in cache_res]
2793 expr_code = ' + '.join(gen_sig_code(cache_spec))
2794 signature_id_tuple = '(%s)' % (
2795 ', '.join(str(len(p)) for p in example_sig.split('.')))
2796 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
2797 ' return %s\n') % (signature_id_tuple, expr_code)
2798 self.to_screen('Extracted signature function:\n' + code)
2799
2800 def _parse_sig_js(self, jscode):
2801 funcname = self._search_regex(
2802 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2803 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2804 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
2805 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
2806 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
2807 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
2808 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
2809 # Obsolete patterns
2810 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2811 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
2812 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2813 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2814 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2815 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2816 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2817 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
2818 jscode, 'Initial JS player signature function name', group='sig')
2819
2820 jsi = JSInterpreter(jscode)
2821 initial_function = jsi.extract_function(funcname)
2822 return lambda s: initial_function([s])
2823
2824 def _cached(self, func, *cache_id):
2825 def inner(*args, **kwargs):
2826 if cache_id not in self._player_cache:
2827 try:
2828 self._player_cache[cache_id] = func(*args, **kwargs)
2829 except ExtractorError as e:
2830 self._player_cache[cache_id] = e
2831 except Exception as e:
2832 self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)
2833
2834 ret = self._player_cache[cache_id]
2835 if isinstance(ret, Exception):
2836 raise ret
2837 return ret
2838 return inner
2839
2840 def _decrypt_signature(self, s, video_id, player_url):
2841 """Turn the encrypted s field into a working signature"""
2842 extract_sig = self._cached(
2843 self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))
2844 func = extract_sig(video_id, player_url, s)
2845 self._print_sig_code(func, s)
2846 return func(s)
2847
2848 def _decrypt_nsig(self, s, video_id, player_url):
2849 """Turn the encrypted n field into a working signature"""
2850 if player_url is None:
2851 raise ExtractorError('Cannot decrypt nsig without player_url')
2852 player_url = urljoin('https://www.youtube.com', player_url)
2853
2854 try:
2855 jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
2856 except ExtractorError as e:
2857 raise ExtractorError('Unable to extract nsig function code', cause=e)
2858 if self.get_param('youtube_print_sig_code'):
2859 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
2860
2861 try:
2862 extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)
2863 ret = extract_nsig(jsi, func_code)(s)
2864 except JSInterpreter.Exception as e:
2865 try:
2866 jsi = PhantomJSwrapper(self, timeout=5000)
2867 except ExtractorError:
2868 raise e
2869 self.report_warning(
2870 f'Native nsig extraction failed: Trying with PhantomJS\n'
2871 f' n = {s} ; player = {player_url}', video_id)
2872 self.write_debug(e, only_once=True)
2873
2874 args, func_body = func_code
2875 ret = jsi.execute(
2876 f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',
2877 video_id=video_id, note='Executing signature code').strip()
2878
2879 self.write_debug(f'Decrypted nsig {s} => {ret}')
2880 return ret
2881
2882 def _extract_n_function_name(self, jscode):
2883 funcname, idx = self._search_regex(
2884 r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)',
2885 jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
2886 if not idx:
2887 return funcname
2888
2889 return json.loads(js_to_json(self._search_regex(
2890 rf'var {re.escape(funcname)}\s*=\s*(\[.+?\]);', jscode,
2891 f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
2892
2893 def _extract_n_function_code(self, video_id, player_url):
2894 player_id = self._extract_player_info(player_url)
2895 func_code = self.cache.load('youtube-nsig', player_id, min_ver='2022.09.1')
2896 jscode = func_code or self._load_player(video_id, player_url)
2897 jsi = JSInterpreter(jscode)
2898
2899 if func_code:
2900 return jsi, player_id, func_code
2901
2902 func_name = self._extract_n_function_name(jscode)
2903
2904 # For redundancy
2905 func_code = self._search_regex(
2906 r'''(?xs)%s\s*=\s*function\s*\((?P<var>[\w$]+)\)\s*
2907 # NB: The end of the regex is intentionally kept strict
2908 {(?P<code>.+?}\s*return\ [\w$]+.join\(""\))};''' % func_name,
2909 jscode, 'nsig function', group=('var', 'code'), default=None)
2910 if func_code:
2911 func_code = ([func_code[0]], func_code[1])
2912 else:
2913 self.write_debug('Extracting nsig function with jsinterp')
2914 func_code = jsi.extract_function_code(func_name)
2915
2916 self.cache.store('youtube-nsig', player_id, func_code)
2917 return jsi, player_id, func_code
2918
2919 def _extract_n_function_from_code(self, jsi, func_code):
2920 func = jsi.extract_function_from_code(*func_code)
2921
2922 def extract_nsig(s):
2923 try:
2924 ret = func([s])
2925 except JSInterpreter.Exception:
2926 raise
2927 except Exception as e:
2928 raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
2929
2930 if ret.startswith('enhanced_except_'):
2931 raise JSInterpreter.Exception('Signature function returned an exception')
2932 return ret
2933
2934 return extract_nsig
2935
2936 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
2937 """
2938 Extract signatureTimestamp (sts)
2939 Required to tell API what sig/player version is in use.
2940 """
2941 sts = None
2942 if isinstance(ytcfg, dict):
2943 sts = int_or_none(ytcfg.get('STS'))
2944
2945 if not sts:
2946 # Attempt to extract from player
2947 if player_url is None:
2948 error_msg = 'Cannot extract signature timestamp without player_url.'
2949 if fatal:
2950 raise ExtractorError(error_msg)
2951 self.report_warning(error_msg)
2952 return
2953 code = self._load_player(video_id, player_url, fatal=fatal)
2954 if code:
2955 sts = int_or_none(self._search_regex(
2956 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2957 'JS player signature timestamp', group='sts', fatal=fatal))
2958 return sts
2959
2960 def _mark_watched(self, video_id, player_responses):
2961 for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):
2962 label = 'fully ' if is_full else ''
2963 url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),
2964 expected_type=url_or_none)
2965 if not url:
2966 self.report_warning(f'Unable to mark {label}watched')
2967 return
2968 parsed_url = urllib.parse.urlparse(url)
2969 qs = urllib.parse.parse_qs(parsed_url.query)
2970
2971 # cpn generation algorithm is reverse engineered from base.js.
2972 # In fact it works even with dummy cpn.
2973 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2974 cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))
2975
2976 # # more consistent results setting it to right before the end
2977 video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]
2978
2979 qs.update({
2980 'ver': ['2'],
2981 'cpn': [cpn],
2982 'cmt': video_length,
2983 'el': 'detailpage', # otherwise defaults to "shorts"
2984 })
2985
2986 if is_full:
2987 # these seem to mark watchtime "history" in the real world
2988 # they're required, so send in a single value
2989 qs.update({
2990 'st': 0,
2991 'et': video_length,
2992 })
2993
2994 url = urllib.parse.urlunparse(
2995 parsed_url._replace(query=urllib.parse.urlencode(qs, True)))
2996
2997 self._download_webpage(
2998 url, video_id, f'Marking {label}watched',
2999 'Unable to mark watched', fatal=False)
3000
3001 @classmethod
3002 def _extract_from_webpage(cls, url, webpage):
3003 # Invidious Instances
3004 # https://github.com/yt-dlp/yt-dlp/issues/195
3005 # https://github.com/iv-org/invidious/pull/1730
3006 mobj = re.search(
3007 r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',
3008 webpage)
3009 if mobj:
3010 yield cls.url_result(mobj.group('url'), cls)
3011 raise cls.StopExtraction()
3012
3013 yield from super()._extract_from_webpage(url, webpage)
3014
3015 # lazyYT YouTube embed
3016 for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):
3017 yield cls.url_result(unescapeHTML(id_), cls, id_)
3018
3019 # Wordpress "YouTube Video Importer" plugin
3020 for m in re.findall(r'''(?x)<div[^>]+
3021 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
3022 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):
3023 yield cls.url_result(m[-1], cls, m[-1])
3024
3025 @classmethod
3026 def extract_id(cls, url):
3027 video_id = cls.get_temp_id(url)
3028 if not video_id:
3029 raise ExtractorError(f'Invalid URL: {url}')
3030 return video_id
3031
3032 def _extract_chapters_from_json(self, data, duration):
3033 chapter_list = traverse_obj(
3034 data, (
3035 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
3036 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
3037 ), expected_type=list)
3038
3039 return self._extract_chapters(
3040 chapter_list,
3041 chapter_time=lambda chapter: float_or_none(
3042 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
3043 chapter_title=lambda chapter: traverse_obj(
3044 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
3045 duration=duration)
3046
3047 def _extract_chapters_from_engagement_panel(self, data, duration):
3048 content_list = traverse_obj(
3049 data,
3050 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
3051 expected_type=list, default=[])
3052 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
3053 chapter_title = lambda chapter: self._get_text(chapter, 'title')
3054
3055 return next(filter(None, (
3056 self._extract_chapters(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
3057 chapter_time, chapter_title, duration)
3058 for contents in content_list)), [])
3059
3060 def _extract_chapters_from_description(self, description, duration):
3061 duration_re = r'(?:\d+:)?\d{1,2}:\d{2}'
3062 sep_re = r'(?m)^\s*(%s)\b\W*\s(%s)\s*$'
3063 return self._extract_chapters(
3064 re.findall(sep_re % (duration_re, r'.+?'), description or ''),
3065 chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1],
3066 duration=duration, strict=False) or self._extract_chapters(
3067 re.findall(sep_re % (r'.+?', duration_re), description or ''),
3068 chapter_time=lambda x: parse_duration(x[1]), chapter_title=lambda x: x[0],
3069 duration=duration, strict=False)
3070
3071 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True):
3072 if not duration:
3073 return
3074 chapter_list = [{
3075 'start_time': chapter_time(chapter),
3076 'title': chapter_title(chapter),
3077 } for chapter in chapter_list or []]
3078 if not strict:
3079 chapter_list.sort(key=lambda c: c['start_time'] or 0)
3080
3081 chapters = [{'start_time': 0}]
3082 for idx, chapter in enumerate(chapter_list):
3083 if chapter['start_time'] is None:
3084 self.report_warning(f'Incomplete chapter {idx}')
3085 elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:
3086 chapters.append(chapter)
3087 elif chapter not in chapters:
3088 self.report_warning(
3089 f'Invalid start time ({chapter["start_time"]} < {chapters[-1]["start_time"]}) for chapter "{chapter["title"]}"')
3090 return chapters[1:]
3091
3092 def _extract_comment(self, comment_renderer, parent=None):
3093 comment_id = comment_renderer.get('commentId')
3094 if not comment_id:
3095 return
3096
3097 text = self._get_text(comment_renderer, 'contentText')
3098
3099 # Timestamp is an estimate calculated from the current time and time_text
3100 time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
3101 timestamp = self._parse_time_text(time_text)
3102
3103 author = self._get_text(comment_renderer, 'authorText')
3104 author_id = try_get(comment_renderer,
3105 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str)
3106
3107 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
3108 lambda x: x['likeCount']), str)) or 0
3109 author_thumbnail = try_get(comment_renderer,
3110 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], str)
3111
3112 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
3113 is_favorited = 'creatorHeart' in (try_get(
3114 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
3115 return {
3116 'id': comment_id,
3117 'text': text,
3118 'timestamp': timestamp,
3119 'time_text': time_text,
3120 'like_count': votes,
3121 'is_favorited': is_favorited,
3122 'author': author,
3123 'author_id': author_id,
3124 'author_thumbnail': author_thumbnail,
3125 'author_is_uploader': author_is_uploader,
3126 'parent': parent or 'root'
3127 }
3128
3129 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
3130
3131 get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
3132
3133 def extract_header(contents):
3134 _continuation = None
3135 for content in contents:
3136 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
3137 expected_comment_count = self._get_count(
3138 comments_header_renderer, 'countText', 'commentsCount')
3139
3140 if expected_comment_count:
3141 tracker['est_total'] = expected_comment_count
3142 self.to_screen(f'Downloading ~{expected_comment_count} comments')
3143 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
3144
3145 sort_menu_item = try_get(
3146 comments_header_renderer,
3147 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
3148 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
3149
3150 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
3151 if not _continuation:
3152 continue
3153
3154 sort_text = str_or_none(sort_menu_item.get('title'))
3155 if not sort_text:
3156 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
3157 self.to_screen('Sorting comments by %s' % sort_text.lower())
3158 break
3159 return _continuation
3160
3161 def extract_thread(contents):
3162 if not parent:
3163 tracker['current_page_thread'] = 0
3164 for content in contents:
3165 if not parent and tracker['total_parent_comments'] >= max_parents:
3166 yield
3167 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
3168 comment_renderer = get_first(
3169 (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
3170 expected_type=dict, default={})
3171
3172 comment = self._extract_comment(comment_renderer, parent)
3173 if not comment:
3174 continue
3175
3176 tracker['running_total'] += 1
3177 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
3178 yield comment
3179
3180 # Attempt to get the replies
3181 comment_replies_renderer = try_get(
3182 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
3183
3184 if comment_replies_renderer:
3185 tracker['current_page_thread'] += 1
3186 comment_entries_iter = self._comment_entries(
3187 comment_replies_renderer, ytcfg, video_id,
3188 parent=comment.get('id'), tracker=tracker)
3189 yield from itertools.islice(comment_entries_iter, min(
3190 max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))
3191
3192 # Keeps track of counts across recursive calls
3193 if not tracker:
3194 tracker = dict(
3195 running_total=0,
3196 est_total=0,
3197 current_page_thread=0,
3198 total_parent_comments=0,
3199 total_reply_comments=0)
3200
3201 # TODO: Deprecated
3202 # YouTube comments have a max depth of 2
3203 max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
3204 if max_depth:
3205 self._downloader.deprecated_feature('[youtube] max_comment_depth extractor argument is deprecated. '
3206 'Set max replies in the max-comments extractor argument instead')
3207 if max_depth == 1 and parent:
3208 return
3209
3210 max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(
3211 lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
3212
3213 continuation = self._extract_continuation(root_continuation_data)
3214
3215 response = None
3216 is_forced_continuation = False
3217 is_first_continuation = parent is None
3218 if is_first_continuation and not continuation:
3219 # Sometimes you can get comments by generating the continuation yourself,
3220 # even if YouTube initially reports them being disabled - e.g. stories comments.
3221 # Note: if the comment section is actually disabled, YouTube may return a response with
3222 # required check_get_keys missing. So we will disable that check initially in this case.
3223 continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
3224 is_forced_continuation = True
3225
3226 for page_num in itertools.count(0):
3227 if not continuation:
3228 break
3229 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
3230 comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"
3231 if page_num == 0:
3232 if is_first_continuation:
3233 note_prefix = 'Downloading comment section API JSON'
3234 else:
3235 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
3236 tracker['current_page_thread'], comment_prog_str)
3237 else:
3238 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
3239 ' ' if parent else '', ' replies' if parent else '',
3240 page_num, comment_prog_str)
3241 try:
3242 response = self._extract_response(
3243 item_id=None, query=continuation,
3244 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
3245 check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)
3246 except ExtractorError as e:
3247 # Ignore incomplete data error for replies if retries didn't work.
3248 # This is to allow any other parent comments and comment threads to be downloaded.
3249 # See: https://github.com/yt-dlp/yt-dlp/issues/4669
3250 if 'incomplete data' in str(e).lower() and parent and self.get_param('ignoreerrors') is True:
3251 self.report_warning(
3252 'Received incomplete data for a comment reply thread and retrying did not help. '
3253 'Ignoring to let other comments be downloaded.')
3254 else:
3255 raise
3256 is_forced_continuation = False
3257 continuation_contents = traverse_obj(
3258 response, 'onResponseReceivedEndpoints', expected_type=list, default=[])
3259
3260 continuation = None
3261 for continuation_section in continuation_contents:
3262 continuation_items = traverse_obj(
3263 continuation_section,
3264 (('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),
3265 get_all=False, expected_type=list) or []
3266 if is_first_continuation:
3267 continuation = extract_header(continuation_items)
3268 is_first_continuation = False
3269 if continuation:
3270 break
3271 continue
3272
3273 for entry in extract_thread(continuation_items):
3274 if not entry:
3275 return
3276 yield entry
3277 continuation = self._extract_continuation({'contents': continuation_items})
3278 if continuation:
3279 break
3280
3281 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
3282 if message and not parent and tracker['running_total'] == 0:
3283 self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)
3284 raise self.CommentsDisabled
3285
3286 @staticmethod
3287 def _generate_comment_continuation(video_id):
3288 """
3289 Generates initial comment section continuation token from given video id
3290 """
3291 token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'
3292 return base64.b64encode(token.encode()).decode()
3293
3294 def _get_comments(self, ytcfg, video_id, contents, webpage):
3295 """Entry for comment extraction"""
3296 def _real_comment_extract(contents):
3297 renderer = next((
3298 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
3299 if item.get('sectionIdentifier') == 'comment-item-section'), None)
3300 yield from self._comment_entries(renderer, ytcfg, video_id)
3301
3302 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
3303 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
3304
3305 @staticmethod
3306 def _get_checkok_params():
3307 return {'contentCheckOk': True, 'racyCheckOk': True}
3308
3309 @classmethod
3310 def _generate_player_context(cls, sts=None):
3311 context = {
3312 'html5Preference': 'HTML5_PREF_WANTS',
3313 }
3314 if sts is not None:
3315 context['signatureTimestamp'] = sts
3316 return {
3317 'playbackContext': {
3318 'contentPlaybackContext': context
3319 },
3320 **cls._get_checkok_params()
3321 }
3322
3323 @staticmethod
3324 def _is_agegated(player_response):
3325 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
3326 return True
3327
3328 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
3329 AGE_GATE_REASONS = (
3330 'confirm your age', 'age-restricted', 'inappropriate', # reason
3331 'age_verification_required', 'age_check_required', # status
3332 )
3333 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
3334
3335 @staticmethod
3336 def _is_unplayable(player_response):
3337 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
3338
3339 _STORY_PLAYER_PARAMS = '8AEB'
3340
3341 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):
3342
3343 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
3344 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
3345 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
3346 headers = self.generate_api_headers(
3347 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
3348
3349 yt_query = {
3350 'videoId': video_id,
3351 }
3352 if smuggled_data.get('is_story') or _split_innertube_client(client)[0] == 'android':
3353 yt_query['params'] = self._STORY_PLAYER_PARAMS
3354
3355 yt_query.update(self._generate_player_context(sts))
3356 return self._extract_response(
3357 item_id=video_id, ep='player', query=yt_query,
3358 ytcfg=player_ytcfg, headers=headers, fatal=True,
3359 default_client=client,
3360 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
3361 ) or None
3362
3363 def _get_requested_clients(self, url, smuggled_data):
3364 requested_clients = []
3365 default = ['android', 'web']
3366 allowed_clients = sorted(
3367 (client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),
3368 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
3369 for client in self._configuration_arg('player_client'):
3370 if client in allowed_clients:
3371 requested_clients.append(client)
3372 elif client == 'default':
3373 requested_clients.extend(default)
3374 elif client == 'all':
3375 requested_clients.extend(allowed_clients)
3376 else:
3377 self.report_warning(f'Skipping unsupported client {client}')
3378 if not requested_clients:
3379 requested_clients = default
3380
3381 if smuggled_data.get('is_music_url') or self.is_music_url(url):
3382 requested_clients.extend(
3383 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
3384
3385 return orderedSet(requested_clients)
3386
3387 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
3388 initial_pr = None
3389 if webpage:
3390 initial_pr = self._search_json(
3391 self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
3392
3393 all_clients = set(clients)
3394 clients = clients[::-1]
3395 prs = []
3396
3397 def append_client(*client_names):
3398 """ Append the first client name that exists but not already used """
3399 for client_name in client_names:
3400 actual_client = _split_innertube_client(client_name)[0]
3401 if actual_client in INNERTUBE_CLIENTS:
3402 if actual_client not in all_clients:
3403 clients.append(client_name)
3404 all_clients.add(actual_client)
3405 return
3406
3407 # Android player_response does not have microFormats which are needed for
3408 # extraction of some data. So we return the initial_pr with formats
3409 # stripped out even if not requested by the user
3410 # See: https://github.com/yt-dlp/yt-dlp/issues/501
3411 if initial_pr:
3412 pr = dict(initial_pr)
3413 pr['streamingData'] = None
3414 prs.append(pr)
3415
3416 last_error = None
3417 tried_iframe_fallback = False
3418 player_url = None
3419 while clients:
3420 client, base_client, variant = _split_innertube_client(clients.pop())
3421 player_ytcfg = master_ytcfg if client == 'web' else {}
3422 if 'configs' not in self._configuration_arg('player_skip') and client != 'web':
3423 player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
3424
3425 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
3426 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
3427 if 'js' in self._configuration_arg('player_skip'):
3428 require_js_player = False
3429 player_url = None
3430
3431 if not player_url and not tried_iframe_fallback and require_js_player:
3432 player_url = self._download_player_url(video_id)
3433 tried_iframe_fallback = True
3434
3435 try:
3436 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
3437 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)
3438 except ExtractorError as e:
3439 if last_error:
3440 self.report_warning(last_error)
3441 last_error = e
3442 continue
3443
3444 if pr:
3445 # YouTube may return a different video player response than expected.
3446 # See: https://github.com/TeamNewPipe/NewPipe/issues/8713
3447 pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId'))
3448 if pr_video_id and pr_video_id != video_id:
3449 self.report_warning(
3450 f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message())
3451 else:
3452 prs.append(pr)
3453
3454 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
3455 if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
3456 append_client(f'{base_client}_creator')
3457 elif self._is_agegated(pr):
3458 if variant == 'tv_embedded':
3459 append_client(f'{base_client}_embedded')
3460 elif not variant:
3461 append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
3462
3463 if last_error:
3464 if not len(prs):
3465 raise last_error
3466 self.report_warning(last_error)
3467 return prs, player_url
3468
3469 def _needs_live_processing(self, live_status, duration):
3470 if (live_status == 'is_live' and self.get_param('live_from_start')
3471 or live_status == 'post_live' and (duration or 0) > 4 * 3600):
3472 return live_status
3473
3474 def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):
3475 itags, stream_ids = {}, []
3476 itag_qualities, res_qualities = {}, {0: None}
3477 q = qualities([
3478 # Normally tiny is the smallest video-only formats. But
3479 # audio-only formats with unknown quality may get tagged as tiny
3480 'tiny',
3481 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
3482 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
3483 ])
3484 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
3485
3486 for fmt in streaming_formats:
3487 if fmt.get('targetDurationSec'):
3488 continue
3489
3490 itag = str_or_none(fmt.get('itag'))
3491 audio_track = fmt.get('audioTrack') or {}
3492 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
3493 if stream_id in stream_ids:
3494 continue
3495
3496 quality = fmt.get('quality')
3497 height = int_or_none(fmt.get('height'))
3498 if quality == 'tiny' or not quality:
3499 quality = fmt.get('audioQuality', '').lower() or quality
3500 # The 3gp format (17) in android client has a quality of "small",
3501 # but is actually worse than other formats
3502 if itag == '17':
3503 quality = 'tiny'
3504 if quality:
3505 if itag:
3506 itag_qualities[itag] = quality
3507 if height:
3508 res_qualities[height] = quality
3509 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
3510 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
3511 # number of fragment that would subsequently requested with (`&sq=N`)
3512 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
3513 continue
3514
3515 fmt_url = fmt.get('url')
3516 if not fmt_url:
3517 sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))
3518 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
3519 encrypted_sig = try_get(sc, lambda x: x['s'][0])
3520 if not all((sc, fmt_url, player_url, encrypted_sig)):
3521 continue
3522 try:
3523 fmt_url += '&%s=%s' % (
3524 traverse_obj(sc, ('sp', -1)) or 'signature',
3525 self._decrypt_signature(encrypted_sig, video_id, player_url)
3526 )
3527 except ExtractorError as e:
3528 self.report_warning('Signature extraction failed: Some formats may be missing',
3529 video_id=video_id, only_once=True)
3530 self.write_debug(e, only_once=True)
3531 continue
3532
3533 query = parse_qs(fmt_url)
3534 throttled = False
3535 if query.get('n'):
3536 try:
3537 decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])
3538 fmt_url = update_url_query(fmt_url, {
3539 'n': decrypt_nsig(query['n'][0], video_id, player_url)
3540 })
3541 except ExtractorError as e:
3542 phantomjs_hint = ''
3543 if isinstance(e, JSInterpreter.Exception):
3544 phantomjs_hint = (f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} '
3545 f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')
3546 if player_url:
3547 self.report_warning(
3548 f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}'
3549 f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)
3550 self.write_debug(e, only_once=True)
3551 else:
3552 self.report_warning(
3553 'Cannot decrypt nsig without player_url: You may experience throttling for some formats',
3554 video_id=video_id, only_once=True)
3555 throttled = True
3556
3557 if itag:
3558 itags[itag] = 'https'
3559 stream_ids.append(stream_id)
3560
3561 tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
3562 language_preference = (
3563 10 if audio_track.get('audioIsDefault') and 10
3564 else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
3565 else -1)
3566 # Some formats may have much smaller duration than others (possibly damaged during encoding)
3567 # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
3568 # Make sure to avoid false positives with small duration differences.
3569 # E.g. __2ABJjxzNo, ySuUZEjARPY
3570 is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)
3571 if is_damaged:
3572 self.report_warning(
3573 f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
3574 dct = {
3575 'asr': int_or_none(fmt.get('audioSampleRate')),
3576 'filesize': int_or_none(fmt.get('contentLength')),
3577 'format_id': itag,
3578 'format_note': join_nonempty(
3579 '%s%s' % (audio_track.get('displayName') or '',
3580 ' (default)' if language_preference > 0 else ''),
3581 fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
3582 try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
3583 try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
3584 throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),
3585 # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
3586 'source_preference': -10 if throttled else -5 if itag == '22' else -1,
3587 'fps': int_or_none(fmt.get('fps')) or None,
3588 'audio_channels': fmt.get('audioChannels'),
3589 'height': height,
3590 'quality': q(quality),
3591 'has_drm': bool(fmt.get('drmFamilies')),
3592 'tbr': tbr,
3593 'url': fmt_url,
3594 'width': int_or_none(fmt.get('width')),
3595 'language': join_nonempty(audio_track.get('id', '').split('.')[0],
3596 'desc' if language_preference < -1 else ''),
3597 'language_preference': language_preference,
3598 # Strictly de-prioritize damaged and 3gp formats
3599 'preference': -10 if is_damaged else -2 if itag == '17' else None,
3600 }
3601 mime_mobj = re.match(
3602 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
3603 if mime_mobj:
3604 dct['ext'] = mimetype2ext(mime_mobj.group(1))
3605 dct.update(parse_codecs(mime_mobj.group(2)))
3606 no_audio = dct.get('acodec') == 'none'
3607 no_video = dct.get('vcodec') == 'none'
3608 if no_audio:
3609 dct['vbr'] = tbr
3610 if no_video:
3611 dct['abr'] = tbr
3612 if no_audio or no_video:
3613 dct['downloader_options'] = {
3614 # Youtube throttles chunks >~10M
3615 'http_chunk_size': 10485760,
3616 }
3617 if dct.get('ext'):
3618 dct['container'] = dct['ext'] + '_dash'
3619 yield dct
3620
3621 needs_live_processing = self._needs_live_processing(live_status, duration)
3622 skip_bad_formats = not self._configuration_arg('include_incomplete_formats')
3623
3624 skip_manifests = set(self._configuration_arg('skip'))
3625 if (not self.get_param('youtube_include_hls_manifest', True)
3626 or needs_live_processing == 'is_live' # These will be filtered out by YoutubeDL anyway
3627 or needs_live_processing and skip_bad_formats):
3628 skip_manifests.add('hls')
3629
3630 if not self.get_param('youtube_include_dash_manifest', True):
3631 skip_manifests.add('dash')
3632 if self._configuration_arg('include_live_dash'):
3633 self._downloader.deprecated_feature('[youtube] include_live_dash extractor argument is deprecated. '
3634 'Use include_incomplete_formats extractor argument instead')
3635 elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':
3636 skip_manifests.add('dash')
3637
3638 def process_manifest_format(f, proto, itag):
3639 if itag in itags:
3640 if itags[itag] == proto or f'{itag}-{proto}' in itags:
3641 return False
3642 itag = f'{itag}-{proto}'
3643 if itag:
3644 f['format_id'] = itag
3645 itags[itag] = proto
3646
3647 f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
3648 if f['quality'] == -1 and f.get('height'):
3649 f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
3650 return True
3651
3652 subtitles = {}
3653 for sd in streaming_data:
3654 hls_manifest_url = 'hls' not in skip_manifests and sd.get('hlsManifestUrl')
3655 if hls_manifest_url:
3656 fmts, subs = self._extract_m3u8_formats_and_subtitles(
3657 hls_manifest_url, video_id, 'mp4', fatal=False, live=live_status == 'is_live')
3658 subtitles = self._merge_subtitles(subs, subtitles)
3659 for f in fmts:
3660 if process_manifest_format(f, 'hls', self._search_regex(
3661 r'/itag/(\d+)', f['url'], 'itag', default=None)):
3662 yield f
3663
3664 dash_manifest_url = 'dash' not in skip_manifests and sd.get('dashManifestUrl')
3665 if dash_manifest_url:
3666 formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
3667 subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
3668 for f in formats:
3669 if process_manifest_format(f, 'dash', f['format_id']):
3670 f['filesize'] = int_or_none(self._search_regex(
3671 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
3672 if needs_live_processing:
3673 f['is_from_start'] = True
3674
3675 yield f
3676 yield subtitles
3677
3678 def _extract_storyboard(self, player_responses, duration):
3679 spec = get_first(
3680 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
3681 base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))
3682 if not base_url:
3683 return
3684 L = len(spec) - 1
3685 for i, args in enumerate(spec):
3686 args = args.split('#')
3687 counts = list(map(int_or_none, args[:5]))
3688 if len(args) != 8 or not all(counts):
3689 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
3690 continue
3691 width, height, frame_count, cols, rows = counts
3692 N, sigh = args[6:]
3693
3694 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
3695 fragment_count = frame_count / (cols * rows)
3696 fragment_duration = duration / fragment_count
3697 yield {
3698 'format_id': f'sb{i}',
3699 'format_note': 'storyboard',
3700 'ext': 'mhtml',
3701 'protocol': 'mhtml',
3702 'acodec': 'none',
3703 'vcodec': 'none',
3704 'url': url,
3705 'width': width,
3706 'height': height,
3707 'fps': frame_count / duration,
3708 'rows': rows,
3709 'columns': cols,
3710 'fragments': [{
3711 'url': url.replace('$M', str(j)),
3712 'duration': min(fragment_duration, duration - (j * fragment_duration)),
3713 } for j in range(math.ceil(fragment_count))],
3714 }
3715
3716 def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
3717 webpage = None
3718 if 'webpage' not in self._configuration_arg('player_skip'):
3719 query = {'bpctr': '9999999999', 'has_verified': '1'}
3720 if smuggled_data.get('is_story'):
3721 query['pp'] = self._STORY_PLAYER_PARAMS
3722 webpage = self._download_webpage(
3723 webpage_url, video_id, fatal=False, query=query)
3724
3725 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
3726
3727 player_responses, player_url = self._extract_player_responses(
3728 self._get_requested_clients(url, smuggled_data),
3729 video_id, webpage, master_ytcfg, smuggled_data)
3730
3731 return webpage, master_ytcfg, player_responses, player_url
3732
3733 def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):
3734 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
3735 is_live = get_first(video_details, 'isLive')
3736 if is_live is None:
3737 is_live = get_first(live_broadcast_details, 'isLiveNow')
3738 live_content = get_first(video_details, 'isLiveContent')
3739 is_upcoming = get_first(video_details, 'isUpcoming')
3740 post_live = get_first(video_details, 'isPostLiveDvr')
3741 live_status = ('post_live' if post_live
3742 else 'is_live' if is_live
3743 else 'is_upcoming' if is_upcoming
3744 else 'was_live' if live_content
3745 else 'not_live' if False in (is_live, live_content)
3746 else None)
3747 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
3748 *formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, live_status, duration)
3749
3750 return live_broadcast_details, live_status, streaming_data, formats, subtitles
3751
3752 def _real_extract(self, url):
3753 url, smuggled_data = unsmuggle_url(url, {})
3754 video_id = self._match_id(url)
3755
3756 base_url = self.http_scheme() + '//www.youtube.com/'
3757 webpage_url = base_url + 'watch?v=' + video_id
3758
3759 webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
3760
3761 playability_statuses = traverse_obj(
3762 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
3763
3764 trailer_video_id = get_first(
3765 playability_statuses,
3766 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
3767 expected_type=str)
3768 if trailer_video_id:
3769 return self.url_result(
3770 trailer_video_id, self.ie_key(), trailer_video_id)
3771
3772 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
3773 if webpage else (lambda x: None))
3774
3775 video_details = traverse_obj(
3776 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
3777 microformats = traverse_obj(
3778 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
3779 expected_type=dict, default=[])
3780
3781 translated_title = self._get_text(microformats, (..., 'title'))
3782 video_title = (self._preferred_lang and translated_title
3783 or get_first(video_details, 'title') # primary
3784 or translated_title
3785 or search_meta(['og:title', 'twitter:title', 'title']))
3786 translated_description = self._get_text(microformats, (..., 'description'))
3787 original_description = get_first(video_details, 'shortDescription')
3788 video_description = (
3789 self._preferred_lang and translated_description
3790 # If original description is blank, it will be an empty string.
3791 # Do not prefer translated description in this case.
3792 or original_description if original_description is not None else translated_description)
3793
3794 multifeed_metadata_list = get_first(
3795 player_responses,
3796 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
3797 expected_type=str)
3798 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
3799 if self.get_param('noplaylist'):
3800 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3801 else:
3802 entries = []
3803 feed_ids = []
3804 for feed in multifeed_metadata_list.split(','):
3805 # Unquote should take place before split on comma (,) since textual
3806 # fields may contain comma as well (see
3807 # https://github.com/ytdl-org/youtube-dl/issues/8536)
3808 feed_data = urllib.parse.parse_qs(
3809 urllib.parse.unquote_plus(feed))
3810
3811 def feed_entry(name):
3812 return try_get(
3813 feed_data, lambda x: x[name][0], str)
3814
3815 feed_id = feed_entry('id')
3816 if not feed_id:
3817 continue
3818 feed_title = feed_entry('title')
3819 title = video_title
3820 if feed_title:
3821 title += ' (%s)' % feed_title
3822 entries.append({
3823 '_type': 'url_transparent',
3824 'ie_key': 'Youtube',
3825 'url': smuggle_url(
3826 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
3827 {'force_singlefeed': True}),
3828 'title': title,
3829 })
3830 feed_ids.append(feed_id)
3831 self.to_screen(
3832 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
3833 % (', '.join(feed_ids), video_id))
3834 return self.playlist_result(
3835 entries, video_id, video_title, video_description)
3836
3837 duration = (int_or_none(get_first(video_details, 'lengthSeconds'))
3838 or int_or_none(get_first(microformats, 'lengthSeconds'))
3839 or parse_duration(search_meta('duration')) or None)
3840
3841 live_broadcast_details, live_status, streaming_data, formats, automatic_captions = \
3842 self._list_formats(video_id, microformats, video_details, player_responses, player_url, duration)
3843 if live_status == 'post_live':
3844 self.write_debug(f'{video_id}: Video is in Post-Live Manifestless mode')
3845
3846 if not formats:
3847 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
3848 self.report_drm(video_id)
3849 pemr = get_first(
3850 playability_statuses,
3851 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
3852 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
3853 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
3854 if subreason:
3855 if subreason == 'The uploader has not made this video available in your country.':
3856 countries = get_first(microformats, 'availableCountries')
3857 if not countries:
3858 regions_allowed = search_meta('regionsAllowed')
3859 countries = regions_allowed.split(',') if regions_allowed else None
3860 self.raise_geo_restricted(subreason, countries, metadata_available=True)
3861 reason += f'. {subreason}'
3862 if reason:
3863 self.raise_no_formats(reason, expected=True)
3864
3865 keywords = get_first(video_details, 'keywords', expected_type=list) or []
3866 if not keywords and webpage:
3867 keywords = [
3868 unescapeHTML(m.group('content'))
3869 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
3870 for keyword in keywords:
3871 if keyword.startswith('yt:stretch='):
3872 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
3873 if mobj:
3874 # NB: float is intentional for forcing float division
3875 w, h = (float(v) for v in mobj.groups())
3876 if w > 0 and h > 0:
3877 ratio = w / h
3878 for f in formats:
3879 if f.get('vcodec') != 'none':
3880 f['stretched_ratio'] = ratio
3881 break
3882 thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))
3883 thumbnail_url = search_meta(['og:image', 'twitter:image'])
3884 if thumbnail_url:
3885 thumbnails.append({
3886 'url': thumbnail_url,
3887 })
3888 original_thumbnails = thumbnails.copy()
3889
3890 # The best resolution thumbnails sometimes does not appear in the webpage
3891 # See: https://github.com/yt-dlp/yt-dlp/issues/340
3892 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
3893 thumbnail_names = [
3894 # While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants
3895 # in resolution, these are not the custom thumbnail. So de-prioritize them
3896 'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',
3897 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'
3898 ]
3899 n_thumbnail_names = len(thumbnail_names)
3900 thumbnails.extend({
3901 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
3902 video_id=video_id, name=name, ext=ext,
3903 webp='_webp' if ext == 'webp' else '', live='_live' if live_status == 'is_live' else ''),
3904 } for name in thumbnail_names for ext in ('webp', 'jpg'))
3905 for thumb in thumbnails:
3906 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
3907 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
3908 self._remove_duplicate_formats(thumbnails)
3909 self._downloader._sort_thumbnails(original_thumbnails)
3910
3911 category = get_first(microformats, 'category') or search_meta('genre')
3912 channel_id = str_or_none(
3913 get_first(video_details, 'channelId')
3914 or get_first(microformats, 'externalChannelId')
3915 or search_meta('channelId'))
3916 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
3917
3918 live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
3919 live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
3920 if not duration and live_end_time and live_start_time:
3921 duration = live_end_time - live_start_time
3922
3923 needs_live_processing = self._needs_live_processing(live_status, duration)
3924
3925 def is_bad_format(fmt):
3926 if needs_live_processing and not fmt.get('is_from_start'):
3927 return True
3928 elif (live_status == 'is_live' and needs_live_processing != 'is_live'
3929 and fmt.get('protocol') == 'http_dash_segments'):
3930 return True
3931
3932 for fmt in filter(is_bad_format, formats):
3933 fmt['preference'] = (fmt.get('preference') or -1) - 10
3934 fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 4 hours)', delim=' ')
3935
3936 if needs_live_processing:
3937 self._prepare_live_from_start_formats(
3938 formats, video_id, live_start_time, url, webpage_url, smuggled_data, live_status == 'is_live')
3939
3940 formats.extend(self._extract_storyboard(player_responses, duration))
3941
3942 # source_preference is lower for throttled/potentially damaged formats
3943 self._sort_formats(formats, (
3944 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto'))
3945
3946 info = {
3947 'id': video_id,
3948 'title': video_title,
3949 'formats': formats,
3950 'thumbnails': thumbnails,
3951 # The best thumbnail that we are sure exists. Prevents unnecessary
3952 # URL checking if user don't care about getting the best possible thumbnail
3953 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
3954 'description': video_description,
3955 'uploader': get_first(video_details, 'author'),
3956 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
3957 'uploader_url': owner_profile_url,
3958 'channel_id': channel_id,
3959 'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s'),
3960 'duration': duration,
3961 'view_count': int_or_none(
3962 get_first((video_details, microformats), (..., 'viewCount'))
3963 or search_meta('interactionCount')),
3964 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
3965 'age_limit': 18 if (
3966 get_first(microformats, 'isFamilySafe') is False
3967 or search_meta('isFamilyFriendly') == 'false'
3968 or search_meta('og:restrictions:age') == '18+') else 0,
3969 'webpage_url': webpage_url,
3970 'categories': [category] if category else None,
3971 'tags': keywords,
3972 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
3973 'live_status': live_status,
3974 'release_timestamp': live_start_time,
3975 }
3976
3977 subtitles = {}
3978 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
3979 if pctr:
3980 def get_lang_code(track):
3981 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
3982 or track.get('languageCode'))
3983
3984 # Converted into dicts to remove duplicates
3985 captions = {
3986 get_lang_code(sub): sub
3987 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
3988 translation_languages = {
3989 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
3990 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
3991
3992 def process_language(container, base_url, lang_code, sub_name, query):
3993 lang_subs = container.setdefault(lang_code, [])
3994 for fmt in self._SUBTITLE_FORMATS:
3995 query.update({
3996 'fmt': fmt,
3997 })
3998 lang_subs.append({
3999 'ext': fmt,
4000 'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),
4001 'name': sub_name,
4002 })
4003
4004 # NB: Constructing the full subtitle dictionary is slow
4005 get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (
4006 self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))
4007 for lang_code, caption_track in captions.items():
4008 base_url = caption_track.get('baseUrl')
4009 orig_lang = parse_qs(base_url).get('lang', [None])[-1]
4010 if not base_url:
4011 continue
4012 lang_name = self._get_text(caption_track, 'name', max_runs=1)
4013 if caption_track.get('kind') != 'asr':
4014 if not lang_code:
4015 continue
4016 process_language(
4017 subtitles, base_url, lang_code, lang_name, {})
4018 if not caption_track.get('isTranslatable'):
4019 continue
4020 for trans_code, trans_name in translation_languages.items():
4021 if not trans_code:
4022 continue
4023 orig_trans_code = trans_code
4024 if caption_track.get('kind') != 'asr':
4025 if not get_translated_subs:
4026 continue
4027 trans_code += f'-{lang_code}'
4028 trans_name += format_field(lang_name, None, ' from %s')
4029 # Add an "-orig" label to the original language so that it can be distinguished.
4030 # The subs are returned without "-orig" as well for compatibility
4031 if lang_code == f'a-{orig_trans_code}':
4032 process_language(
4033 automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
4034 # Setting tlang=lang returns damaged subtitles.
4035 process_language(automatic_captions, base_url, trans_code, trans_name,
4036 {} if orig_lang == orig_trans_code else {'tlang': trans_code})
4037
4038 info['automatic_captions'] = automatic_captions
4039 info['subtitles'] = subtitles
4040
4041 parsed_url = urllib.parse.urlparse(url)
4042 for component in [parsed_url.fragment, parsed_url.query]:
4043 query = urllib.parse.parse_qs(component)
4044 for k, v in query.items():
4045 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
4046 d_k += '_time'
4047 if d_k not in info and k in s_ks:
4048 info[d_k] = parse_duration(query[k][0])
4049
4050 # Youtube Music Auto-generated description
4051 if video_description:
4052 mobj = re.search(
4053 r'''(?xs)
4054 (?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+
4055 (?P<album>[^\n]+)
4056 (?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
4057 (?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
4058 (.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?
4059 .+\nAuto-generated\ by\ YouTube\.\s*$
4060 ''', video_description)
4061 if mobj:
4062 release_year = mobj.group('release_year')
4063 release_date = mobj.group('release_date')
4064 if release_date:
4065 release_date = release_date.replace('-', '')
4066 if not release_year:
4067 release_year = release_date[:4]
4068 info.update({
4069 'album': mobj.group('album'.strip()),
4070 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
4071 'track': mobj.group('track').strip(),
4072 'release_date': release_date,
4073 'release_year': int_or_none(release_year),
4074 })
4075
4076 initial_data = None
4077 if webpage:
4078 initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)
4079 if not initial_data:
4080 query = {'videoId': video_id}
4081 query.update(self._get_checkok_params())
4082 initial_data = self._extract_response(
4083 item_id=video_id, ep='next', fatal=False,
4084 ytcfg=master_ytcfg, query=query,
4085 headers=self.generate_api_headers(ytcfg=master_ytcfg),
4086 note='Downloading initial data API JSON')
4087
4088 info['comment_count'] = traverse_obj(initial_data, (
4089 'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',
4090 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount', 'simpleText'
4091 ), (
4092 'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',
4093 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo', 'runs', ..., 'text'
4094 ), expected_type=int_or_none, get_all=False)
4095
4096 try: # This will error if there is no livechat
4097 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
4098 except (KeyError, IndexError, TypeError):
4099 pass
4100 else:
4101 info.setdefault('subtitles', {})['live_chat'] = [{
4102 # url is needed to set cookies
4103 'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',
4104 'video_id': video_id,
4105 'ext': 'json',
4106 'protocol': ('youtube_live_chat' if live_status in ('is_live', 'is_upcoming')
4107 else 'youtube_live_chat_replay'),
4108 }]
4109
4110 if initial_data:
4111 info['chapters'] = (
4112 self._extract_chapters_from_json(initial_data, duration)
4113 or self._extract_chapters_from_engagement_panel(initial_data, duration)
4114 or self._extract_chapters_from_description(video_description, duration)
4115 or None)
4116
4117 contents = traverse_obj(
4118 initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
4119 expected_type=list, default=[])
4120
4121 vpir = get_first(contents, 'videoPrimaryInfoRenderer')
4122 if vpir:
4123 stl = vpir.get('superTitleLink')
4124 if stl:
4125 stl = self._get_text(stl)
4126 if try_get(
4127 vpir,
4128 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
4129 info['location'] = stl
4130 else:
4131 mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)
4132 if mobj:
4133 info.update({
4134 'series': mobj.group(1),
4135 'season_number': int(mobj.group(2)),
4136 'episode_number': int(mobj.group(3)),
4137 })
4138 for tlb in (try_get(
4139 vpir,
4140 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
4141 list) or []):
4142 tbrs = variadic(
4143 traverse_obj(
4144 tlb, 'toggleButtonRenderer',
4145 ('segmentedLikeDislikeButtonRenderer', ..., 'toggleButtonRenderer'),
4146 default=[]))
4147 for tbr in tbrs:
4148 for getter, regex in [(
4149 lambda x: x['defaultText']['accessibility']['accessibilityData'],
4150 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
4151 lambda x: x['accessibility'],
4152 lambda x: x['accessibilityData']['accessibilityData'],
4153 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
4154 label = (try_get(tbr, getter, dict) or {}).get('label')
4155 if label:
4156 mobj = re.match(regex, label)
4157 if mobj:
4158 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
4159 break
4160 sbr_tooltip = try_get(
4161 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
4162 if sbr_tooltip:
4163 like_count, dislike_count = sbr_tooltip.split(' / ')
4164 info.update({
4165 'like_count': str_to_int(like_count),
4166 'dislike_count': str_to_int(dislike_count),
4167 })
4168 vcr = traverse_obj(vpir, ('viewCount', 'videoViewCountRenderer'))
4169 if vcr:
4170 vc = self._get_count(vcr, 'viewCount')
4171 # Upcoming premieres with waiting count are treated as live here
4172 if vcr.get('isLive'):
4173 info['concurrent_view_count'] = vc
4174 elif info.get('view_count') is None:
4175 info['view_count'] = vc
4176
4177 vsir = get_first(contents, 'videoSecondaryInfoRenderer')
4178 if vsir:
4179 vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))
4180 info.update({
4181 'channel': self._get_text(vor, 'title'),
4182 'channel_follower_count': self._get_count(vor, 'subscriberCountText')})
4183
4184 rows = try_get(
4185 vsir,
4186 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
4187 list) or []
4188 multiple_songs = False
4189 for row in rows:
4190 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
4191 multiple_songs = True
4192 break
4193 for row in rows:
4194 mrr = row.get('metadataRowRenderer') or {}
4195 mrr_title = mrr.get('title')
4196 if not mrr_title:
4197 continue
4198 mrr_title = self._get_text(mrr, 'title')
4199 mrr_contents_text = self._get_text(mrr, ('contents', 0))
4200 if mrr_title == 'License':
4201 info['license'] = mrr_contents_text
4202 elif not multiple_songs:
4203 if mrr_title == 'Album':
4204 info['album'] = mrr_contents_text
4205 elif mrr_title == 'Artist':
4206 info['artist'] = mrr_contents_text
4207 elif mrr_title == 'Song':
4208 info['track'] = mrr_contents_text
4209
4210 fallbacks = {
4211 'channel': 'uploader',
4212 'channel_id': 'uploader_id',
4213 'channel_url': 'uploader_url',
4214 }
4215
4216 # The upload date for scheduled, live and past live streams / premieres in microformats
4217 # may be different from the stream date. Although not in UTC, we will prefer it in this case.
4218 # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139
4219 upload_date = (
4220 unified_strdate(get_first(microformats, 'uploadDate'))
4221 or unified_strdate(search_meta('uploadDate')))
4222 if not upload_date or (
4223 live_status in ('not_live', None)
4224 and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', [])
4225 ):
4226 upload_date = strftime_or_none(
4227 self._parse_time_text(self._get_text(vpir, 'dateText')), '%Y%m%d') or upload_date
4228 info['upload_date'] = upload_date
4229
4230 for to, frm in fallbacks.items():
4231 if not info.get(to):
4232 info[to] = info.get(frm)
4233
4234 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
4235 v = info.get(s_k)
4236 if v:
4237 info[d_k] = v
4238
4239 badges = self._extract_badges(traverse_obj(contents, (..., 'videoPrimaryInfoRenderer'), get_all=False))
4240
4241 is_private = (self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
4242 or get_first(video_details, 'isPrivate', expected_type=bool))
4243
4244 info['availability'] = (
4245 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
4246 else self._availability(
4247 is_private=is_private,
4248 needs_premium=(
4249 self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM)
4250 or False if initial_data and is_private is not None else None),
4251 needs_subscription=(
4252 self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION)
4253 or False if initial_data and is_private is not None else None),
4254 needs_auth=info['age_limit'] >= 18,
4255 is_unlisted=None if is_private is None else (
4256 self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
4257 or get_first(microformats, 'isUnlisted', expected_type=bool))))
4258
4259 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
4260
4261 self.mark_watched(video_id, player_responses)
4262
4263 return info
4264
4265
4266class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
4267 @staticmethod
4268 def passthrough_smuggled_data(func):
4269 def _smuggle(info, smuggled_data):
4270 if info.get('_type') not in ('url', 'url_transparent'):
4271 return info
4272 if smuggled_data.get('is_music_url'):
4273 parsed_url = urllib.parse.urlparse(info['url'])
4274 if parsed_url.netloc in ('www.youtube.com', 'music.youtube.com'):
4275 smuggled_data.pop('is_music_url')
4276 info['url'] = urllib.parse.urlunparse(parsed_url._replace(netloc='music.youtube.com'))
4277 if smuggled_data:
4278 info['url'] = smuggle_url(info['url'], smuggled_data)
4279 return info
4280
4281 @functools.wraps(func)
4282 def wrapper(self, url):
4283 url, smuggled_data = unsmuggle_url(url, {})
4284 if self.is_music_url(url):
4285 smuggled_data['is_music_url'] = True
4286 info_dict = func(self, url, smuggled_data)
4287 if smuggled_data:
4288 _smuggle(info_dict, smuggled_data)
4289 if info_dict.get('entries'):
4290 info_dict['entries'] = (_smuggle(i, smuggled_data.copy()) for i in info_dict['entries'])
4291 return info_dict
4292 return wrapper
4293
4294 def _extract_channel_id(self, webpage):
4295 channel_id = self._html_search_meta(
4296 'channelId', webpage, 'channel id', default=None)
4297 if channel_id:
4298 return channel_id
4299 channel_url = self._html_search_meta(
4300 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
4301 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
4302 'twitter:app:url:googleplay'), webpage, 'channel url')
4303 return self._search_regex(
4304 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
4305 channel_url, 'channel id')
4306
4307 @staticmethod
4308 def _extract_basic_item_renderer(item):
4309 # Modified from _extract_grid_item_renderer
4310 known_basic_renderers = (
4311 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'
4312 )
4313 for key, renderer in item.items():
4314 if not isinstance(renderer, dict):
4315 continue
4316 elif key in known_basic_renderers:
4317 return renderer
4318 elif key.startswith('grid') and key.endswith('Renderer'):
4319 return renderer
4320
4321 def _grid_entries(self, grid_renderer):
4322 for item in grid_renderer['items']:
4323 if not isinstance(item, dict):
4324 continue
4325 renderer = self._extract_basic_item_renderer(item)
4326 if not isinstance(renderer, dict):
4327 continue
4328 title = self._get_text(renderer, 'title')
4329
4330 # playlist
4331 playlist_id = renderer.get('playlistId')
4332 if playlist_id:
4333 yield self.url_result(
4334 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4335 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4336 video_title=title)
4337 continue
4338 # video
4339 video_id = renderer.get('videoId')
4340 if video_id:
4341 yield self._extract_video(renderer)
4342 continue
4343 # channel
4344 channel_id = renderer.get('channelId')
4345 if channel_id:
4346 yield self.url_result(
4347 'https://www.youtube.com/channel/%s' % channel_id,
4348 ie=YoutubeTabIE.ie_key(), video_title=title)
4349 continue
4350 # generic endpoint URL support
4351 ep_url = urljoin('https://www.youtube.com/', try_get(
4352 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
4353 str))
4354 if ep_url:
4355 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
4356 if ie.suitable(ep_url):
4357 yield self.url_result(
4358 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
4359 break
4360
4361 def _music_reponsive_list_entry(self, renderer):
4362 video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))
4363 if video_id:
4364 return self.url_result(f'https://music.youtube.com/watch?v={video_id}',
4365 ie=YoutubeIE.ie_key(), video_id=video_id)
4366 playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))
4367 if playlist_id:
4368 video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))
4369 if video_id:
4370 return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',
4371 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4372 return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',
4373 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4374 browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))
4375 if browse_id:
4376 return self.url_result(f'https://music.youtube.com/browse/{browse_id}',
4377 ie=YoutubeTabIE.ie_key(), video_id=browse_id)
4378
4379 def _shelf_entries_from_content(self, shelf_renderer):
4380 content = shelf_renderer.get('content')
4381 if not isinstance(content, dict):
4382 return
4383 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
4384 if renderer:
4385 # TODO: add support for nested playlists so each shelf is processed
4386 # as separate playlist
4387 # TODO: this includes only first N items
4388 yield from self._grid_entries(renderer)
4389 renderer = content.get('horizontalListRenderer')
4390 if renderer:
4391 # TODO
4392 pass
4393
4394 def _shelf_entries(self, shelf_renderer, skip_channels=False):
4395 ep = try_get(
4396 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4397 str)
4398 shelf_url = urljoin('https://www.youtube.com', ep)
4399 if shelf_url:
4400 # Skipping links to another channels, note that checking for
4401 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
4402 # will not work
4403 if skip_channels and '/channels?' in shelf_url:
4404 return
4405 title = self._get_text(shelf_renderer, 'title')
4406 yield self.url_result(shelf_url, video_title=title)
4407 # Shelf may not contain shelf URL, fallback to extraction from content
4408 yield from self._shelf_entries_from_content(shelf_renderer)
4409
4410 def _playlist_entries(self, video_list_renderer):
4411 for content in video_list_renderer['contents']:
4412 if not isinstance(content, dict):
4413 continue
4414 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
4415 if not isinstance(renderer, dict):
4416 continue
4417 video_id = renderer.get('videoId')
4418 if not video_id:
4419 continue
4420 yield self._extract_video(renderer)
4421
4422 def _rich_entries(self, rich_grid_renderer):
4423 renderer = traverse_obj(
4424 rich_grid_renderer, ('content', ('videoRenderer', 'reelItemRenderer')), get_all=False) or {}
4425 video_id = renderer.get('videoId')
4426 if not video_id:
4427 return
4428 yield self._extract_video(renderer)
4429
4430 def _video_entry(self, video_renderer):
4431 video_id = video_renderer.get('videoId')
4432 if video_id:
4433 return self._extract_video(video_renderer)
4434
4435 def _hashtag_tile_entry(self, hashtag_tile_renderer):
4436 url = urljoin('https://youtube.com', traverse_obj(
4437 hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))
4438 if url:
4439 return self.url_result(
4440 url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))
4441
4442 def _post_thread_entries(self, post_thread_renderer):
4443 post_renderer = try_get(
4444 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
4445 if not post_renderer:
4446 return
4447 # video attachment
4448 video_renderer = try_get(
4449 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
4450 video_id = video_renderer.get('videoId')
4451 if video_id:
4452 entry = self._extract_video(video_renderer)
4453 if entry:
4454 yield entry
4455 # playlist attachment
4456 playlist_id = try_get(
4457 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)
4458 if playlist_id:
4459 yield self.url_result(
4460 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4461 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4462 # inline video links
4463 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
4464 for run in runs:
4465 if not isinstance(run, dict):
4466 continue
4467 ep_url = try_get(
4468 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)
4469 if not ep_url:
4470 continue
4471 if not YoutubeIE.suitable(ep_url):
4472 continue
4473 ep_video_id = YoutubeIE._match_id(ep_url)
4474 if video_id == ep_video_id:
4475 continue
4476 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
4477
4478 def _post_thread_continuation_entries(self, post_thread_continuation):
4479 contents = post_thread_continuation.get('contents')
4480 if not isinstance(contents, list):
4481 return
4482 for content in contents:
4483 renderer = content.get('backstagePostThreadRenderer')
4484 if isinstance(renderer, dict):
4485 yield from self._post_thread_entries(renderer)
4486 continue
4487 renderer = content.get('videoRenderer')
4488 if isinstance(renderer, dict):
4489 yield self._video_entry(renderer)
4490
4491 r''' # unused
4492 def _rich_grid_entries(self, contents):
4493 for content in contents:
4494 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
4495 if video_renderer:
4496 entry = self._video_entry(video_renderer)
4497 if entry:
4498 yield entry
4499 '''
4500
4501 def _report_history_entries(self, renderer):
4502 for url in traverse_obj(renderer, (
4503 'rows', ..., 'reportHistoryTableRowRenderer', 'cells', ...,
4504 'reportHistoryTableCellRenderer', 'cell', 'reportHistoryTableTextCellRenderer', 'text', 'runs', ...,
4505 'navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url')):
4506 yield self.url_result(urljoin('https://www.youtube.com', url), YoutubeIE)
4507
4508 def _extract_entries(self, parent_renderer, continuation_list):
4509 # continuation_list is modified in-place with continuation_list = [continuation_token]
4510 continuation_list[:] = [None]
4511 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
4512 for content in contents:
4513 if not isinstance(content, dict):
4514 continue
4515 is_renderer = traverse_obj(
4516 content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',
4517 expected_type=dict)
4518 if not is_renderer:
4519 if content.get('richItemRenderer'):
4520 for entry in self._rich_entries(content['richItemRenderer']):
4521 yield entry
4522 continuation_list[0] = self._extract_continuation(parent_renderer)
4523 elif content.get('reportHistorySectionRenderer'): # https://www.youtube.com/reporthistory
4524 table = traverse_obj(content, ('reportHistorySectionRenderer', 'table', 'tableRenderer'))
4525 yield from self._report_history_entries(table)
4526 continuation_list[0] = self._extract_continuation(table)
4527 continue
4528
4529 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
4530 for isr_content in isr_contents:
4531 if not isinstance(isr_content, dict):
4532 continue
4533
4534 known_renderers = {
4535 'playlistVideoListRenderer': self._playlist_entries,
4536 'gridRenderer': self._grid_entries,
4537 'reelShelfRenderer': self._grid_entries,
4538 'shelfRenderer': self._shelf_entries,
4539 'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],
4540 'backstagePostThreadRenderer': self._post_thread_entries,
4541 'videoRenderer': lambda x: [self._video_entry(x)],
4542 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
4543 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
4544 'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]
4545 }
4546 for key, renderer in isr_content.items():
4547 if key not in known_renderers:
4548 continue
4549 for entry in known_renderers[key](renderer):
4550 if entry:
4551 yield entry
4552 continuation_list[0] = self._extract_continuation(renderer)
4553 break
4554
4555 if not continuation_list[0]:
4556 continuation_list[0] = self._extract_continuation(is_renderer)
4557
4558 if not continuation_list[0]:
4559 continuation_list[0] = self._extract_continuation(parent_renderer)
4560
4561 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
4562 continuation_list = [None]
4563 extract_entries = lambda x: self._extract_entries(x, continuation_list)
4564 tab_content = try_get(tab, lambda x: x['content'], dict)
4565 if not tab_content:
4566 return
4567 parent_renderer = (
4568 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
4569 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
4570 yield from extract_entries(parent_renderer)
4571 continuation = continuation_list[0]
4572
4573 for page_num in itertools.count(1):
4574 if not continuation:
4575 break
4576 headers = self.generate_api_headers(
4577 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
4578 response = self._extract_response(
4579 item_id=f'{item_id} page {page_num}',
4580 query=continuation, headers=headers, ytcfg=ytcfg,
4581 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
4582
4583 if not response:
4584 break
4585 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
4586 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
4587 visitor_data = self._extract_visitor_data(response) or visitor_data
4588
4589 known_renderers = {
4590 'videoRenderer': (self._grid_entries, 'items'), # for membership tab
4591 'gridPlaylistRenderer': (self._grid_entries, 'items'),
4592 'gridVideoRenderer': (self._grid_entries, 'items'),
4593 'gridChannelRenderer': (self._grid_entries, 'items'),
4594 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
4595 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
4596 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
4597 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents'),
4598 'reportHistoryTableRowRenderer': (self._report_history_entries, 'rows'),
4599 'playlistVideoListContinuation': (self._playlist_entries, None),
4600 'gridContinuation': (self._grid_entries, None),
4601 'itemSectionContinuation': (self._post_thread_continuation_entries, None),
4602 'sectionListContinuation': (extract_entries, None), # for feeds
4603 }
4604
4605 continuation_items = traverse_obj(response, (
4606 ('onResponseReceivedActions', 'onResponseReceivedEndpoints'), ...,
4607 'appendContinuationItemsAction', 'continuationItems'
4608 ), 'continuationContents', get_all=False)
4609 continuation_item = traverse_obj(continuation_items, 0, None, expected_type=dict, default={})
4610
4611 video_items_renderer = None
4612 for key in continuation_item.keys():
4613 if key not in known_renderers:
4614 continue
4615 func, parent_key = known_renderers[key]
4616 video_items_renderer = {parent_key: continuation_items} if parent_key else continuation_items
4617 continuation_list = [None]
4618 yield from func(video_items_renderer)
4619 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
4620
4621 if not video_items_renderer:
4622 break
4623
4624 @staticmethod
4625 def _extract_selected_tab(tabs, fatal=True):
4626 for tab_renderer in tabs:
4627 if tab_renderer.get('selected'):
4628 return tab_renderer
4629 if fatal:
4630 raise ExtractorError('Unable to find selected tab')
4631
4632 @staticmethod
4633 def _extract_tab_renderers(response):
4634 return traverse_obj(
4635 response, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., ('tabRenderer', 'expandableTabRenderer')), expected_type=dict)
4636
4637 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
4638 metadata = self._extract_metadata_from_tabs(item_id, data)
4639
4640 selected_tab = self._extract_selected_tab(tabs)
4641 metadata['title'] += format_field(selected_tab, 'title', ' - %s')
4642 metadata['title'] += format_field(selected_tab, 'expandedText', ' - %s')
4643
4644 return self.playlist_result(
4645 self._entries(
4646 selected_tab, metadata['id'], ytcfg,
4647 self._extract_account_syncid(ytcfg, data),
4648 self._extract_visitor_data(data, ytcfg)),
4649 **metadata)
4650
4651 def _extract_metadata_from_tabs(self, item_id, data):
4652 info = {'id': item_id}
4653
4654 metadata_renderer = traverse_obj(data, ('metadata', 'channelMetadataRenderer'), expected_type=dict)
4655 if metadata_renderer:
4656 info.update({
4657 'uploader': metadata_renderer.get('title'),
4658 'uploader_id': metadata_renderer.get('externalId'),
4659 'uploader_url': metadata_renderer.get('channelUrl'),
4660 })
4661 if info['uploader_id']:
4662 info['id'] = info['uploader_id']
4663 else:
4664 metadata_renderer = traverse_obj(data, ('metadata', 'playlistMetadataRenderer'), expected_type=dict)
4665
4666 # We can get the uncropped banner/avatar by replacing the crop params with '=s0'
4667 # See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714
4668 def _get_uncropped(url):
4669 return url_or_none((url or '').split('=')[0] + '=s0')
4670
4671 avatar_thumbnails = self._extract_thumbnails(metadata_renderer, 'avatar')
4672 if avatar_thumbnails:
4673 uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])
4674 if uncropped_avatar:
4675 avatar_thumbnails.append({
4676 'url': uncropped_avatar,
4677 'id': 'avatar_uncropped',
4678 'preference': 1
4679 })
4680
4681 channel_banners = self._extract_thumbnails(
4682 data, ('header', ..., ('banner', 'mobileBanner', 'tvBanner')))
4683 for banner in channel_banners:
4684 banner['preference'] = -10
4685
4686 if channel_banners:
4687 uncropped_banner = _get_uncropped(channel_banners[0]['url'])
4688 if uncropped_banner:
4689 channel_banners.append({
4690 'url': uncropped_banner,
4691 'id': 'banner_uncropped',
4692 'preference': -5
4693 })
4694
4695 # Deprecated - remove primary_sidebar_renderer when layout discontinued
4696 primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4697 playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer'), expected_type=dict)
4698
4699 primary_thumbnails = self._extract_thumbnails(
4700 primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))
4701 playlist_thumbnails = self._extract_thumbnails(
4702 playlist_header_renderer, ('playlistHeaderBanner', 'heroPlaylistThumbnailRenderer', 'thumbnail'))
4703
4704 info.update({
4705 'title': (traverse_obj(metadata_renderer, 'title')
4706 or self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag'))
4707 or info['id']),
4708 'availability': self._extract_availability(data),
4709 'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),
4710 'description': try_get(metadata_renderer, lambda x: x.get('description', '')),
4711 'tags': try_get(metadata_renderer or {}, lambda x: x.get('keywords', '').split()),
4712 'thumbnails': (primary_thumbnails or playlist_thumbnails) + avatar_thumbnails + channel_banners,
4713 })
4714
4715 # Playlist stats is a text runs array containing [video count, view count, last updated].
4716 # last updated or (view count and last updated) may be missing.
4717 playlist_stats = get_first(
4718 (primary_sidebar_renderer, playlist_header_renderer), (('stats', 'briefStats', 'numVideosText'), ))
4719
4720 last_updated_unix = self._parse_time_text(
4721 self._get_text(playlist_stats, 2) # deprecated, remove when old layout discontinued
4722 or self._get_text(playlist_header_renderer, ('byline', 1, 'playlistBylineRenderer', 'text')))
4723 info['modified_date'] = strftime_or_none(last_updated_unix, '%Y%m%d')
4724
4725 info['view_count'] = self._get_count(playlist_stats, 1)
4726 if info['view_count'] is None: # 0 is allowed
4727 info['view_count'] = self._get_count(playlist_header_renderer, 'viewCountText')
4728
4729 info['playlist_count'] = self._get_count(playlist_stats, 0)
4730 if info['playlist_count'] is None: # 0 is allowed
4731 info['playlist_count'] = self._get_count(playlist_header_renderer, ('byline', 0, 'playlistBylineRenderer', 'text'))
4732
4733 if not info.get('uploader_id'):
4734 owner = traverse_obj(playlist_header_renderer, 'ownerText')
4735 if not owner: # Deprecated
4736 owner = traverse_obj(
4737 self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer'),
4738 ('videoOwner', 'videoOwnerRenderer', 'title'))
4739 owner_text = self._get_text(owner)
4740 browse_ep = traverse_obj(owner, ('runs', 0, 'navigationEndpoint', 'browseEndpoint')) or {}
4741 info.update({
4742 'uploader': self._search_regex(r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text),
4743 'uploader_id': browse_ep.get('browseId'),
4744 'uploader_url': urljoin('https://www.youtube.com', browse_ep.get('canonicalBaseUrl'))
4745 })
4746
4747 info.update({
4748 'channel': info['uploader'],
4749 'channel_id': info['uploader_id'],
4750 'channel_url': info['uploader_url']
4751 })
4752 return info
4753
4754 def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):
4755 first_id = last_id = response = None
4756 for page_num in itertools.count(1):
4757 videos = list(self._playlist_entries(playlist))
4758 if not videos:
4759 return
4760 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4761 if start >= len(videos):
4762 return
4763 yield from videos[start:]
4764 first_id = first_id or videos[0]['id']
4765 last_id = videos[-1]['id']
4766 watch_endpoint = try_get(
4767 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
4768 headers = self.generate_api_headers(
4769 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4770 visitor_data=self._extract_visitor_data(response, data, ytcfg))
4771 query = {
4772 'playlistId': playlist_id,
4773 'videoId': watch_endpoint.get('videoId') or last_id,
4774 'index': watch_endpoint.get('index') or len(videos),
4775 'params': watch_endpoint.get('params') or 'OAE%3D'
4776 }
4777 response = self._extract_response(
4778 item_id='%s page %d' % (playlist_id, page_num),
4779 query=query, ep='next', headers=headers, ytcfg=ytcfg,
4780 check_get_keys='contents'
4781 )
4782 playlist = try_get(
4783 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4784
4785 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
4786 title = playlist.get('title') or try_get(
4787 data, lambda x: x['titleText']['simpleText'], str)
4788 playlist_id = playlist.get('playlistId') or item_id
4789
4790 # Delegating everything except mix playlists to regular tab-based playlist URL
4791 playlist_url = urljoin(url, try_get(
4792 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4793 str))
4794
4795 # Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]
4796 # [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg
4797 is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)
4798
4799 if playlist_url and playlist_url != url and not is_known_unviewable:
4800 return self.url_result(
4801 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4802 video_title=title)
4803
4804 return self.playlist_result(
4805 self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),
4806 playlist_id=playlist_id, playlist_title=title)
4807
4808 def _extract_availability(self, data):
4809 """
4810 Gets the availability of a given playlist/tab.
4811 Note: Unless YouTube tells us explicitly, we do not assume it is public
4812 @param data: response
4813 """
4814 sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4815 playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer')) or {}
4816 player_header_privacy = playlist_header_renderer.get('privacy')
4817
4818 badges = self._extract_badges(sidebar_renderer)
4819
4820 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4821 privacy_setting_icon = get_first(
4822 (playlist_header_renderer, sidebar_renderer),
4823 ('privacyForm', 'dropdownFormFieldRenderer', 'dropdown', 'dropdownRenderer', 'entries',
4824 lambda _, v: v['privacyDropdownItemRenderer']['isSelected'], 'privacyDropdownItemRenderer', 'icon', 'iconType'),
4825 expected_type=str)
4826
4827 microformats_is_unlisted = traverse_obj(
4828 data, ('microformat', 'microformatDataRenderer', 'unlisted'), expected_type=bool)
4829
4830 return (
4831 'public' if (
4832 self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
4833 or player_header_privacy == 'PUBLIC'
4834 or privacy_setting_icon == 'PRIVACY_PUBLIC')
4835 else self._availability(
4836 is_private=(
4837 self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
4838 or player_header_privacy == 'PRIVATE' if player_header_privacy is not None
4839 else privacy_setting_icon == 'PRIVACY_PRIVATE' if privacy_setting_icon is not None else None),
4840 is_unlisted=(
4841 self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
4842 or player_header_privacy == 'UNLISTED' if player_header_privacy is not None
4843 else privacy_setting_icon == 'PRIVACY_UNLISTED' if privacy_setting_icon is not None
4844 else microformats_is_unlisted if microformats_is_unlisted is not None else None),
4845 needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
4846 needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
4847 needs_auth=False))
4848
4849 @staticmethod
4850 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4851 sidebar_renderer = try_get(
4852 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4853 for item in sidebar_renderer:
4854 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4855 if renderer:
4856 return renderer
4857
4858 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
4859 """
4860 Reload playlists with unavailable videos (e.g. private videos, region blocked, etc.)
4861 """
4862 is_playlist = bool(traverse_obj(
4863 data, ('metadata', 'playlistMetadataRenderer'), ('header', 'playlistHeaderRenderer')))
4864 if not is_playlist:
4865 return
4866 headers = self.generate_api_headers(
4867 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4868 visitor_data=self._extract_visitor_data(data, ytcfg))
4869 query = {
4870 'params': 'wgYCCAA=',
4871 'browseId': f'VL{item_id}'
4872 }
4873 return self._extract_response(
4874 item_id=item_id, headers=headers, query=query,
4875 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
4876 note='Redownloading playlist API JSON with unavailable videos')
4877
4878 @functools.cached_property
4879 def skip_webpage(self):
4880 return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())
4881
4882 def _extract_webpage(self, url, item_id, fatal=True):
4883 webpage, data = None, None
4884 for retry in self.RetryManager(fatal=fatal):
4885 try:
4886 webpage = self._download_webpage(url, item_id, note='Downloading webpage')
4887 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
4888 except ExtractorError as e:
4889 if isinstance(e.cause, network_exceptions):
4890 if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):
4891 retry.error = e
4892 continue
4893 self._error_or_warning(e, fatal=fatal)
4894 break
4895
4896 try:
4897 self._extract_and_report_alerts(data)
4898 except ExtractorError as e:
4899 self._error_or_warning(e, fatal=fatal)
4900 break
4901
4902 # Sometimes youtube returns a webpage with incomplete ytInitialData
4903 # See: https://github.com/yt-dlp/yt-dlp/issues/116
4904 if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):
4905 retry.error = ExtractorError('Incomplete yt initial data received')
4906 continue
4907
4908 return webpage, data
4909
4910 def _report_playlist_authcheck(self, ytcfg, fatal=True):
4911 """Use if failed to extract ytcfg (and data) from initial webpage"""
4912 if not ytcfg and self.is_authenticated:
4913 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'
4914 if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:
4915 raise ExtractorError(
4916 f'{msg}. If you are not downloading private content, or '
4917 'your cookies are only for the first account and channel,'
4918 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
4919 expected=True)
4920 self.report_warning(msg, only_once=True)
4921
4922 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
4923 data = None
4924 if not self.skip_webpage:
4925 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
4926 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
4927 # Reject webpage data if redirected to home page without explicitly requesting
4928 selected_tab = self._extract_selected_tab(self._extract_tab_renderers(data), fatal=False) or {}
4929 if (url != 'https://www.youtube.com/feed/recommended'
4930 and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page
4931 and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):
4932 msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'
4933 if fatal:
4934 raise ExtractorError(msg, expected=True)
4935 self.report_warning(msg, only_once=True)
4936 if not data:
4937 self._report_playlist_authcheck(ytcfg, fatal=fatal)
4938 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
4939 return data, ytcfg
4940
4941 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
4942 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
4943 resolve_response = self._extract_response(
4944 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
4945 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
4946 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
4947 for ep_key, ep in endpoints.items():
4948 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
4949 if params:
4950 return self._extract_response(
4951 item_id=item_id, query=params, ep=ep, headers=headers,
4952 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
4953 check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))
4954 err_note = 'Failed to resolve url (does the playlist exist?)'
4955 if fatal:
4956 raise ExtractorError(err_note, expected=True)
4957 self.report_warning(err_note, item_id)
4958
4959 _SEARCH_PARAMS = None
4960
4961 def _search_results(self, query, params=NO_DEFAULT, default_client='web'):
4962 data = {'query': query}
4963 if params is NO_DEFAULT:
4964 params = self._SEARCH_PARAMS
4965 if params:
4966 data['params'] = params
4967
4968 content_keys = (
4969 ('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),
4970 ('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),
4971 # ytmusic search
4972 ('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),
4973 ('continuationContents', ),
4974 )
4975 display_id = f'query "{query}"'
4976 check_get_keys = tuple({keys[0] for keys in content_keys})
4977 ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}
4978 self._report_playlist_authcheck(ytcfg, fatal=False)
4979
4980 continuation_list = [None]
4981 search = None
4982 for page_num in itertools.count(1):
4983 data.update(continuation_list[0] or {})
4984 headers = self.generate_api_headers(
4985 ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)
4986 search = self._extract_response(
4987 item_id=f'{display_id} page {page_num}', ep='search', query=data,
4988 default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)
4989 slr_contents = traverse_obj(search, *content_keys)
4990 yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)
4991 if not continuation_list[0]:
4992 break
4993
4994
4995class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
4996 IE_DESC = 'YouTube Tabs'
4997 _VALID_URL = r'''(?x:
4998 https?://
4999 (?:\w+\.)?
5000 (?:
5001 youtube(?:kids)?\.com|
5002 %(invidious)s
5003 )/
5004 (?:
5005 (?P<channel_type>channel|c|user|browse)/|
5006 (?P<not_channel>
5007 feed/|hashtag/|
5008 (?:playlist|watch)\?.*?\blist=
5009 )|
5010 (?!(?:%(reserved_names)s)\b) # Direct URLs
5011 )
5012 (?P<id>[^/?\#&]+)
5013 )''' % {
5014 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
5015 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
5016 }
5017 IE_NAME = 'youtube:tab'
5018
5019 _TESTS = [{
5020 'note': 'playlists, multipage',
5021 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
5022 'playlist_mincount': 94,
5023 'info_dict': {
5024 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
5025 'title': 'Igor Kleiner - Playlists',
5026 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
5027 'uploader': 'Igor Kleiner',
5028 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
5029 'channel': 'Igor Kleiner',
5030 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
5031 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
5032 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
5033 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
5034 'channel_follower_count': int
5035 },
5036 }, {
5037 'note': 'playlists, multipage, different order',
5038 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
5039 'playlist_mincount': 94,
5040 'info_dict': {
5041 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
5042 'title': 'Igor Kleiner - Playlists',
5043 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
5044 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
5045 'uploader': 'Igor Kleiner',
5046 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
5047 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
5048 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
5049 'channel': 'Igor Kleiner',
5050 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
5051 'channel_follower_count': int
5052 },
5053 }, {
5054 'note': 'playlists, series',
5055 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
5056 'playlist_mincount': 5,
5057 'info_dict': {
5058 'id': 'UCYO_jab_esuFRV4b17AJtAw',
5059 'title': '3Blue1Brown - Playlists',
5060 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
5061 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
5062 'uploader': '3Blue1Brown',
5063 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5064 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5065 'channel': '3Blue1Brown',
5066 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
5067 'tags': ['Mathematics'],
5068 'channel_follower_count': int
5069 },
5070 }, {
5071 'note': 'playlists, singlepage',
5072 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
5073 'playlist_mincount': 4,
5074 'info_dict': {
5075 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
5076 'title': 'ThirstForScience - Playlists',
5077 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
5078 'uploader': 'ThirstForScience',
5079 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
5080 'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
5081 'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
5082 'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
5083 'tags': 'count:13',
5084 'channel': 'ThirstForScience',
5085 'channel_follower_count': int
5086 }
5087 }, {
5088 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
5089 'only_matching': True,
5090 }, {
5091 'note': 'basic, single video playlist',
5092 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5093 'info_dict': {
5094 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5095 'uploader': 'Sergey M.',
5096 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5097 'title': 'youtube-dl public playlist',
5098 'description': '',
5099 'tags': [],
5100 'view_count': int,
5101 'modified_date': '20201130',
5102 'channel': 'Sergey M.',
5103 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5104 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5105 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5106 'availability': 'public',
5107 },
5108 'playlist_count': 1,
5109 }, {
5110 'note': 'empty playlist',
5111 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
5112 'info_dict': {
5113 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5114 'uploader': 'Sergey M.',
5115 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
5116 'title': 'youtube-dl empty playlist',
5117 'tags': [],
5118 'channel': 'Sergey M.',
5119 'description': '',
5120 'modified_date': '20160902',
5121 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5122 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5123 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5124 'availability': 'public',
5125 },
5126 'playlist_count': 0,
5127 }, {
5128 'note': 'Home tab',
5129 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
5130 'info_dict': {
5131 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5132 'title': 'lex will - Home',
5133 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5134 'uploader': 'lex will',
5135 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5136 'channel': 'lex will',
5137 'tags': ['bible', 'history', 'prophesy'],
5138 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5139 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5140 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5141 'channel_follower_count': int
5142 },
5143 'playlist_mincount': 2,
5144 }, {
5145 'note': 'Videos tab',
5146 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
5147 'info_dict': {
5148 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5149 'title': 'lex will - Videos',
5150 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5151 'uploader': 'lex will',
5152 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5153 'tags': ['bible', 'history', 'prophesy'],
5154 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5155 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5156 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5157 'channel': 'lex will',
5158 'channel_follower_count': int
5159 },
5160 'playlist_mincount': 975,
5161 }, {
5162 'note': 'Videos tab, sorted by popular',
5163 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
5164 'info_dict': {
5165 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5166 'title': 'lex will - Videos',
5167 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5168 'uploader': 'lex will',
5169 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5170 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5171 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5172 'channel': 'lex will',
5173 'tags': ['bible', 'history', 'prophesy'],
5174 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5175 'channel_follower_count': int
5176 },
5177 'playlist_mincount': 199,
5178 }, {
5179 'note': 'Playlists tab',
5180 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
5181 'info_dict': {
5182 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5183 'title': 'lex will - Playlists',
5184 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5185 'uploader': 'lex will',
5186 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5187 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5188 'channel': 'lex will',
5189 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5190 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5191 'tags': ['bible', 'history', 'prophesy'],
5192 'channel_follower_count': int
5193 },
5194 'playlist_mincount': 17,
5195 }, {
5196 'note': 'Community tab',
5197 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
5198 'info_dict': {
5199 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5200 'title': 'lex will - Community',
5201 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5202 'uploader': 'lex will',
5203 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5204 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5205 'channel': 'lex will',
5206 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5207 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5208 'tags': ['bible', 'history', 'prophesy'],
5209 'channel_follower_count': int
5210 },
5211 'playlist_mincount': 18,
5212 }, {
5213 'note': 'Channels tab',
5214 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
5215 'info_dict': {
5216 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5217 'title': 'lex will - Channels',
5218 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5219 'uploader': 'lex will',
5220 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5221 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5222 'channel': 'lex will',
5223 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5224 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5225 'tags': ['bible', 'history', 'prophesy'],
5226 'channel_follower_count': int
5227 },
5228 'playlist_mincount': 12,
5229 }, {
5230 'note': 'Search tab',
5231 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
5232 'playlist_mincount': 40,
5233 'info_dict': {
5234 'id': 'UCYO_jab_esuFRV4b17AJtAw',
5235 'title': '3Blue1Brown - Search - linear algebra',
5236 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
5237 'uploader': '3Blue1Brown',
5238 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
5239 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5240 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5241 'tags': ['Mathematics'],
5242 'channel': '3Blue1Brown',
5243 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
5244 'channel_follower_count': int
5245 },
5246 }, {
5247 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5248 'only_matching': True,
5249 }, {
5250 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5251 'only_matching': True,
5252 }, {
5253 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5254 'only_matching': True,
5255 }, {
5256 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
5257 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
5258 'info_dict': {
5259 'title': '29C3: Not my department',
5260 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
5261 'uploader': 'Christiaan008',
5262 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
5263 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
5264 'tags': [],
5265 'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',
5266 'view_count': int,
5267 'modified_date': '20150605',
5268 'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
5269 'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',
5270 'channel': 'Christiaan008',
5271 'availability': 'public',
5272 },
5273 'playlist_count': 96,
5274 }, {
5275 'note': 'Large playlist',
5276 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
5277 'info_dict': {
5278 'title': 'Uploads from Cauchemar',
5279 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
5280 'uploader': 'Cauchemar',
5281 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
5282 'channel_url': 'https://www.youtube.com/c/Cauchemar89',
5283 'tags': [],
5284 'modified_date': r're:\d{8}',
5285 'channel': 'Cauchemar',
5286 'uploader_url': 'https://www.youtube.com/c/Cauchemar89',
5287 'view_count': int,
5288 'description': '',
5289 'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
5290 'availability': 'public',
5291 },
5292 'playlist_mincount': 1123,
5293 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5294 }, {
5295 'note': 'even larger playlist, 8832 videos',
5296 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
5297 'only_matching': True,
5298 }, {
5299 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
5300 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
5301 'info_dict': {
5302 'title': 'Uploads from Interstellar Movie',
5303 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
5304 'uploader': 'Interstellar Movie',
5305 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
5306 'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',
5307 'tags': [],
5308 'view_count': int,
5309 'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
5310 'channel_url': 'https://www.youtube.com/c/InterstellarMovie',
5311 'channel': 'Interstellar Movie',
5312 'description': '',
5313 'modified_date': r're:\d{8}',
5314 'availability': 'public',
5315 },
5316 'playlist_mincount': 21,
5317 }, {
5318 'note': 'Playlist with "show unavailable videos" button',
5319 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
5320 'info_dict': {
5321 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
5322 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
5323 'uploader': 'Phim Siêu Nhân Nhật Bản',
5324 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
5325 'view_count': int,
5326 'channel': 'Phim Siêu Nhân Nhật Bản',
5327 'tags': [],
5328 'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5329 'description': '',
5330 'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5331 'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
5332 'modified_date': r're:\d{8}',
5333 'availability': 'public',
5334 },
5335 'playlist_mincount': 200,
5336 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5337 }, {
5338 'note': 'Playlist with unavailable videos in page 7',
5339 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
5340 'info_dict': {
5341 'title': 'Uploads from BlankTV',
5342 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
5343 'uploader': 'BlankTV',
5344 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
5345 'channel': 'BlankTV',
5346 'channel_url': 'https://www.youtube.com/c/blanktv',
5347 'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',
5348 'view_count': int,
5349 'tags': [],
5350 'uploader_url': 'https://www.youtube.com/c/blanktv',
5351 'modified_date': r're:\d{8}',
5352 'description': '',
5353 'availability': 'public',
5354 },
5355 'playlist_mincount': 1000,
5356 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5357 }, {
5358 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
5359 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5360 'info_dict': {
5361 'title': 'Data Analysis with Dr Mike Pound',
5362 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5363 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
5364 'uploader': 'Computerphile',
5365 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
5366 'uploader_url': 'https://www.youtube.com/user/Computerphile',
5367 'tags': [],
5368 'view_count': int,
5369 'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
5370 'channel_url': 'https://www.youtube.com/user/Computerphile',
5371 'channel': 'Computerphile',
5372 'availability': 'public',
5373 'modified_date': '20190712',
5374 },
5375 'playlist_mincount': 11,
5376 }, {
5377 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5378 'only_matching': True,
5379 }, {
5380 'note': 'Playlist URL that does not actually serve a playlist',
5381 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
5382 'info_dict': {
5383 'id': 'FqZTN594JQw',
5384 'ext': 'webm',
5385 'title': "Smiley's People 01 detective, Adventure Series, Action",
5386 'uploader': 'STREEM',
5387 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
5388 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
5389 'upload_date': '20150526',
5390 'license': 'Standard YouTube License',
5391 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
5392 'categories': ['People & Blogs'],
5393 'tags': list,
5394 'view_count': int,
5395 'like_count': int,
5396 },
5397 'params': {
5398 'skip_download': True,
5399 },
5400 'skip': 'This video is not available.',
5401 'add_ie': [YoutubeIE.ie_key()],
5402 }, {
5403 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
5404 'only_matching': True,
5405 }, {
5406 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
5407 'only_matching': True,
5408 }, {
5409 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
5410 'info_dict': {
5411 'id': 'Wq15eF5vCbI', # This will keep changing
5412 'ext': 'mp4',
5413 'title': str,
5414 'uploader': 'Sky News',
5415 'uploader_id': 'skynews',
5416 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
5417 'upload_date': r're:\d{8}',
5418 'description': str,
5419 'categories': ['News & Politics'],
5420 'tags': list,
5421 'like_count': int,
5422 'release_timestamp': int,
5423 'channel': 'Sky News',
5424 'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',
5425 'age_limit': 0,
5426 'view_count': int,
5427 'thumbnail': r're:https?://i\.ytimg\.com/vi/[^/]+/maxresdefault(?:_live)?\.jpg',
5428 'playable_in_embed': True,
5429 'release_date': r're:\d+',
5430 'availability': 'public',
5431 'live_status': 'is_live',
5432 'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',
5433 'channel_follower_count': int,
5434 'concurrent_view_count': int,
5435 },
5436 'params': {
5437 'skip_download': True,
5438 },
5439 'expected_warnings': ['Ignoring subtitle tracks found in '],
5440 }, {
5441 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
5442 'info_dict': {
5443 'id': 'a48o2S1cPoo',
5444 'ext': 'mp4',
5445 'title': 'The Young Turks - Live Main Show',
5446 'uploader': 'The Young Turks',
5447 'uploader_id': 'TheYoungTurks',
5448 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
5449 'upload_date': '20150715',
5450 'license': 'Standard YouTube License',
5451 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
5452 'categories': ['News & Politics'],
5453 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
5454 'like_count': int,
5455 },
5456 'params': {
5457 'skip_download': True,
5458 },
5459 'only_matching': True,
5460 }, {
5461 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
5462 'only_matching': True,
5463 }, {
5464 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
5465 'only_matching': True,
5466 }, {
5467 'note': 'A channel that is not live. Should raise error',
5468 'url': 'https://www.youtube.com/user/numberphile/live',
5469 'only_matching': True,
5470 }, {
5471 'url': 'https://www.youtube.com/feed/trending',
5472 'only_matching': True,
5473 }, {
5474 'url': 'https://www.youtube.com/feed/library',
5475 'only_matching': True,
5476 }, {
5477 'url': 'https://www.youtube.com/feed/history',
5478 'only_matching': True,
5479 }, {
5480 'url': 'https://www.youtube.com/feed/subscriptions',
5481 'only_matching': True,
5482 }, {
5483 'url': 'https://www.youtube.com/feed/watch_later',
5484 'only_matching': True,
5485 }, {
5486 'note': 'Recommended - redirects to home page.',
5487 'url': 'https://www.youtube.com/feed/recommended',
5488 'only_matching': True,
5489 }, {
5490 'note': 'inline playlist with not always working continuations',
5491 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
5492 'only_matching': True,
5493 }, {
5494 'url': 'https://www.youtube.com/course',
5495 'only_matching': True,
5496 }, {
5497 'url': 'https://www.youtube.com/zsecurity',
5498 'only_matching': True,
5499 }, {
5500 'url': 'http://www.youtube.com/NASAgovVideo/videos',
5501 'only_matching': True,
5502 }, {
5503 'url': 'https://www.youtube.com/TheYoungTurks/live',
5504 'only_matching': True,
5505 }, {
5506 'url': 'https://www.youtube.com/hashtag/cctv9',
5507 'info_dict': {
5508 'id': 'cctv9',
5509 'title': '#cctv9',
5510 'tags': [],
5511 },
5512 'playlist_mincount': 300, # not consistent but should be over 300
5513 }, {
5514 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
5515 'only_matching': True,
5516 }, {
5517 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
5518 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5519 'only_matching': True
5520 }, {
5521 'note': '/browse/ should redirect to /channel/',
5522 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
5523 'only_matching': True
5524 }, {
5525 'note': 'VLPL, should redirect to playlist?list=PL...',
5526 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5527 'info_dict': {
5528 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5529 'uploader': 'NoCopyrightSounds',
5530 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
5531 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
5532 'title': 'NCS : All Releases 💿',
5533 'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',
5534 'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',
5535 'modified_date': r're:\d{8}',
5536 'view_count': int,
5537 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
5538 'tags': [],
5539 'channel': 'NoCopyrightSounds',
5540 'availability': 'public',
5541 },
5542 'playlist_mincount': 166,
5543 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5544 }, {
5545 'note': 'Topic, should redirect to playlist?list=UU...',
5546 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5547 'info_dict': {
5548 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5549 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5550 'title': 'Uploads from Royalty Free Music - Topic',
5551 'uploader': 'Royalty Free Music - Topic',
5552 'tags': [],
5553 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5554 'channel': 'Royalty Free Music - Topic',
5555 'view_count': int,
5556 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5557 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5558 'modified_date': r're:\d{8}',
5559 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5560 'description': '',
5561 'availability': 'public',
5562 },
5563 'playlist_mincount': 101,
5564 }, {
5565 # Destination channel with only a hidden self tab (tab id is UCtFRv9O2AHqOZjjynzrv-xg)
5566 # Treat as a general feed
5567 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
5568 'info_dict': {
5569 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
5570 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
5571 'tags': [],
5572 },
5573 'playlist_mincount': 9,
5574 }, {
5575 'note': 'Youtube music Album',
5576 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
5577 'info_dict': {
5578 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
5579 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
5580 'tags': [],
5581 'view_count': int,
5582 'description': '',
5583 'availability': 'unlisted',
5584 'modified_date': r're:\d{8}',
5585 },
5586 'playlist_count': 50,
5587 }, {
5588 'note': 'unlisted single video playlist',
5589 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5590 'info_dict': {
5591 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5592 'uploader': 'colethedj',
5593 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5594 'title': 'yt-dlp unlisted playlist test',
5595 'availability': 'unlisted',
5596 'tags': [],
5597 'modified_date': '20220418',
5598 'channel': 'colethedj',
5599 'view_count': int,
5600 'description': '',
5601 'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
5602 'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5603 'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
5604 },
5605 'playlist_count': 1,
5606 }, {
5607 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
5608 'url': 'https://www.youtube.com/feed/recommended',
5609 'info_dict': {
5610 'id': 'recommended',
5611 'title': 'recommended',
5612 'tags': [],
5613 },
5614 'playlist_mincount': 50,
5615 'params': {
5616 'skip_download': True,
5617 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5618 },
5619 }, {
5620 'note': 'API Fallback: /videos tab, sorted by oldest first',
5621 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
5622 'info_dict': {
5623 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5624 'title': 'Cody\'sLab - Videos',
5625 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
5626 'uploader': 'Cody\'sLab',
5627 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5628 'channel': 'Cody\'sLab',
5629 'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5630 'tags': [],
5631 'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
5632 'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
5633 'channel_follower_count': int
5634 },
5635 'playlist_mincount': 650,
5636 'params': {
5637 'skip_download': True,
5638 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5639 },
5640 'skip': 'Query for sorting no longer works',
5641 }, {
5642 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
5643 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5644 'info_dict': {
5645 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5646 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5647 'title': 'Uploads from Royalty Free Music - Topic',
5648 'uploader': 'Royalty Free Music - Topic',
5649 'modified_date': r're:\d{8}',
5650 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5651 'description': '',
5652 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5653 'tags': [],
5654 'channel': 'Royalty Free Music - Topic',
5655 'view_count': int,
5656 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5657 'availability': 'public',
5658 },
5659 'playlist_mincount': 101,
5660 'params': {
5661 'skip_download': True,
5662 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5663 },
5664 }, {
5665 'note': 'non-standard redirect to regional channel',
5666 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
5667 'only_matching': True
5668 }, {
5669 'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
5670 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5671 'info_dict': {
5672 'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5673 'modified_date': '20220407',
5674 'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5675 'tags': [],
5676 'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5677 'uploader': 'pukkandan',
5678 'availability': 'unlisted',
5679 'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5680 'channel': 'pukkandan',
5681 'description': 'Test for collaborative playlist',
5682 'title': 'yt-dlp test - collaborative playlist',
5683 'view_count': int,
5684 'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5685 },
5686 'playlist_mincount': 2
5687 }, {
5688 'note': 'translated tab name',
5689 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/playlists',
5690 'info_dict': {
5691 'id': 'UCiu-3thuViMebBjw_5nWYrA',
5692 'tags': [],
5693 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
5694 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
5695 'description': 'test description',
5696 'title': 'cole-dlp-test-acc - 再生リスト',
5697 'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
5698 'uploader': 'cole-dlp-test-acc',
5699 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
5700 'channel': 'cole-dlp-test-acc',
5701 'channel_follower_count': int,
5702 },
5703 'playlist_mincount': 1,
5704 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
5705 'expected_warnings': ['Preferring "ja"'],
5706 }, {
5707 # XXX: this should really check flat playlist entries, but the test suite doesn't support that
5708 'note': 'preferred lang set with playlist with translated video titles',
5709 'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
5710 'info_dict': {
5711 'id': 'PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
5712 'tags': [],
5713 'view_count': int,
5714 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
5715 'uploader': 'cole-dlp-test-acc',
5716 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
5717 'channel': 'cole-dlp-test-acc',
5718 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
5719 'description': 'test',
5720 'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
5721 'title': 'dlp test playlist',
5722 'availability': 'public',
5723 },
5724 'playlist_mincount': 1,
5725 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
5726 'expected_warnings': ['Preferring "ja"'],
5727 }, {
5728 # shorts audio pivot for 2GtVksBMYFM.
5729 'url': 'https://www.youtube.com/feed/sfv_audio_pivot?bp=8gUrCikSJwoLMkd0VmtzQk1ZRk0SCzJHdFZrc0JNWUZNGgsyR3RWa3NCTVlGTQ==',
5730 'info_dict': {
5731 'id': 'sfv_audio_pivot',
5732 'title': 'sfv_audio_pivot',
5733 'tags': [],
5734 },
5735 'playlist_mincount': 50,
5736
5737 }, {
5738 # Channel with a real live tab (not to be mistaken with streams tab)
5739 # Do not treat like it should redirect to live stream
5740 'url': 'https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live',
5741 'info_dict': {
5742 'id': 'UCEH7P7kyJIkS_gJf93VYbmg',
5743 'title': 'UCEH7P7kyJIkS_gJf93VYbmg - Live',
5744 'tags': [],
5745 },
5746 'playlist_mincount': 20,
5747 }, {
5748 # Tab name is not the same as tab id
5749 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/letsplay',
5750 'info_dict': {
5751 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
5752 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Let\'s play',
5753 'tags': [],
5754 },
5755 'playlist_mincount': 8,
5756 }, {
5757 # Home tab id is literally home. Not to get mistaken with featured
5758 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/home',
5759 'info_dict': {
5760 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
5761 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Home',
5762 'tags': [],
5763 },
5764 'playlist_mincount': 8,
5765 }, {
5766 # Should get three playlists for videos, shorts and streams tabs
5767 'url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
5768 'info_dict': {
5769 'id': 'UCK9V2B22uJYu3N7eR_BT9QA',
5770 'title': 'Polka Ch. 尾丸ポルカ',
5771 'channel_follower_count': int,
5772 'channel_id': 'UCK9V2B22uJYu3N7eR_BT9QA',
5773 'channel_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
5774 'uploader': 'Polka Ch. 尾丸ポルカ',
5775 'description': 'md5:3b8df1ac5af337aa206e37ee3d181ec9',
5776 'channel': 'Polka Ch. 尾丸ポルカ',
5777 'tags': 'count:35',
5778 'uploader_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
5779 'uploader_id': 'UCK9V2B22uJYu3N7eR_BT9QA',
5780 },
5781 'playlist_count': 3,
5782 }, {
5783 # Shorts tab with channel with handle
5784 'url': 'https://www.youtube.com/@NotJustBikes/shorts',
5785 'info_dict': {
5786 'id': 'UC0intLFzLaudFG-xAvUEO-A',
5787 'title': 'Not Just Bikes - Shorts',
5788 'tags': 'count:12',
5789 'uploader': 'Not Just Bikes',
5790 'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',
5791 'description': 'md5:7513148b1f02b924783157d84c4ea555',
5792 'channel_follower_count': int,
5793 'uploader_id': 'UC0intLFzLaudFG-xAvUEO-A',
5794 'channel_id': 'UC0intLFzLaudFG-xAvUEO-A',
5795 'uploader_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',
5796 'channel': 'Not Just Bikes',
5797 },
5798 'playlist_mincount': 10,
5799 }, {
5800 # Streams tab
5801 'url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig/streams',
5802 'info_dict': {
5803 'id': 'UC3eYAvjCVwNHgkaGbXX3sig',
5804 'title': '中村悠一 - Live',
5805 'tags': 'count:7',
5806 'channel_id': 'UC3eYAvjCVwNHgkaGbXX3sig',
5807 'channel_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',
5808 'uploader_id': 'UC3eYAvjCVwNHgkaGbXX3sig',
5809 'channel': '中村悠一',
5810 'uploader_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',
5811 'channel_follower_count': int,
5812 'uploader': '中村悠一',
5813 'description': 'md5:e744f6c93dafa7a03c0c6deecb157300',
5814 },
5815 'playlist_mincount': 60,
5816 }, {
5817 # Channel with no uploads and hence no videos, streams, shorts tabs or uploads playlist. This should fail.
5818 # See test_youtube_lists
5819 'url': 'https://www.youtube.com/channel/UC2yXPzFejc422buOIzn_0CA',
5820 'only_matching': True,
5821 }, {
5822 # No uploads and no UCID given. Should fail with no uploads error
5823 # See test_youtube_lists
5824 'url': 'https://www.youtube.com/news',
5825 'only_matching': True
5826 }, {
5827 # No videos tab but has a shorts tab
5828 'url': 'https://www.youtube.com/c/TKFShorts',
5829 'info_dict': {
5830 'id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
5831 'title': 'Shorts Break - Shorts',
5832 'tags': 'count:32',
5833 'channel_id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
5834 'channel': 'Shorts Break',
5835 'description': 'md5:a6c234cf3d50d878ef8721e34457cd11',
5836 'uploader': 'Shorts Break',
5837 'channel_follower_count': int,
5838 'uploader_id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
5839 'uploader_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg',
5840 'channel_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg',
5841 },
5842 'playlist_mincount': 30,
5843 }, {
5844 # Trending Now Tab. tab id is empty
5845 'url': 'https://www.youtube.com/feed/trending',
5846 'info_dict': {
5847 'id': 'trending',
5848 'title': 'trending - Now',
5849 'tags': [],
5850 },
5851 'playlist_mincount': 30,
5852 }, {
5853 # Trending Gaming Tab. tab id is empty
5854 'url': 'https://www.youtube.com/feed/trending?bp=4gIcGhpnYW1pbmdfY29ycHVzX21vc3RfcG9wdWxhcg%3D%3D',
5855 'info_dict': {
5856 'id': 'trending',
5857 'title': 'trending - Gaming',
5858 'tags': [],
5859 },
5860 'playlist_mincount': 30,
5861 }, {
5862 # Shorts url result in shorts tab
5863 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/shorts',
5864 'info_dict': {
5865 'id': 'UCiu-3thuViMebBjw_5nWYrA',
5866 'title': 'cole-dlp-test-acc - Shorts',
5867 'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
5868 'channel': 'cole-dlp-test-acc',
5869 'channel_follower_count': int,
5870 'description': 'test description',
5871 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
5872 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
5873 'tags': [],
5874 'uploader': 'cole-dlp-test-acc',
5875 'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
5876
5877 },
5878 'playlist': [{
5879 'info_dict': {
5880 '_type': 'url',
5881 'ie_key': 'Youtube',
5882 'url': 'https://www.youtube.com/shorts/sSM9J5YH_60',
5883 'id': 'sSM9J5YH_60',
5884 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
5885 'title': 'SHORT short',
5886 'channel': 'cole-dlp-test-acc',
5887 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
5888 'view_count': int,
5889 'thumbnails': list,
5890 }
5891 }],
5892 'params': {'extract_flat': True},
5893 }, {
5894 # Live video status should be extracted
5895 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live',
5896 'info_dict': {
5897 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
5898 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Live', # TODO, should be Minecraft - Live or Minecraft - Topic - Live
5899 'tags': []
5900 },
5901 'playlist': [{
5902 'info_dict': {
5903 '_type': 'url',
5904 'ie_key': 'Youtube',
5905 'url': 'startswith:https://www.youtube.com/watch?v=',
5906 'id': str,
5907 'title': str,
5908 'live_status': 'is_live',
5909 'channel_id': str,
5910 'channel_url': str,
5911 'concurrent_view_count': int,
5912 'channel': str,
5913 }
5914 }],
5915 'params': {'extract_flat': True},
5916 'playlist_mincount': 1
5917 }]
5918
5919 @classmethod
5920 def suitable(cls, url):
5921 return False if YoutubeIE.suitable(url) else super().suitable(url)
5922
5923 _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/[^?#/]+))?(?P<post>.*)$')
5924
5925 def _get_url_mobj(self, url):
5926 mobj = self._URL_RE.match(url).groupdict()
5927 mobj.update((k, '') for k, v in mobj.items() if v is None)
5928 return mobj
5929
5930 def _extract_tab_id_and_name(self, tab, base_url='https://www.youtube.com'):
5931 tab_name = (tab.get('title') or '').lower()
5932 tab_url = urljoin(base_url, traverse_obj(
5933 tab, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url')))
5934
5935 tab_id = (tab_url and self._get_url_mobj(tab_url)['tab'][1:]
5936 or traverse_obj(tab, 'tabIdentifier', expected_type=str))
5937 if tab_id:
5938 return {
5939 'TAB_ID_SPONSORSHIPS': 'membership',
5940 }.get(tab_id, tab_id), tab_name
5941
5942 # Fallback to tab name if we cannot get the tab id.
5943 # XXX: should we strip non-ascii letters? e.g. in case of 'let's play' tab example on special gaming channel
5944 # Note that in the case of translated tab name this may result in an empty string, which we don't want.
5945 if tab_name:
5946 self.write_debug(f'Falling back to selected tab name: {tab_name}')
5947 return {
5948 'home': 'featured',
5949 'live': 'streams',
5950 }.get(tab_name, tab_name), tab_name
5951
5952 def _has_tab(self, tabs, tab_id):
5953 return any(self._extract_tab_id_and_name(tab)[0] == tab_id for tab in tabs)
5954
5955 @YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
5956 def _real_extract(self, url, smuggled_data):
5957 item_id = self._match_id(url)
5958 url = urllib.parse.urlunparse(
5959 urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))
5960 compat_opts = self.get_param('compat_opts', [])
5961
5962 mobj = self._get_url_mobj(url)
5963 pre, tab, post, is_channel = mobj['pre'], mobj['tab'], mobj['post'], not mobj['not_channel']
5964 if is_channel and smuggled_data.get('is_music_url'):
5965 if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
5966 return self.url_result(
5967 f'https://music.youtube.com/playlist?list={item_id[2:]}', YoutubeTabIE, item_id[2:])
5968 elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
5969 mdata = self._extract_tab_endpoint(
5970 f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
5971 murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
5972 get_all=False, expected_type=str)
5973 if not murl:
5974 raise ExtractorError('Failed to resolve album to playlist')
5975 return self.url_result(murl, YoutubeTabIE)
5976 elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
5977 return self.url_result(
5978 f'https://music.youtube.com/channel/{item_id}{tab}{post}', YoutubeTabIE, item_id)
5979
5980 original_tab_id, display_id = tab[1:], f'{item_id}{tab}'
5981 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
5982 url = f'{pre}/videos{post}'
5983
5984 # Handle both video/playlist URLs
5985 qs = parse_qs(url)
5986 video_id, playlist_id = [traverse_obj(qs, (key, 0)) for key in ('v', 'list')]
5987 if not video_id and mobj['not_channel'].startswith('watch'):
5988 if not playlist_id:
5989 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
5990 raise ExtractorError('A video URL was given without video ID', expected=True)
5991 # Common mistake: https://www.youtube.com/watch?list=playlist_id
5992 self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
5993 return self.url_result(
5994 f'https://www.youtube.com/playlist?list={playlist_id}', YoutubeTabIE, playlist_id)
5995
5996 if not self._yes_playlist(playlist_id, video_id):
5997 return self.url_result(
5998 f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)
5999
6000 data, ytcfg = self._extract_data(url, display_id)
6001
6002 # YouTube may provide a non-standard redirect to the regional channel
6003 # See: https://github.com/yt-dlp/yt-dlp/issues/2694
6004 # https://support.google.com/youtube/answer/2976814#zippy=,conditional-redirects
6005 redirect_url = traverse_obj(
6006 data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)
6007 if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:
6008 redirect_url = ''.join((urljoin('https://www.youtube.com', redirect_url), tab, post))
6009 self.to_screen(f'This playlist is likely not available in your region. Following conditional redirect to {redirect_url}')
6010 return self.url_result(redirect_url, YoutubeTabIE)
6011
6012 tabs, extra_tabs = self._extract_tab_renderers(data), []
6013 if is_channel and tabs and 'no-youtube-channel-redirect' not in compat_opts:
6014 selected_tab = self._extract_selected_tab(tabs)
6015 selected_tab_id, selected_tab_name = self._extract_tab_id_and_name(selected_tab, url) # NB: Name may be translated
6016 self.write_debug(f'Selected tab: {selected_tab_id!r} ({selected_tab_name}), Requested tab: {original_tab_id!r}')
6017
6018 if not original_tab_id and selected_tab_name:
6019 self.to_screen('Downloading all uploads of the channel. '
6020 'To download only the videos in a specific tab, pass the tab\'s URL')
6021 if self._has_tab(tabs, 'streams'):
6022 extra_tabs.append(''.join((pre, '/streams', post)))
6023 if self._has_tab(tabs, 'shorts'):
6024 extra_tabs.append(''.join((pre, '/shorts', post)))
6025 # XXX: Members-only tab should also be extracted
6026
6027 if not extra_tabs and selected_tab_id != 'videos':
6028 # Channel does not have streams, shorts or videos tabs
6029 if item_id[:2] != 'UC':
6030 raise ExtractorError('This channel has no uploads', expected=True)
6031
6032 # Topic channels don't have /videos. Use the equivalent playlist instead
6033 pl_id = f'UU{item_id[2:]}'
6034 pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
6035 try:
6036 data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)
6037 except ExtractorError:
6038 raise ExtractorError('This channel has no uploads', expected=True)
6039 else:
6040 item_id, url = pl_id, pl_url
6041 self.to_screen(
6042 f'The channel does not have a videos, shorts, or live tab. Redirecting to playlist {pl_id} instead')
6043
6044 elif extra_tabs and selected_tab_id != 'videos':
6045 # When there are shorts/live tabs but not videos tab
6046 url, data = f'{pre}{post}', None
6047
6048 elif (original_tab_id or 'videos') != selected_tab_id:
6049 if original_tab_id == 'live':
6050 # Live tab should have redirected to the video
6051 # Except in the case the channel has an actual live tab
6052 # Example: https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live
6053 raise UserNotLive(video_id=item_id)
6054 elif selected_tab_name:
6055 raise ExtractorError(f'This channel does not have a {original_tab_id} tab', expected=True)
6056
6057 # For channels such as https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg
6058 url = f'{pre}{post}'
6059
6060 # YouTube sometimes provides a button to reload playlist with unavailable videos.
6061 if 'no-youtube-unavailable-videos' not in compat_opts:
6062 data = self._reload_with_unavailable_videos(display_id, data, ytcfg) or data
6063 self._extract_and_report_alerts(data, only_once=True)
6064
6065 tabs, entries = self._extract_tab_renderers(data), []
6066 if tabs:
6067 entries = [self._extract_from_tabs(item_id, ytcfg, data, tabs)]
6068 entries[0].update({
6069 'extractor_key': YoutubeTabIE.ie_key(),
6070 'extractor': YoutubeTabIE.IE_NAME,
6071 'webpage_url': url,
6072 })
6073 if self.get_param('playlist_items') == '0':
6074 entries.extend(self.url_result(u, YoutubeTabIE) for u in extra_tabs)
6075 else: # Users expect to get all `video_id`s even with `--flat-playlist`. So don't return `url_result`
6076 entries.extend(map(self._real_extract, extra_tabs))
6077
6078 if len(entries) == 1:
6079 return entries[0]
6080 elif entries:
6081 metadata = self._extract_metadata_from_tabs(item_id, data)
6082 uploads_url = 'the Uploads (UU) playlist URL'
6083 if try_get(metadata, lambda x: x['channel_id'].startswith('UC')):
6084 uploads_url = f'https://www.youtube.com/playlist?list=UU{metadata["channel_id"][2:]}'
6085 self.to_screen(
6086 'Downloading as multiple playlists, separated by tabs. '
6087 f'To download as a single playlist instead, pass {uploads_url}')
6088 return self.playlist_result(entries, item_id, **metadata)
6089
6090 # Inline playlist
6091 playlist = traverse_obj(
6092 data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
6093 if playlist:
6094 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
6095
6096 video_id = traverse_obj(
6097 data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
6098 if video_id:
6099 if tab != '/live': # live tab is expected to redirect to video
6100 self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
6101 return self.url_result(f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)
6102
6103 raise ExtractorError('Unable to recognize tab page')
6104
6105
6106class YoutubePlaylistIE(InfoExtractor):
6107 IE_DESC = 'YouTube playlists'
6108 _VALID_URL = r'''(?x)(?:
6109 (?:https?://)?
6110 (?:\w+\.)?
6111 (?:
6112 (?:
6113 youtube(?:kids)?\.com|
6114 %(invidious)s
6115 )
6116 /.*?\?.*?\blist=
6117 )?
6118 (?P<id>%(playlist_id)s)
6119 )''' % {
6120 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
6121 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
6122 }
6123 IE_NAME = 'youtube:playlist'
6124 _TESTS = [{
6125 'note': 'issue #673',
6126 'url': 'PLBB231211A4F62143',
6127 'info_dict': {
6128 'title': '[OLD]Team Fortress 2 (Class-based LP)',
6129 'id': 'PLBB231211A4F62143',
6130 'uploader': 'Wickman',
6131 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
6132 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
6133 'view_count': int,
6134 'uploader_url': 'https://www.youtube.com/c/WickmanVT',
6135 'modified_date': r're:\d{8}',
6136 'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
6137 'channel': 'Wickman',
6138 'tags': [],
6139 'channel_url': 'https://www.youtube.com/c/WickmanVT',
6140 'availability': 'public',
6141 },
6142 'playlist_mincount': 29,
6143 }, {
6144 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
6145 'info_dict': {
6146 'title': 'YDL_safe_search',
6147 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
6148 },
6149 'playlist_count': 2,
6150 'skip': 'This playlist is private',
6151 }, {
6152 'note': 'embedded',
6153 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
6154 'playlist_count': 4,
6155 'info_dict': {
6156 'title': 'JODA15',
6157 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
6158 'uploader': 'milan',
6159 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
6160 'description': '',
6161 'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
6162 'tags': [],
6163 'modified_date': '20140919',
6164 'view_count': int,
6165 'channel': 'milan',
6166 'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
6167 'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
6168 'availability': 'public',
6169 },
6170 'expected_warnings': [r'[Uu]navailable videos? (is|are|will be) hidden'],
6171 }, {
6172 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
6173 'playlist_mincount': 455,
6174 'info_dict': {
6175 'title': '2018 Chinese New Singles (11/6 updated)',
6176 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
6177 'uploader': 'LBK',
6178 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
6179 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
6180 'channel': 'LBK',
6181 'view_count': int,
6182 'channel_url': 'https://www.youtube.com/c/愛低音的國王',
6183 'tags': [],
6184 'uploader_url': 'https://www.youtube.com/c/愛低音的國王',
6185 'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
6186 'modified_date': r're:\d{8}',
6187 'availability': 'public',
6188 },
6189 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
6190 }, {
6191 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
6192 'only_matching': True,
6193 }, {
6194 # music album playlist
6195 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
6196 'only_matching': True,
6197 }]
6198
6199 @classmethod
6200 def suitable(cls, url):
6201 if YoutubeTabIE.suitable(url):
6202 return False
6203 from ..utils import parse_qs
6204 qs = parse_qs(url)
6205 if qs.get('v', [None])[0]:
6206 return False
6207 return super().suitable(url)
6208
6209 def _real_extract(self, url):
6210 playlist_id = self._match_id(url)
6211 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
6212 url = update_url_query(
6213 'https://www.youtube.com/playlist',
6214 parse_qs(url) or {'list': playlist_id})
6215 if is_music_url:
6216 url = smuggle_url(url, {'is_music_url': True})
6217 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
6218
6219
6220class YoutubeYtBeIE(InfoExtractor):
6221 IE_DESC = 'youtu.be'
6222 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
6223 _TESTS = [{
6224 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
6225 'info_dict': {
6226 'id': 'yeWKywCrFtk',
6227 'ext': 'mp4',
6228 'title': 'Small Scale Baler and Braiding Rugs',
6229 'uploader': 'Backus-Page House Museum',
6230 'uploader_id': 'backuspagemuseum',
6231 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
6232 'upload_date': '20161008',
6233 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
6234 'categories': ['Nonprofits & Activism'],
6235 'tags': list,
6236 'like_count': int,
6237 'age_limit': 0,
6238 'playable_in_embed': True,
6239 'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',
6240 'channel': 'Backus-Page House Museum',
6241 'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',
6242 'live_status': 'not_live',
6243 'view_count': int,
6244 'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',
6245 'availability': 'public',
6246 'duration': 59,
6247 'comment_count': int,
6248 'channel_follower_count': int
6249 },
6250 'params': {
6251 'noplaylist': True,
6252 'skip_download': True,
6253 },
6254 }, {
6255 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
6256 'only_matching': True,
6257 }]
6258
6259 def _real_extract(self, url):
6260 mobj = self._match_valid_url(url)
6261 video_id = mobj.group('id')
6262 playlist_id = mobj.group('playlist_id')
6263 return self.url_result(
6264 update_url_query('https://www.youtube.com/watch', {
6265 'v': video_id,
6266 'list': playlist_id,
6267 'feature': 'youtu.be',
6268 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
6269
6270
6271class YoutubeLivestreamEmbedIE(InfoExtractor):
6272 IE_DESC = 'YouTube livestream embeds'
6273 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'
6274 _TESTS = [{
6275 'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',
6276 'only_matching': True,
6277 }]
6278
6279 def _real_extract(self, url):
6280 channel_id = self._match_id(url)
6281 return self.url_result(
6282 f'https://www.youtube.com/channel/{channel_id}/live',
6283 ie=YoutubeTabIE.ie_key(), video_id=channel_id)
6284
6285
6286class YoutubeYtUserIE(InfoExtractor):
6287 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
6288 IE_NAME = 'youtube:user'
6289 _VALID_URL = r'ytuser:(?P<id>.+)'
6290 _TESTS = [{
6291 'url': 'ytuser:phihag',
6292 'only_matching': True,
6293 }]
6294
6295 def _real_extract(self, url):
6296 user_id = self._match_id(url)
6297 return self.url_result(f'https://www.youtube.com/user/{user_id}', YoutubeTabIE, user_id)
6298
6299
6300class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
6301 IE_NAME = 'youtube:favorites'
6302 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
6303 _VALID_URL = r':ytfav(?:ou?rite)?s?'
6304 _LOGIN_REQUIRED = True
6305 _TESTS = [{
6306 'url': ':ytfav',
6307 'only_matching': True,
6308 }, {
6309 'url': ':ytfavorites',
6310 'only_matching': True,
6311 }]
6312
6313 def _real_extract(self, url):
6314 return self.url_result(
6315 'https://www.youtube.com/playlist?list=LL',
6316 ie=YoutubeTabIE.ie_key())
6317
6318
6319class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):
6320 IE_NAME = 'youtube:notif'
6321 IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'
6322 _VALID_URL = r':ytnotif(?:ication)?s?'
6323 _LOGIN_REQUIRED = True
6324 _TESTS = [{
6325 'url': ':ytnotif',
6326 'only_matching': True,
6327 }, {
6328 'url': ':ytnotifications',
6329 'only_matching': True,
6330 }]
6331
6332 def _extract_notification_menu(self, response, continuation_list):
6333 notification_list = traverse_obj(
6334 response,
6335 ('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),
6336 ('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),
6337 expected_type=list) or []
6338 continuation_list[0] = None
6339 for item in notification_list:
6340 entry = self._extract_notification_renderer(item.get('notificationRenderer'))
6341 if entry:
6342 yield entry
6343 continuation = item.get('continuationItemRenderer')
6344 if continuation:
6345 continuation_list[0] = continuation
6346
6347 def _extract_notification_renderer(self, notification):
6348 video_id = traverse_obj(
6349 notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)
6350 url = f'https://www.youtube.com/watch?v={video_id}'
6351 channel_id = None
6352 if not video_id:
6353 browse_ep = traverse_obj(
6354 notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)
6355 channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)
6356 post_id = self._search_regex(
6357 r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),
6358 'post id', default=None)
6359 if not channel_id or not post_id:
6360 return
6361 # The direct /post url redirects to this in the browser
6362 url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'
6363
6364 channel = traverse_obj(
6365 notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),
6366 expected_type=str)
6367 notification_title = self._get_text(notification, 'shortMessage')
6368 if notification_title:
6369 notification_title = notification_title.replace('\xad', '') # remove soft hyphens
6370 # TODO: handle recommended videos
6371 title = self._search_regex(
6372 rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,
6373 'video title', default=None)
6374 timestamp = (self._parse_time_text(self._get_text(notification, 'sentTimeText'))
6375 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)
6376 else None)
6377 return {
6378 '_type': 'url',
6379 'url': url,
6380 'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),
6381 'video_id': video_id,
6382 'title': title,
6383 'channel_id': channel_id,
6384 'channel': channel,
6385 'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),
6386 'timestamp': timestamp,
6387 }
6388
6389 def _notification_menu_entries(self, ytcfg):
6390 continuation_list = [None]
6391 response = None
6392 for page in itertools.count(1):
6393 ctoken = traverse_obj(
6394 continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)
6395 response = self._extract_response(
6396 item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,
6397 ep='notification/get_notification_menu', check_get_keys='actions',
6398 headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))
6399 yield from self._extract_notification_menu(response, continuation_list)
6400 if not continuation_list[0]:
6401 break
6402
6403 def _real_extract(self, url):
6404 display_id = 'notifications'
6405 ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}
6406 self._report_playlist_authcheck(ytcfg)
6407 return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)
6408
6409
6410class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
6411 IE_DESC = 'YouTube search'
6412 IE_NAME = 'youtube:search'
6413 _SEARCH_KEY = 'ytsearch'
6414 _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
6415 _TESTS = [{
6416 'url': 'ytsearch5:youtube-dl test video',
6417 'playlist_count': 5,
6418 'info_dict': {
6419 'id': 'youtube-dl test video',
6420 'title': 'youtube-dl test video',
6421 }
6422 }]
6423
6424
6425class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
6426 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
6427 _SEARCH_KEY = 'ytsearchdate'
6428 IE_DESC = 'YouTube search, newest videos first'
6429 _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
6430 _TESTS = [{
6431 'url': 'ytsearchdate5:youtube-dl test video',
6432 'playlist_count': 5,
6433 'info_dict': {
6434 'id': 'youtube-dl test video',
6435 'title': 'youtube-dl test video',
6436 }
6437 }]
6438
6439
6440class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
6441 IE_DESC = 'YouTube search URLs with sorting and filter support'
6442 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
6443 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
6444 _TESTS = [{
6445 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
6446 'playlist_mincount': 5,
6447 'info_dict': {
6448 'id': 'youtube-dl test video',
6449 'title': 'youtube-dl test video',
6450 }
6451 }, {
6452 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
6453 'playlist_mincount': 5,
6454 'info_dict': {
6455 'id': 'python',
6456 'title': 'python',
6457 }
6458 }, {
6459 'url': 'https://www.youtube.com/results?search_query=%23cats',
6460 'playlist_mincount': 1,
6461 'info_dict': {
6462 'id': '#cats',
6463 'title': '#cats',
6464 # The test suite does not have support for nested playlists
6465 # 'entries': [{
6466 # 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',
6467 # 'title': '#cats',
6468 # }],
6469 },
6470 }, {
6471 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
6472 'only_matching': True,
6473 }]
6474
6475 def _real_extract(self, url):
6476 qs = parse_qs(url)
6477 query = (qs.get('search_query') or qs.get('q'))[0]
6478 return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
6479
6480
6481class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
6482 IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'
6483 IE_NAME = 'youtube:music:search_url'
6484 _VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
6485 _TESTS = [{
6486 'url': 'https://music.youtube.com/search?q=royalty+free+music',
6487 'playlist_count': 16,
6488 'info_dict': {
6489 'id': 'royalty free music',
6490 'title': 'royalty free music',
6491 }
6492 }, {
6493 'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',
6494 'playlist_mincount': 30,
6495 'info_dict': {
6496 'id': 'royalty free music - songs',
6497 'title': 'royalty free music - songs',
6498 },
6499 'params': {'extract_flat': 'in_playlist'}
6500 }, {
6501 'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',
6502 'playlist_mincount': 30,
6503 'info_dict': {
6504 'id': 'royalty free music - community playlists',
6505 'title': 'royalty free music - community playlists',
6506 },
6507 'params': {'extract_flat': 'in_playlist'}
6508 }]
6509
6510 _SECTIONS = {
6511 'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',
6512 'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',
6513 'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',
6514 'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',
6515 'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',
6516 'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',
6517 }
6518
6519 def _real_extract(self, url):
6520 qs = parse_qs(url)
6521 query = (qs.get('search_query') or qs.get('q'))[0]
6522 params = qs.get('sp', (None,))[0]
6523 if params:
6524 section = next((k for k, v in self._SECTIONS.items() if v == params), params)
6525 else:
6526 section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower()
6527 params = self._SECTIONS.get(section)
6528 if not params:
6529 section = None
6530 title = join_nonempty(query, section, delim=' - ')
6531 return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)
6532
6533
6534class YoutubeFeedsInfoExtractor(InfoExtractor):
6535 """
6536 Base class for feed extractors
6537 Subclasses must re-define the _FEED_NAME property.
6538 """
6539 _LOGIN_REQUIRED = True
6540 _FEED_NAME = 'feeds'
6541
6542 def _real_initialize(self):
6543 YoutubeBaseInfoExtractor._check_login_required(self)
6544
6545 @classproperty
6546 def IE_NAME(self):
6547 return f'youtube:{self._FEED_NAME}'
6548
6549 def _real_extract(self, url):
6550 return self.url_result(
6551 f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())
6552
6553
6554class YoutubeWatchLaterIE(InfoExtractor):
6555 IE_NAME = 'youtube:watchlater'
6556 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
6557 _VALID_URL = r':ytwatchlater'
6558 _TESTS = [{
6559 'url': ':ytwatchlater',
6560 'only_matching': True,
6561 }]
6562
6563 def _real_extract(self, url):
6564 return self.url_result(
6565 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
6566
6567
6568class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
6569 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
6570 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
6571 _FEED_NAME = 'recommended'
6572 _LOGIN_REQUIRED = False
6573 _TESTS = [{
6574 'url': ':ytrec',
6575 'only_matching': True,
6576 }, {
6577 'url': ':ytrecommended',
6578 'only_matching': True,
6579 }, {
6580 'url': 'https://youtube.com',
6581 'only_matching': True,
6582 }]
6583
6584
6585class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
6586 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
6587 _VALID_URL = r':ytsub(?:scription)?s?'
6588 _FEED_NAME = 'subscriptions'
6589 _TESTS = [{
6590 'url': ':ytsubs',
6591 'only_matching': True,
6592 }, {
6593 'url': ':ytsubscriptions',
6594 'only_matching': True,
6595 }]
6596
6597
6598class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
6599 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
6600 _VALID_URL = r':ythis(?:tory)?'
6601 _FEED_NAME = 'history'
6602 _TESTS = [{
6603 'url': ':ythistory',
6604 'only_matching': True,
6605 }]
6606
6607
6608class YoutubeStoriesIE(InfoExtractor):
6609 IE_DESC = 'YouTube channel stories; "ytstories:" prefix'
6610 IE_NAME = 'youtube:stories'
6611 _VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'
6612 _TESTS = [{
6613 'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',
6614 'only_matching': True,
6615 }]
6616
6617 def _real_extract(self, url):
6618 playlist_id = f'RLTD{self._match_id(url)}'
6619 return self.url_result(
6620 smuggle_url(f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1', {'is_story': True}),
6621 ie=YoutubeTabIE, video_id=playlist_id)
6622
6623
6624class YoutubeShortsAudioPivotIE(InfoExtractor):
6625 IE_DESC = 'YouTube Shorts audio pivot (Shorts using audio of a given video)'
6626 IE_NAME = 'youtube:shorts:pivot:audio'
6627 _VALID_URL = r'https?://(?:www\.)?youtube\.com/source/(?P<id>[\w-]{11})/shorts'
6628 _TESTS = [{
6629 'url': 'https://www.youtube.com/source/Lyj-MZSAA9o/shorts',
6630 'only_matching': True,
6631 }]
6632
6633 @staticmethod
6634 def _generate_audio_pivot_params(video_id):
6635 """
6636 Generates sfv_audio_pivot browse params for this video id
6637 """
6638 pb_params = b'\xf2\x05+\n)\x12\'\n\x0b%b\x12\x0b%b\x1a\x0b%b' % ((video_id.encode(),) * 3)
6639 return urllib.parse.quote(base64.b64encode(pb_params).decode())
6640
6641 def _real_extract(self, url):
6642 video_id = self._match_id(url)
6643 return self.url_result(
6644 f'https://www.youtube.com/feed/sfv_audio_pivot?bp={self._generate_audio_pivot_params(video_id)}',
6645 ie=YoutubeTabIE)
6646
6647
6648class YoutubeTruncatedURLIE(InfoExtractor):
6649 IE_NAME = 'youtube:truncated_url'
6650 IE_DESC = False # Do not list
6651 _VALID_URL = r'''(?x)
6652 (?:https?://)?
6653 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
6654 (?:watch\?(?:
6655 feature=[a-z_]+|
6656 annotation_id=annotation_[^&]+|
6657 x-yt-cl=[0-9]+|
6658 hl=[^&]*|
6659 t=[0-9]+
6660 )?
6661 |
6662 attribution_link\?a=[^&]+
6663 )
6664 $
6665 '''
6666
6667 _TESTS = [{
6668 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
6669 'only_matching': True,
6670 }, {
6671 'url': 'https://www.youtube.com/watch?',
6672 'only_matching': True,
6673 }, {
6674 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
6675 'only_matching': True,
6676 }, {
6677 'url': 'https://www.youtube.com/watch?feature=foo',
6678 'only_matching': True,
6679 }, {
6680 'url': 'https://www.youtube.com/watch?hl=en-GB',
6681 'only_matching': True,
6682 }, {
6683 'url': 'https://www.youtube.com/watch?t=2372',
6684 'only_matching': True,
6685 }]
6686
6687 def _real_extract(self, url):
6688 raise ExtractorError(
6689 'Did you forget to quote the URL? Remember that & is a meta '
6690 'character in most shells, so you want to put the URL in quotes, '
6691 'like youtube-dl '
6692 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
6693 ' or simply youtube-dl BaW_jenozKc .',
6694 expected=True)
6695
6696
6697class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
6698 IE_NAME = 'youtube:clip'
6699 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'
6700 _TESTS = [{
6701 # FIXME: Other metadata should be extracted from the clip, not from the base video
6702 'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',
6703 'info_dict': {
6704 'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',
6705 'ext': 'mp4',
6706 'section_start': 29.0,
6707 'section_end': 39.7,
6708 'duration': 10.7,
6709 'age_limit': 0,
6710 'availability': 'public',
6711 'categories': ['Gaming'],
6712 'channel': 'Scott The Woz',
6713 'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',
6714 'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',
6715 'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',
6716 'like_count': int,
6717 'playable_in_embed': True,
6718 'tags': 'count:17',
6719 'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',
6720 'title': 'Mobile Games on Console - Scott The Woz',
6721 'upload_date': '20210920',
6722 'uploader': 'Scott The Woz',
6723 'uploader_id': 'scottthewoz',
6724 'uploader_url': 'http://www.youtube.com/user/scottthewoz',
6725 'view_count': int,
6726 'live_status': 'not_live',
6727 'channel_follower_count': int
6728 }
6729 }]
6730
6731 def _real_extract(self, url):
6732 clip_id = self._match_id(url)
6733 _, data = self._extract_webpage(url, clip_id)
6734
6735 video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))
6736 if not video_id:
6737 raise ExtractorError('Unable to find video ID')
6738
6739 clip_data = traverse_obj(data, (
6740 'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',
6741 'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,
6742 'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',
6743 'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)
6744
6745 return {
6746 '_type': 'url_transparent',
6747 'url': f'https://www.youtube.com/watch?v={video_id}',
6748 'ie_key': YoutubeIE.ie_key(),
6749 'id': clip_id,
6750 'section_start': int(clip_data['startTimeMs']) / 1000,
6751 'section_end': int(clip_data['endTimeMs']) / 1000,
6752 }
6753
6754
6755class YoutubeTruncatedIDIE(InfoExtractor):
6756 IE_NAME = 'youtube:truncated_id'
6757 IE_DESC = False # Do not list
6758 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
6759
6760 _TESTS = [{
6761 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
6762 'only_matching': True,
6763 }]
6764
6765 def _real_extract(self, url):
6766 video_id = self._match_id(url)
6767 raise ExtractorError(
6768 f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',
6769 expected=True)