]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/youtube.py
[extractor/youtube] Fix video like count extraction
[yt-dlp.git] / yt_dlp / extractor / youtube.py
1 import base64
2 import calendar
3 import copy
4 import datetime
5 import hashlib
6 import itertools
7 import json
8 import math
9 import os.path
10 import random
11 import re
12 import sys
13 import threading
14 import time
15 import traceback
16 import urllib.error
17 import urllib.parse
18
19 from .common import InfoExtractor, SearchInfoExtractor
20 from .openload import PhantomJSwrapper
21 from ..compat import functools
22 from ..jsinterp import JSInterpreter
23 from ..utils import (
24 NO_DEFAULT,
25 ExtractorError,
26 UserNotLive,
27 bug_reports_message,
28 classproperty,
29 clean_html,
30 datetime_from_str,
31 dict_get,
32 float_or_none,
33 format_field,
34 get_first,
35 int_or_none,
36 is_html,
37 join_nonempty,
38 js_to_json,
39 mimetype2ext,
40 network_exceptions,
41 orderedSet,
42 parse_codecs,
43 parse_count,
44 parse_duration,
45 parse_iso8601,
46 parse_qs,
47 qualities,
48 remove_start,
49 smuggle_url,
50 str_or_none,
51 str_to_int,
52 strftime_or_none,
53 traverse_obj,
54 try_get,
55 unescapeHTML,
56 unified_strdate,
57 unified_timestamp,
58 unsmuggle_url,
59 update_url_query,
60 url_or_none,
61 urljoin,
62 variadic,
63 )
64
65 # any clients starting with _ cannot be explicitly requested by the user
66 INNERTUBE_CLIENTS = {
67 'web': {
68 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
69 'INNERTUBE_CONTEXT': {
70 'client': {
71 'clientName': 'WEB',
72 'clientVersion': '2.20220801.00.00',
73 }
74 },
75 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
76 },
77 'web_embedded': {
78 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
79 'INNERTUBE_CONTEXT': {
80 'client': {
81 'clientName': 'WEB_EMBEDDED_PLAYER',
82 'clientVersion': '1.20220731.00.00',
83 },
84 },
85 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
86 },
87 'web_music': {
88 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
89 'INNERTUBE_HOST': 'music.youtube.com',
90 'INNERTUBE_CONTEXT': {
91 'client': {
92 'clientName': 'WEB_REMIX',
93 'clientVersion': '1.20220727.01.00',
94 }
95 },
96 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
97 },
98 'web_creator': {
99 'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
100 'INNERTUBE_CONTEXT': {
101 'client': {
102 'clientName': 'WEB_CREATOR',
103 'clientVersion': '1.20220726.00.00',
104 }
105 },
106 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
107 },
108 'android': {
109 'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
110 'INNERTUBE_CONTEXT': {
111 'client': {
112 'clientName': 'ANDROID',
113 'clientVersion': '17.31.35',
114 'androidSdkVersion': 30,
115 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
116 }
117 },
118 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
119 'REQUIRE_JS_PLAYER': False
120 },
121 'android_embedded': {
122 'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
123 'INNERTUBE_CONTEXT': {
124 'client': {
125 'clientName': 'ANDROID_EMBEDDED_PLAYER',
126 'clientVersion': '17.31.35',
127 'androidSdkVersion': 30,
128 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
129 },
130 },
131 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
132 'REQUIRE_JS_PLAYER': False
133 },
134 'android_music': {
135 'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
136 'INNERTUBE_CONTEXT': {
137 'client': {
138 'clientName': 'ANDROID_MUSIC',
139 'clientVersion': '5.16.51',
140 'androidSdkVersion': 30,
141 'userAgent': 'com.google.android.apps.youtube.music/5.16.51 (Linux; U; Android 11) gzip'
142 }
143 },
144 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
145 'REQUIRE_JS_PLAYER': False
146 },
147 'android_creator': {
148 'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
149 'INNERTUBE_CONTEXT': {
150 'client': {
151 'clientName': 'ANDROID_CREATOR',
152 'clientVersion': '22.30.100',
153 'androidSdkVersion': 30,
154 'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip'
155 },
156 },
157 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
158 'REQUIRE_JS_PLAYER': False
159 },
160 # iOS clients have HLS live streams. Setting device model to get 60fps formats.
161 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
162 'ios': {
163 'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
164 'INNERTUBE_CONTEXT': {
165 'client': {
166 'clientName': 'IOS',
167 'clientVersion': '17.33.2',
168 'deviceModel': 'iPhone14,3',
169 'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
170 }
171 },
172 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
173 'REQUIRE_JS_PLAYER': False
174 },
175 'ios_embedded': {
176 'INNERTUBE_CONTEXT': {
177 'client': {
178 'clientName': 'IOS_MESSAGES_EXTENSION',
179 'clientVersion': '17.33.2',
180 'deviceModel': 'iPhone14,3',
181 'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
182 },
183 },
184 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
185 'REQUIRE_JS_PLAYER': False
186 },
187 'ios_music': {
188 'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
189 'INNERTUBE_CONTEXT': {
190 'client': {
191 'clientName': 'IOS_MUSIC',
192 'clientVersion': '5.21',
193 'deviceModel': 'iPhone14,3',
194 'userAgent': 'com.google.ios.youtubemusic/5.21 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
195 },
196 },
197 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
198 'REQUIRE_JS_PLAYER': False
199 },
200 'ios_creator': {
201 'INNERTUBE_CONTEXT': {
202 'client': {
203 'clientName': 'IOS_CREATOR',
204 'clientVersion': '22.33.101',
205 'deviceModel': 'iPhone14,3',
206 'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
207 },
208 },
209 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
210 'REQUIRE_JS_PLAYER': False
211 },
212 # mweb has 'ultralow' formats
213 # See: https://github.com/yt-dlp/yt-dlp/pull/557
214 'mweb': {
215 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
216 'INNERTUBE_CONTEXT': {
217 'client': {
218 'clientName': 'MWEB',
219 'clientVersion': '2.20220801.00.00',
220 }
221 },
222 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
223 },
224 # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
225 # See: https://github.com/zerodytrash/YouTube-Internal-Clients
226 'tv_embedded': {
227 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
228 'INNERTUBE_CONTEXT': {
229 'client': {
230 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
231 'clientVersion': '2.0',
232 },
233 },
234 'INNERTUBE_CONTEXT_CLIENT_NAME': 85
235 },
236 }
237
238
239 def _split_innertube_client(client_name):
240 variant, *base = client_name.rsplit('.', 1)
241 if base:
242 return variant, base[0], variant
243 base, *variant = client_name.split('_', 1)
244 return client_name, base, variant[0] if variant else None
245
246
247 def build_innertube_clients():
248 THIRD_PARTY = {
249 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
250 }
251 BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
252 priority = qualities(BASE_CLIENTS[::-1])
253
254 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
255 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
256 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
257 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
258 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
259
260 _, base_client, variant = _split_innertube_client(client)
261 ytcfg['priority'] = 10 * priority(base_client)
262
263 if not variant:
264 INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
265 embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
266 embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
267 embedscreen['priority'] -= 3
268 elif variant == 'embedded':
269 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
270 ytcfg['priority'] -= 2
271 else:
272 ytcfg['priority'] -= 3
273
274
275 build_innertube_clients()
276
277
278 class YoutubeBaseInfoExtractor(InfoExtractor):
279 """Provide base functions for Youtube extractors"""
280
281 _RESERVED_NAMES = (
282 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
283 r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|'
284 r'browse|oembed|get_video_info|iframe_api|s/player|'
285 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
286
287 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
288
289 # _NETRC_MACHINE = 'youtube'
290
291 # If True it will raise an error if no login info is provided
292 _LOGIN_REQUIRED = False
293
294 _INVIDIOUS_SITES = (
295 # invidious-redirect websites
296 r'(?:www\.)?redirect\.invidious\.io',
297 r'(?:(?:www|dev)\.)?invidio\.us',
298 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
299 r'(?:www\.)?invidious\.pussthecat\.org',
300 r'(?:www\.)?invidious\.zee\.li',
301 r'(?:www\.)?invidious\.ethibox\.fr',
302 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
303 r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
304 r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
305 # youtube-dl invidious instances list
306 r'(?:(?:www|no)\.)?invidiou\.sh',
307 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
308 r'(?:www\.)?invidious\.kabi\.tk',
309 r'(?:www\.)?invidious\.mastodon\.host',
310 r'(?:www\.)?invidious\.zapashcanon\.fr',
311 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
312 r'(?:www\.)?invidious\.tinfoil-hat\.net',
313 r'(?:www\.)?invidious\.himiko\.cloud',
314 r'(?:www\.)?invidious\.reallyancient\.tech',
315 r'(?:www\.)?invidious\.tube',
316 r'(?:www\.)?invidiou\.site',
317 r'(?:www\.)?invidious\.site',
318 r'(?:www\.)?invidious\.xyz',
319 r'(?:www\.)?invidious\.nixnet\.xyz',
320 r'(?:www\.)?invidious\.048596\.xyz',
321 r'(?:www\.)?invidious\.drycat\.fr',
322 r'(?:www\.)?inv\.skyn3t\.in',
323 r'(?:www\.)?tube\.poal\.co',
324 r'(?:www\.)?tube\.connect\.cafe',
325 r'(?:www\.)?vid\.wxzm\.sx',
326 r'(?:www\.)?vid\.mint\.lgbt',
327 r'(?:www\.)?vid\.puffyan\.us',
328 r'(?:www\.)?yewtu\.be',
329 r'(?:www\.)?yt\.elukerio\.org',
330 r'(?:www\.)?yt\.lelux\.fi',
331 r'(?:www\.)?invidious\.ggc-project\.de',
332 r'(?:www\.)?yt\.maisputain\.ovh',
333 r'(?:www\.)?ytprivate\.com',
334 r'(?:www\.)?invidious\.13ad\.de',
335 r'(?:www\.)?invidious\.toot\.koeln',
336 r'(?:www\.)?invidious\.fdn\.fr',
337 r'(?:www\.)?watch\.nettohikari\.com',
338 r'(?:www\.)?invidious\.namazso\.eu',
339 r'(?:www\.)?invidious\.silkky\.cloud',
340 r'(?:www\.)?invidious\.exonip\.de',
341 r'(?:www\.)?invidious\.riverside\.rocks',
342 r'(?:www\.)?invidious\.blamefran\.net',
343 r'(?:www\.)?invidious\.moomoo\.de',
344 r'(?:www\.)?ytb\.trom\.tf',
345 r'(?:www\.)?yt\.cyberhost\.uk',
346 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
347 r'(?:www\.)?qklhadlycap4cnod\.onion',
348 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
349 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
350 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
351 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
352 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
353 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
354 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
355 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
356 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
357 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
358 # piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
359 r'(?:www\.)?piped\.kavin\.rocks',
360 r'(?:www\.)?piped\.silkky\.cloud',
361 r'(?:www\.)?piped\.tokhmi\.xyz',
362 r'(?:www\.)?piped\.moomoo\.me',
363 r'(?:www\.)?il\.ax',
364 r'(?:www\.)?piped\.syncpundit\.com',
365 r'(?:www\.)?piped\.mha\.fi',
366 r'(?:www\.)?piped\.mint\.lgbt',
367 r'(?:www\.)?piped\.privacy\.com\.de',
368 )
369
370 def _initialize_consent(self):
371 cookies = self._get_cookies('https://www.youtube.com/')
372 if cookies.get('__Secure-3PSID'):
373 return
374 consent_id = None
375 consent = cookies.get('CONSENT')
376 if consent:
377 if 'YES' in consent.value:
378 return
379 consent_id = self._search_regex(
380 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
381 if not consent_id:
382 consent_id = random.randint(100, 999)
383 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
384
385 def _initialize_pref(self):
386 cookies = self._get_cookies('https://www.youtube.com/')
387 pref_cookie = cookies.get('PREF')
388 pref = {}
389 if pref_cookie:
390 try:
391 pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
392 except ValueError:
393 self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
394 pref.update({'hl': 'en', 'tz': 'UTC'})
395 self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
396
397 def _real_initialize(self):
398 self._initialize_pref()
399 self._initialize_consent()
400 self._check_login_required()
401
402 def _check_login_required(self):
403 if self._LOGIN_REQUIRED and not self._cookies_passed:
404 self.raise_login_required('Login details are needed to download this content', method='cookies')
405
406 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='
407 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
408
409 def _get_default_ytcfg(self, client='web'):
410 return copy.deepcopy(INNERTUBE_CLIENTS[client])
411
412 def _get_innertube_host(self, client='web'):
413 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
414
415 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
416 # try_get but with fallback to default ytcfg client values when present
417 _func = lambda y: try_get(y, getter, expected_type)
418 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
419
420 def _extract_client_name(self, ytcfg, default_client='web'):
421 return self._ytcfg_get_safe(
422 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
423 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
424
425 def _extract_client_version(self, ytcfg, default_client='web'):
426 return self._ytcfg_get_safe(
427 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
428 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
429
430 def _select_api_hostname(self, req_api_hostname, default_client=None):
431 return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
432 or req_api_hostname or self._get_innertube_host(default_client or 'web'))
433
434 def _extract_api_key(self, ytcfg=None, default_client='web'):
435 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
436
437 def _extract_context(self, ytcfg=None, default_client='web'):
438 context = get_first(
439 (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
440 # Enforce language and tz for extraction
441 client_context = traverse_obj(context, 'client', expected_type=dict, default={})
442 client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
443 return context
444
445 _SAPISID = None
446
447 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
448 time_now = round(time.time())
449 if self._SAPISID is None:
450 yt_cookies = self._get_cookies('https://www.youtube.com')
451 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
452 # See: https://github.com/yt-dlp/yt-dlp/issues/393
453 sapisid_cookie = dict_get(
454 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
455 if sapisid_cookie and sapisid_cookie.value:
456 self._SAPISID = sapisid_cookie.value
457 self.write_debug('Extracted SAPISID cookie')
458 # SAPISID cookie is required if not already present
459 if not yt_cookies.get('SAPISID'):
460 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
461 self._set_cookie(
462 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
463 else:
464 self._SAPISID = False
465 if not self._SAPISID:
466 return None
467 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
468 sapisidhash = hashlib.sha1(
469 f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
470 return f'SAPISIDHASH {time_now}_{sapisidhash}'
471
472 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
473 note='Downloading API JSON', errnote='Unable to download API page',
474 context=None, api_key=None, api_hostname=None, default_client='web'):
475
476 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
477 data.update(query)
478 real_headers = self.generate_api_headers(default_client=default_client)
479 real_headers.update({'content-type': 'application/json'})
480 if headers:
481 real_headers.update(headers)
482 api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]
483 or api_key or self._extract_api_key(default_client=default_client))
484 return self._download_json(
485 f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
486 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
487 data=json.dumps(data).encode('utf8'), headers=real_headers,
488 query={'key': api_key, 'prettyPrint': 'false'})
489
490 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
491 return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
492
493 @staticmethod
494 def _extract_session_index(*data):
495 """
496 Index of current account in account list.
497 See: https://github.com/yt-dlp/yt-dlp/pull/519
498 """
499 for ytcfg in data:
500 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
501 if session_index is not None:
502 return session_index
503
504 # Deprecated?
505 def _extract_identity_token(self, ytcfg=None, webpage=None):
506 if ytcfg:
507 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)
508 if token:
509 return token
510 if webpage:
511 return self._search_regex(
512 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
513 'identity token', default=None, fatal=False)
514
515 @staticmethod
516 def _extract_account_syncid(*args):
517 """
518 Extract syncId required to download private playlists of secondary channels
519 @params response and/or ytcfg
520 """
521 for data in args:
522 # ytcfg includes channel_syncid if on secondary channel
523 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)
524 if delegated_sid:
525 return delegated_sid
526 sync_ids = (try_get(
527 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
528 lambda x: x['DATASYNC_ID']), str) or '').split('||')
529 if len(sync_ids) >= 2 and sync_ids[1]:
530 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
531 # and just "user_syncid||" for primary channel. We only want the channel_syncid
532 return sync_ids[0]
533
534 @staticmethod
535 def _extract_visitor_data(*args):
536 """
537 Extracts visitorData from an API response or ytcfg
538 Appears to be used to track session state
539 """
540 return get_first(
541 args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
542 expected_type=str)
543
544 @functools.cached_property
545 def is_authenticated(self):
546 return bool(self._generate_sapisidhash_header())
547
548 def extract_ytcfg(self, video_id, webpage):
549 if not webpage:
550 return {}
551 return self._parse_json(
552 self._search_regex(
553 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
554 default='{}'), video_id, fatal=False) or {}
555
556 def generate_api_headers(
557 self, *, ytcfg=None, account_syncid=None, session_index=None,
558 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
559
560 origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))
561 headers = {
562 'X-YouTube-Client-Name': str(
563 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
564 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
565 'Origin': origin,
566 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
567 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
568 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),
569 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client)
570 }
571 if session_index is None:
572 session_index = self._extract_session_index(ytcfg)
573 if account_syncid or session_index is not None:
574 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
575
576 auth = self._generate_sapisidhash_header(origin)
577 if auth is not None:
578 headers['Authorization'] = auth
579 headers['X-Origin'] = origin
580 return {h: v for h, v in headers.items() if v is not None}
581
582 def _download_ytcfg(self, client, video_id):
583 url = {
584 'web': 'https://www.youtube.com',
585 'web_music': 'https://music.youtube.com',
586 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
587 }.get(client)
588 if not url:
589 return {}
590 webpage = self._download_webpage(
591 url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')
592 return self.extract_ytcfg(video_id, webpage) or {}
593
594 @staticmethod
595 def _build_api_continuation_query(continuation, ctp=None):
596 query = {
597 'continuation': continuation
598 }
599 # TODO: Inconsistency with clickTrackingParams.
600 # Currently we have a fixed ctp contained within context (from ytcfg)
601 # and a ctp in root query for continuation.
602 if ctp:
603 query['clickTracking'] = {'clickTrackingParams': ctp}
604 return query
605
606 @classmethod
607 def _extract_next_continuation_data(cls, renderer):
608 next_continuation = try_get(
609 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
610 lambda x: x['continuation']['reloadContinuationData']), dict)
611 if not next_continuation:
612 return
613 continuation = next_continuation.get('continuation')
614 if not continuation:
615 return
616 ctp = next_continuation.get('clickTrackingParams')
617 return cls._build_api_continuation_query(continuation, ctp)
618
619 @classmethod
620 def _extract_continuation_ep_data(cls, continuation_ep: dict):
621 if isinstance(continuation_ep, dict):
622 continuation = try_get(
623 continuation_ep, lambda x: x['continuationCommand']['token'], str)
624 if not continuation:
625 return
626 ctp = continuation_ep.get('clickTrackingParams')
627 return cls._build_api_continuation_query(continuation, ctp)
628
629 @classmethod
630 def _extract_continuation(cls, renderer):
631 next_continuation = cls._extract_next_continuation_data(renderer)
632 if next_continuation:
633 return next_continuation
634
635 contents = []
636 for key in ('contents', 'items'):
637 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
638
639 for content in contents:
640 if not isinstance(content, dict):
641 continue
642 continuation_ep = try_get(
643 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
644 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
645 dict)
646 continuation = cls._extract_continuation_ep_data(continuation_ep)
647 if continuation:
648 return continuation
649
650 @classmethod
651 def _extract_alerts(cls, data):
652 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
653 if not isinstance(alert_dict, dict):
654 continue
655 for alert in alert_dict.values():
656 alert_type = alert.get('type')
657 if not alert_type:
658 continue
659 message = cls._get_text(alert, 'text')
660 if message:
661 yield alert_type, message
662
663 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
664 errors = []
665 warnings = []
666 for alert_type, alert_message in alerts:
667 if alert_type.lower() == 'error' and fatal:
668 errors.append([alert_type, alert_message])
669 else:
670 warnings.append([alert_type, alert_message])
671
672 for alert_type, alert_message in (warnings + errors[:-1]):
673 self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)
674 if errors:
675 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
676
677 def _extract_and_report_alerts(self, data, *args, **kwargs):
678 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
679
680 def _extract_badges(self, renderer: dict):
681 badges = set()
682 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
683 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], str)
684 if label:
685 badges.add(label.lower())
686 return badges
687
688 @staticmethod
689 def _get_text(data, *path_list, max_runs=None):
690 for path in path_list or [None]:
691 if path is None:
692 obj = [data]
693 else:
694 obj = traverse_obj(data, path, default=[])
695 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
696 obj = [obj]
697 for item in obj:
698 text = try_get(item, lambda x: x['simpleText'], str)
699 if text:
700 return text
701 runs = try_get(item, lambda x: x['runs'], list) or []
702 if not runs and isinstance(item, list):
703 runs = item
704
705 runs = runs[:min(len(runs), max_runs or len(runs))]
706 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
707 if text:
708 return text
709
710 def _get_count(self, data, *path_list):
711 count_text = self._get_text(data, *path_list) or ''
712 count = parse_count(count_text)
713 if count is None:
714 count = str_to_int(
715 self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))
716 return count
717
718 @staticmethod
719 def _extract_thumbnails(data, *path_list):
720 """
721 Extract thumbnails from thumbnails dict
722 @param path_list: path list to level that contains 'thumbnails' key
723 """
724 thumbnails = []
725 for path in path_list or [()]:
726 for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):
727 thumbnail_url = url_or_none(thumbnail.get('url'))
728 if not thumbnail_url:
729 continue
730 # Sometimes youtube gives a wrong thumbnail URL. See:
731 # https://github.com/yt-dlp/yt-dlp/issues/233
732 # https://github.com/ytdl-org/youtube-dl/issues/28023
733 if 'maxresdefault' in thumbnail_url:
734 thumbnail_url = thumbnail_url.split('?')[0]
735 thumbnails.append({
736 'url': thumbnail_url,
737 'height': int_or_none(thumbnail.get('height')),
738 'width': int_or_none(thumbnail.get('width')),
739 })
740 return thumbnails
741
742 @staticmethod
743 def extract_relative_time(relative_time_text):
744 """
745 Extracts a relative time from string and converts to dt object
746 e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'
747 """
748 mobj = re.search(r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?\s*ago', relative_time_text)
749 if mobj:
750 start = mobj.group('start')
751 if start:
752 return datetime_from_str(start)
753 try:
754 return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))
755 except ValueError:
756 return None
757
758 def _extract_time_text(self, renderer, *path_list):
759 """@returns (timestamp, time_text)"""
760 text = self._get_text(renderer, *path_list) or ''
761 dt = self.extract_relative_time(text)
762 timestamp = None
763 if isinstance(dt, datetime.datetime):
764 timestamp = calendar.timegm(dt.timetuple())
765
766 if timestamp is None:
767 timestamp = (
768 unified_timestamp(text) or unified_timestamp(
769 self._search_regex(
770 (r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),
771 text.lower(), 'time text', default=None)))
772
773 if text and timestamp is None:
774 self.report_warning(f"Cannot parse localized time text '{text}'" + bug_reports_message(), only_once=True)
775 return timestamp, text
776
777 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
778 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
779 default_client='web'):
780 for retry in self.RetryManager():
781 try:
782 response = self._call_api(
783 ep=ep, fatal=True, headers=headers,
784 video_id=item_id, query=query, note=note,
785 context=self._extract_context(ytcfg, default_client),
786 api_key=self._extract_api_key(ytcfg, default_client),
787 api_hostname=api_hostname, default_client=default_client)
788 except ExtractorError as e:
789 if not isinstance(e.cause, network_exceptions):
790 return self._error_or_warning(e, fatal=fatal)
791 elif not isinstance(e.cause, urllib.error.HTTPError):
792 retry.error = e
793 continue
794
795 first_bytes = e.cause.read(512)
796 if not is_html(first_bytes):
797 yt_error = try_get(
798 self._parse_json(
799 self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
800 lambda x: x['error']['message'], str)
801 if yt_error:
802 self._report_alerts([('ERROR', yt_error)], fatal=False)
803 # Downloading page may result in intermittent 5xx HTTP error
804 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
805 # We also want to catch all other network exceptions since errors in later pages can be troublesome
806 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
807 if e.cause.code not in (403, 429):
808 retry.error = e
809 continue
810 return self._error_or_warning(e, fatal=fatal)
811
812 try:
813 self._extract_and_report_alerts(response, only_once=True)
814 except ExtractorError as e:
815 # YouTube servers may return errors we want to retry on in a 200 OK response
816 # See: https://github.com/yt-dlp/yt-dlp/issues/839
817 if 'unknown error' in e.msg.lower():
818 retry.error = e
819 continue
820 return self._error_or_warning(e, fatal=fatal)
821 # Youtube sometimes sends incomplete data
822 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
823 if not traverse_obj(response, *variadic(check_get_keys)):
824 retry.error = ExtractorError('Incomplete data received', expected=True)
825 continue
826
827 return response
828
829 @staticmethod
830 def is_music_url(url):
831 return re.match(r'https?://music\.youtube\.com/', url) is not None
832
833 def _extract_video(self, renderer):
834 video_id = renderer.get('videoId')
835 title = self._get_text(renderer, 'title')
836 description = self._get_text(renderer, 'descriptionSnippet')
837 duration = parse_duration(self._get_text(
838 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
839 if duration is None:
840 duration = parse_duration(self._search_regex(
841 r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',
842 traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),
843 video_id, default=None, group='duration'))
844
845 view_count = self._get_count(renderer, 'viewCountText')
846
847 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
848 channel_id = traverse_obj(
849 renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
850 expected_type=str, get_all=False)
851 timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')
852 scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
853 overlay_style = traverse_obj(
854 renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
855 get_all=False, expected_type=str)
856 badges = self._extract_badges(renderer)
857 thumbnails = self._extract_thumbnails(renderer, 'thumbnail')
858 navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
859 renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
860 expected_type=str)) or ''
861 url = f'https://www.youtube.com/watch?v={video_id}'
862 if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:
863 url = f'https://www.youtube.com/shorts/{video_id}'
864
865 return {
866 '_type': 'url',
867 'ie_key': YoutubeIE.ie_key(),
868 'id': video_id,
869 'url': url,
870 'title': title,
871 'description': description,
872 'duration': duration,
873 'view_count': view_count,
874 'uploader': uploader,
875 'channel_id': channel_id,
876 'thumbnails': thumbnails,
877 'upload_date': (strftime_or_none(timestamp, '%Y%m%d')
878 if self._configuration_arg('approximate_date', ie_key='youtubetab')
879 else None),
880 'live_status': ('is_upcoming' if scheduled_timestamp is not None
881 else 'was_live' if 'streamed' in time_text.lower()
882 else 'is_live' if overlay_style == 'LIVE' or 'live now' in badges
883 else None),
884 'release_timestamp': scheduled_timestamp,
885 'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)
886 }
887
888
889 class YoutubeIE(YoutubeBaseInfoExtractor):
890 IE_DESC = 'YouTube'
891 _VALID_URL = r"""(?x)^
892 (
893 (?:https?://|//) # http(s):// or protocol-independent URL
894 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
895 (?:www\.)?deturl\.com/www\.youtube\.com|
896 (?:www\.)?pwnyoutube\.com|
897 (?:www\.)?hooktube\.com|
898 (?:www\.)?yourepeat\.com|
899 tube\.majestyc\.net|
900 %(invidious)s|
901 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
902 (?:.*?\#/)? # handle anchor (#/) redirect urls
903 (?: # the various things that can precede the ID:
904 (?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/
905 |(?: # or the v= param in all its forms
906 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
907 (?:\?|\#!?) # the params delimiter ? or # or #!
908 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
909 v=
910 )
911 ))
912 |(?:
913 youtu\.be| # just youtu.be/xxxx
914 vid\.plus| # or vid.plus/xxxx
915 zwearz\.com/watch| # or zwearz.com/watch/xxxx
916 %(invidious)s
917 )/
918 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
919 )
920 )? # all until now is optional -> you can pass the naked ID
921 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
922 (?(1).+)? # if we found the ID, everything can follow
923 (?:\#|$)""" % {
924 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
925 }
926 _EMBED_REGEX = [
927 r'''(?x)
928 (?:
929 <iframe[^>]+?src=|
930 data-video-url=|
931 <embed[^>]+?src=|
932 embedSWF\(?:\s*|
933 <object[^>]+data=|
934 new\s+SWFObject\(
935 )
936 (["\'])
937 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
938 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
939 \1''',
940 # https://wordpress.org/plugins/lazy-load-for-videos/
941 r'''(?xs)
942 <a\s[^>]*\bhref="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"
943 \s[^>]*\bclass="[^"]*\blazy-load-youtube''',
944 ]
945
946 _PLAYER_INFO_RE = (
947 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
948 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
949 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
950 )
951 _formats = {
952 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
953 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
954 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
955 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
956 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
957 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
958 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
959 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
960 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
961 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
962 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
963 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
964 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
965 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
966 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
967 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
968 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
969 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
970
971
972 # 3D videos
973 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
974 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
975 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
976 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
977 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
978 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
979 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
980
981 # Apple HTTP Live Streaming
982 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
983 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
984 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
985 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
986 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
987 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
988 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
989 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
990
991 # DASH mp4 video
992 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
993 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
994 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
995 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
996 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
997 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
998 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
999 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1000 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
1001 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1002 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1003 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
1004
1005 # Dash mp4 audio
1006 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1007 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1008 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1009 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1010 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1011 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1012 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
1013
1014 # Dash webm
1015 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1016 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1017 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1018 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1019 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1020 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1021 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1022 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1023 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1024 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1025 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1026 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1027 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1028 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1029 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1030 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
1031 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1032 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1033 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1034 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1035 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1036 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1037
1038 # Dash webm audio
1039 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1040 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
1041
1042 # Dash webm audio with opus inside
1043 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1044 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1045 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
1046
1047 # RTMP (unnamed)
1048 '_rtmp': {'protocol': 'rtmp'},
1049
1050 # av01 video only formats sometimes served with "unknown" codecs
1051 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1052 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1053 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
1054 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1055 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1056 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1057 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1058 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1059 }
1060 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
1061
1062 _GEO_BYPASS = False
1063
1064 IE_NAME = 'youtube'
1065 _TESTS = [
1066 {
1067 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
1068 'info_dict': {
1069 'id': 'BaW_jenozKc',
1070 'ext': 'mp4',
1071 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1072 'uploader': 'Philipp Hagemeister',
1073 'uploader_id': 'phihag',
1074 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1075 'channel': 'Philipp Hagemeister',
1076 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1077 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1078 'upload_date': '20121002',
1079 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
1080 'categories': ['Science & Technology'],
1081 'tags': ['youtube-dl'],
1082 'duration': 10,
1083 'view_count': int,
1084 'like_count': int,
1085 'availability': 'public',
1086 'playable_in_embed': True,
1087 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1088 'live_status': 'not_live',
1089 'age_limit': 0,
1090 'start_time': 1,
1091 'end_time': 9,
1092 'comment_count': int,
1093 'channel_follower_count': int
1094 }
1095 },
1096 {
1097 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1098 'note': 'Embed-only video (#1746)',
1099 'info_dict': {
1100 'id': 'yZIXLfi8CZQ',
1101 'ext': 'mp4',
1102 'upload_date': '20120608',
1103 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1104 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1105 'uploader': 'SET India',
1106 'uploader_id': 'setindia',
1107 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
1108 'age_limit': 18,
1109 },
1110 'skip': 'Private video',
1111 },
1112 {
1113 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1114 'note': 'Use the first video ID in the URL',
1115 'info_dict': {
1116 'id': 'BaW_jenozKc',
1117 'ext': 'mp4',
1118 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1119 'uploader': 'Philipp Hagemeister',
1120 'uploader_id': 'phihag',
1121 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1122 'channel': 'Philipp Hagemeister',
1123 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1124 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1125 'upload_date': '20121002',
1126 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
1127 'categories': ['Science & Technology'],
1128 'tags': ['youtube-dl'],
1129 'duration': 10,
1130 'view_count': int,
1131 'like_count': int,
1132 'availability': 'public',
1133 'playable_in_embed': True,
1134 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1135 'live_status': 'not_live',
1136 'age_limit': 0,
1137 'comment_count': int,
1138 'channel_follower_count': int
1139 },
1140 'params': {
1141 'skip_download': True,
1142 },
1143 },
1144 {
1145 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1146 'note': '256k DASH audio (format 141) via DASH manifest',
1147 'info_dict': {
1148 'id': 'a9LDPn-MO4I',
1149 'ext': 'm4a',
1150 'upload_date': '20121002',
1151 'uploader_id': '8KVIDEO',
1152 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1153 'description': '',
1154 'uploader': '8KVIDEO',
1155 'title': 'UHDTV TEST 8K VIDEO.mp4'
1156 },
1157 'params': {
1158 'youtube_include_dash_manifest': True,
1159 'format': '141',
1160 },
1161 'skip': 'format 141 not served anymore',
1162 },
1163 # DASH manifest with encrypted signature
1164 {
1165 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1166 'info_dict': {
1167 'id': 'IB3lcPjvWLA',
1168 'ext': 'm4a',
1169 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1170 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1171 'duration': 244,
1172 'uploader': 'AfrojackVEVO',
1173 'uploader_id': 'AfrojackVEVO',
1174 'upload_date': '20131011',
1175 'abr': 129.495,
1176 'like_count': int,
1177 'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',
1178 'playable_in_embed': True,
1179 'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',
1180 'view_count': int,
1181 'track': 'The Spark',
1182 'live_status': 'not_live',
1183 'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',
1184 'channel': 'Afrojack',
1185 'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',
1186 'tags': 'count:19',
1187 'availability': 'public',
1188 'categories': ['Music'],
1189 'age_limit': 0,
1190 'alt_title': 'The Spark',
1191 'channel_follower_count': int
1192 },
1193 'params': {
1194 'youtube_include_dash_manifest': True,
1195 'format': '141/bestaudio[ext=m4a]',
1196 },
1197 },
1198 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
1199 {
1200 'note': 'Embed allowed age-gate video',
1201 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1202 'info_dict': {
1203 'id': 'HtVdAasjOgU',
1204 'ext': 'mp4',
1205 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1206 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1207 'duration': 142,
1208 'uploader': 'The Witcher',
1209 'uploader_id': 'WitcherGame',
1210 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1211 'upload_date': '20140605',
1212 'age_limit': 18,
1213 'categories': ['Gaming'],
1214 'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',
1215 'availability': 'needs_auth',
1216 'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
1217 'like_count': int,
1218 'channel': 'The Witcher',
1219 'live_status': 'not_live',
1220 'tags': 'count:17',
1221 'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
1222 'playable_in_embed': True,
1223 'view_count': int,
1224 'channel_follower_count': int
1225 },
1226 },
1227 {
1228 'note': 'Age-gate video with embed allowed in public site',
1229 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1230 'info_dict': {
1231 'id': 'HsUATh_Nc2U',
1232 'ext': 'mp4',
1233 'title': 'Godzilla 2 (Official Video)',
1234 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1235 'upload_date': '20200408',
1236 'uploader_id': 'FlyingKitty900',
1237 'uploader': 'FlyingKitty',
1238 'age_limit': 18,
1239 'availability': 'needs_auth',
1240 'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
1241 'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',
1242 'channel': 'FlyingKitty',
1243 'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
1244 'view_count': int,
1245 'categories': ['Entertainment'],
1246 'live_status': 'not_live',
1247 'tags': ['Flyingkitty', 'godzilla 2'],
1248 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
1249 'like_count': int,
1250 'duration': 177,
1251 'playable_in_embed': True,
1252 'channel_follower_count': int
1253 },
1254 },
1255 {
1256 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1257 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1258 'info_dict': {
1259 'id': 'Tq92D6wQ1mg',
1260 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
1261 'ext': 'mp4',
1262 'upload_date': '20191228',
1263 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1264 'uploader': 'Projekt Melody',
1265 'description': 'md5:17eccca93a786d51bc67646756894066',
1266 'age_limit': 18,
1267 'like_count': int,
1268 'availability': 'needs_auth',
1269 'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
1270 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1271 'view_count': int,
1272 'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',
1273 'channel': 'Projekt Melody',
1274 'live_status': 'not_live',
1275 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
1276 'playable_in_embed': True,
1277 'categories': ['Entertainment'],
1278 'duration': 106,
1279 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
1280 'comment_count': int,
1281 'channel_follower_count': int
1282 },
1283 },
1284 {
1285 'note': 'Non-Agegated non-embeddable video',
1286 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1287 'info_dict': {
1288 'id': 'MeJVWBSsPAY',
1289 'ext': 'mp4',
1290 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1291 'uploader': 'Herr Lurik',
1292 'uploader_id': 'st3in234',
1293 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1294 'upload_date': '20130730',
1295 'track': 'Such mich find mich',
1296 'age_limit': 0,
1297 'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
1298 'like_count': int,
1299 'playable_in_embed': False,
1300 'creator': 'OOMPH!',
1301 'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',
1302 'view_count': int,
1303 'alt_title': 'Such mich find mich',
1304 'duration': 210,
1305 'channel': 'Herr Lurik',
1306 'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
1307 'categories': ['Music'],
1308 'availability': 'public',
1309 'uploader_url': 'http://www.youtube.com/user/st3in234',
1310 'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
1311 'live_status': 'not_live',
1312 'artist': 'OOMPH!',
1313 'channel_follower_count': int
1314 },
1315 },
1316 {
1317 'note': 'Non-bypassable age-gated video',
1318 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1319 'only_matching': True,
1320 },
1321 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1322 # YouTube Red ad is not captured for creator
1323 {
1324 'url': '__2ABJjxzNo',
1325 'info_dict': {
1326 'id': '__2ABJjxzNo',
1327 'ext': 'mp4',
1328 'duration': 266,
1329 'upload_date': '20100430',
1330 'uploader_id': 'deadmau5',
1331 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1332 'creator': 'deadmau5',
1333 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1334 'uploader': 'deadmau5',
1335 'title': 'Deadmau5 - Some Chords (HD)',
1336 'alt_title': 'Some Chords',
1337 'availability': 'public',
1338 'tags': 'count:14',
1339 'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',
1340 'view_count': int,
1341 'live_status': 'not_live',
1342 'channel': 'deadmau5',
1343 'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',
1344 'like_count': int,
1345 'track': 'Some Chords',
1346 'artist': 'deadmau5',
1347 'playable_in_embed': True,
1348 'age_limit': 0,
1349 'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',
1350 'categories': ['Music'],
1351 'album': 'Some Chords',
1352 'channel_follower_count': int
1353 },
1354 'expected_warnings': [
1355 'DASH manifest missing',
1356 ]
1357 },
1358 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1359 {
1360 'url': 'lqQg6PlCWgI',
1361 'info_dict': {
1362 'id': 'lqQg6PlCWgI',
1363 'ext': 'mp4',
1364 'duration': 6085,
1365 'upload_date': '20150827',
1366 'uploader_id': 'olympic',
1367 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1368 'description': 'md5:04bbbf3ccceb6795947572ca36f45904',
1369 'uploader': 'Olympics',
1370 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1371 'like_count': int,
1372 'release_timestamp': 1343767800,
1373 'playable_in_embed': True,
1374 'categories': ['Sports'],
1375 'release_date': '20120731',
1376 'channel': 'Olympics',
1377 'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
1378 'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
1379 'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
1380 'age_limit': 0,
1381 'availability': 'public',
1382 'live_status': 'was_live',
1383 'view_count': int,
1384 'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
1385 'channel_follower_count': int
1386 },
1387 'params': {
1388 'skip_download': 'requires avconv',
1389 }
1390 },
1391 # Non-square pixels
1392 {
1393 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1394 'info_dict': {
1395 'id': '_b-2C3KPAM0',
1396 'ext': 'mp4',
1397 'stretched_ratio': 16 / 9.,
1398 'duration': 85,
1399 'upload_date': '20110310',
1400 'uploader_id': 'AllenMeow',
1401 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1402 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1403 'uploader': '孫ᄋᄅ',
1404 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1405 'playable_in_embed': True,
1406 'channel': '孫ᄋᄅ',
1407 'age_limit': 0,
1408 'tags': 'count:11',
1409 'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',
1410 'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',
1411 'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',
1412 'view_count': int,
1413 'categories': ['People & Blogs'],
1414 'like_count': int,
1415 'live_status': 'not_live',
1416 'availability': 'unlisted',
1417 'comment_count': int,
1418 'channel_follower_count': int
1419 },
1420 },
1421 # url_encoded_fmt_stream_map is empty string
1422 {
1423 'url': 'qEJwOuvDf7I',
1424 'info_dict': {
1425 'id': 'qEJwOuvDf7I',
1426 'ext': 'webm',
1427 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1428 'description': '',
1429 'upload_date': '20150404',
1430 'uploader_id': 'spbelect',
1431 'uploader': 'Наблюдатели Петербурга',
1432 },
1433 'params': {
1434 'skip_download': 'requires avconv',
1435 },
1436 'skip': 'This live event has ended.',
1437 },
1438 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1439 {
1440 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1441 'info_dict': {
1442 'id': 'FIl7x6_3R5Y',
1443 'ext': 'webm',
1444 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1445 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1446 'duration': 220,
1447 'upload_date': '20150625',
1448 'uploader_id': 'dorappi2000',
1449 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1450 'uploader': 'dorappi2000',
1451 'formats': 'mincount:31',
1452 },
1453 'skip': 'not actual anymore',
1454 },
1455 # DASH manifest with segment_list
1456 {
1457 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1458 'md5': '8ce563a1d667b599d21064e982ab9e31',
1459 'info_dict': {
1460 'id': 'CsmdDsKjzN8',
1461 'ext': 'mp4',
1462 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1463 'uploader': 'Airtek',
1464 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1465 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1466 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1467 },
1468 'params': {
1469 'youtube_include_dash_manifest': True,
1470 'format': '135', # bestvideo
1471 },
1472 'skip': 'This live event has ended.',
1473 },
1474 {
1475 # Multifeed videos (multiple cameras), URL is for Main Camera
1476 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1477 'info_dict': {
1478 'id': 'jvGDaLqkpTg',
1479 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1480 'description': 'md5:e03b909557865076822aa169218d6a5d',
1481 },
1482 'playlist': [{
1483 'info_dict': {
1484 'id': 'jvGDaLqkpTg',
1485 'ext': 'mp4',
1486 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1487 'description': 'md5:e03b909557865076822aa169218d6a5d',
1488 'duration': 10643,
1489 'upload_date': '20161111',
1490 'uploader': 'Team PGP',
1491 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1492 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1493 },
1494 }, {
1495 'info_dict': {
1496 'id': '3AKt1R1aDnw',
1497 'ext': 'mp4',
1498 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1499 'description': 'md5:e03b909557865076822aa169218d6a5d',
1500 'duration': 10991,
1501 'upload_date': '20161111',
1502 'uploader': 'Team PGP',
1503 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1504 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1505 },
1506 }, {
1507 'info_dict': {
1508 'id': 'RtAMM00gpVc',
1509 'ext': 'mp4',
1510 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1511 'description': 'md5:e03b909557865076822aa169218d6a5d',
1512 'duration': 10995,
1513 'upload_date': '20161111',
1514 'uploader': 'Team PGP',
1515 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1516 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1517 },
1518 }, {
1519 'info_dict': {
1520 'id': '6N2fdlP3C5U',
1521 'ext': 'mp4',
1522 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1523 'description': 'md5:e03b909557865076822aa169218d6a5d',
1524 'duration': 10990,
1525 'upload_date': '20161111',
1526 'uploader': 'Team PGP',
1527 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1528 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1529 },
1530 }],
1531 'params': {
1532 'skip_download': True,
1533 },
1534 'skip': 'Not multifeed anymore',
1535 },
1536 {
1537 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1538 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1539 'info_dict': {
1540 'id': 'gVfLd0zydlo',
1541 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1542 },
1543 'playlist_count': 2,
1544 'skip': 'Not multifeed anymore',
1545 },
1546 {
1547 'url': 'https://vid.plus/FlRa-iH7PGw',
1548 'only_matching': True,
1549 },
1550 {
1551 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1552 'only_matching': True,
1553 },
1554 {
1555 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1556 # Also tests cut-off URL expansion in video description (see
1557 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1558 # https://github.com/ytdl-org/youtube-dl/issues/8164)
1559 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1560 'info_dict': {
1561 'id': 'lsguqyKfVQg',
1562 'ext': 'mp4',
1563 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1564 'alt_title': 'Dark Walk',
1565 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1566 'duration': 133,
1567 'upload_date': '20151119',
1568 'uploader_id': 'IronSoulElf',
1569 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1570 'uploader': 'IronSoulElf',
1571 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1572 'track': 'Dark Walk',
1573 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1574 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1575 'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',
1576 'categories': ['Film & Animation'],
1577 'view_count': int,
1578 'live_status': 'not_live',
1579 'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',
1580 'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',
1581 'tags': 'count:13',
1582 'availability': 'public',
1583 'channel': 'IronSoulElf',
1584 'playable_in_embed': True,
1585 'like_count': int,
1586 'age_limit': 0,
1587 'channel_follower_count': int
1588 },
1589 'params': {
1590 'skip_download': True,
1591 },
1592 },
1593 {
1594 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1595 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1596 'only_matching': True,
1597 },
1598 {
1599 # Video with yt:stretch=17:0
1600 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1601 'info_dict': {
1602 'id': 'Q39EVAstoRM',
1603 'ext': 'mp4',
1604 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1605 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1606 'upload_date': '20151107',
1607 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1608 'uploader': 'CH GAMER DROID',
1609 },
1610 'params': {
1611 'skip_download': True,
1612 },
1613 'skip': 'This video does not exist.',
1614 },
1615 {
1616 # Video with incomplete 'yt:stretch=16:'
1617 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1618 'only_matching': True,
1619 },
1620 {
1621 # Video licensed under Creative Commons
1622 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1623 'info_dict': {
1624 'id': 'M4gD1WSo5mA',
1625 'ext': 'mp4',
1626 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1627 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1628 'duration': 721,
1629 'upload_date': '20150128',
1630 'uploader_id': 'BerkmanCenter',
1631 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1632 'uploader': 'The Berkman Klein Center for Internet & Society',
1633 'license': 'Creative Commons Attribution license (reuse allowed)',
1634 'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',
1635 'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',
1636 'like_count': int,
1637 'age_limit': 0,
1638 'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],
1639 'channel': 'The Berkman Klein Center for Internet & Society',
1640 'availability': 'public',
1641 'view_count': int,
1642 'categories': ['Education'],
1643 'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',
1644 'live_status': 'not_live',
1645 'playable_in_embed': True,
1646 'comment_count': int,
1647 'channel_follower_count': int
1648 },
1649 'params': {
1650 'skip_download': True,
1651 },
1652 },
1653 {
1654 # Channel-like uploader_url
1655 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1656 'info_dict': {
1657 'id': 'eQcmzGIKrzg',
1658 'ext': 'mp4',
1659 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1660 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1661 'duration': 4060,
1662 'upload_date': '20151120',
1663 'uploader': 'Bernie Sanders',
1664 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1665 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1666 'license': 'Creative Commons Attribution license (reuse allowed)',
1667 'playable_in_embed': True,
1668 'tags': 'count:12',
1669 'like_count': int,
1670 'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1671 'age_limit': 0,
1672 'availability': 'public',
1673 'categories': ['News & Politics'],
1674 'channel': 'Bernie Sanders',
1675 'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',
1676 'view_count': int,
1677 'live_status': 'not_live',
1678 'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1679 'comment_count': int,
1680 'channel_follower_count': int
1681 },
1682 'params': {
1683 'skip_download': True,
1684 },
1685 },
1686 {
1687 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1688 'only_matching': True,
1689 },
1690 {
1691 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1692 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1693 'only_matching': True,
1694 },
1695 {
1696 # Rental video preview
1697 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1698 'info_dict': {
1699 'id': 'uGpuVWrhIzE',
1700 'ext': 'mp4',
1701 'title': 'Piku - Trailer',
1702 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1703 'upload_date': '20150811',
1704 'uploader': 'FlixMatrix',
1705 'uploader_id': 'FlixMatrixKaravan',
1706 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1707 'license': 'Standard YouTube License',
1708 },
1709 'params': {
1710 'skip_download': True,
1711 },
1712 'skip': 'This video is not available.',
1713 },
1714 {
1715 # YouTube Red video with episode data
1716 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1717 'info_dict': {
1718 'id': 'iqKdEhx-dD4',
1719 'ext': 'mp4',
1720 'title': 'Isolation - Mind Field (Ep 1)',
1721 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1722 'duration': 2085,
1723 'upload_date': '20170118',
1724 'uploader': 'Vsauce',
1725 'uploader_id': 'Vsauce',
1726 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1727 'series': 'Mind Field',
1728 'season_number': 1,
1729 'episode_number': 1,
1730 'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',
1731 'tags': 'count:12',
1732 'view_count': int,
1733 'availability': 'public',
1734 'age_limit': 0,
1735 'channel': 'Vsauce',
1736 'episode': 'Episode 1',
1737 'categories': ['Entertainment'],
1738 'season': 'Season 1',
1739 'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',
1740 'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',
1741 'like_count': int,
1742 'playable_in_embed': True,
1743 'live_status': 'not_live',
1744 'channel_follower_count': int
1745 },
1746 'params': {
1747 'skip_download': True,
1748 },
1749 'expected_warnings': [
1750 'Skipping DASH manifest',
1751 ],
1752 },
1753 {
1754 # The following content has been identified by the YouTube community
1755 # as inappropriate or offensive to some audiences.
1756 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1757 'info_dict': {
1758 'id': '6SJNVb0GnPI',
1759 'ext': 'mp4',
1760 'title': 'Race Differences in Intelligence',
1761 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1762 'duration': 965,
1763 'upload_date': '20140124',
1764 'uploader': 'New Century Foundation',
1765 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1766 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1767 },
1768 'params': {
1769 'skip_download': True,
1770 },
1771 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1772 },
1773 {
1774 # itag 212
1775 'url': '1t24XAntNCY',
1776 'only_matching': True,
1777 },
1778 {
1779 # geo restricted to JP
1780 'url': 'sJL6WA-aGkQ',
1781 'only_matching': True,
1782 },
1783 {
1784 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1785 'only_matching': True,
1786 },
1787 {
1788 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1789 'only_matching': True,
1790 },
1791 {
1792 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1793 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1794 'only_matching': True,
1795 },
1796 {
1797 # DRM protected
1798 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1799 'only_matching': True,
1800 },
1801 {
1802 # Video with unsupported adaptive stream type formats
1803 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1804 'info_dict': {
1805 'id': 'Z4Vy8R84T1U',
1806 'ext': 'mp4',
1807 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1808 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1809 'duration': 433,
1810 'upload_date': '20130923',
1811 'uploader': 'Amelia Putri Harwita',
1812 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1813 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1814 'formats': 'maxcount:10',
1815 },
1816 'params': {
1817 'skip_download': True,
1818 'youtube_include_dash_manifest': False,
1819 },
1820 'skip': 'not actual anymore',
1821 },
1822 {
1823 # Youtube Music Auto-generated description
1824 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1825 'info_dict': {
1826 'id': 'MgNrAu2pzNs',
1827 'ext': 'mp4',
1828 'title': 'Voyeur Girl',
1829 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1830 'upload_date': '20190312',
1831 'uploader': 'Stephen - Topic',
1832 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1833 'artist': 'Stephen',
1834 'track': 'Voyeur Girl',
1835 'album': 'it\'s too much love to know my dear',
1836 'release_date': '20190313',
1837 'release_year': 2019,
1838 'alt_title': 'Voyeur Girl',
1839 'view_count': int,
1840 'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1841 'playable_in_embed': True,
1842 'like_count': int,
1843 'categories': ['Music'],
1844 'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1845 'channel': 'Stephen',
1846 'availability': 'public',
1847 'creator': 'Stephen',
1848 'duration': 169,
1849 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
1850 'age_limit': 0,
1851 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1852 'tags': 'count:11',
1853 'live_status': 'not_live',
1854 'channel_follower_count': int
1855 },
1856 'params': {
1857 'skip_download': True,
1858 },
1859 },
1860 {
1861 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1862 'only_matching': True,
1863 },
1864 {
1865 # invalid -> valid video id redirection
1866 'url': 'DJztXj2GPfl',
1867 'info_dict': {
1868 'id': 'DJztXj2GPfk',
1869 'ext': 'mp4',
1870 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1871 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1872 'upload_date': '20090125',
1873 'uploader': 'Prochorowka',
1874 'uploader_id': 'Prochorowka',
1875 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1876 'artist': 'Panjabi MC',
1877 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1878 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1879 },
1880 'params': {
1881 'skip_download': True,
1882 },
1883 'skip': 'Video unavailable',
1884 },
1885 {
1886 # empty description results in an empty string
1887 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1888 'info_dict': {
1889 'id': 'x41yOUIvK2k',
1890 'ext': 'mp4',
1891 'title': 'IMG 3456',
1892 'description': '',
1893 'upload_date': '20170613',
1894 'uploader_id': 'ElevageOrVert',
1895 'uploader': 'ElevageOrVert',
1896 'view_count': int,
1897 'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',
1898 'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',
1899 'like_count': int,
1900 'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',
1901 'tags': [],
1902 'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',
1903 'availability': 'public',
1904 'age_limit': 0,
1905 'categories': ['Pets & Animals'],
1906 'duration': 7,
1907 'playable_in_embed': True,
1908 'live_status': 'not_live',
1909 'channel': 'ElevageOrVert',
1910 'channel_follower_count': int
1911 },
1912 'params': {
1913 'skip_download': True,
1914 },
1915 },
1916 {
1917 # with '};' inside yt initial data (see [1])
1918 # see [2] for an example with '};' inside ytInitialPlayerResponse
1919 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1920 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1921 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1922 'info_dict': {
1923 'id': 'CHqg6qOn4no',
1924 'ext': 'mp4',
1925 'title': 'Part 77 Sort a list of simple types in c#',
1926 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1927 'upload_date': '20130831',
1928 'uploader_id': 'kudvenkat',
1929 'uploader': 'kudvenkat',
1930 'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',
1931 'like_count': int,
1932 'uploader_url': 'http://www.youtube.com/user/kudvenkat',
1933 'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',
1934 'live_status': 'not_live',
1935 'categories': ['Education'],
1936 'availability': 'public',
1937 'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',
1938 'tags': 'count:12',
1939 'playable_in_embed': True,
1940 'age_limit': 0,
1941 'view_count': int,
1942 'duration': 522,
1943 'channel': 'kudvenkat',
1944 'comment_count': int,
1945 'channel_follower_count': int
1946 },
1947 'params': {
1948 'skip_download': True,
1949 },
1950 },
1951 {
1952 # another example of '};' in ytInitialData
1953 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1954 'only_matching': True,
1955 },
1956 {
1957 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1958 'only_matching': True,
1959 },
1960 {
1961 # https://github.com/ytdl-org/youtube-dl/pull/28094
1962 'url': 'OtqTfy26tG0',
1963 'info_dict': {
1964 'id': 'OtqTfy26tG0',
1965 'ext': 'mp4',
1966 'title': 'Burn Out',
1967 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1968 'upload_date': '20141120',
1969 'uploader': 'The Cinematic Orchestra - Topic',
1970 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1971 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1972 'artist': 'The Cinematic Orchestra',
1973 'track': 'Burn Out',
1974 'album': 'Every Day',
1975 'like_count': int,
1976 'live_status': 'not_live',
1977 'alt_title': 'Burn Out',
1978 'duration': 614,
1979 'age_limit': 0,
1980 'view_count': int,
1981 'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1982 'creator': 'The Cinematic Orchestra',
1983 'channel': 'The Cinematic Orchestra',
1984 'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],
1985 'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1986 'availability': 'public',
1987 'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',
1988 'categories': ['Music'],
1989 'playable_in_embed': True,
1990 'channel_follower_count': int
1991 },
1992 'params': {
1993 'skip_download': True,
1994 },
1995 },
1996 {
1997 # controversial video, only works with bpctr when authenticated with cookies
1998 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1999 'only_matching': True,
2000 },
2001 {
2002 # controversial video, requires bpctr/contentCheckOk
2003 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
2004 'info_dict': {
2005 'id': 'SZJvDhaSDnc',
2006 'ext': 'mp4',
2007 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
2008 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
2009 'uploader': 'CBS Mornings',
2010 'uploader_id': 'CBSThisMorning',
2011 'upload_date': '20140716',
2012 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',
2013 'duration': 170,
2014 'categories': ['News & Politics'],
2015 'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',
2016 'view_count': int,
2017 'channel': 'CBS Mornings',
2018 'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],
2019 'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',
2020 'age_limit': 18,
2021 'availability': 'needs_auth',
2022 'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',
2023 'like_count': int,
2024 'live_status': 'not_live',
2025 'playable_in_embed': True,
2026 'channel_follower_count': int
2027 }
2028 },
2029 {
2030 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
2031 'url': 'cBvYw8_A0vQ',
2032 'info_dict': {
2033 'id': 'cBvYw8_A0vQ',
2034 'ext': 'mp4',
2035 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
2036 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
2037 'upload_date': '20201120',
2038 'uploader': 'Walk around Japan',
2039 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2040 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2041 'duration': 1456,
2042 'categories': ['Travel & Events'],
2043 'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2044 'view_count': int,
2045 'channel': 'Walk around Japan',
2046 'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],
2047 'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',
2048 'age_limit': 0,
2049 'availability': 'public',
2050 'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2051 'live_status': 'not_live',
2052 'playable_in_embed': True,
2053 'channel_follower_count': int
2054 },
2055 'params': {
2056 'skip_download': True,
2057 },
2058 }, {
2059 # Has multiple audio streams
2060 'url': 'WaOKSUlf4TM',
2061 'only_matching': True
2062 }, {
2063 # Requires Premium: has format 141 when requested using YTM url
2064 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
2065 'only_matching': True
2066 }, {
2067 # multiple subtitles with same lang_code
2068 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
2069 'only_matching': True,
2070 }, {
2071 # Force use android client fallback
2072 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
2073 'info_dict': {
2074 'id': 'YOelRv7fMxY',
2075 'title': 'DIGGING A SECRET TUNNEL Part 1',
2076 'ext': '3gp',
2077 'upload_date': '20210624',
2078 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
2079 'uploader': 'colinfurze',
2080 'uploader_id': 'colinfurze',
2081 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
2082 'description': 'md5:5d5991195d599b56cd0c4148907eec50',
2083 'duration': 596,
2084 'categories': ['Entertainment'],
2085 'uploader_url': 'http://www.youtube.com/user/colinfurze',
2086 'view_count': int,
2087 'channel': 'colinfurze',
2088 'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],
2089 'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',
2090 'age_limit': 0,
2091 'availability': 'public',
2092 'like_count': int,
2093 'live_status': 'not_live',
2094 'playable_in_embed': True,
2095 'channel_follower_count': int
2096 },
2097 'params': {
2098 'format': '17', # 3gp format available on android
2099 'extractor_args': {'youtube': {'player_client': ['android']}},
2100 },
2101 },
2102 {
2103 # Skip download of additional client configs (remix client config in this case)
2104 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2105 'only_matching': True,
2106 'params': {
2107 'extractor_args': {'youtube': {'player_skip': ['configs']}},
2108 },
2109 }, {
2110 # shorts
2111 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
2112 'only_matching': True,
2113 }, {
2114 'note': 'Storyboards',
2115 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
2116 'info_dict': {
2117 'id': '5KLPxDtMqe8',
2118 'ext': 'mhtml',
2119 'format_id': 'sb0',
2120 'title': 'Your Brain is Plastic',
2121 'uploader_id': 'scishow',
2122 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
2123 'upload_date': '20140324',
2124 'uploader': 'SciShow',
2125 'like_count': int,
2126 'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',
2127 'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',
2128 'view_count': int,
2129 'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',
2130 'playable_in_embed': True,
2131 'tags': 'count:12',
2132 'uploader_url': 'http://www.youtube.com/user/scishow',
2133 'availability': 'public',
2134 'channel': 'SciShow',
2135 'live_status': 'not_live',
2136 'duration': 248,
2137 'categories': ['Education'],
2138 'age_limit': 0,
2139 'channel_follower_count': int
2140 }, 'params': {'format': 'mhtml', 'skip_download': True}
2141 }, {
2142 # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
2143 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2144 'info_dict': {
2145 'id': '2NUZ8W2llS4',
2146 'ext': 'mp4',
2147 'title': 'The NP that test your phone performance 🙂',
2148 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
2149 'uploader': 'Leon Nguyen',
2150 'uploader_id': 'VNSXIII',
2151 'uploader_url': 'http://www.youtube.com/user/VNSXIII',
2152 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2153 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2154 'duration': 21,
2155 'view_count': int,
2156 'age_limit': 0,
2157 'categories': ['Gaming'],
2158 'tags': 'count:23',
2159 'playable_in_embed': True,
2160 'live_status': 'not_live',
2161 'upload_date': '20220103',
2162 'like_count': int,
2163 'availability': 'public',
2164 'channel': 'Leon Nguyen',
2165 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
2166 'comment_count': int,
2167 'channel_follower_count': int
2168 }
2169 }, {
2170 # Same video as above, but with --compat-opt no-youtube-prefer-utc-upload-date
2171 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2172 'info_dict': {
2173 'id': '2NUZ8W2llS4',
2174 'ext': 'mp4',
2175 'title': 'The NP that test your phone performance 🙂',
2176 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
2177 'uploader': 'Leon Nguyen',
2178 'uploader_id': 'VNSXIII',
2179 'uploader_url': 'http://www.youtube.com/user/VNSXIII',
2180 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2181 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2182 'duration': 21,
2183 'view_count': int,
2184 'age_limit': 0,
2185 'categories': ['Gaming'],
2186 'tags': 'count:23',
2187 'playable_in_embed': True,
2188 'live_status': 'not_live',
2189 'upload_date': '20220102',
2190 'like_count': int,
2191 'availability': 'public',
2192 'channel': 'Leon Nguyen',
2193 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
2194 'comment_count': int,
2195 'channel_follower_count': int
2196 },
2197 'params': {'compat_opts': ['no-youtube-prefer-utc-upload-date']}
2198 }, {
2199 # date text is premiered video, ensure upload date in UTC (published 1641172509)
2200 'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',
2201 'info_dict': {
2202 'id': 'mzZzzBU6lrM',
2203 'ext': 'mp4',
2204 'title': 'I Met GeorgeNotFound In Real Life...',
2205 'description': 'md5:cca98a355c7184e750f711f3a1b22c84',
2206 'uploader': 'Quackity',
2207 'uploader_id': 'QuackityHQ',
2208 'uploader_url': 'http://www.youtube.com/user/QuackityHQ',
2209 'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',
2210 'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',
2211 'duration': 955,
2212 'view_count': int,
2213 'age_limit': 0,
2214 'categories': ['Entertainment'],
2215 'tags': 'count:26',
2216 'playable_in_embed': True,
2217 'live_status': 'not_live',
2218 'release_timestamp': 1641172509,
2219 'release_date': '20220103',
2220 'upload_date': '20220103',
2221 'like_count': int,
2222 'availability': 'public',
2223 'channel': 'Quackity',
2224 'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',
2225 'channel_follower_count': int
2226 }
2227 },
2228 { # continuous livestream. Microformat upload date should be preferred.
2229 # Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27
2230 'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',
2231 'info_dict': {
2232 'id': 'kgx4WGK0oNU',
2233 'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
2234 'ext': 'mp4',
2235 'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2236 'availability': 'public',
2237 'age_limit': 0,
2238 'release_timestamp': 1637975704,
2239 'upload_date': '20210619',
2240 'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2241 'live_status': 'is_live',
2242 'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',
2243 'uploader': '阿鲍Abao',
2244 'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2245 'channel': 'Abao in Tokyo',
2246 'channel_follower_count': int,
2247 'release_date': '20211127',
2248 'tags': 'count:39',
2249 'categories': ['People & Blogs'],
2250 'like_count': int,
2251 'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2252 'view_count': int,
2253 'playable_in_embed': True,
2254 'description': 'md5:2ef1d002cad520f65825346e2084e49d',
2255 },
2256 'params': {'skip_download': True}
2257 }, {
2258 # Story. Requires specific player params to work.
2259 'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',
2260 'info_dict': {
2261 'id': 'vv8qTUWmulI',
2262 'ext': 'mp4',
2263 'availability': 'unlisted',
2264 'view_count': int,
2265 'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',
2266 'upload_date': '20220526',
2267 'categories': ['Education'],
2268 'title': 'Story',
2269 'channel': 'IT\'S HISTORY',
2270 'description': '',
2271 'uploader_id': 'BlastfromthePast',
2272 'duration': 12,
2273 'uploader': 'IT\'S HISTORY',
2274 'playable_in_embed': True,
2275 'age_limit': 0,
2276 'live_status': 'not_live',
2277 'tags': [],
2278 'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',
2279 'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',
2280 'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',
2281 },
2282 'skip': 'stories get removed after some period of time',
2283 }, {
2284 'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
2285 'info_dict': {
2286 'id': 'tjjjtzRLHvA',
2287 'ext': 'mp4',
2288 'title': 'ハッシュタグ無し };if window.ytcsi',
2289 'upload_date': '20220323',
2290 'like_count': int,
2291 'availability': 'unlisted',
2292 'channel': 'nao20010128nao',
2293 'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',
2294 'age_limit': 0,
2295 'uploader': 'nao20010128nao',
2296 'uploader_id': 'nao20010128nao',
2297 'categories': ['Music'],
2298 'view_count': int,
2299 'description': '',
2300 'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',
2301 'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',
2302 'live_status': 'not_live',
2303 'playable_in_embed': True,
2304 'channel_follower_count': int,
2305 'duration': 6,
2306 'tags': [],
2307 'uploader_url': 'http://www.youtube.com/user/nao20010128nao',
2308 }
2309 }, {
2310 'note': '6 channel audio',
2311 'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',
2312 'only_matching': True,
2313 }
2314 ]
2315
2316 _WEBPAGE_TESTS = [
2317 # YouTube <object> embed
2318 {
2319 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
2320 'md5': '873c81d308b979f0e23ee7e620b312a3',
2321 'info_dict': {
2322 'id': 'msN87y-iEx0',
2323 'ext': 'mp4',
2324 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
2325 'upload_date': '20080526',
2326 'description': 'md5:873c81d308b979f0e23ee7e620b312a3',
2327 'uploader': 'Christopher Sykes',
2328 'uploader_id': 'ChristopherJSykes',
2329 'age_limit': 0,
2330 'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],
2331 'channel_id': 'UCCeo--lls1vna5YJABWAcVA',
2332 'playable_in_embed': True,
2333 'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',
2334 'like_count': int,
2335 'comment_count': int,
2336 'channel': 'Christopher Sykes',
2337 'live_status': 'not_live',
2338 'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',
2339 'availability': 'public',
2340 'duration': 195,
2341 'view_count': int,
2342 'categories': ['Science & Technology'],
2343 'channel_follower_count': int,
2344 'uploader_url': 'http://www.youtube.com/user/ChristopherJSykes',
2345 },
2346 'params': {
2347 'skip_download': True,
2348 }
2349 },
2350 ]
2351
2352 @classmethod
2353 def suitable(cls, url):
2354 from ..utils import parse_qs
2355
2356 qs = parse_qs(url)
2357 if qs.get('list', [None])[0]:
2358 return False
2359 return super().suitable(url)
2360
2361 def __init__(self, *args, **kwargs):
2362 super().__init__(*args, **kwargs)
2363 self._code_cache = {}
2364 self._player_cache = {}
2365
2366 def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):
2367 lock = threading.Lock()
2368
2369 is_live = True
2370 start_time = time.time()
2371 formats = [f for f in formats if f.get('is_from_start')]
2372
2373 def refetch_manifest(format_id, delay):
2374 nonlocal formats, start_time, is_live
2375 if time.time() <= start_time + delay:
2376 return
2377
2378 _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
2379 video_details = traverse_obj(
2380 prs, (..., 'videoDetails'), expected_type=dict, default=[])
2381 microformats = traverse_obj(
2382 prs, (..., 'microformat', 'playerMicroformatRenderer'),
2383 expected_type=dict, default=[])
2384 _, is_live, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)
2385 start_time = time.time()
2386
2387 def mpd_feed(format_id, delay):
2388 """
2389 @returns (manifest_url, manifest_stream_number, is_live) or None
2390 """
2391 with lock:
2392 refetch_manifest(format_id, delay)
2393
2394 f = next((f for f in formats if f['format_id'] == format_id), None)
2395 if not f:
2396 if not is_live:
2397 self.to_screen(f'{video_id}: Video is no longer live')
2398 else:
2399 self.report_warning(
2400 f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')
2401 return None
2402 return f['manifest_url'], f['manifest_stream_number'], is_live
2403
2404 for f in formats:
2405 f['is_live'] = True
2406 f['protocol'] = 'http_dash_segments_generator'
2407 f['fragments'] = functools.partial(
2408 self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)
2409
2410 def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):
2411 FETCH_SPAN, MAX_DURATION = 5, 432000
2412
2413 mpd_url, stream_number, is_live = None, None, True
2414
2415 begin_index = 0
2416 download_start_time = ctx.get('start') or time.time()
2417
2418 lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
2419 if lack_early_segments:
2420 self.report_warning(bug_reports_message(
2421 'Starting download from the last 120 hours of the live stream since '
2422 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
2423 lack_early_segments = True
2424
2425 known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
2426 fragments, fragment_base_url = None, None
2427
2428 def _extract_sequence_from_mpd(refresh_sequence, immediate):
2429 nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
2430 # Obtain from MPD's maximum seq value
2431 old_mpd_url = mpd_url
2432 last_error = ctx.pop('last_error', None)
2433 expire_fast = immediate or last_error and isinstance(last_error, urllib.error.HTTPError) and last_error.code == 403
2434 mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
2435 or (mpd_url, stream_number, False))
2436 if not refresh_sequence:
2437 if expire_fast and not is_live:
2438 return False, last_seq
2439 elif old_mpd_url == mpd_url:
2440 return True, last_seq
2441 try:
2442 fmts, _ = self._extract_mpd_formats_and_subtitles(
2443 mpd_url, None, note=False, errnote=False, fatal=False)
2444 except ExtractorError:
2445 fmts = None
2446 if not fmts:
2447 no_fragment_score += 2
2448 return False, last_seq
2449 fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
2450 fragments = fmt_info['fragments']
2451 fragment_base_url = fmt_info['fragment_base_url']
2452 assert fragment_base_url
2453
2454 _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
2455 return True, _last_seq
2456
2457 while is_live:
2458 fetch_time = time.time()
2459 if no_fragment_score > 30:
2460 return
2461 if last_segment_url:
2462 # Obtain from "X-Head-Seqnum" header value from each segment
2463 try:
2464 urlh = self._request_webpage(
2465 last_segment_url, None, note=False, errnote=False, fatal=False)
2466 except ExtractorError:
2467 urlh = None
2468 last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
2469 if last_seq is None:
2470 no_fragment_score += 2
2471 last_segment_url = None
2472 continue
2473 else:
2474 should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)
2475 no_fragment_score += 2
2476 if not should_continue:
2477 continue
2478
2479 if known_idx > last_seq:
2480 last_segment_url = None
2481 continue
2482
2483 last_seq += 1
2484
2485 if begin_index < 0 and known_idx < 0:
2486 # skip from the start when it's negative value
2487 known_idx = last_seq + begin_index
2488 if lack_early_segments:
2489 known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
2490 try:
2491 for idx in range(known_idx, last_seq):
2492 # do not update sequence here or you'll get skipped some part of it
2493 should_continue, _ = _extract_sequence_from_mpd(False, False)
2494 if not should_continue:
2495 known_idx = idx - 1
2496 raise ExtractorError('breaking out of outer loop')
2497 last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)
2498 yield {
2499 'url': last_segment_url,
2500 'fragment_count': last_seq,
2501 }
2502 if known_idx == last_seq:
2503 no_fragment_score += 5
2504 else:
2505 no_fragment_score = 0
2506 known_idx = last_seq
2507 except ExtractorError:
2508 continue
2509
2510 time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
2511
2512 def _extract_player_url(self, *ytcfgs, webpage=None):
2513 player_url = traverse_obj(
2514 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
2515 get_all=False, expected_type=str)
2516 if not player_url:
2517 return
2518 return urljoin('https://www.youtube.com', player_url)
2519
2520 def _download_player_url(self, video_id, fatal=False):
2521 res = self._download_webpage(
2522 'https://www.youtube.com/iframe_api',
2523 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
2524 if res:
2525 player_version = self._search_regex(
2526 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
2527 if player_version:
2528 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
2529
2530 def _signature_cache_id(self, example_sig):
2531 """ Return a string representation of a signature """
2532 return '.'.join(str(len(part)) for part in example_sig.split('.'))
2533
2534 @classmethod
2535 def _extract_player_info(cls, player_url):
2536 for player_re in cls._PLAYER_INFO_RE:
2537 id_m = re.search(player_re, player_url)
2538 if id_m:
2539 break
2540 else:
2541 raise ExtractorError('Cannot identify player %r' % player_url)
2542 return id_m.group('id')
2543
2544 def _load_player(self, video_id, player_url, fatal=True):
2545 player_id = self._extract_player_info(player_url)
2546 if player_id not in self._code_cache:
2547 code = self._download_webpage(
2548 player_url, video_id, fatal=fatal,
2549 note='Downloading player ' + player_id,
2550 errnote='Download of %s failed' % player_url)
2551 if code:
2552 self._code_cache[player_id] = code
2553 return self._code_cache.get(player_id)
2554
2555 def _extract_signature_function(self, video_id, player_url, example_sig):
2556 player_id = self._extract_player_info(player_url)
2557
2558 # Read from filesystem cache
2559 func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'
2560 assert os.path.basename(func_id) == func_id
2561
2562 self.write_debug(f'Extracting signature function {func_id}')
2563 cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
2564
2565 if not cache_spec:
2566 code = self._load_player(video_id, player_url)
2567 if code:
2568 res = self._parse_sig_js(code)
2569 test_string = ''.join(map(chr, range(len(example_sig))))
2570 cache_spec = [ord(c) for c in res(test_string)]
2571 self.cache.store('youtube-sigfuncs', func_id, cache_spec)
2572
2573 return lambda s: ''.join(s[i] for i in cache_spec)
2574
2575 def _print_sig_code(self, func, example_sig):
2576 if not self.get_param('youtube_print_sig_code'):
2577 return
2578
2579 def gen_sig_code(idxs):
2580 def _genslice(start, end, step):
2581 starts = '' if start == 0 else str(start)
2582 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
2583 steps = '' if step == 1 else (':%d' % step)
2584 return f's[{starts}{ends}{steps}]'
2585
2586 step = None
2587 # Quelch pyflakes warnings - start will be set when step is set
2588 start = '(Never used)'
2589 for i, prev in zip(idxs[1:], idxs[:-1]):
2590 if step is not None:
2591 if i - prev == step:
2592 continue
2593 yield _genslice(start, prev, step)
2594 step = None
2595 continue
2596 if i - prev in [-1, 1]:
2597 step = i - prev
2598 start = prev
2599 continue
2600 else:
2601 yield 's[%d]' % prev
2602 if step is None:
2603 yield 's[%d]' % i
2604 else:
2605 yield _genslice(start, i, step)
2606
2607 test_string = ''.join(map(chr, range(len(example_sig))))
2608 cache_res = func(test_string)
2609 cache_spec = [ord(c) for c in cache_res]
2610 expr_code = ' + '.join(gen_sig_code(cache_spec))
2611 signature_id_tuple = '(%s)' % (
2612 ', '.join(str(len(p)) for p in example_sig.split('.')))
2613 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
2614 ' return %s\n') % (signature_id_tuple, expr_code)
2615 self.to_screen('Extracted signature function:\n' + code)
2616
2617 def _parse_sig_js(self, jscode):
2618 funcname = self._search_regex(
2619 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2620 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2621 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
2622 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
2623 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
2624 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
2625 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
2626 # Obsolete patterns
2627 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2628 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
2629 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2630 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2631 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2632 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2633 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2634 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
2635 jscode, 'Initial JS player signature function name', group='sig')
2636
2637 jsi = JSInterpreter(jscode)
2638 initial_function = jsi.extract_function(funcname)
2639 return lambda s: initial_function([s])
2640
2641 def _cached(self, func, *cache_id):
2642 def inner(*args, **kwargs):
2643 if cache_id not in self._player_cache:
2644 try:
2645 self._player_cache[cache_id] = func(*args, **kwargs)
2646 except ExtractorError as e:
2647 self._player_cache[cache_id] = e
2648 except Exception as e:
2649 self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)
2650
2651 ret = self._player_cache[cache_id]
2652 if isinstance(ret, Exception):
2653 raise ret
2654 return ret
2655 return inner
2656
2657 def _decrypt_signature(self, s, video_id, player_url):
2658 """Turn the encrypted s field into a working signature"""
2659 extract_sig = self._cached(
2660 self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))
2661 func = extract_sig(video_id, player_url, s)
2662 self._print_sig_code(func, s)
2663 return func(s)
2664
2665 def _decrypt_nsig(self, s, video_id, player_url):
2666 """Turn the encrypted n field into a working signature"""
2667 if player_url is None:
2668 raise ExtractorError('Cannot decrypt nsig without player_url')
2669 player_url = urljoin('https://www.youtube.com', player_url)
2670
2671 try:
2672 jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
2673 except ExtractorError as e:
2674 raise ExtractorError('Unable to extract nsig function code', cause=e)
2675 if self.get_param('youtube_print_sig_code'):
2676 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
2677
2678 try:
2679 extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)
2680 ret = extract_nsig(jsi, func_code)(s)
2681 except JSInterpreter.Exception as e:
2682 try:
2683 jsi = PhantomJSwrapper(self, timeout=5000)
2684 except ExtractorError:
2685 raise e
2686 self.report_warning(
2687 f'Native nsig extraction failed: Trying with PhantomJS\n'
2688 f' n = {s} ; player = {player_url}', video_id)
2689 self.write_debug(e)
2690
2691 args, func_body = func_code
2692 ret = jsi.execute(
2693 f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',
2694 video_id=video_id, note='Executing signature code').strip()
2695
2696 self.write_debug(f'Decrypted nsig {s} => {ret}')
2697 return ret
2698
2699 def _extract_n_function_name(self, jscode):
2700 funcname, idx = self._search_regex(
2701 r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)',
2702 jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
2703 if not idx:
2704 return funcname
2705
2706 return json.loads(js_to_json(self._search_regex(
2707 rf'var {re.escape(funcname)}\s*=\s*(\[.+?\]);', jscode,
2708 f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
2709
2710 def _extract_n_function_code(self, video_id, player_url):
2711 player_id = self._extract_player_info(player_url)
2712 func_code = self.cache.load('youtube-nsig', player_id, min_ver='2022.09.1')
2713 jscode = func_code or self._load_player(video_id, player_url)
2714 jsi = JSInterpreter(jscode)
2715
2716 if func_code:
2717 return jsi, player_id, func_code
2718
2719 func_name = self._extract_n_function_name(jscode)
2720
2721 # For redundancy
2722 func_code = self._search_regex(
2723 r'''(?xs)%s\s*=\s*function\s*\((?P<var>[\w$]+)\)\s*
2724 # NB: The end of the regex is intentionally kept strict
2725 {(?P<code>.+?}\s*return\ [\w$]+.join\(""\))};''' % func_name,
2726 jscode, 'nsig function', group=('var', 'code'), default=None)
2727 if func_code:
2728 func_code = ([func_code[0]], func_code[1])
2729 else:
2730 self.write_debug('Extracting nsig function with jsinterp')
2731 func_code = jsi.extract_function_code(func_name)
2732
2733 self.cache.store('youtube-nsig', player_id, func_code)
2734 return jsi, player_id, func_code
2735
2736 def _extract_n_function_from_code(self, jsi, func_code):
2737 func = jsi.extract_function_from_code(*func_code)
2738
2739 def extract_nsig(s):
2740 try:
2741 ret = func([s])
2742 except JSInterpreter.Exception:
2743 raise
2744 except Exception as e:
2745 raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
2746
2747 if ret.startswith('enhanced_except_'):
2748 raise JSInterpreter.Exception('Signature function returned an exception')
2749 return ret
2750
2751 return extract_nsig
2752
2753 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
2754 """
2755 Extract signatureTimestamp (sts)
2756 Required to tell API what sig/player version is in use.
2757 """
2758 sts = None
2759 if isinstance(ytcfg, dict):
2760 sts = int_or_none(ytcfg.get('STS'))
2761
2762 if not sts:
2763 # Attempt to extract from player
2764 if player_url is None:
2765 error_msg = 'Cannot extract signature timestamp without player_url.'
2766 if fatal:
2767 raise ExtractorError(error_msg)
2768 self.report_warning(error_msg)
2769 return
2770 code = self._load_player(video_id, player_url, fatal=fatal)
2771 if code:
2772 sts = int_or_none(self._search_regex(
2773 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2774 'JS player signature timestamp', group='sts', fatal=fatal))
2775 return sts
2776
2777 def _mark_watched(self, video_id, player_responses):
2778 for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):
2779 label = 'fully ' if is_full else ''
2780 url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),
2781 expected_type=url_or_none)
2782 if not url:
2783 self.report_warning(f'Unable to mark {label}watched')
2784 return
2785 parsed_url = urllib.parse.urlparse(url)
2786 qs = urllib.parse.parse_qs(parsed_url.query)
2787
2788 # cpn generation algorithm is reverse engineered from base.js.
2789 # In fact it works even with dummy cpn.
2790 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2791 cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))
2792
2793 # # more consistent results setting it to right before the end
2794 video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]
2795
2796 qs.update({
2797 'ver': ['2'],
2798 'cpn': [cpn],
2799 'cmt': video_length,
2800 'el': 'detailpage', # otherwise defaults to "shorts"
2801 })
2802
2803 if is_full:
2804 # these seem to mark watchtime "history" in the real world
2805 # they're required, so send in a single value
2806 qs.update({
2807 'st': video_length,
2808 'et': video_length,
2809 })
2810
2811 url = urllib.parse.urlunparse(
2812 parsed_url._replace(query=urllib.parse.urlencode(qs, True)))
2813
2814 self._download_webpage(
2815 url, video_id, f'Marking {label}watched',
2816 'Unable to mark watched', fatal=False)
2817
2818 @classmethod
2819 def _extract_from_webpage(cls, url, webpage):
2820 # Invidious Instances
2821 # https://github.com/yt-dlp/yt-dlp/issues/195
2822 # https://github.com/iv-org/invidious/pull/1730
2823 mobj = re.search(
2824 r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',
2825 webpage)
2826 if mobj:
2827 yield cls.url_result(mobj.group('url'), cls)
2828 raise cls.StopExtraction()
2829
2830 yield from super()._extract_from_webpage(url, webpage)
2831
2832 # lazyYT YouTube embed
2833 for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):
2834 yield cls.url_result(unescapeHTML(id_), cls, id_)
2835
2836 # Wordpress "YouTube Video Importer" plugin
2837 for m in re.findall(r'''(?x)<div[^>]+
2838 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2839 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):
2840 yield cls.url_result(m[-1], cls, m[-1])
2841
2842 @classmethod
2843 def extract_id(cls, url):
2844 video_id = cls.get_temp_id(url)
2845 if not video_id:
2846 raise ExtractorError(f'Invalid URL: {url}')
2847 return video_id
2848
2849 def _extract_chapters_from_json(self, data, duration):
2850 chapter_list = traverse_obj(
2851 data, (
2852 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2853 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2854 ), expected_type=list)
2855
2856 return self._extract_chapters(
2857 chapter_list,
2858 chapter_time=lambda chapter: float_or_none(
2859 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2860 chapter_title=lambda chapter: traverse_obj(
2861 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2862 duration=duration)
2863
2864 def _extract_chapters_from_engagement_panel(self, data, duration):
2865 content_list = traverse_obj(
2866 data,
2867 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
2868 expected_type=list, default=[])
2869 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2870 chapter_title = lambda chapter: self._get_text(chapter, 'title')
2871
2872 return next(filter(None, (
2873 self._extract_chapters(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2874 chapter_time, chapter_title, duration)
2875 for contents in content_list)), [])
2876
2877 def _extract_chapters_from_description(self, description, duration):
2878 return self._extract_chapters(
2879 re.findall(r'(?m)^((?:\d+:)?\d{1,2}:\d{2})\b\W*\s(.+?)\s*$', description or ''),
2880 chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1],
2881 duration=duration, strict=False)
2882
2883 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True):
2884 if not duration:
2885 return
2886 chapter_list = [{
2887 'start_time': chapter_time(chapter),
2888 'title': chapter_title(chapter),
2889 } for chapter in chapter_list or []]
2890 if not strict:
2891 chapter_list.sort(key=lambda c: c['start_time'] or 0)
2892
2893 chapters = [{'start_time': 0}]
2894 for idx, chapter in enumerate(chapter_list):
2895 if chapter['start_time'] is None:
2896 self.report_warning(f'Incomplete chapter {idx}')
2897 elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:
2898 chapters.append(chapter)
2899 else:
2900 self.report_warning(f'Invalid start time for chapter "{chapter["title"]}"')
2901 return chapters[1:]
2902
2903 def _extract_comment(self, comment_renderer, parent=None):
2904 comment_id = comment_renderer.get('commentId')
2905 if not comment_id:
2906 return
2907
2908 text = self._get_text(comment_renderer, 'contentText')
2909
2910 # note: timestamp is an estimate calculated from the current time and time_text
2911 timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')
2912 author = self._get_text(comment_renderer, 'authorText')
2913 author_id = try_get(comment_renderer,
2914 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str)
2915
2916 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2917 lambda x: x['likeCount']), str)) or 0
2918 author_thumbnail = try_get(comment_renderer,
2919 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], str)
2920
2921 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2922 is_favorited = 'creatorHeart' in (try_get(
2923 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2924 return {
2925 'id': comment_id,
2926 'text': text,
2927 'timestamp': timestamp,
2928 'time_text': time_text,
2929 'like_count': votes,
2930 'is_favorited': is_favorited,
2931 'author': author,
2932 'author_id': author_id,
2933 'author_thumbnail': author_thumbnail,
2934 'author_is_uploader': author_is_uploader,
2935 'parent': parent or 'root'
2936 }
2937
2938 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
2939
2940 get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
2941
2942 def extract_header(contents):
2943 _continuation = None
2944 for content in contents:
2945 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
2946 expected_comment_count = self._get_count(
2947 comments_header_renderer, 'countText', 'commentsCount')
2948
2949 if expected_comment_count:
2950 tracker['est_total'] = expected_comment_count
2951 self.to_screen(f'Downloading ~{expected_comment_count} comments')
2952 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
2953
2954 sort_menu_item = try_get(
2955 comments_header_renderer,
2956 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2957 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2958
2959 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2960 if not _continuation:
2961 continue
2962
2963 sort_text = str_or_none(sort_menu_item.get('title'))
2964 if not sort_text:
2965 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2966 self.to_screen('Sorting comments by %s' % sort_text.lower())
2967 break
2968 return _continuation
2969
2970 def extract_thread(contents):
2971 if not parent:
2972 tracker['current_page_thread'] = 0
2973 for content in contents:
2974 if not parent and tracker['total_parent_comments'] >= max_parents:
2975 yield
2976 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2977 comment_renderer = get_first(
2978 (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
2979 expected_type=dict, default={})
2980
2981 comment = self._extract_comment(comment_renderer, parent)
2982 if not comment:
2983 continue
2984
2985 tracker['running_total'] += 1
2986 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
2987 yield comment
2988
2989 # Attempt to get the replies
2990 comment_replies_renderer = try_get(
2991 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2992
2993 if comment_replies_renderer:
2994 tracker['current_page_thread'] += 1
2995 comment_entries_iter = self._comment_entries(
2996 comment_replies_renderer, ytcfg, video_id,
2997 parent=comment.get('id'), tracker=tracker)
2998 yield from itertools.islice(comment_entries_iter, min(
2999 max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))
3000
3001 # Keeps track of counts across recursive calls
3002 if not tracker:
3003 tracker = dict(
3004 running_total=0,
3005 est_total=0,
3006 current_page_thread=0,
3007 total_parent_comments=0,
3008 total_reply_comments=0)
3009
3010 # TODO: Deprecated
3011 # YouTube comments have a max depth of 2
3012 max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
3013 if max_depth:
3014 self._downloader.deprecated_feature('[youtube] max_comment_depth extractor argument is deprecated. '
3015 'Set max replies in the max-comments extractor argument instead')
3016 if max_depth == 1 and parent:
3017 return
3018
3019 max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(
3020 lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
3021
3022 continuation = self._extract_continuation(root_continuation_data)
3023
3024 response = None
3025 is_forced_continuation = False
3026 is_first_continuation = parent is None
3027 if is_first_continuation and not continuation:
3028 # Sometimes you can get comments by generating the continuation yourself,
3029 # even if YouTube initially reports them being disabled - e.g. stories comments.
3030 # Note: if the comment section is actually disabled, YouTube may return a response with
3031 # required check_get_keys missing. So we will disable that check initially in this case.
3032 continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
3033 is_forced_continuation = True
3034
3035 for page_num in itertools.count(0):
3036 if not continuation:
3037 break
3038 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
3039 comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"
3040 if page_num == 0:
3041 if is_first_continuation:
3042 note_prefix = 'Downloading comment section API JSON'
3043 else:
3044 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
3045 tracker['current_page_thread'], comment_prog_str)
3046 else:
3047 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
3048 ' ' if parent else '', ' replies' if parent else '',
3049 page_num, comment_prog_str)
3050
3051 response = self._extract_response(
3052 item_id=None, query=continuation,
3053 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
3054 check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)
3055 is_forced_continuation = False
3056 continuation_contents = traverse_obj(
3057 response, 'onResponseReceivedEndpoints', expected_type=list, default=[])
3058
3059 continuation = None
3060 for continuation_section in continuation_contents:
3061 continuation_items = traverse_obj(
3062 continuation_section,
3063 (('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),
3064 get_all=False, expected_type=list) or []
3065 if is_first_continuation:
3066 continuation = extract_header(continuation_items)
3067 is_first_continuation = False
3068 if continuation:
3069 break
3070 continue
3071
3072 for entry in extract_thread(continuation_items):
3073 if not entry:
3074 return
3075 yield entry
3076 continuation = self._extract_continuation({'contents': continuation_items})
3077 if continuation:
3078 break
3079
3080 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
3081 if message and not parent and tracker['running_total'] == 0:
3082 self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)
3083
3084 @staticmethod
3085 def _generate_comment_continuation(video_id):
3086 """
3087 Generates initial comment section continuation token from given video id
3088 """
3089 token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'
3090 return base64.b64encode(token.encode()).decode()
3091
3092 def _get_comments(self, ytcfg, video_id, contents, webpage):
3093 """Entry for comment extraction"""
3094 def _real_comment_extract(contents):
3095 renderer = next((
3096 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
3097 if item.get('sectionIdentifier') == 'comment-item-section'), None)
3098 yield from self._comment_entries(renderer, ytcfg, video_id)
3099
3100 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
3101 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
3102
3103 @staticmethod
3104 def _get_checkok_params():
3105 return {'contentCheckOk': True, 'racyCheckOk': True}
3106
3107 @classmethod
3108 def _generate_player_context(cls, sts=None):
3109 context = {
3110 'html5Preference': 'HTML5_PREF_WANTS',
3111 }
3112 if sts is not None:
3113 context['signatureTimestamp'] = sts
3114 return {
3115 'playbackContext': {
3116 'contentPlaybackContext': context
3117 },
3118 **cls._get_checkok_params()
3119 }
3120
3121 @staticmethod
3122 def _is_agegated(player_response):
3123 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
3124 return True
3125
3126 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
3127 AGE_GATE_REASONS = (
3128 'confirm your age', 'age-restricted', 'inappropriate', # reason
3129 'age_verification_required', 'age_check_required', # status
3130 )
3131 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
3132
3133 @staticmethod
3134 def _is_unplayable(player_response):
3135 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
3136
3137 _STORY_PLAYER_PARAMS = '8AEB'
3138
3139 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):
3140
3141 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
3142 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
3143 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
3144 headers = self.generate_api_headers(
3145 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
3146
3147 yt_query = {
3148 'videoId': video_id,
3149 }
3150 if smuggled_data.get('is_story') or _split_innertube_client(client)[0] == 'android':
3151 yt_query['params'] = self._STORY_PLAYER_PARAMS
3152
3153 yt_query.update(self._generate_player_context(sts))
3154 return self._extract_response(
3155 item_id=video_id, ep='player', query=yt_query,
3156 ytcfg=player_ytcfg, headers=headers, fatal=True,
3157 default_client=client,
3158 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
3159 ) or None
3160
3161 def _get_requested_clients(self, url, smuggled_data):
3162 requested_clients = []
3163 default = ['android', 'web']
3164 allowed_clients = sorted(
3165 (client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),
3166 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
3167 for client in self._configuration_arg('player_client'):
3168 if client in allowed_clients:
3169 requested_clients.append(client)
3170 elif client == 'default':
3171 requested_clients.extend(default)
3172 elif client == 'all':
3173 requested_clients.extend(allowed_clients)
3174 else:
3175 self.report_warning(f'Skipping unsupported client {client}')
3176 if not requested_clients:
3177 requested_clients = default
3178
3179 if smuggled_data.get('is_music_url') or self.is_music_url(url):
3180 requested_clients.extend(
3181 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
3182
3183 return orderedSet(requested_clients)
3184
3185 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
3186 initial_pr = None
3187 if webpage:
3188 initial_pr = self._search_json(
3189 self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
3190
3191 all_clients = set(clients)
3192 clients = clients[::-1]
3193 prs = []
3194
3195 def append_client(*client_names):
3196 """ Append the first client name that exists but not already used """
3197 for client_name in client_names:
3198 actual_client = _split_innertube_client(client_name)[0]
3199 if actual_client in INNERTUBE_CLIENTS:
3200 if actual_client not in all_clients:
3201 clients.append(client_name)
3202 all_clients.add(actual_client)
3203 return
3204
3205 # Android player_response does not have microFormats which are needed for
3206 # extraction of some data. So we return the initial_pr with formats
3207 # stripped out even if not requested by the user
3208 # See: https://github.com/yt-dlp/yt-dlp/issues/501
3209 if initial_pr:
3210 pr = dict(initial_pr)
3211 pr['streamingData'] = None
3212 prs.append(pr)
3213
3214 last_error = None
3215 tried_iframe_fallback = False
3216 player_url = None
3217 while clients:
3218 client, base_client, variant = _split_innertube_client(clients.pop())
3219 player_ytcfg = master_ytcfg if client == 'web' else {}
3220 if 'configs' not in self._configuration_arg('player_skip') and client != 'web':
3221 player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
3222
3223 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
3224 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
3225 if 'js' in self._configuration_arg('player_skip'):
3226 require_js_player = False
3227 player_url = None
3228
3229 if not player_url and not tried_iframe_fallback and require_js_player:
3230 player_url = self._download_player_url(video_id)
3231 tried_iframe_fallback = True
3232
3233 try:
3234 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
3235 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)
3236 except ExtractorError as e:
3237 if last_error:
3238 self.report_warning(last_error)
3239 last_error = e
3240 continue
3241
3242 if pr:
3243 # YouTube may return a different video player response than expected.
3244 # See: https://github.com/TeamNewPipe/NewPipe/issues/8713
3245 pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId'))
3246 if pr_video_id and pr_video_id != video_id:
3247 self.report_warning(
3248 f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message())
3249 else:
3250 prs.append(pr)
3251
3252 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
3253 if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
3254 append_client(f'{base_client}_creator')
3255 elif self._is_agegated(pr):
3256 if variant == 'tv_embedded':
3257 append_client(f'{base_client}_embedded')
3258 elif not variant:
3259 append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
3260
3261 if last_error:
3262 if not len(prs):
3263 raise last_error
3264 self.report_warning(last_error)
3265 return prs, player_url
3266
3267 def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, is_live, duration):
3268 itags, stream_ids = {}, []
3269 itag_qualities, res_qualities = {}, {0: None}
3270 q = qualities([
3271 # Normally tiny is the smallest video-only formats. But
3272 # audio-only formats with unknown quality may get tagged as tiny
3273 'tiny',
3274 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
3275 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
3276 ])
3277 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
3278
3279 for fmt in streaming_formats:
3280 if fmt.get('targetDurationSec'):
3281 continue
3282
3283 itag = str_or_none(fmt.get('itag'))
3284 audio_track = fmt.get('audioTrack') or {}
3285 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
3286 if stream_id in stream_ids:
3287 continue
3288
3289 quality = fmt.get('quality')
3290 height = int_or_none(fmt.get('height'))
3291 if quality == 'tiny' or not quality:
3292 quality = fmt.get('audioQuality', '').lower() or quality
3293 # The 3gp format (17) in android client has a quality of "small",
3294 # but is actually worse than other formats
3295 if itag == '17':
3296 quality = 'tiny'
3297 if quality:
3298 if itag:
3299 itag_qualities[itag] = quality
3300 if height:
3301 res_qualities[height] = quality
3302 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
3303 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
3304 # number of fragment that would subsequently requested with (`&sq=N`)
3305 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
3306 continue
3307
3308 fmt_url = fmt.get('url')
3309 if not fmt_url:
3310 sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))
3311 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
3312 encrypted_sig = try_get(sc, lambda x: x['s'][0])
3313 if not all((sc, fmt_url, player_url, encrypted_sig)):
3314 continue
3315 try:
3316 fmt_url += '&%s=%s' % (
3317 traverse_obj(sc, ('sp', -1)) or 'signature',
3318 self._decrypt_signature(encrypted_sig, video_id, player_url)
3319 )
3320 except ExtractorError as e:
3321 self.report_warning('Signature extraction failed: Some formats may be missing',
3322 video_id=video_id, only_once=True)
3323 self.write_debug(e, only_once=True)
3324 continue
3325
3326 query = parse_qs(fmt_url)
3327 throttled = False
3328 if query.get('n'):
3329 try:
3330 decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])
3331 fmt_url = update_url_query(fmt_url, {
3332 'n': decrypt_nsig(query['n'][0], video_id, player_url)
3333 })
3334 except ExtractorError as e:
3335 phantomjs_hint = ''
3336 if isinstance(e, JSInterpreter.Exception):
3337 phantomjs_hint = (f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} '
3338 f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')
3339 if player_url:
3340 self.report_warning(
3341 f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}'
3342 f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)
3343 self.write_debug(e, only_once=True)
3344 else:
3345 self.report_warning(
3346 'Cannot decrypt nsig without player_url: You may experience throttling for some formats',
3347 video_id=video_id, only_once=True)
3348 throttled = True
3349
3350 if itag:
3351 itags[itag] = 'https'
3352 stream_ids.append(stream_id)
3353
3354 tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
3355 language_preference = (
3356 10 if audio_track.get('audioIsDefault') and 10
3357 else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
3358 else -1)
3359 # Some formats may have much smaller duration than others (possibly damaged during encoding)
3360 # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
3361 # Make sure to avoid false positives with small duration differences.
3362 # E.g. __2ABJjxzNo, ySuUZEjARPY
3363 is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)
3364 if is_damaged:
3365 self.report_warning(
3366 f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
3367 dct = {
3368 'asr': int_or_none(fmt.get('audioSampleRate')),
3369 'filesize': int_or_none(fmt.get('contentLength')),
3370 'format_id': itag,
3371 'format_note': join_nonempty(
3372 '%s%s' % (audio_track.get('displayName') or '',
3373 ' (default)' if language_preference > 0 else ''),
3374 fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
3375 try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
3376 try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
3377 throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),
3378 # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
3379 'source_preference': -10 if throttled else -5 if itag == '22' else -1,
3380 'fps': int_or_none(fmt.get('fps')) or None,
3381 'audio_channels': fmt.get('audioChannels'),
3382 'height': height,
3383 'quality': q(quality),
3384 'has_drm': bool(fmt.get('drmFamilies')),
3385 'tbr': tbr,
3386 'url': fmt_url,
3387 'width': int_or_none(fmt.get('width')),
3388 'language': join_nonempty(audio_track.get('id', '').split('.')[0],
3389 'desc' if language_preference < -1 else ''),
3390 'language_preference': language_preference,
3391 # Strictly de-prioritize damaged and 3gp formats
3392 'preference': -10 if is_damaged else -2 if itag == '17' else None,
3393 }
3394 mime_mobj = re.match(
3395 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
3396 if mime_mobj:
3397 dct['ext'] = mimetype2ext(mime_mobj.group(1))
3398 dct.update(parse_codecs(mime_mobj.group(2)))
3399 no_audio = dct.get('acodec') == 'none'
3400 no_video = dct.get('vcodec') == 'none'
3401 if no_audio:
3402 dct['vbr'] = tbr
3403 if no_video:
3404 dct['abr'] = tbr
3405 if no_audio or no_video:
3406 dct['downloader_options'] = {
3407 # Youtube throttles chunks >~10M
3408 'http_chunk_size': 10485760,
3409 }
3410 if dct.get('ext'):
3411 dct['container'] = dct['ext'] + '_dash'
3412 yield dct
3413
3414 live_from_start = is_live and self.get_param('live_from_start')
3415 skip_manifests = self._configuration_arg('skip')
3416 if not self.get_param('youtube_include_hls_manifest', True):
3417 skip_manifests.append('hls')
3418 if not self.get_param('youtube_include_dash_manifest', True):
3419 skip_manifests.append('dash')
3420 get_dash = 'dash' not in skip_manifests and (
3421 not is_live or live_from_start or self._configuration_arg('include_live_dash'))
3422 get_hls = not live_from_start and 'hls' not in skip_manifests
3423
3424 def process_manifest_format(f, proto, itag):
3425 if itag in itags:
3426 if itags[itag] == proto or f'{itag}-{proto}' in itags:
3427 return False
3428 itag = f'{itag}-{proto}'
3429 if itag:
3430 f['format_id'] = itag
3431 itags[itag] = proto
3432
3433 f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
3434 if f['quality'] == -1 and f.get('height'):
3435 f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
3436 return True
3437
3438 subtitles = {}
3439 for sd in streaming_data:
3440 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
3441 if hls_manifest_url:
3442 fmts, subs = self._extract_m3u8_formats_and_subtitles(hls_manifest_url, video_id, 'mp4', fatal=False, live=is_live)
3443 subtitles = self._merge_subtitles(subs, subtitles)
3444 for f in fmts:
3445 if process_manifest_format(f, 'hls', self._search_regex(
3446 r'/itag/(\d+)', f['url'], 'itag', default=None)):
3447 yield f
3448
3449 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
3450 if dash_manifest_url:
3451 formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
3452 subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
3453 for f in formats:
3454 if process_manifest_format(f, 'dash', f['format_id']):
3455 f['filesize'] = int_or_none(self._search_regex(
3456 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
3457 if live_from_start:
3458 f['is_from_start'] = True
3459
3460 yield f
3461 yield subtitles
3462
3463 def _extract_storyboard(self, player_responses, duration):
3464 spec = get_first(
3465 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
3466 base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))
3467 if not base_url:
3468 return
3469 L = len(spec) - 1
3470 for i, args in enumerate(spec):
3471 args = args.split('#')
3472 counts = list(map(int_or_none, args[:5]))
3473 if len(args) != 8 or not all(counts):
3474 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
3475 continue
3476 width, height, frame_count, cols, rows = counts
3477 N, sigh = args[6:]
3478
3479 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
3480 fragment_count = frame_count / (cols * rows)
3481 fragment_duration = duration / fragment_count
3482 yield {
3483 'format_id': f'sb{i}',
3484 'format_note': 'storyboard',
3485 'ext': 'mhtml',
3486 'protocol': 'mhtml',
3487 'acodec': 'none',
3488 'vcodec': 'none',
3489 'url': url,
3490 'width': width,
3491 'height': height,
3492 'fps': frame_count / duration,
3493 'rows': rows,
3494 'columns': cols,
3495 'fragments': [{
3496 'url': url.replace('$M', str(j)),
3497 'duration': min(fragment_duration, duration - (j * fragment_duration)),
3498 } for j in range(math.ceil(fragment_count))],
3499 }
3500
3501 def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
3502 webpage = None
3503 if 'webpage' not in self._configuration_arg('player_skip'):
3504 query = {'bpctr': '9999999999', 'has_verified': '1'}
3505 if smuggled_data.get('is_story'):
3506 query['pp'] = self._STORY_PLAYER_PARAMS
3507 webpage = self._download_webpage(
3508 webpage_url, video_id, fatal=False, query=query)
3509
3510 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
3511
3512 player_responses, player_url = self._extract_player_responses(
3513 self._get_requested_clients(url, smuggled_data),
3514 video_id, webpage, master_ytcfg, smuggled_data)
3515
3516 return webpage, master_ytcfg, player_responses, player_url
3517
3518 def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):
3519 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
3520 is_live = get_first(video_details, 'isLive')
3521 if is_live is None:
3522 is_live = get_first(live_broadcast_details, 'isLiveNow')
3523
3524 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
3525 *formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, is_live, duration)
3526
3527 return live_broadcast_details, is_live, streaming_data, formats, subtitles
3528
3529 def _real_extract(self, url):
3530 url, smuggled_data = unsmuggle_url(url, {})
3531 video_id = self._match_id(url)
3532
3533 base_url = self.http_scheme() + '//www.youtube.com/'
3534 webpage_url = base_url + 'watch?v=' + video_id
3535
3536 webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
3537
3538 playability_statuses = traverse_obj(
3539 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
3540
3541 trailer_video_id = get_first(
3542 playability_statuses,
3543 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
3544 expected_type=str)
3545 if trailer_video_id:
3546 return self.url_result(
3547 trailer_video_id, self.ie_key(), trailer_video_id)
3548
3549 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
3550 if webpage else (lambda x: None))
3551
3552 video_details = traverse_obj(
3553 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
3554 microformats = traverse_obj(
3555 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
3556 expected_type=dict, default=[])
3557 video_title = (
3558 get_first(video_details, 'title')
3559 or self._get_text(microformats, (..., 'title'))
3560 or search_meta(['og:title', 'twitter:title', 'title']))
3561 video_description = get_first(video_details, 'shortDescription')
3562
3563 multifeed_metadata_list = get_first(
3564 player_responses,
3565 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
3566 expected_type=str)
3567 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
3568 if self.get_param('noplaylist'):
3569 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3570 else:
3571 entries = []
3572 feed_ids = []
3573 for feed in multifeed_metadata_list.split(','):
3574 # Unquote should take place before split on comma (,) since textual
3575 # fields may contain comma as well (see
3576 # https://github.com/ytdl-org/youtube-dl/issues/8536)
3577 feed_data = urllib.parse.parse_qs(
3578 urllib.parse.unquote_plus(feed))
3579
3580 def feed_entry(name):
3581 return try_get(
3582 feed_data, lambda x: x[name][0], str)
3583
3584 feed_id = feed_entry('id')
3585 if not feed_id:
3586 continue
3587 feed_title = feed_entry('title')
3588 title = video_title
3589 if feed_title:
3590 title += ' (%s)' % feed_title
3591 entries.append({
3592 '_type': 'url_transparent',
3593 'ie_key': 'Youtube',
3594 'url': smuggle_url(
3595 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
3596 {'force_singlefeed': True}),
3597 'title': title,
3598 })
3599 feed_ids.append(feed_id)
3600 self.to_screen(
3601 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
3602 % (', '.join(feed_ids), video_id))
3603 return self.playlist_result(
3604 entries, video_id, video_title, video_description)
3605
3606 duration = int_or_none(
3607 get_first(video_details, 'lengthSeconds')
3608 or get_first(microformats, 'lengthSeconds')
3609 or parse_duration(search_meta('duration'))) or None
3610
3611 live_broadcast_details, is_live, streaming_data, formats, automatic_captions = \
3612 self._list_formats(video_id, microformats, video_details, player_responses, player_url)
3613
3614 if not formats:
3615 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
3616 self.report_drm(video_id)
3617 pemr = get_first(
3618 playability_statuses,
3619 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
3620 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
3621 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
3622 if subreason:
3623 if subreason == 'The uploader has not made this video available in your country.':
3624 countries = get_first(microformats, 'availableCountries')
3625 if not countries:
3626 regions_allowed = search_meta('regionsAllowed')
3627 countries = regions_allowed.split(',') if regions_allowed else None
3628 self.raise_geo_restricted(subreason, countries, metadata_available=True)
3629 reason += f'. {subreason}'
3630 if reason:
3631 self.raise_no_formats(reason, expected=True)
3632
3633 keywords = get_first(video_details, 'keywords', expected_type=list) or []
3634 if not keywords and webpage:
3635 keywords = [
3636 unescapeHTML(m.group('content'))
3637 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
3638 for keyword in keywords:
3639 if keyword.startswith('yt:stretch='):
3640 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
3641 if mobj:
3642 # NB: float is intentional for forcing float division
3643 w, h = (float(v) for v in mobj.groups())
3644 if w > 0 and h > 0:
3645 ratio = w / h
3646 for f in formats:
3647 if f.get('vcodec') != 'none':
3648 f['stretched_ratio'] = ratio
3649 break
3650 thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))
3651 thumbnail_url = search_meta(['og:image', 'twitter:image'])
3652 if thumbnail_url:
3653 thumbnails.append({
3654 'url': thumbnail_url,
3655 })
3656 original_thumbnails = thumbnails.copy()
3657
3658 # The best resolution thumbnails sometimes does not appear in the webpage
3659 # See: https://github.com/yt-dlp/yt-dlp/issues/340
3660 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
3661 thumbnail_names = [
3662 # While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants
3663 # in resolution, these are not the custom thumbnail. So de-prioritize them
3664 'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',
3665 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'
3666 ]
3667 n_thumbnail_names = len(thumbnail_names)
3668 thumbnails.extend({
3669 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
3670 video_id=video_id, name=name, ext=ext,
3671 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
3672 } for name in thumbnail_names for ext in ('webp', 'jpg'))
3673 for thumb in thumbnails:
3674 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
3675 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
3676 self._remove_duplicate_formats(thumbnails)
3677 self._downloader._sort_thumbnails(original_thumbnails)
3678
3679 category = get_first(microformats, 'category') or search_meta('genre')
3680 channel_id = str_or_none(
3681 get_first(video_details, 'channelId')
3682 or get_first(microformats, 'externalChannelId')
3683 or search_meta('channelId'))
3684 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
3685
3686 live_content = get_first(video_details, 'isLiveContent')
3687 is_upcoming = get_first(video_details, 'isUpcoming')
3688 if is_live is None:
3689 if is_upcoming or live_content is False:
3690 is_live = False
3691 if is_upcoming is None and (live_content or is_live):
3692 is_upcoming = False
3693 live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
3694 live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
3695 if not duration and live_end_time and live_start_time:
3696 duration = live_end_time - live_start_time
3697
3698 if is_live and self.get_param('live_from_start'):
3699 self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)
3700
3701 formats.extend(self._extract_storyboard(player_responses, duration))
3702
3703 # source_preference is lower for throttled/potentially damaged formats
3704 self._sort_formats(formats, (
3705 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto'))
3706
3707 info = {
3708 'id': video_id,
3709 'title': video_title,
3710 'formats': formats,
3711 'thumbnails': thumbnails,
3712 # The best thumbnail that we are sure exists. Prevents unnecessary
3713 # URL checking if user don't care about getting the best possible thumbnail
3714 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
3715 'description': video_description,
3716 'uploader': get_first(video_details, 'author'),
3717 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
3718 'uploader_url': owner_profile_url,
3719 'channel_id': channel_id,
3720 'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s'),
3721 'duration': duration,
3722 'view_count': int_or_none(
3723 get_first((video_details, microformats), (..., 'viewCount'))
3724 or search_meta('interactionCount')),
3725 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
3726 'age_limit': 18 if (
3727 get_first(microformats, 'isFamilySafe') is False
3728 or search_meta('isFamilyFriendly') == 'false'
3729 or search_meta('og:restrictions:age') == '18+') else 0,
3730 'webpage_url': webpage_url,
3731 'categories': [category] if category else None,
3732 'tags': keywords,
3733 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
3734 'is_live': is_live,
3735 'was_live': (False if is_live or is_upcoming or live_content is False
3736 else None if is_live is None or is_upcoming is None
3737 else live_content),
3738 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
3739 'release_timestamp': live_start_time,
3740 }
3741
3742 if get_first(video_details, 'isPostLiveDvr'):
3743 self.write_debug('Video is in Post-Live Manifestless mode')
3744 info['live_status'] = 'post_live'
3745 if (duration or 0) > 4 * 3600:
3746 self.report_warning(
3747 'The livestream has not finished processing. Only 4 hours of the video can be currently downloaded. '
3748 'This is a known issue and patches are welcome')
3749
3750 subtitles = {}
3751 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
3752 if pctr:
3753 def get_lang_code(track):
3754 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
3755 or track.get('languageCode'))
3756
3757 # Converted into dicts to remove duplicates
3758 captions = {
3759 get_lang_code(sub): sub
3760 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
3761 translation_languages = {
3762 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
3763 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
3764
3765 def process_language(container, base_url, lang_code, sub_name, query):
3766 lang_subs = container.setdefault(lang_code, [])
3767 for fmt in self._SUBTITLE_FORMATS:
3768 query.update({
3769 'fmt': fmt,
3770 })
3771 lang_subs.append({
3772 'ext': fmt,
3773 'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),
3774 'name': sub_name,
3775 })
3776
3777 # NB: Constructing the full subtitle dictionary is slow
3778 get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (
3779 self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))
3780 for lang_code, caption_track in captions.items():
3781 base_url = caption_track.get('baseUrl')
3782 orig_lang = parse_qs(base_url).get('lang', [None])[-1]
3783 if not base_url:
3784 continue
3785 lang_name = self._get_text(caption_track, 'name', max_runs=1)
3786 if caption_track.get('kind') != 'asr':
3787 if not lang_code:
3788 continue
3789 process_language(
3790 subtitles, base_url, lang_code, lang_name, {})
3791 if not caption_track.get('isTranslatable'):
3792 continue
3793 for trans_code, trans_name in translation_languages.items():
3794 if not trans_code:
3795 continue
3796 orig_trans_code = trans_code
3797 if caption_track.get('kind') != 'asr':
3798 if not get_translated_subs:
3799 continue
3800 trans_code += f'-{lang_code}'
3801 trans_name += format_field(lang_name, None, ' from %s')
3802 # Add an "-orig" label to the original language so that it can be distinguished.
3803 # The subs are returned without "-orig" as well for compatibility
3804 if lang_code == f'a-{orig_trans_code}':
3805 process_language(
3806 automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
3807 # Setting tlang=lang returns damaged subtitles.
3808 process_language(automatic_captions, base_url, trans_code, trans_name,
3809 {} if orig_lang == orig_trans_code else {'tlang': trans_code})
3810
3811 info['automatic_captions'] = automatic_captions
3812 info['subtitles'] = subtitles
3813
3814 parsed_url = urllib.parse.urlparse(url)
3815 for component in [parsed_url.fragment, parsed_url.query]:
3816 query = urllib.parse.parse_qs(component)
3817 for k, v in query.items():
3818 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
3819 d_k += '_time'
3820 if d_k not in info and k in s_ks:
3821 info[d_k] = parse_duration(query[k][0])
3822
3823 # Youtube Music Auto-generated description
3824 if video_description:
3825 mobj = re.search(
3826 r'''(?xs)
3827 (?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+
3828 (?P<album>[^\n]+)
3829 (?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
3830 (?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
3831 (.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?
3832 .+\nAuto-generated\ by\ YouTube\.\s*$
3833 ''', video_description)
3834 if mobj:
3835 release_year = mobj.group('release_year')
3836 release_date = mobj.group('release_date')
3837 if release_date:
3838 release_date = release_date.replace('-', '')
3839 if not release_year:
3840 release_year = release_date[:4]
3841 info.update({
3842 'album': mobj.group('album'.strip()),
3843 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
3844 'track': mobj.group('track').strip(),
3845 'release_date': release_date,
3846 'release_year': int_or_none(release_year),
3847 })
3848
3849 initial_data = None
3850 if webpage:
3851 initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)
3852 if not initial_data:
3853 query = {'videoId': video_id}
3854 query.update(self._get_checkok_params())
3855 initial_data = self._extract_response(
3856 item_id=video_id, ep='next', fatal=False,
3857 ytcfg=master_ytcfg, query=query,
3858 headers=self.generate_api_headers(ytcfg=master_ytcfg),
3859 note='Downloading initial data API JSON')
3860
3861 info['comment_count'] = traverse_obj(initial_data, (
3862 'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',
3863 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount', 'simpleText'
3864 ), (
3865 'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',
3866 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo', 'runs', ..., 'text'
3867 ), expected_type=int_or_none, get_all=False)
3868
3869 try: # This will error if there is no livechat
3870 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
3871 except (KeyError, IndexError, TypeError):
3872 pass
3873 else:
3874 info.setdefault('subtitles', {})['live_chat'] = [{
3875 # url is needed to set cookies
3876 'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',
3877 'video_id': video_id,
3878 'ext': 'json',
3879 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
3880 }]
3881
3882 if initial_data:
3883 info['chapters'] = (
3884 self._extract_chapters_from_json(initial_data, duration)
3885 or self._extract_chapters_from_engagement_panel(initial_data, duration)
3886 or self._extract_chapters_from_description(video_description, duration)
3887 or None)
3888
3889 contents = traverse_obj(
3890 initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
3891 expected_type=list, default=[])
3892
3893 vpir = get_first(contents, 'videoPrimaryInfoRenderer')
3894 if vpir:
3895 stl = vpir.get('superTitleLink')
3896 if stl:
3897 stl = self._get_text(stl)
3898 if try_get(
3899 vpir,
3900 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3901 info['location'] = stl
3902 else:
3903 mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)
3904 if mobj:
3905 info.update({
3906 'series': mobj.group(1),
3907 'season_number': int(mobj.group(2)),
3908 'episode_number': int(mobj.group(3)),
3909 })
3910 for tlb in (try_get(
3911 vpir,
3912 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3913 list) or []):
3914 tbrs = variadic(
3915 traverse_obj(
3916 tlb, 'toggleButtonRenderer',
3917 ('segmentedLikeDislikeButtonRenderer', ..., 'toggleButtonRenderer'),
3918 default=[]))
3919 for tbr in tbrs:
3920 for getter, regex in [(
3921 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3922 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3923 lambda x: x['accessibility'],
3924 lambda x: x['accessibilityData']['accessibilityData'],
3925 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3926 label = (try_get(tbr, getter, dict) or {}).get('label')
3927 if label:
3928 mobj = re.match(regex, label)
3929 if mobj:
3930 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3931 break
3932 sbr_tooltip = try_get(
3933 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3934 if sbr_tooltip:
3935 like_count, dislike_count = sbr_tooltip.split(' / ')
3936 info.update({
3937 'like_count': str_to_int(like_count),
3938 'dislike_count': str_to_int(dislike_count),
3939 })
3940 vsir = get_first(contents, 'videoSecondaryInfoRenderer')
3941 if vsir:
3942 vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))
3943 info.update({
3944 'channel': self._get_text(vor, 'title'),
3945 'channel_follower_count': self._get_count(vor, 'subscriberCountText')})
3946
3947 rows = try_get(
3948 vsir,
3949 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3950 list) or []
3951 multiple_songs = False
3952 for row in rows:
3953 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3954 multiple_songs = True
3955 break
3956 for row in rows:
3957 mrr = row.get('metadataRowRenderer') or {}
3958 mrr_title = mrr.get('title')
3959 if not mrr_title:
3960 continue
3961 mrr_title = self._get_text(mrr, 'title')
3962 mrr_contents_text = self._get_text(mrr, ('contents', 0))
3963 if mrr_title == 'License':
3964 info['license'] = mrr_contents_text
3965 elif not multiple_songs:
3966 if mrr_title == 'Album':
3967 info['album'] = mrr_contents_text
3968 elif mrr_title == 'Artist':
3969 info['artist'] = mrr_contents_text
3970 elif mrr_title == 'Song':
3971 info['track'] = mrr_contents_text
3972
3973 fallbacks = {
3974 'channel': 'uploader',
3975 'channel_id': 'uploader_id',
3976 'channel_url': 'uploader_url',
3977 }
3978
3979 # The upload date for scheduled, live and past live streams / premieres in microformats
3980 # may be different from the stream date. Although not in UTC, we will prefer it in this case.
3981 # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139
3982 upload_date = (
3983 unified_strdate(get_first(microformats, 'uploadDate'))
3984 or unified_strdate(search_meta('uploadDate')))
3985 if not upload_date or (
3986 not info.get('is_live')
3987 and not info.get('was_live')
3988 and info.get('live_status') != 'is_upcoming'
3989 and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', [])
3990 ):
3991 upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d') or upload_date
3992 info['upload_date'] = upload_date
3993
3994 for to, frm in fallbacks.items():
3995 if not info.get(to):
3996 info[to] = info.get(frm)
3997
3998 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3999 v = info.get(s_k)
4000 if v:
4001 info[d_k] = v
4002
4003 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
4004 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
4005 is_membersonly = None
4006 is_premium = None
4007 if initial_data and is_private is not None:
4008 is_membersonly = False
4009 is_premium = False
4010 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
4011 badge_labels = set()
4012 for content in contents:
4013 if not isinstance(content, dict):
4014 continue
4015 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
4016 for badge_label in badge_labels:
4017 if badge_label.lower() == 'members only':
4018 is_membersonly = True
4019 elif badge_label.lower() == 'premium':
4020 is_premium = True
4021 elif badge_label.lower() == 'unlisted':
4022 is_unlisted = True
4023
4024 info['availability'] = self._availability(
4025 is_private=is_private,
4026 needs_premium=is_premium,
4027 needs_subscription=is_membersonly,
4028 needs_auth=info['age_limit'] >= 18,
4029 is_unlisted=None if is_private is None else is_unlisted)
4030
4031 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
4032
4033 self.mark_watched(video_id, player_responses)
4034
4035 return info
4036
4037
4038 class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
4039
4040 @staticmethod
4041 def passthrough_smuggled_data(func):
4042 def _smuggle(entries, smuggled_data):
4043 for entry in entries:
4044 # TODO: Convert URL to music.youtube instead.
4045 # Do we need to passthrough any other smuggled_data?
4046 entry['url'] = smuggle_url(entry['url'], smuggled_data)
4047 yield entry
4048
4049 @functools.wraps(func)
4050 def wrapper(self, url):
4051 url, smuggled_data = unsmuggle_url(url, {})
4052 if self.is_music_url(url):
4053 smuggled_data['is_music_url'] = True
4054 info_dict = func(self, url, smuggled_data)
4055 if smuggled_data and info_dict.get('entries'):
4056 info_dict['entries'] = _smuggle(info_dict['entries'], smuggled_data)
4057 return info_dict
4058 return wrapper
4059
4060 def _extract_channel_id(self, webpage):
4061 channel_id = self._html_search_meta(
4062 'channelId', webpage, 'channel id', default=None)
4063 if channel_id:
4064 return channel_id
4065 channel_url = self._html_search_meta(
4066 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
4067 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
4068 'twitter:app:url:googleplay'), webpage, 'channel url')
4069 return self._search_regex(
4070 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
4071 channel_url, 'channel id')
4072
4073 @staticmethod
4074 def _extract_basic_item_renderer(item):
4075 # Modified from _extract_grid_item_renderer
4076 known_basic_renderers = (
4077 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'
4078 )
4079 for key, renderer in item.items():
4080 if not isinstance(renderer, dict):
4081 continue
4082 elif key in known_basic_renderers:
4083 return renderer
4084 elif key.startswith('grid') and key.endswith('Renderer'):
4085 return renderer
4086
4087 def _grid_entries(self, grid_renderer):
4088 for item in grid_renderer['items']:
4089 if not isinstance(item, dict):
4090 continue
4091 renderer = self._extract_basic_item_renderer(item)
4092 if not isinstance(renderer, dict):
4093 continue
4094 title = self._get_text(renderer, 'title')
4095
4096 # playlist
4097 playlist_id = renderer.get('playlistId')
4098 if playlist_id:
4099 yield self.url_result(
4100 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4101 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4102 video_title=title)
4103 continue
4104 # video
4105 video_id = renderer.get('videoId')
4106 if video_id:
4107 yield self._extract_video(renderer)
4108 continue
4109 # channel
4110 channel_id = renderer.get('channelId')
4111 if channel_id:
4112 yield self.url_result(
4113 'https://www.youtube.com/channel/%s' % channel_id,
4114 ie=YoutubeTabIE.ie_key(), video_title=title)
4115 continue
4116 # generic endpoint URL support
4117 ep_url = urljoin('https://www.youtube.com/', try_get(
4118 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
4119 str))
4120 if ep_url:
4121 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
4122 if ie.suitable(ep_url):
4123 yield self.url_result(
4124 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
4125 break
4126
4127 def _music_reponsive_list_entry(self, renderer):
4128 video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))
4129 if video_id:
4130 return self.url_result(f'https://music.youtube.com/watch?v={video_id}',
4131 ie=YoutubeIE.ie_key(), video_id=video_id)
4132 playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))
4133 if playlist_id:
4134 video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))
4135 if video_id:
4136 return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',
4137 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4138 return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',
4139 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4140 browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))
4141 if browse_id:
4142 return self.url_result(f'https://music.youtube.com/browse/{browse_id}',
4143 ie=YoutubeTabIE.ie_key(), video_id=browse_id)
4144
4145 def _shelf_entries_from_content(self, shelf_renderer):
4146 content = shelf_renderer.get('content')
4147 if not isinstance(content, dict):
4148 return
4149 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
4150 if renderer:
4151 # TODO: add support for nested playlists so each shelf is processed
4152 # as separate playlist
4153 # TODO: this includes only first N items
4154 yield from self._grid_entries(renderer)
4155 renderer = content.get('horizontalListRenderer')
4156 if renderer:
4157 # TODO
4158 pass
4159
4160 def _shelf_entries(self, shelf_renderer, skip_channels=False):
4161 ep = try_get(
4162 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4163 str)
4164 shelf_url = urljoin('https://www.youtube.com', ep)
4165 if shelf_url:
4166 # Skipping links to another channels, note that checking for
4167 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
4168 # will not work
4169 if skip_channels and '/channels?' in shelf_url:
4170 return
4171 title = self._get_text(shelf_renderer, 'title')
4172 yield self.url_result(shelf_url, video_title=title)
4173 # Shelf may not contain shelf URL, fallback to extraction from content
4174 yield from self._shelf_entries_from_content(shelf_renderer)
4175
4176 def _playlist_entries(self, video_list_renderer):
4177 for content in video_list_renderer['contents']:
4178 if not isinstance(content, dict):
4179 continue
4180 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
4181 if not isinstance(renderer, dict):
4182 continue
4183 video_id = renderer.get('videoId')
4184 if not video_id:
4185 continue
4186 yield self._extract_video(renderer)
4187
4188 def _rich_entries(self, rich_grid_renderer):
4189 renderer = try_get(
4190 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
4191 video_id = renderer.get('videoId')
4192 if not video_id:
4193 return
4194 yield self._extract_video(renderer)
4195
4196 def _video_entry(self, video_renderer):
4197 video_id = video_renderer.get('videoId')
4198 if video_id:
4199 return self._extract_video(video_renderer)
4200
4201 def _hashtag_tile_entry(self, hashtag_tile_renderer):
4202 url = urljoin('https://youtube.com', traverse_obj(
4203 hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))
4204 if url:
4205 return self.url_result(
4206 url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))
4207
4208 def _post_thread_entries(self, post_thread_renderer):
4209 post_renderer = try_get(
4210 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
4211 if not post_renderer:
4212 return
4213 # video attachment
4214 video_renderer = try_get(
4215 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
4216 video_id = video_renderer.get('videoId')
4217 if video_id:
4218 entry = self._extract_video(video_renderer)
4219 if entry:
4220 yield entry
4221 # playlist attachment
4222 playlist_id = try_get(
4223 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)
4224 if playlist_id:
4225 yield self.url_result(
4226 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4227 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4228 # inline video links
4229 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
4230 for run in runs:
4231 if not isinstance(run, dict):
4232 continue
4233 ep_url = try_get(
4234 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)
4235 if not ep_url:
4236 continue
4237 if not YoutubeIE.suitable(ep_url):
4238 continue
4239 ep_video_id = YoutubeIE._match_id(ep_url)
4240 if video_id == ep_video_id:
4241 continue
4242 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
4243
4244 def _post_thread_continuation_entries(self, post_thread_continuation):
4245 contents = post_thread_continuation.get('contents')
4246 if not isinstance(contents, list):
4247 return
4248 for content in contents:
4249 renderer = content.get('backstagePostThreadRenderer')
4250 if isinstance(renderer, dict):
4251 yield from self._post_thread_entries(renderer)
4252 continue
4253 renderer = content.get('videoRenderer')
4254 if isinstance(renderer, dict):
4255 yield self._video_entry(renderer)
4256
4257 r''' # unused
4258 def _rich_grid_entries(self, contents):
4259 for content in contents:
4260 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
4261 if video_renderer:
4262 entry = self._video_entry(video_renderer)
4263 if entry:
4264 yield entry
4265 '''
4266
4267 def _extract_entries(self, parent_renderer, continuation_list):
4268 # continuation_list is modified in-place with continuation_list = [continuation_token]
4269 continuation_list[:] = [None]
4270 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
4271 for content in contents:
4272 if not isinstance(content, dict):
4273 continue
4274 is_renderer = traverse_obj(
4275 content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',
4276 expected_type=dict)
4277 if not is_renderer:
4278 renderer = content.get('richItemRenderer')
4279 if renderer:
4280 for entry in self._rich_entries(renderer):
4281 yield entry
4282 continuation_list[0] = self._extract_continuation(parent_renderer)
4283 continue
4284 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
4285 for isr_content in isr_contents:
4286 if not isinstance(isr_content, dict):
4287 continue
4288
4289 known_renderers = {
4290 'playlistVideoListRenderer': self._playlist_entries,
4291 'gridRenderer': self._grid_entries,
4292 'reelShelfRenderer': self._grid_entries,
4293 'shelfRenderer': self._shelf_entries,
4294 'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],
4295 'backstagePostThreadRenderer': self._post_thread_entries,
4296 'videoRenderer': lambda x: [self._video_entry(x)],
4297 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
4298 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
4299 'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]
4300 }
4301 for key, renderer in isr_content.items():
4302 if key not in known_renderers:
4303 continue
4304 for entry in known_renderers[key](renderer):
4305 if entry:
4306 yield entry
4307 continuation_list[0] = self._extract_continuation(renderer)
4308 break
4309
4310 if not continuation_list[0]:
4311 continuation_list[0] = self._extract_continuation(is_renderer)
4312
4313 if not continuation_list[0]:
4314 continuation_list[0] = self._extract_continuation(parent_renderer)
4315
4316 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
4317 continuation_list = [None]
4318 extract_entries = lambda x: self._extract_entries(x, continuation_list)
4319 tab_content = try_get(tab, lambda x: x['content'], dict)
4320 if not tab_content:
4321 return
4322 parent_renderer = (
4323 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
4324 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
4325 yield from extract_entries(parent_renderer)
4326 continuation = continuation_list[0]
4327
4328 for page_num in itertools.count(1):
4329 if not continuation:
4330 break
4331 headers = self.generate_api_headers(
4332 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
4333 response = self._extract_response(
4334 item_id=f'{item_id} page {page_num}',
4335 query=continuation, headers=headers, ytcfg=ytcfg,
4336 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
4337
4338 if not response:
4339 break
4340 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
4341 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
4342 visitor_data = self._extract_visitor_data(response) or visitor_data
4343
4344 known_continuation_renderers = {
4345 'playlistVideoListContinuation': self._playlist_entries,
4346 'gridContinuation': self._grid_entries,
4347 'itemSectionContinuation': self._post_thread_continuation_entries,
4348 'sectionListContinuation': extract_entries, # for feeds
4349 }
4350 continuation_contents = try_get(
4351 response, lambda x: x['continuationContents'], dict) or {}
4352 continuation_renderer = None
4353 for key, value in continuation_contents.items():
4354 if key not in known_continuation_renderers:
4355 continue
4356 continuation_renderer = value
4357 continuation_list = [None]
4358 yield from known_continuation_renderers[key](continuation_renderer)
4359 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
4360 break
4361 if continuation_renderer:
4362 continue
4363
4364 known_renderers = {
4365 'videoRenderer': (self._grid_entries, 'items'), # for membership tab
4366 'gridPlaylistRenderer': (self._grid_entries, 'items'),
4367 'gridVideoRenderer': (self._grid_entries, 'items'),
4368 'gridChannelRenderer': (self._grid_entries, 'items'),
4369 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
4370 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
4371 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
4372 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
4373 }
4374 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
4375 continuation_items = try_get(
4376 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
4377 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
4378 video_items_renderer = None
4379 for key, value in continuation_item.items():
4380 if key not in known_renderers:
4381 continue
4382 video_items_renderer = {known_renderers[key][1]: continuation_items}
4383 continuation_list = [None]
4384 yield from known_renderers[key][0](video_items_renderer)
4385 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
4386 break
4387 if video_items_renderer:
4388 continue
4389 break
4390
4391 @staticmethod
4392 def _extract_selected_tab(tabs, fatal=True):
4393 for tab in tabs:
4394 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
4395 if renderer.get('selected') is True:
4396 return renderer
4397 else:
4398 if fatal:
4399 raise ExtractorError('Unable to find selected tab')
4400
4401 def _extract_uploader(self, data):
4402 uploader = {}
4403 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
4404 owner = try_get(
4405 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
4406 if owner:
4407 owner_text = owner.get('text')
4408 uploader['uploader'] = self._search_regex(
4409 r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text)
4410 uploader['uploader_id'] = try_get(
4411 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], str)
4412 uploader['uploader_url'] = urljoin(
4413 'https://www.youtube.com/',
4414 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], str))
4415 return {k: v for k, v in uploader.items() if v is not None}
4416
4417 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
4418 playlist_id = title = description = channel_url = channel_name = channel_id = None
4419 tags = []
4420
4421 selected_tab = self._extract_selected_tab(tabs)
4422 primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4423 renderer = try_get(
4424 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
4425 if renderer:
4426 channel_name = renderer.get('title')
4427 channel_url = renderer.get('channelUrl')
4428 channel_id = renderer.get('externalId')
4429 else:
4430 renderer = try_get(
4431 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
4432
4433 if renderer:
4434 title = renderer.get('title')
4435 description = renderer.get('description', '')
4436 playlist_id = channel_id
4437 tags = renderer.get('keywords', '').split()
4438
4439 # We can get the uncropped banner/avatar by replacing the crop params with '=s0'
4440 # See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714
4441 def _get_uncropped(url):
4442 return url_or_none((url or '').split('=')[0] + '=s0')
4443
4444 avatar_thumbnails = self._extract_thumbnails(renderer, 'avatar')
4445 if avatar_thumbnails:
4446 uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])
4447 if uncropped_avatar:
4448 avatar_thumbnails.append({
4449 'url': uncropped_avatar,
4450 'id': 'avatar_uncropped',
4451 'preference': 1
4452 })
4453
4454 channel_banners = self._extract_thumbnails(
4455 data, ('header', ..., ['banner', 'mobileBanner', 'tvBanner']))
4456 for banner in channel_banners:
4457 banner['preference'] = -10
4458
4459 if channel_banners:
4460 uncropped_banner = _get_uncropped(channel_banners[0]['url'])
4461 if uncropped_banner:
4462 channel_banners.append({
4463 'url': uncropped_banner,
4464 'id': 'banner_uncropped',
4465 'preference': -5
4466 })
4467
4468 primary_thumbnails = self._extract_thumbnails(
4469 primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))
4470
4471 if playlist_id is None:
4472 playlist_id = item_id
4473
4474 playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')
4475 last_updated_unix, _ = self._extract_time_text(playlist_stats, 2)
4476 if title is None:
4477 title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id
4478 title += format_field(selected_tab, 'title', ' - %s')
4479 title += format_field(selected_tab, 'expandedText', ' - %s')
4480
4481 metadata = {
4482 'playlist_id': playlist_id,
4483 'playlist_title': title,
4484 'playlist_description': description,
4485 'uploader': channel_name,
4486 'uploader_id': channel_id,
4487 'uploader_url': channel_url,
4488 'thumbnails': primary_thumbnails + avatar_thumbnails + channel_banners,
4489 'tags': tags,
4490 'view_count': self._get_count(playlist_stats, 1),
4491 'availability': self._extract_availability(data),
4492 'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'),
4493 'playlist_count': self._get_count(playlist_stats, 0),
4494 'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),
4495 }
4496 if not channel_id:
4497 metadata.update(self._extract_uploader(data))
4498 metadata.update({
4499 'channel': metadata['uploader'],
4500 'channel_id': metadata['uploader_id'],
4501 'channel_url': metadata['uploader_url']})
4502 return self.playlist_result(
4503 self._entries(
4504 selected_tab, playlist_id, ytcfg,
4505 self._extract_account_syncid(ytcfg, data),
4506 self._extract_visitor_data(data, ytcfg)),
4507 **metadata)
4508
4509 def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):
4510 first_id = last_id = response = None
4511 for page_num in itertools.count(1):
4512 videos = list(self._playlist_entries(playlist))
4513 if not videos:
4514 return
4515 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4516 if start >= len(videos):
4517 return
4518 yield from videos[start:]
4519 first_id = first_id or videos[0]['id']
4520 last_id = videos[-1]['id']
4521 watch_endpoint = try_get(
4522 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
4523 headers = self.generate_api_headers(
4524 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4525 visitor_data=self._extract_visitor_data(response, data, ytcfg))
4526 query = {
4527 'playlistId': playlist_id,
4528 'videoId': watch_endpoint.get('videoId') or last_id,
4529 'index': watch_endpoint.get('index') or len(videos),
4530 'params': watch_endpoint.get('params') or 'OAE%3D'
4531 }
4532 response = self._extract_response(
4533 item_id='%s page %d' % (playlist_id, page_num),
4534 query=query, ep='next', headers=headers, ytcfg=ytcfg,
4535 check_get_keys='contents'
4536 )
4537 playlist = try_get(
4538 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4539
4540 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
4541 title = playlist.get('title') or try_get(
4542 data, lambda x: x['titleText']['simpleText'], str)
4543 playlist_id = playlist.get('playlistId') or item_id
4544
4545 # Delegating everything except mix playlists to regular tab-based playlist URL
4546 playlist_url = urljoin(url, try_get(
4547 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4548 str))
4549
4550 # Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]
4551 # [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg
4552 is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)
4553
4554 if playlist_url and playlist_url != url and not is_known_unviewable:
4555 return self.url_result(
4556 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4557 video_title=title)
4558
4559 return self.playlist_result(
4560 self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),
4561 playlist_id=playlist_id, playlist_title=title)
4562
4563 def _extract_availability(self, data):
4564 """
4565 Gets the availability of a given playlist/tab.
4566 Note: Unless YouTube tells us explicitly, we do not assume it is public
4567 @param data: response
4568 """
4569 is_private = is_unlisted = None
4570 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4571 badge_labels = self._extract_badges(renderer)
4572
4573 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4574 privacy_dropdown_entries = try_get(
4575 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4576 for renderer_dict in privacy_dropdown_entries:
4577 is_selected = try_get(
4578 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4579 if not is_selected:
4580 continue
4581 label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
4582 if label:
4583 badge_labels.add(label.lower())
4584 break
4585
4586 for badge_label in badge_labels:
4587 if badge_label == 'unlisted':
4588 is_unlisted = True
4589 elif badge_label == 'private':
4590 is_private = True
4591 elif badge_label == 'public':
4592 is_unlisted = is_private = False
4593 return self._availability(is_private, False, False, False, is_unlisted)
4594
4595 @staticmethod
4596 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4597 sidebar_renderer = try_get(
4598 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4599 for item in sidebar_renderer:
4600 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4601 if renderer:
4602 return renderer
4603
4604 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
4605 """
4606 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4607 """
4608 browse_id = params = None
4609 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4610 if not renderer:
4611 return
4612 menu_renderer = try_get(
4613 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4614 for menu_item in menu_renderer:
4615 if not isinstance(menu_item, dict):
4616 continue
4617 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4618 text = try_get(
4619 nav_item_renderer, lambda x: x['text']['simpleText'], str)
4620 if not text or text.lower() != 'show unavailable videos':
4621 continue
4622 browse_endpoint = try_get(
4623 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4624 browse_id = browse_endpoint.get('browseId')
4625 params = browse_endpoint.get('params')
4626 break
4627
4628 headers = self.generate_api_headers(
4629 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4630 visitor_data=self._extract_visitor_data(data, ytcfg))
4631 query = {
4632 'params': params or 'wgYCCAA=',
4633 'browseId': browse_id or 'VL%s' % item_id
4634 }
4635 return self._extract_response(
4636 item_id=item_id, headers=headers, query=query,
4637 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
4638 note='Downloading API JSON with unavailable videos')
4639
4640 @functools.cached_property
4641 def skip_webpage(self):
4642 return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())
4643
4644 def _extract_webpage(self, url, item_id, fatal=True):
4645 webpage, data = None, None
4646 for retry in self.RetryManager(fatal=fatal):
4647 try:
4648 webpage = self._download_webpage(url, item_id, note='Downloading webpage')
4649 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
4650 except ExtractorError as e:
4651 if isinstance(e.cause, network_exceptions):
4652 if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):
4653 retry.error = e
4654 continue
4655 self._error_or_warning(e, fatal=fatal)
4656 break
4657
4658 try:
4659 self._extract_and_report_alerts(data)
4660 except ExtractorError as e:
4661 self._error_or_warning(e, fatal=fatal)
4662 break
4663
4664 # Sometimes youtube returns a webpage with incomplete ytInitialData
4665 # See: https://github.com/yt-dlp/yt-dlp/issues/116
4666 if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):
4667 retry.error = ExtractorError('Incomplete yt initial data received')
4668 continue
4669
4670 return webpage, data
4671
4672 def _report_playlist_authcheck(self, ytcfg, fatal=True):
4673 """Use if failed to extract ytcfg (and data) from initial webpage"""
4674 if not ytcfg and self.is_authenticated:
4675 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'
4676 if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:
4677 raise ExtractorError(
4678 f'{msg}. If you are not downloading private content, or '
4679 'your cookies are only for the first account and channel,'
4680 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
4681 expected=True)
4682 self.report_warning(msg, only_once=True)
4683
4684 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
4685 data = None
4686 if not self.skip_webpage:
4687 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
4688 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
4689 # Reject webpage data if redirected to home page without explicitly requesting
4690 selected_tab = self._extract_selected_tab(traverse_obj(
4691 data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list, default=[]), fatal=False) or {}
4692 if (url != 'https://www.youtube.com/feed/recommended'
4693 and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page
4694 and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):
4695 msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'
4696 if fatal:
4697 raise ExtractorError(msg, expected=True)
4698 self.report_warning(msg, only_once=True)
4699 if not data:
4700 self._report_playlist_authcheck(ytcfg, fatal=fatal)
4701 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
4702 return data, ytcfg
4703
4704 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
4705 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
4706 resolve_response = self._extract_response(
4707 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
4708 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
4709 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
4710 for ep_key, ep in endpoints.items():
4711 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
4712 if params:
4713 return self._extract_response(
4714 item_id=item_id, query=params, ep=ep, headers=headers,
4715 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
4716 check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))
4717 err_note = 'Failed to resolve url (does the playlist exist?)'
4718 if fatal:
4719 raise ExtractorError(err_note, expected=True)
4720 self.report_warning(err_note, item_id)
4721
4722 _SEARCH_PARAMS = None
4723
4724 def _search_results(self, query, params=NO_DEFAULT, default_client='web'):
4725 data = {'query': query}
4726 if params is NO_DEFAULT:
4727 params = self._SEARCH_PARAMS
4728 if params:
4729 data['params'] = params
4730
4731 content_keys = (
4732 ('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),
4733 ('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),
4734 # ytmusic search
4735 ('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),
4736 ('continuationContents', ),
4737 )
4738 display_id = f'query "{query}"'
4739 check_get_keys = tuple({keys[0] for keys in content_keys})
4740 ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}
4741 self._report_playlist_authcheck(ytcfg, fatal=False)
4742
4743 continuation_list = [None]
4744 search = None
4745 for page_num in itertools.count(1):
4746 data.update(continuation_list[0] or {})
4747 headers = self.generate_api_headers(
4748 ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)
4749 search = self._extract_response(
4750 item_id=f'{display_id} page {page_num}', ep='search', query=data,
4751 default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)
4752 slr_contents = traverse_obj(search, *content_keys)
4753 yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)
4754 if not continuation_list[0]:
4755 break
4756
4757
4758 class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
4759 IE_DESC = 'YouTube Tabs'
4760 _VALID_URL = r'''(?x:
4761 https?://
4762 (?:\w+\.)?
4763 (?:
4764 youtube(?:kids)?\.com|
4765 %(invidious)s
4766 )/
4767 (?:
4768 (?P<channel_type>channel|c|user|browse)/|
4769 (?P<not_channel>
4770 feed/|hashtag/|
4771 (?:playlist|watch)\?.*?\blist=
4772 )|
4773 (?!(?:%(reserved_names)s)\b) # Direct URLs
4774 )
4775 (?P<id>[^/?\#&]+)
4776 )''' % {
4777 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
4778 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
4779 }
4780 IE_NAME = 'youtube:tab'
4781
4782 _TESTS = [{
4783 'note': 'playlists, multipage',
4784 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
4785 'playlist_mincount': 94,
4786 'info_dict': {
4787 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
4788 'title': 'Igor Kleiner - Playlists',
4789 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
4790 'uploader': 'Igor Kleiner',
4791 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4792 'channel': 'Igor Kleiner',
4793 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4794 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
4795 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4796 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4797 'channel_follower_count': int
4798 },
4799 }, {
4800 'note': 'playlists, multipage, different order',
4801 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
4802 'playlist_mincount': 94,
4803 'info_dict': {
4804 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
4805 'title': 'Igor Kleiner - Playlists',
4806 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
4807 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4808 'uploader': 'Igor Kleiner',
4809 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4810 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
4811 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4812 'channel': 'Igor Kleiner',
4813 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4814 'channel_follower_count': int
4815 },
4816 }, {
4817 'note': 'playlists, series',
4818 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
4819 'playlist_mincount': 5,
4820 'info_dict': {
4821 'id': 'UCYO_jab_esuFRV4b17AJtAw',
4822 'title': '3Blue1Brown - Playlists',
4823 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
4824 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
4825 'uploader': '3Blue1Brown',
4826 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4827 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4828 'channel': '3Blue1Brown',
4829 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
4830 'tags': ['Mathematics'],
4831 'channel_follower_count': int
4832 },
4833 }, {
4834 'note': 'playlists, singlepage',
4835 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
4836 'playlist_mincount': 4,
4837 'info_dict': {
4838 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
4839 'title': 'ThirstForScience - Playlists',
4840 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
4841 'uploader': 'ThirstForScience',
4842 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
4843 'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
4844 'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
4845 'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
4846 'tags': 'count:13',
4847 'channel': 'ThirstForScience',
4848 'channel_follower_count': int
4849 }
4850 }, {
4851 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
4852 'only_matching': True,
4853 }, {
4854 'note': 'basic, single video playlist',
4855 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4856 'info_dict': {
4857 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4858 'uploader': 'Sergey M.',
4859 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4860 'title': 'youtube-dl public playlist',
4861 'description': '',
4862 'tags': [],
4863 'view_count': int,
4864 'modified_date': '20201130',
4865 'channel': 'Sergey M.',
4866 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4867 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4868 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4869 },
4870 'playlist_count': 1,
4871 }, {
4872 'note': 'empty playlist',
4873 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
4874 'info_dict': {
4875 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4876 'uploader': 'Sergey M.',
4877 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
4878 'title': 'youtube-dl empty playlist',
4879 'tags': [],
4880 'channel': 'Sergey M.',
4881 'description': '',
4882 'modified_date': '20160902',
4883 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4884 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4885 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4886 },
4887 'playlist_count': 0,
4888 }, {
4889 'note': 'Home tab',
4890 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
4891 'info_dict': {
4892 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4893 'title': 'lex will - Home',
4894 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4895 'uploader': 'lex will',
4896 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4897 'channel': 'lex will',
4898 'tags': ['bible', 'history', 'prophesy'],
4899 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4900 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4901 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4902 'channel_follower_count': int
4903 },
4904 'playlist_mincount': 2,
4905 }, {
4906 'note': 'Videos tab',
4907 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
4908 'info_dict': {
4909 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4910 'title': 'lex will - Videos',
4911 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4912 'uploader': 'lex will',
4913 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4914 'tags': ['bible', 'history', 'prophesy'],
4915 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4916 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4917 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4918 'channel': 'lex will',
4919 'channel_follower_count': int
4920 },
4921 'playlist_mincount': 975,
4922 }, {
4923 'note': 'Videos tab, sorted by popular',
4924 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
4925 'info_dict': {
4926 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4927 'title': 'lex will - Videos',
4928 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4929 'uploader': 'lex will',
4930 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4931 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4932 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4933 'channel': 'lex will',
4934 'tags': ['bible', 'history', 'prophesy'],
4935 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4936 'channel_follower_count': int
4937 },
4938 'playlist_mincount': 199,
4939 }, {
4940 'note': 'Playlists tab',
4941 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
4942 'info_dict': {
4943 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4944 'title': 'lex will - Playlists',
4945 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4946 'uploader': 'lex will',
4947 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4948 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4949 'channel': 'lex will',
4950 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4951 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4952 'tags': ['bible', 'history', 'prophesy'],
4953 'channel_follower_count': int
4954 },
4955 'playlist_mincount': 17,
4956 }, {
4957 'note': 'Community tab',
4958 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
4959 'info_dict': {
4960 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4961 'title': 'lex will - Community',
4962 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4963 'uploader': 'lex will',
4964 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4965 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4966 'channel': 'lex will',
4967 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4968 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4969 'tags': ['bible', 'history', 'prophesy'],
4970 'channel_follower_count': int
4971 },
4972 'playlist_mincount': 18,
4973 }, {
4974 'note': 'Channels tab',
4975 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
4976 'info_dict': {
4977 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4978 'title': 'lex will - Channels',
4979 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4980 'uploader': 'lex will',
4981 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4982 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4983 'channel': 'lex will',
4984 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4985 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4986 'tags': ['bible', 'history', 'prophesy'],
4987 'channel_follower_count': int
4988 },
4989 'playlist_mincount': 12,
4990 }, {
4991 'note': 'Search tab',
4992 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
4993 'playlist_mincount': 40,
4994 'info_dict': {
4995 'id': 'UCYO_jab_esuFRV4b17AJtAw',
4996 'title': '3Blue1Brown - Search - linear algebra',
4997 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
4998 'uploader': '3Blue1Brown',
4999 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
5000 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5001 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5002 'tags': ['Mathematics'],
5003 'channel': '3Blue1Brown',
5004 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
5005 'channel_follower_count': int
5006 },
5007 }, {
5008 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5009 'only_matching': True,
5010 }, {
5011 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5012 'only_matching': True,
5013 }, {
5014 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5015 'only_matching': True,
5016 }, {
5017 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
5018 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
5019 'info_dict': {
5020 'title': '29C3: Not my department',
5021 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
5022 'uploader': 'Christiaan008',
5023 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
5024 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
5025 'tags': [],
5026 'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',
5027 'view_count': int,
5028 'modified_date': '20150605',
5029 'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
5030 'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',
5031 'channel': 'Christiaan008',
5032 },
5033 'playlist_count': 96,
5034 }, {
5035 'note': 'Large playlist',
5036 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
5037 'info_dict': {
5038 'title': 'Uploads from Cauchemar',
5039 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
5040 'uploader': 'Cauchemar',
5041 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
5042 'channel_url': 'https://www.youtube.com/c/Cauchemar89',
5043 'tags': [],
5044 'modified_date': r're:\d{8}',
5045 'channel': 'Cauchemar',
5046 'uploader_url': 'https://www.youtube.com/c/Cauchemar89',
5047 'view_count': int,
5048 'description': '',
5049 'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
5050 },
5051 'playlist_mincount': 1123,
5052 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5053 }, {
5054 'note': 'even larger playlist, 8832 videos',
5055 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
5056 'only_matching': True,
5057 }, {
5058 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
5059 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
5060 'info_dict': {
5061 'title': 'Uploads from Interstellar Movie',
5062 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
5063 'uploader': 'Interstellar Movie',
5064 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
5065 'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',
5066 'tags': [],
5067 'view_count': int,
5068 'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
5069 'channel_url': 'https://www.youtube.com/c/InterstellarMovie',
5070 'channel': 'Interstellar Movie',
5071 'description': '',
5072 'modified_date': r're:\d{8}',
5073 },
5074 'playlist_mincount': 21,
5075 }, {
5076 'note': 'Playlist with "show unavailable videos" button',
5077 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
5078 'info_dict': {
5079 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
5080 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
5081 'uploader': 'Phim Siêu Nhân Nhật Bản',
5082 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
5083 'view_count': int,
5084 'channel': 'Phim Siêu Nhân Nhật Bản',
5085 'tags': [],
5086 'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5087 'description': '',
5088 'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5089 'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
5090 'modified_date': r're:\d{8}',
5091 },
5092 'playlist_mincount': 200,
5093 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5094 }, {
5095 'note': 'Playlist with unavailable videos in page 7',
5096 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
5097 'info_dict': {
5098 'title': 'Uploads from BlankTV',
5099 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
5100 'uploader': 'BlankTV',
5101 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
5102 'channel': 'BlankTV',
5103 'channel_url': 'https://www.youtube.com/c/blanktv',
5104 'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',
5105 'view_count': int,
5106 'tags': [],
5107 'uploader_url': 'https://www.youtube.com/c/blanktv',
5108 'modified_date': r're:\d{8}',
5109 'description': '',
5110 },
5111 'playlist_mincount': 1000,
5112 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5113 }, {
5114 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
5115 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5116 'info_dict': {
5117 'title': 'Data Analysis with Dr Mike Pound',
5118 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5119 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
5120 'uploader': 'Computerphile',
5121 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
5122 'uploader_url': 'https://www.youtube.com/user/Computerphile',
5123 'tags': [],
5124 'view_count': int,
5125 'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
5126 'channel_url': 'https://www.youtube.com/user/Computerphile',
5127 'channel': 'Computerphile',
5128 },
5129 'playlist_mincount': 11,
5130 }, {
5131 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5132 'only_matching': True,
5133 }, {
5134 'note': 'Playlist URL that does not actually serve a playlist',
5135 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
5136 'info_dict': {
5137 'id': 'FqZTN594JQw',
5138 'ext': 'webm',
5139 'title': "Smiley's People 01 detective, Adventure Series, Action",
5140 'uploader': 'STREEM',
5141 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
5142 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
5143 'upload_date': '20150526',
5144 'license': 'Standard YouTube License',
5145 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
5146 'categories': ['People & Blogs'],
5147 'tags': list,
5148 'view_count': int,
5149 'like_count': int,
5150 },
5151 'params': {
5152 'skip_download': True,
5153 },
5154 'skip': 'This video is not available.',
5155 'add_ie': [YoutubeIE.ie_key()],
5156 }, {
5157 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
5158 'only_matching': True,
5159 }, {
5160 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
5161 'only_matching': True,
5162 }, {
5163 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
5164 'info_dict': {
5165 'id': 'Wq15eF5vCbI', # This will keep changing
5166 'ext': 'mp4',
5167 'title': str,
5168 'uploader': 'Sky News',
5169 'uploader_id': 'skynews',
5170 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
5171 'upload_date': r're:\d{8}',
5172 'description': str,
5173 'categories': ['News & Politics'],
5174 'tags': list,
5175 'like_count': int,
5176 'release_timestamp': 1642502819,
5177 'channel': 'Sky News',
5178 'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',
5179 'age_limit': 0,
5180 'view_count': int,
5181 'thumbnail': 'https://i.ytimg.com/vi/GgL890LIznQ/maxresdefault_live.jpg',
5182 'playable_in_embed': True,
5183 'release_date': '20220118',
5184 'availability': 'public',
5185 'live_status': 'is_live',
5186 'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',
5187 'channel_follower_count': int
5188 },
5189 'params': {
5190 'skip_download': True,
5191 },
5192 'expected_warnings': ['Ignoring subtitle tracks found in '],
5193 }, {
5194 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
5195 'info_dict': {
5196 'id': 'a48o2S1cPoo',
5197 'ext': 'mp4',
5198 'title': 'The Young Turks - Live Main Show',
5199 'uploader': 'The Young Turks',
5200 'uploader_id': 'TheYoungTurks',
5201 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
5202 'upload_date': '20150715',
5203 'license': 'Standard YouTube License',
5204 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
5205 'categories': ['News & Politics'],
5206 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
5207 'like_count': int,
5208 },
5209 'params': {
5210 'skip_download': True,
5211 },
5212 'only_matching': True,
5213 }, {
5214 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
5215 'only_matching': True,
5216 }, {
5217 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
5218 'only_matching': True,
5219 }, {
5220 'note': 'A channel that is not live. Should raise error',
5221 'url': 'https://www.youtube.com/user/numberphile/live',
5222 'only_matching': True,
5223 }, {
5224 'url': 'https://www.youtube.com/feed/trending',
5225 'only_matching': True,
5226 }, {
5227 'url': 'https://www.youtube.com/feed/library',
5228 'only_matching': True,
5229 }, {
5230 'url': 'https://www.youtube.com/feed/history',
5231 'only_matching': True,
5232 }, {
5233 'url': 'https://www.youtube.com/feed/subscriptions',
5234 'only_matching': True,
5235 }, {
5236 'url': 'https://www.youtube.com/feed/watch_later',
5237 'only_matching': True,
5238 }, {
5239 'note': 'Recommended - redirects to home page.',
5240 'url': 'https://www.youtube.com/feed/recommended',
5241 'only_matching': True,
5242 }, {
5243 'note': 'inline playlist with not always working continuations',
5244 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
5245 'only_matching': True,
5246 }, {
5247 'url': 'https://www.youtube.com/course',
5248 'only_matching': True,
5249 }, {
5250 'url': 'https://www.youtube.com/zsecurity',
5251 'only_matching': True,
5252 }, {
5253 'url': 'http://www.youtube.com/NASAgovVideo/videos',
5254 'only_matching': True,
5255 }, {
5256 'url': 'https://www.youtube.com/TheYoungTurks/live',
5257 'only_matching': True,
5258 }, {
5259 'url': 'https://www.youtube.com/hashtag/cctv9',
5260 'info_dict': {
5261 'id': 'cctv9',
5262 'title': '#cctv9',
5263 'tags': [],
5264 },
5265 'playlist_mincount': 350,
5266 }, {
5267 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
5268 'only_matching': True,
5269 }, {
5270 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
5271 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5272 'only_matching': True
5273 }, {
5274 'note': '/browse/ should redirect to /channel/',
5275 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
5276 'only_matching': True
5277 }, {
5278 'note': 'VLPL, should redirect to playlist?list=PL...',
5279 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5280 'info_dict': {
5281 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5282 'uploader': 'NoCopyrightSounds',
5283 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
5284 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
5285 'title': 'NCS : All Releases 💿',
5286 'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',
5287 'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',
5288 'modified_date': r're:\d{8}',
5289 'view_count': int,
5290 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
5291 'tags': [],
5292 'channel': 'NoCopyrightSounds',
5293 },
5294 'playlist_mincount': 166,
5295 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5296 }, {
5297 'note': 'Topic, should redirect to playlist?list=UU...',
5298 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5299 'info_dict': {
5300 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5301 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5302 'title': 'Uploads from Royalty Free Music - Topic',
5303 'uploader': 'Royalty Free Music - Topic',
5304 'tags': [],
5305 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5306 'channel': 'Royalty Free Music - Topic',
5307 'view_count': int,
5308 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5309 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5310 'modified_date': r're:\d{8}',
5311 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5312 'description': '',
5313 },
5314 'expected_warnings': [
5315 'The URL does not have a videos tab',
5316 r'[Uu]navailable videos (are|will be) hidden',
5317 ],
5318 'playlist_mincount': 101,
5319 }, {
5320 'note': 'Topic without a UU playlist',
5321 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
5322 'info_dict': {
5323 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
5324 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
5325 'tags': [],
5326 },
5327 'expected_warnings': [
5328 'the playlist redirect gave error',
5329 ],
5330 'playlist_mincount': 9,
5331 }, {
5332 'note': 'Youtube music Album',
5333 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
5334 'info_dict': {
5335 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
5336 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
5337 'tags': [],
5338 'view_count': int,
5339 'description': '',
5340 'availability': 'unlisted',
5341 'modified_date': r're:\d{8}',
5342 },
5343 'playlist_count': 50,
5344 }, {
5345 'note': 'unlisted single video playlist',
5346 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5347 'info_dict': {
5348 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5349 'uploader': 'colethedj',
5350 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5351 'title': 'yt-dlp unlisted playlist test',
5352 'availability': 'unlisted',
5353 'tags': [],
5354 'modified_date': '20220418',
5355 'channel': 'colethedj',
5356 'view_count': int,
5357 'description': '',
5358 'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
5359 'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5360 'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
5361 },
5362 'playlist_count': 1,
5363 }, {
5364 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
5365 'url': 'https://www.youtube.com/feed/recommended',
5366 'info_dict': {
5367 'id': 'recommended',
5368 'title': 'recommended',
5369 'tags': [],
5370 },
5371 'playlist_mincount': 50,
5372 'params': {
5373 'skip_download': True,
5374 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5375 },
5376 }, {
5377 'note': 'API Fallback: /videos tab, sorted by oldest first',
5378 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
5379 'info_dict': {
5380 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5381 'title': 'Cody\'sLab - Videos',
5382 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
5383 'uploader': 'Cody\'sLab',
5384 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5385 'channel': 'Cody\'sLab',
5386 'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5387 'tags': [],
5388 'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
5389 'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
5390 'channel_follower_count': int
5391 },
5392 'playlist_mincount': 650,
5393 'params': {
5394 'skip_download': True,
5395 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5396 },
5397 }, {
5398 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
5399 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5400 'info_dict': {
5401 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5402 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5403 'title': 'Uploads from Royalty Free Music - Topic',
5404 'uploader': 'Royalty Free Music - Topic',
5405 'modified_date': r're:\d{8}',
5406 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5407 'description': '',
5408 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5409 'tags': [],
5410 'channel': 'Royalty Free Music - Topic',
5411 'view_count': int,
5412 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5413 },
5414 'expected_warnings': [
5415 'does not have a videos tab',
5416 r'[Uu]navailable videos (are|will be) hidden',
5417 ],
5418 'playlist_mincount': 101,
5419 'params': {
5420 'skip_download': True,
5421 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5422 },
5423 }, {
5424 'note': 'non-standard redirect to regional channel',
5425 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
5426 'only_matching': True
5427 }, {
5428 'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
5429 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5430 'info_dict': {
5431 'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5432 'modified_date': '20220407',
5433 'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5434 'tags': [],
5435 'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5436 'uploader': 'pukkandan',
5437 'availability': 'unlisted',
5438 'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5439 'channel': 'pukkandan',
5440 'description': 'Test for collaborative playlist',
5441 'title': 'yt-dlp test - collaborative playlist',
5442 'view_count': int,
5443 'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5444 },
5445 'playlist_mincount': 2
5446 }]
5447
5448 @classmethod
5449 def suitable(cls, url):
5450 return False if YoutubeIE.suitable(url) else super().suitable(url)
5451
5452 _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$')
5453
5454 @YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
5455 def _real_extract(self, url, smuggled_data):
5456 item_id = self._match_id(url)
5457 url = urllib.parse.urlunparse(
5458 urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))
5459 compat_opts = self.get_param('compat_opts', [])
5460
5461 def get_mobj(url):
5462 mobj = self._URL_RE.match(url).groupdict()
5463 mobj.update((k, '') for k, v in mobj.items() if v is None)
5464 return mobj
5465
5466 mobj, redirect_warning = get_mobj(url), None
5467 # Youtube returns incomplete data if tabname is not lower case
5468 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
5469 if is_channel:
5470 if smuggled_data.get('is_music_url'):
5471 if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
5472 item_id = item_id[2:]
5473 pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False
5474 elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
5475 mdata = self._extract_tab_endpoint(
5476 f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
5477 murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
5478 get_all=False, expected_type=str)
5479 if not murl:
5480 raise ExtractorError('Failed to resolve album to playlist')
5481 return self.url_result(murl, ie=YoutubeTabIE.ie_key())
5482 elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
5483 pre = f'https://www.youtube.com/channel/{item_id}'
5484
5485 original_tab_name = tab
5486 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
5487 # Home URLs should redirect to /videos/
5488 redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '
5489 'To download only the videos in the home page, add a "/featured" to the URL')
5490 tab = '/videos'
5491
5492 url = ''.join((pre, tab, post))
5493 mobj = get_mobj(url)
5494
5495 # Handle both video/playlist URLs
5496 qs = parse_qs(url)
5497 video_id, playlist_id = (qs.get(key, [None])[0] for key in ('v', 'list'))
5498
5499 if not video_id and mobj['not_channel'].startswith('watch'):
5500 if not playlist_id:
5501 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
5502 raise ExtractorError('Unable to recognize tab page')
5503 # Common mistake: https://www.youtube.com/watch?list=playlist_id
5504 self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
5505 url = f'https://www.youtube.com/playlist?list={playlist_id}'
5506 mobj = get_mobj(url)
5507
5508 if video_id and playlist_id:
5509 if self.get_param('noplaylist'):
5510 self.to_screen(f'Downloading just video {video_id} because of --no-playlist')
5511 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
5512 ie=YoutubeIE.ie_key(), video_id=video_id)
5513 self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')
5514
5515 data, ytcfg = self._extract_data(url, item_id)
5516
5517 # YouTube may provide a non-standard redirect to the regional channel
5518 # See: https://github.com/yt-dlp/yt-dlp/issues/2694
5519 redirect_url = traverse_obj(
5520 data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)
5521 if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:
5522 redirect_url = ''.join((
5523 urljoin('https://www.youtube.com', redirect_url), mobj['tab'], mobj['post']))
5524 self.to_screen(f'This playlist is likely not available in your region. Following redirect to regional playlist {redirect_url}')
5525 return self.url_result(redirect_url, ie=YoutubeTabIE.ie_key())
5526
5527 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
5528 if tabs:
5529 selected_tab = self._extract_selected_tab(tabs)
5530 selected_tab_name = selected_tab.get('title', '').lower()
5531 if selected_tab_name == 'home':
5532 selected_tab_name = 'featured'
5533 requested_tab_name = mobj['tab'][1:]
5534 if 'no-youtube-channel-redirect' not in compat_opts:
5535 if requested_tab_name == 'live': # Live tab should have redirected to the video
5536 raise UserNotLive(video_id=mobj['id'])
5537 if requested_tab_name not in ('', selected_tab_name):
5538 redirect_warning = f'The channel does not have a {requested_tab_name} tab'
5539 if not original_tab_name:
5540 if item_id[:2] == 'UC':
5541 # Topic channels don't have /videos. Use the equivalent playlist instead
5542 pl_id = f'UU{item_id[2:]}'
5543 pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
5544 try:
5545 data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)
5546 except ExtractorError:
5547 redirect_warning += ' and the playlist redirect gave error'
5548 else:
5549 item_id, url, selected_tab_name = pl_id, pl_url, requested_tab_name
5550 redirect_warning += f'. Redirecting to playlist {pl_id} instead'
5551 if selected_tab_name and selected_tab_name != requested_tab_name:
5552 redirect_warning += f'. {selected_tab_name} tab is being downloaded instead'
5553 else:
5554 raise ExtractorError(redirect_warning, expected=True)
5555
5556 if redirect_warning:
5557 self.to_screen(redirect_warning)
5558 self.write_debug(f'Final URL: {url}')
5559
5560 # YouTube sometimes provides a button to reload playlist with unavailable videos.
5561 if 'no-youtube-unavailable-videos' not in compat_opts:
5562 data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data
5563 self._extract_and_report_alerts(data, only_once=True)
5564 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
5565 if tabs:
5566 return self._extract_from_tabs(item_id, ytcfg, data, tabs)
5567
5568 playlist = traverse_obj(
5569 data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
5570 if playlist:
5571 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
5572
5573 video_id = traverse_obj(
5574 data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
5575 if video_id:
5576 if mobj['tab'] != '/live': # live tab is expected to redirect to video
5577 self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
5578 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
5579 ie=YoutubeIE.ie_key(), video_id=video_id)
5580
5581 raise ExtractorError('Unable to recognize tab page')
5582
5583
5584 class YoutubePlaylistIE(InfoExtractor):
5585 IE_DESC = 'YouTube playlists'
5586 _VALID_URL = r'''(?x)(?:
5587 (?:https?://)?
5588 (?:\w+\.)?
5589 (?:
5590 (?:
5591 youtube(?:kids)?\.com|
5592 %(invidious)s
5593 )
5594 /.*?\?.*?\blist=
5595 )?
5596 (?P<id>%(playlist_id)s)
5597 )''' % {
5598 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
5599 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
5600 }
5601 IE_NAME = 'youtube:playlist'
5602 _TESTS = [{
5603 'note': 'issue #673',
5604 'url': 'PLBB231211A4F62143',
5605 'info_dict': {
5606 'title': '[OLD]Team Fortress 2 (Class-based LP)',
5607 'id': 'PLBB231211A4F62143',
5608 'uploader': 'Wickman',
5609 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
5610 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
5611 'view_count': int,
5612 'uploader_url': 'https://www.youtube.com/user/Wickydoo',
5613 'modified_date': r're:\d{8}',
5614 'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
5615 'channel': 'Wickman',
5616 'tags': [],
5617 'channel_url': 'https://www.youtube.com/user/Wickydoo',
5618 },
5619 'playlist_mincount': 29,
5620 }, {
5621 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
5622 'info_dict': {
5623 'title': 'YDL_safe_search',
5624 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
5625 },
5626 'playlist_count': 2,
5627 'skip': 'This playlist is private',
5628 }, {
5629 'note': 'embedded',
5630 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
5631 'playlist_count': 4,
5632 'info_dict': {
5633 'title': 'JODA15',
5634 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
5635 'uploader': 'milan',
5636 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
5637 'description': '',
5638 'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
5639 'tags': [],
5640 'modified_date': '20140919',
5641 'view_count': int,
5642 'channel': 'milan',
5643 'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
5644 'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
5645 },
5646 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5647 }, {
5648 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
5649 'playlist_mincount': 455,
5650 'info_dict': {
5651 'title': '2018 Chinese New Singles (11/6 updated)',
5652 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
5653 'uploader': 'LBK',
5654 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
5655 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
5656 'channel': 'LBK',
5657 'view_count': int,
5658 'channel_url': 'https://www.youtube.com/c/愛低音的國王',
5659 'tags': [],
5660 'uploader_url': 'https://www.youtube.com/c/愛低音的國王',
5661 'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
5662 'modified_date': r're:\d{8}',
5663 },
5664 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5665 }, {
5666 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
5667 'only_matching': True,
5668 }, {
5669 # music album playlist
5670 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
5671 'only_matching': True,
5672 }]
5673
5674 @classmethod
5675 def suitable(cls, url):
5676 if YoutubeTabIE.suitable(url):
5677 return False
5678 from ..utils import parse_qs
5679 qs = parse_qs(url)
5680 if qs.get('v', [None])[0]:
5681 return False
5682 return super().suitable(url)
5683
5684 def _real_extract(self, url):
5685 playlist_id = self._match_id(url)
5686 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
5687 url = update_url_query(
5688 'https://www.youtube.com/playlist',
5689 parse_qs(url) or {'list': playlist_id})
5690 if is_music_url:
5691 url = smuggle_url(url, {'is_music_url': True})
5692 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
5693
5694
5695 class YoutubeYtBeIE(InfoExtractor):
5696 IE_DESC = 'youtu.be'
5697 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
5698 _TESTS = [{
5699 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
5700 'info_dict': {
5701 'id': 'yeWKywCrFtk',
5702 'ext': 'mp4',
5703 'title': 'Small Scale Baler and Braiding Rugs',
5704 'uploader': 'Backus-Page House Museum',
5705 'uploader_id': 'backuspagemuseum',
5706 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
5707 'upload_date': '20161008',
5708 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
5709 'categories': ['Nonprofits & Activism'],
5710 'tags': list,
5711 'like_count': int,
5712 'age_limit': 0,
5713 'playable_in_embed': True,
5714 'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',
5715 'channel': 'Backus-Page House Museum',
5716 'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',
5717 'live_status': 'not_live',
5718 'view_count': int,
5719 'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',
5720 'availability': 'public',
5721 'duration': 59,
5722 'comment_count': int,
5723 'channel_follower_count': int
5724 },
5725 'params': {
5726 'noplaylist': True,
5727 'skip_download': True,
5728 },
5729 }, {
5730 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
5731 'only_matching': True,
5732 }]
5733
5734 def _real_extract(self, url):
5735 mobj = self._match_valid_url(url)
5736 video_id = mobj.group('id')
5737 playlist_id = mobj.group('playlist_id')
5738 return self.url_result(
5739 update_url_query('https://www.youtube.com/watch', {
5740 'v': video_id,
5741 'list': playlist_id,
5742 'feature': 'youtu.be',
5743 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
5744
5745
5746 class YoutubeLivestreamEmbedIE(InfoExtractor):
5747 IE_DESC = 'YouTube livestream embeds'
5748 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'
5749 _TESTS = [{
5750 'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',
5751 'only_matching': True,
5752 }]
5753
5754 def _real_extract(self, url):
5755 channel_id = self._match_id(url)
5756 return self.url_result(
5757 f'https://www.youtube.com/channel/{channel_id}/live',
5758 ie=YoutubeTabIE.ie_key(), video_id=channel_id)
5759
5760
5761 class YoutubeYtUserIE(InfoExtractor):
5762 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
5763 IE_NAME = 'youtube:user'
5764 _VALID_URL = r'ytuser:(?P<id>.+)'
5765 _TESTS = [{
5766 'url': 'ytuser:phihag',
5767 'only_matching': True,
5768 }]
5769
5770 def _real_extract(self, url):
5771 user_id = self._match_id(url)
5772 return self.url_result(
5773 'https://www.youtube.com/user/%s/videos' % user_id,
5774 ie=YoutubeTabIE.ie_key(), video_id=user_id)
5775
5776
5777 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
5778 IE_NAME = 'youtube:favorites'
5779 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
5780 _VALID_URL = r':ytfav(?:ou?rite)?s?'
5781 _LOGIN_REQUIRED = True
5782 _TESTS = [{
5783 'url': ':ytfav',
5784 'only_matching': True,
5785 }, {
5786 'url': ':ytfavorites',
5787 'only_matching': True,
5788 }]
5789
5790 def _real_extract(self, url):
5791 return self.url_result(
5792 'https://www.youtube.com/playlist?list=LL',
5793 ie=YoutubeTabIE.ie_key())
5794
5795
5796 class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):
5797 IE_NAME = 'youtube:notif'
5798 IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'
5799 _VALID_URL = r':ytnotif(?:ication)?s?'
5800 _LOGIN_REQUIRED = True
5801 _TESTS = [{
5802 'url': ':ytnotif',
5803 'only_matching': True,
5804 }, {
5805 'url': ':ytnotifications',
5806 'only_matching': True,
5807 }]
5808
5809 def _extract_notification_menu(self, response, continuation_list):
5810 notification_list = traverse_obj(
5811 response,
5812 ('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),
5813 ('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),
5814 expected_type=list) or []
5815 continuation_list[0] = None
5816 for item in notification_list:
5817 entry = self._extract_notification_renderer(item.get('notificationRenderer'))
5818 if entry:
5819 yield entry
5820 continuation = item.get('continuationItemRenderer')
5821 if continuation:
5822 continuation_list[0] = continuation
5823
5824 def _extract_notification_renderer(self, notification):
5825 video_id = traverse_obj(
5826 notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)
5827 url = f'https://www.youtube.com/watch?v={video_id}'
5828 channel_id = None
5829 if not video_id:
5830 browse_ep = traverse_obj(
5831 notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)
5832 channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)
5833 post_id = self._search_regex(
5834 r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),
5835 'post id', default=None)
5836 if not channel_id or not post_id:
5837 return
5838 # The direct /post url redirects to this in the browser
5839 url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'
5840
5841 channel = traverse_obj(
5842 notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),
5843 expected_type=str)
5844 notification_title = self._get_text(notification, 'shortMessage')
5845 if notification_title:
5846 notification_title = notification_title.replace('\xad', '') # remove soft hyphens
5847 # TODO: handle recommended videos
5848 title = self._search_regex(
5849 rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,
5850 'video title', default=None)
5851 upload_date = (strftime_or_none(self._extract_time_text(notification, 'sentTimeText')[0], '%Y%m%d')
5852 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE.ie_key())
5853 else None)
5854 return {
5855 '_type': 'url',
5856 'url': url,
5857 'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),
5858 'video_id': video_id,
5859 'title': title,
5860 'channel_id': channel_id,
5861 'channel': channel,
5862 'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),
5863 'upload_date': upload_date,
5864 }
5865
5866 def _notification_menu_entries(self, ytcfg):
5867 continuation_list = [None]
5868 response = None
5869 for page in itertools.count(1):
5870 ctoken = traverse_obj(
5871 continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)
5872 response = self._extract_response(
5873 item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,
5874 ep='notification/get_notification_menu', check_get_keys='actions',
5875 headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))
5876 yield from self._extract_notification_menu(response, continuation_list)
5877 if not continuation_list[0]:
5878 break
5879
5880 def _real_extract(self, url):
5881 display_id = 'notifications'
5882 ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}
5883 self._report_playlist_authcheck(ytcfg)
5884 return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)
5885
5886
5887 class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
5888 IE_DESC = 'YouTube search'
5889 IE_NAME = 'youtube:search'
5890 _SEARCH_KEY = 'ytsearch'
5891 _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
5892 _TESTS = [{
5893 'url': 'ytsearch5:youtube-dl test video',
5894 'playlist_count': 5,
5895 'info_dict': {
5896 'id': 'youtube-dl test video',
5897 'title': 'youtube-dl test video',
5898 }
5899 }]
5900
5901
5902 class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
5903 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
5904 _SEARCH_KEY = 'ytsearchdate'
5905 IE_DESC = 'YouTube search, newest videos first'
5906 _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
5907 _TESTS = [{
5908 'url': 'ytsearchdate5:youtube-dl test video',
5909 'playlist_count': 5,
5910 'info_dict': {
5911 'id': 'youtube-dl test video',
5912 'title': 'youtube-dl test video',
5913 }
5914 }]
5915
5916
5917 class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
5918 IE_DESC = 'YouTube search URLs with sorting and filter support'
5919 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
5920 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
5921 _TESTS = [{
5922 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
5923 'playlist_mincount': 5,
5924 'info_dict': {
5925 'id': 'youtube-dl test video',
5926 'title': 'youtube-dl test video',
5927 }
5928 }, {
5929 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
5930 'playlist_mincount': 5,
5931 'info_dict': {
5932 'id': 'python',
5933 'title': 'python',
5934 }
5935 }, {
5936 'url': 'https://www.youtube.com/results?search_query=%23cats',
5937 'playlist_mincount': 1,
5938 'info_dict': {
5939 'id': '#cats',
5940 'title': '#cats',
5941 # The test suite does not have support for nested playlists
5942 # 'entries': [{
5943 # 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',
5944 # 'title': '#cats',
5945 # }],
5946 },
5947 }, {
5948 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
5949 'only_matching': True,
5950 }]
5951
5952 def _real_extract(self, url):
5953 qs = parse_qs(url)
5954 query = (qs.get('search_query') or qs.get('q'))[0]
5955 return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
5956
5957
5958 class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
5959 IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'
5960 IE_NAME = 'youtube:music:search_url'
5961 _VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
5962 _TESTS = [{
5963 'url': 'https://music.youtube.com/search?q=royalty+free+music',
5964 'playlist_count': 16,
5965 'info_dict': {
5966 'id': 'royalty free music',
5967 'title': 'royalty free music',
5968 }
5969 }, {
5970 'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',
5971 'playlist_mincount': 30,
5972 'info_dict': {
5973 'id': 'royalty free music - songs',
5974 'title': 'royalty free music - songs',
5975 },
5976 'params': {'extract_flat': 'in_playlist'}
5977 }, {
5978 'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',
5979 'playlist_mincount': 30,
5980 'info_dict': {
5981 'id': 'royalty free music - community playlists',
5982 'title': 'royalty free music - community playlists',
5983 },
5984 'params': {'extract_flat': 'in_playlist'}
5985 }]
5986
5987 _SECTIONS = {
5988 'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',
5989 'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',
5990 'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',
5991 'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',
5992 'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',
5993 'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',
5994 }
5995
5996 def _real_extract(self, url):
5997 qs = parse_qs(url)
5998 query = (qs.get('search_query') or qs.get('q'))[0]
5999 params = qs.get('sp', (None,))[0]
6000 if params:
6001 section = next((k for k, v in self._SECTIONS.items() if v == params), params)
6002 else:
6003 section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower()
6004 params = self._SECTIONS.get(section)
6005 if not params:
6006 section = None
6007 title = join_nonempty(query, section, delim=' - ')
6008 return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)
6009
6010
6011 class YoutubeFeedsInfoExtractor(InfoExtractor):
6012 """
6013 Base class for feed extractors
6014 Subclasses must re-define the _FEED_NAME property.
6015 """
6016 _LOGIN_REQUIRED = True
6017 _FEED_NAME = 'feeds'
6018
6019 def _real_initialize(self):
6020 YoutubeBaseInfoExtractor._check_login_required(self)
6021
6022 @classproperty
6023 def IE_NAME(self):
6024 return f'youtube:{self._FEED_NAME}'
6025
6026 def _real_extract(self, url):
6027 return self.url_result(
6028 f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())
6029
6030
6031 class YoutubeWatchLaterIE(InfoExtractor):
6032 IE_NAME = 'youtube:watchlater'
6033 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
6034 _VALID_URL = r':ytwatchlater'
6035 _TESTS = [{
6036 'url': ':ytwatchlater',
6037 'only_matching': True,
6038 }]
6039
6040 def _real_extract(self, url):
6041 return self.url_result(
6042 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
6043
6044
6045 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
6046 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
6047 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
6048 _FEED_NAME = 'recommended'
6049 _LOGIN_REQUIRED = False
6050 _TESTS = [{
6051 'url': ':ytrec',
6052 'only_matching': True,
6053 }, {
6054 'url': ':ytrecommended',
6055 'only_matching': True,
6056 }, {
6057 'url': 'https://youtube.com',
6058 'only_matching': True,
6059 }]
6060
6061
6062 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
6063 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
6064 _VALID_URL = r':ytsub(?:scription)?s?'
6065 _FEED_NAME = 'subscriptions'
6066 _TESTS = [{
6067 'url': ':ytsubs',
6068 'only_matching': True,
6069 }, {
6070 'url': ':ytsubscriptions',
6071 'only_matching': True,
6072 }]
6073
6074
6075 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
6076 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
6077 _VALID_URL = r':ythis(?:tory)?'
6078 _FEED_NAME = 'history'
6079 _TESTS = [{
6080 'url': ':ythistory',
6081 'only_matching': True,
6082 }]
6083
6084
6085 class YoutubeStoriesIE(InfoExtractor):
6086 IE_DESC = 'YouTube channel stories; "ytstories:" prefix'
6087 IE_NAME = 'youtube:stories'
6088 _VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'
6089 _TESTS = [{
6090 'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',
6091 'only_matching': True,
6092 }]
6093
6094 def _real_extract(self, url):
6095 playlist_id = f'RLTD{self._match_id(url)}'
6096 return self.url_result(
6097 smuggle_url(f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1', {'is_story': True}),
6098 ie=YoutubeTabIE, video_id=playlist_id)
6099
6100
6101 class YoutubeTruncatedURLIE(InfoExtractor):
6102 IE_NAME = 'youtube:truncated_url'
6103 IE_DESC = False # Do not list
6104 _VALID_URL = r'''(?x)
6105 (?:https?://)?
6106 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
6107 (?:watch\?(?:
6108 feature=[a-z_]+|
6109 annotation_id=annotation_[^&]+|
6110 x-yt-cl=[0-9]+|
6111 hl=[^&]*|
6112 t=[0-9]+
6113 )?
6114 |
6115 attribution_link\?a=[^&]+
6116 )
6117 $
6118 '''
6119
6120 _TESTS = [{
6121 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
6122 'only_matching': True,
6123 }, {
6124 'url': 'https://www.youtube.com/watch?',
6125 'only_matching': True,
6126 }, {
6127 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
6128 'only_matching': True,
6129 }, {
6130 'url': 'https://www.youtube.com/watch?feature=foo',
6131 'only_matching': True,
6132 }, {
6133 'url': 'https://www.youtube.com/watch?hl=en-GB',
6134 'only_matching': True,
6135 }, {
6136 'url': 'https://www.youtube.com/watch?t=2372',
6137 'only_matching': True,
6138 }]
6139
6140 def _real_extract(self, url):
6141 raise ExtractorError(
6142 'Did you forget to quote the URL? Remember that & is a meta '
6143 'character in most shells, so you want to put the URL in quotes, '
6144 'like youtube-dl '
6145 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
6146 ' or simply youtube-dl BaW_jenozKc .',
6147 expected=True)
6148
6149
6150 class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
6151 IE_NAME = 'youtube:clip'
6152 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'
6153 _TESTS = [{
6154 # FIXME: Other metadata should be extracted from the clip, not from the base video
6155 'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',
6156 'info_dict': {
6157 'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',
6158 'ext': 'mp4',
6159 'section_start': 29.0,
6160 'section_end': 39.7,
6161 'duration': 10.7,
6162 'age_limit': 0,
6163 'availability': 'public',
6164 'categories': ['Gaming'],
6165 'channel': 'Scott The Woz',
6166 'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',
6167 'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',
6168 'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',
6169 'like_count': int,
6170 'playable_in_embed': True,
6171 'tags': 'count:17',
6172 'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',
6173 'title': 'Mobile Games on Console - Scott The Woz',
6174 'upload_date': '20210920',
6175 'uploader': 'Scott The Woz',
6176 'uploader_id': 'scottthewoz',
6177 'uploader_url': 'http://www.youtube.com/user/scottthewoz',
6178 'view_count': int,
6179 'live_status': 'not_live',
6180 'channel_follower_count': int
6181 }
6182 }]
6183
6184 def _real_extract(self, url):
6185 clip_id = self._match_id(url)
6186 _, data = self._extract_webpage(url, clip_id)
6187
6188 video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))
6189 if not video_id:
6190 raise ExtractorError('Unable to find video ID')
6191
6192 clip_data = traverse_obj(data, (
6193 'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',
6194 'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,
6195 'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',
6196 'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)
6197
6198 return {
6199 '_type': 'url_transparent',
6200 'url': f'https://www.youtube.com/watch?v={video_id}',
6201 'ie_key': YoutubeIE.ie_key(),
6202 'id': clip_id,
6203 'section_start': int(clip_data['startTimeMs']) / 1000,
6204 'section_end': int(clip_data['endTimeMs']) / 1000,
6205 }
6206
6207
6208 class YoutubeTruncatedIDIE(InfoExtractor):
6209 IE_NAME = 'youtube:truncated_id'
6210 IE_DESC = False # Do not list
6211 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
6212
6213 _TESTS = [{
6214 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
6215 'only_matching': True,
6216 }]
6217
6218 def _real_extract(self, url):
6219 video_id = self._match_id(url)
6220 raise ExtractorError(
6221 f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',
6222 expected=True)