]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/youtube.py
[jsinterp, extractor/youtube] Minor fixes
[yt-dlp.git] / yt_dlp / extractor / youtube.py
1 import base64
2 import calendar
3 import copy
4 import datetime
5 import hashlib
6 import itertools
7 import json
8 import math
9 import os.path
10 import random
11 import re
12 import sys
13 import threading
14 import time
15 import traceback
16 import urllib.error
17 import urllib.parse
18
19 from .common import InfoExtractor, SearchInfoExtractor
20 from .openload import PhantomJSwrapper
21 from ..compat import functools
22 from ..jsinterp import JSInterpreter
23 from ..utils import (
24 NO_DEFAULT,
25 ExtractorError,
26 UserNotLive,
27 bug_reports_message,
28 classproperty,
29 clean_html,
30 datetime_from_str,
31 dict_get,
32 float_or_none,
33 format_field,
34 get_first,
35 int_or_none,
36 is_html,
37 join_nonempty,
38 js_to_json,
39 mimetype2ext,
40 network_exceptions,
41 orderedSet,
42 parse_codecs,
43 parse_count,
44 parse_duration,
45 parse_iso8601,
46 parse_qs,
47 qualities,
48 remove_start,
49 smuggle_url,
50 str_or_none,
51 str_to_int,
52 strftime_or_none,
53 traverse_obj,
54 try_get,
55 unescapeHTML,
56 unified_strdate,
57 unified_timestamp,
58 unsmuggle_url,
59 update_url_query,
60 url_or_none,
61 urljoin,
62 variadic,
63 )
64
65 # any clients starting with _ cannot be explicitly requested by the user
66 INNERTUBE_CLIENTS = {
67 'web': {
68 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
69 'INNERTUBE_CONTEXT': {
70 'client': {
71 'clientName': 'WEB',
72 'clientVersion': '2.20220801.00.00',
73 }
74 },
75 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
76 },
77 'web_embedded': {
78 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
79 'INNERTUBE_CONTEXT': {
80 'client': {
81 'clientName': 'WEB_EMBEDDED_PLAYER',
82 'clientVersion': '1.20220731.00.00',
83 },
84 },
85 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
86 },
87 'web_music': {
88 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
89 'INNERTUBE_HOST': 'music.youtube.com',
90 'INNERTUBE_CONTEXT': {
91 'client': {
92 'clientName': 'WEB_REMIX',
93 'clientVersion': '1.20220727.01.00',
94 }
95 },
96 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
97 },
98 'web_creator': {
99 'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
100 'INNERTUBE_CONTEXT': {
101 'client': {
102 'clientName': 'WEB_CREATOR',
103 'clientVersion': '1.20220726.00.00',
104 }
105 },
106 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
107 },
108 'android': {
109 'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
110 'INNERTUBE_CONTEXT': {
111 'client': {
112 'clientName': 'ANDROID',
113 'clientVersion': '17.31.35',
114 'androidSdkVersion': 30,
115 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
116 }
117 },
118 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
119 'REQUIRE_JS_PLAYER': False
120 },
121 'android_embedded': {
122 'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
123 'INNERTUBE_CONTEXT': {
124 'client': {
125 'clientName': 'ANDROID_EMBEDDED_PLAYER',
126 'clientVersion': '17.31.35',
127 'androidSdkVersion': 30,
128 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
129 },
130 },
131 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
132 'REQUIRE_JS_PLAYER': False
133 },
134 'android_music': {
135 'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
136 'INNERTUBE_CONTEXT': {
137 'client': {
138 'clientName': 'ANDROID_MUSIC',
139 'clientVersion': '5.16.51',
140 'androidSdkVersion': 30,
141 'userAgent': 'com.google.android.apps.youtube.music/5.16.51 (Linux; U; Android 11) gzip'
142 }
143 },
144 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
145 'REQUIRE_JS_PLAYER': False
146 },
147 'android_creator': {
148 'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
149 'INNERTUBE_CONTEXT': {
150 'client': {
151 'clientName': 'ANDROID_CREATOR',
152 'clientVersion': '22.30.100',
153 'androidSdkVersion': 30,
154 'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip'
155 },
156 },
157 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
158 'REQUIRE_JS_PLAYER': False
159 },
160 # iOS clients have HLS live streams. Setting device model to get 60fps formats.
161 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
162 'ios': {
163 'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
164 'INNERTUBE_CONTEXT': {
165 'client': {
166 'clientName': 'IOS',
167 'clientVersion': '17.33.2',
168 'deviceModel': 'iPhone14,3',
169 'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
170 }
171 },
172 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
173 'REQUIRE_JS_PLAYER': False
174 },
175 'ios_embedded': {
176 'INNERTUBE_CONTEXT': {
177 'client': {
178 'clientName': 'IOS_MESSAGES_EXTENSION',
179 'clientVersion': '17.33.2',
180 'deviceModel': 'iPhone14,3',
181 'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
182 },
183 },
184 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
185 'REQUIRE_JS_PLAYER': False
186 },
187 'ios_music': {
188 'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
189 'INNERTUBE_CONTEXT': {
190 'client': {
191 'clientName': 'IOS_MUSIC',
192 'clientVersion': '5.21',
193 'deviceModel': 'iPhone14,3',
194 'userAgent': 'com.google.ios.youtubemusic/5.21 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
195 },
196 },
197 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
198 'REQUIRE_JS_PLAYER': False
199 },
200 'ios_creator': {
201 'INNERTUBE_CONTEXT': {
202 'client': {
203 'clientName': 'IOS_CREATOR',
204 'clientVersion': '22.33.101',
205 'deviceModel': 'iPhone14,3',
206 'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
207 },
208 },
209 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
210 'REQUIRE_JS_PLAYER': False
211 },
212 # mweb has 'ultralow' formats
213 # See: https://github.com/yt-dlp/yt-dlp/pull/557
214 'mweb': {
215 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
216 'INNERTUBE_CONTEXT': {
217 'client': {
218 'clientName': 'MWEB',
219 'clientVersion': '2.20220801.00.00',
220 }
221 },
222 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
223 },
224 # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
225 # See: https://github.com/zerodytrash/YouTube-Internal-Clients
226 'tv_embedded': {
227 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
228 'INNERTUBE_CONTEXT': {
229 'client': {
230 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
231 'clientVersion': '2.0',
232 },
233 },
234 'INNERTUBE_CONTEXT_CLIENT_NAME': 85
235 },
236 }
237
238
239 def _split_innertube_client(client_name):
240 variant, *base = client_name.rsplit('.', 1)
241 if base:
242 return variant, base[0], variant
243 base, *variant = client_name.split('_', 1)
244 return client_name, base, variant[0] if variant else None
245
246
247 def build_innertube_clients():
248 THIRD_PARTY = {
249 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
250 }
251 BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
252 priority = qualities(BASE_CLIENTS[::-1])
253
254 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
255 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
256 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
257 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
258 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
259
260 _, base_client, variant = _split_innertube_client(client)
261 ytcfg['priority'] = 10 * priority(base_client)
262
263 if not variant:
264 INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
265 embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
266 embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
267 embedscreen['priority'] -= 3
268 elif variant == 'embedded':
269 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
270 ytcfg['priority'] -= 2
271 else:
272 ytcfg['priority'] -= 3
273
274
275 build_innertube_clients()
276
277
278 class YoutubeBaseInfoExtractor(InfoExtractor):
279 """Provide base functions for Youtube extractors"""
280
281 _RESERVED_NAMES = (
282 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
283 r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|'
284 r'browse|oembed|get_video_info|iframe_api|s/player|'
285 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
286
287 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
288
289 # _NETRC_MACHINE = 'youtube'
290
291 # If True it will raise an error if no login info is provided
292 _LOGIN_REQUIRED = False
293
294 _INVIDIOUS_SITES = (
295 # invidious-redirect websites
296 r'(?:www\.)?redirect\.invidious\.io',
297 r'(?:(?:www|dev)\.)?invidio\.us',
298 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
299 r'(?:www\.)?invidious\.pussthecat\.org',
300 r'(?:www\.)?invidious\.zee\.li',
301 r'(?:www\.)?invidious\.ethibox\.fr',
302 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
303 r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
304 r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
305 # youtube-dl invidious instances list
306 r'(?:(?:www|no)\.)?invidiou\.sh',
307 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
308 r'(?:www\.)?invidious\.kabi\.tk',
309 r'(?:www\.)?invidious\.mastodon\.host',
310 r'(?:www\.)?invidious\.zapashcanon\.fr',
311 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
312 r'(?:www\.)?invidious\.tinfoil-hat\.net',
313 r'(?:www\.)?invidious\.himiko\.cloud',
314 r'(?:www\.)?invidious\.reallyancient\.tech',
315 r'(?:www\.)?invidious\.tube',
316 r'(?:www\.)?invidiou\.site',
317 r'(?:www\.)?invidious\.site',
318 r'(?:www\.)?invidious\.xyz',
319 r'(?:www\.)?invidious\.nixnet\.xyz',
320 r'(?:www\.)?invidious\.048596\.xyz',
321 r'(?:www\.)?invidious\.drycat\.fr',
322 r'(?:www\.)?inv\.skyn3t\.in',
323 r'(?:www\.)?tube\.poal\.co',
324 r'(?:www\.)?tube\.connect\.cafe',
325 r'(?:www\.)?vid\.wxzm\.sx',
326 r'(?:www\.)?vid\.mint\.lgbt',
327 r'(?:www\.)?vid\.puffyan\.us',
328 r'(?:www\.)?yewtu\.be',
329 r'(?:www\.)?yt\.elukerio\.org',
330 r'(?:www\.)?yt\.lelux\.fi',
331 r'(?:www\.)?invidious\.ggc-project\.de',
332 r'(?:www\.)?yt\.maisputain\.ovh',
333 r'(?:www\.)?ytprivate\.com',
334 r'(?:www\.)?invidious\.13ad\.de',
335 r'(?:www\.)?invidious\.toot\.koeln',
336 r'(?:www\.)?invidious\.fdn\.fr',
337 r'(?:www\.)?watch\.nettohikari\.com',
338 r'(?:www\.)?invidious\.namazso\.eu',
339 r'(?:www\.)?invidious\.silkky\.cloud',
340 r'(?:www\.)?invidious\.exonip\.de',
341 r'(?:www\.)?invidious\.riverside\.rocks',
342 r'(?:www\.)?invidious\.blamefran\.net',
343 r'(?:www\.)?invidious\.moomoo\.de',
344 r'(?:www\.)?ytb\.trom\.tf',
345 r'(?:www\.)?yt\.cyberhost\.uk',
346 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
347 r'(?:www\.)?qklhadlycap4cnod\.onion',
348 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
349 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
350 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
351 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
352 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
353 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
354 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
355 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
356 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
357 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
358 # piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
359 r'(?:www\.)?piped\.kavin\.rocks',
360 r'(?:www\.)?piped\.silkky\.cloud',
361 r'(?:www\.)?piped\.tokhmi\.xyz',
362 r'(?:www\.)?piped\.moomoo\.me',
363 r'(?:www\.)?il\.ax',
364 r'(?:www\.)?piped\.syncpundit\.com',
365 r'(?:www\.)?piped\.mha\.fi',
366 r'(?:www\.)?piped\.mint\.lgbt',
367 r'(?:www\.)?piped\.privacy\.com\.de',
368 )
369
370 def _initialize_consent(self):
371 cookies = self._get_cookies('https://www.youtube.com/')
372 if cookies.get('__Secure-3PSID'):
373 return
374 consent_id = None
375 consent = cookies.get('CONSENT')
376 if consent:
377 if 'YES' in consent.value:
378 return
379 consent_id = self._search_regex(
380 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
381 if not consent_id:
382 consent_id = random.randint(100, 999)
383 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
384
385 def _initialize_pref(self):
386 cookies = self._get_cookies('https://www.youtube.com/')
387 pref_cookie = cookies.get('PREF')
388 pref = {}
389 if pref_cookie:
390 try:
391 pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
392 except ValueError:
393 self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
394 pref.update({'hl': 'en', 'tz': 'UTC'})
395 self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
396
397 def _real_initialize(self):
398 self._initialize_pref()
399 self._initialize_consent()
400 self._check_login_required()
401
402 def _check_login_required(self):
403 if self._LOGIN_REQUIRED and not self._cookies_passed:
404 self.raise_login_required('Login details are needed to download this content', method='cookies')
405
406 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='
407 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
408
409 def _get_default_ytcfg(self, client='web'):
410 return copy.deepcopy(INNERTUBE_CLIENTS[client])
411
412 def _get_innertube_host(self, client='web'):
413 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
414
415 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
416 # try_get but with fallback to default ytcfg client values when present
417 _func = lambda y: try_get(y, getter, expected_type)
418 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
419
420 def _extract_client_name(self, ytcfg, default_client='web'):
421 return self._ytcfg_get_safe(
422 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
423 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
424
425 def _extract_client_version(self, ytcfg, default_client='web'):
426 return self._ytcfg_get_safe(
427 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
428 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
429
430 def _select_api_hostname(self, req_api_hostname, default_client=None):
431 return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
432 or req_api_hostname or self._get_innertube_host(default_client or 'web'))
433
434 def _extract_api_key(self, ytcfg=None, default_client='web'):
435 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
436
437 def _extract_context(self, ytcfg=None, default_client='web'):
438 context = get_first(
439 (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
440 # Enforce language and tz for extraction
441 client_context = traverse_obj(context, 'client', expected_type=dict, default={})
442 client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
443 return context
444
445 _SAPISID = None
446
447 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
448 time_now = round(time.time())
449 if self._SAPISID is None:
450 yt_cookies = self._get_cookies('https://www.youtube.com')
451 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
452 # See: https://github.com/yt-dlp/yt-dlp/issues/393
453 sapisid_cookie = dict_get(
454 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
455 if sapisid_cookie and sapisid_cookie.value:
456 self._SAPISID = sapisid_cookie.value
457 self.write_debug('Extracted SAPISID cookie')
458 # SAPISID cookie is required if not already present
459 if not yt_cookies.get('SAPISID'):
460 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
461 self._set_cookie(
462 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
463 else:
464 self._SAPISID = False
465 if not self._SAPISID:
466 return None
467 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
468 sapisidhash = hashlib.sha1(
469 f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
470 return f'SAPISIDHASH {time_now}_{sapisidhash}'
471
472 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
473 note='Downloading API JSON', errnote='Unable to download API page',
474 context=None, api_key=None, api_hostname=None, default_client='web'):
475
476 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
477 data.update(query)
478 real_headers = self.generate_api_headers(default_client=default_client)
479 real_headers.update({'content-type': 'application/json'})
480 if headers:
481 real_headers.update(headers)
482 api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]
483 or api_key or self._extract_api_key(default_client=default_client))
484 return self._download_json(
485 f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
486 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
487 data=json.dumps(data).encode('utf8'), headers=real_headers,
488 query={'key': api_key, 'prettyPrint': 'false'})
489
490 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
491 return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
492
493 @staticmethod
494 def _extract_session_index(*data):
495 """
496 Index of current account in account list.
497 See: https://github.com/yt-dlp/yt-dlp/pull/519
498 """
499 for ytcfg in data:
500 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
501 if session_index is not None:
502 return session_index
503
504 # Deprecated?
505 def _extract_identity_token(self, ytcfg=None, webpage=None):
506 if ytcfg:
507 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)
508 if token:
509 return token
510 if webpage:
511 return self._search_regex(
512 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
513 'identity token', default=None, fatal=False)
514
515 @staticmethod
516 def _extract_account_syncid(*args):
517 """
518 Extract syncId required to download private playlists of secondary channels
519 @params response and/or ytcfg
520 """
521 for data in args:
522 # ytcfg includes channel_syncid if on secondary channel
523 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)
524 if delegated_sid:
525 return delegated_sid
526 sync_ids = (try_get(
527 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
528 lambda x: x['DATASYNC_ID']), str) or '').split('||')
529 if len(sync_ids) >= 2 and sync_ids[1]:
530 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
531 # and just "user_syncid||" for primary channel. We only want the channel_syncid
532 return sync_ids[0]
533
534 @staticmethod
535 def _extract_visitor_data(*args):
536 """
537 Extracts visitorData from an API response or ytcfg
538 Appears to be used to track session state
539 """
540 return get_first(
541 args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
542 expected_type=str)
543
544 @functools.cached_property
545 def is_authenticated(self):
546 return bool(self._generate_sapisidhash_header())
547
548 def extract_ytcfg(self, video_id, webpage):
549 if not webpage:
550 return {}
551 return self._parse_json(
552 self._search_regex(
553 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
554 default='{}'), video_id, fatal=False) or {}
555
556 def generate_api_headers(
557 self, *, ytcfg=None, account_syncid=None, session_index=None,
558 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
559
560 origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))
561 headers = {
562 'X-YouTube-Client-Name': str(
563 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
564 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
565 'Origin': origin,
566 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
567 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
568 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),
569 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client)
570 }
571 if session_index is None:
572 session_index = self._extract_session_index(ytcfg)
573 if account_syncid or session_index is not None:
574 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
575
576 auth = self._generate_sapisidhash_header(origin)
577 if auth is not None:
578 headers['Authorization'] = auth
579 headers['X-Origin'] = origin
580 return {h: v for h, v in headers.items() if v is not None}
581
582 def _download_ytcfg(self, client, video_id):
583 url = {
584 'web': 'https://www.youtube.com',
585 'web_music': 'https://music.youtube.com',
586 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
587 }.get(client)
588 if not url:
589 return {}
590 webpage = self._download_webpage(
591 url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')
592 return self.extract_ytcfg(video_id, webpage) or {}
593
594 @staticmethod
595 def _build_api_continuation_query(continuation, ctp=None):
596 query = {
597 'continuation': continuation
598 }
599 # TODO: Inconsistency with clickTrackingParams.
600 # Currently we have a fixed ctp contained within context (from ytcfg)
601 # and a ctp in root query for continuation.
602 if ctp:
603 query['clickTracking'] = {'clickTrackingParams': ctp}
604 return query
605
606 @classmethod
607 def _extract_next_continuation_data(cls, renderer):
608 next_continuation = try_get(
609 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
610 lambda x: x['continuation']['reloadContinuationData']), dict)
611 if not next_continuation:
612 return
613 continuation = next_continuation.get('continuation')
614 if not continuation:
615 return
616 ctp = next_continuation.get('clickTrackingParams')
617 return cls._build_api_continuation_query(continuation, ctp)
618
619 @classmethod
620 def _extract_continuation_ep_data(cls, continuation_ep: dict):
621 if isinstance(continuation_ep, dict):
622 continuation = try_get(
623 continuation_ep, lambda x: x['continuationCommand']['token'], str)
624 if not continuation:
625 return
626 ctp = continuation_ep.get('clickTrackingParams')
627 return cls._build_api_continuation_query(continuation, ctp)
628
629 @classmethod
630 def _extract_continuation(cls, renderer):
631 next_continuation = cls._extract_next_continuation_data(renderer)
632 if next_continuation:
633 return next_continuation
634
635 contents = []
636 for key in ('contents', 'items'):
637 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
638
639 for content in contents:
640 if not isinstance(content, dict):
641 continue
642 continuation_ep = try_get(
643 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
644 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
645 dict)
646 continuation = cls._extract_continuation_ep_data(continuation_ep)
647 if continuation:
648 return continuation
649
650 @classmethod
651 def _extract_alerts(cls, data):
652 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
653 if not isinstance(alert_dict, dict):
654 continue
655 for alert in alert_dict.values():
656 alert_type = alert.get('type')
657 if not alert_type:
658 continue
659 message = cls._get_text(alert, 'text')
660 if message:
661 yield alert_type, message
662
663 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
664 errors = []
665 warnings = []
666 for alert_type, alert_message in alerts:
667 if alert_type.lower() == 'error' and fatal:
668 errors.append([alert_type, alert_message])
669 else:
670 warnings.append([alert_type, alert_message])
671
672 for alert_type, alert_message in (warnings + errors[:-1]):
673 self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)
674 if errors:
675 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
676
677 def _extract_and_report_alerts(self, data, *args, **kwargs):
678 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
679
680 def _extract_badges(self, renderer: dict):
681 badges = set()
682 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
683 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], str)
684 if label:
685 badges.add(label.lower())
686 return badges
687
688 @staticmethod
689 def _get_text(data, *path_list, max_runs=None):
690 for path in path_list or [None]:
691 if path is None:
692 obj = [data]
693 else:
694 obj = traverse_obj(data, path, default=[])
695 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
696 obj = [obj]
697 for item in obj:
698 text = try_get(item, lambda x: x['simpleText'], str)
699 if text:
700 return text
701 runs = try_get(item, lambda x: x['runs'], list) or []
702 if not runs and isinstance(item, list):
703 runs = item
704
705 runs = runs[:min(len(runs), max_runs or len(runs))]
706 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
707 if text:
708 return text
709
710 def _get_count(self, data, *path_list):
711 count_text = self._get_text(data, *path_list) or ''
712 count = parse_count(count_text)
713 if count is None:
714 count = str_to_int(
715 self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))
716 return count
717
718 @staticmethod
719 def _extract_thumbnails(data, *path_list):
720 """
721 Extract thumbnails from thumbnails dict
722 @param path_list: path list to level that contains 'thumbnails' key
723 """
724 thumbnails = []
725 for path in path_list or [()]:
726 for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):
727 thumbnail_url = url_or_none(thumbnail.get('url'))
728 if not thumbnail_url:
729 continue
730 # Sometimes youtube gives a wrong thumbnail URL. See:
731 # https://github.com/yt-dlp/yt-dlp/issues/233
732 # https://github.com/ytdl-org/youtube-dl/issues/28023
733 if 'maxresdefault' in thumbnail_url:
734 thumbnail_url = thumbnail_url.split('?')[0]
735 thumbnails.append({
736 'url': thumbnail_url,
737 'height': int_or_none(thumbnail.get('height')),
738 'width': int_or_none(thumbnail.get('width')),
739 })
740 return thumbnails
741
742 @staticmethod
743 def extract_relative_time(relative_time_text):
744 """
745 Extracts a relative time from string and converts to dt object
746 e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'
747 """
748 mobj = re.search(r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?\s*ago', relative_time_text)
749 if mobj:
750 start = mobj.group('start')
751 if start:
752 return datetime_from_str(start)
753 try:
754 return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))
755 except ValueError:
756 return None
757
758 def _extract_time_text(self, renderer, *path_list):
759 """@returns (timestamp, time_text)"""
760 text = self._get_text(renderer, *path_list) or ''
761 dt = self.extract_relative_time(text)
762 timestamp = None
763 if isinstance(dt, datetime.datetime):
764 timestamp = calendar.timegm(dt.timetuple())
765
766 if timestamp is None:
767 timestamp = (
768 unified_timestamp(text) or unified_timestamp(
769 self._search_regex(
770 (r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),
771 text.lower(), 'time text', default=None)))
772
773 if text and timestamp is None:
774 self.report_warning(f"Cannot parse localized time text '{text}'" + bug_reports_message(), only_once=True)
775 return timestamp, text
776
777 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
778 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
779 default_client='web'):
780 for retry in self.RetryManager():
781 try:
782 response = self._call_api(
783 ep=ep, fatal=True, headers=headers,
784 video_id=item_id, query=query, note=note,
785 context=self._extract_context(ytcfg, default_client),
786 api_key=self._extract_api_key(ytcfg, default_client),
787 api_hostname=api_hostname, default_client=default_client)
788 except ExtractorError as e:
789 if not isinstance(e.cause, network_exceptions):
790 return self._error_or_warning(e, fatal=fatal)
791 elif not isinstance(e.cause, urllib.error.HTTPError):
792 retry.error = e
793 continue
794
795 first_bytes = e.cause.read(512)
796 if not is_html(first_bytes):
797 yt_error = try_get(
798 self._parse_json(
799 self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
800 lambda x: x['error']['message'], str)
801 if yt_error:
802 self._report_alerts([('ERROR', yt_error)], fatal=False)
803 # Downloading page may result in intermittent 5xx HTTP error
804 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
805 # We also want to catch all other network exceptions since errors in later pages can be troublesome
806 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
807 if e.cause.code not in (403, 429):
808 retry.error = e
809 continue
810 return self._error_or_warning(e, fatal=fatal)
811
812 try:
813 self._extract_and_report_alerts(response, only_once=True)
814 except ExtractorError as e:
815 # YouTube servers may return errors we want to retry on in a 200 OK response
816 # See: https://github.com/yt-dlp/yt-dlp/issues/839
817 if 'unknown error' in e.msg.lower():
818 retry.error = e
819 continue
820 return self._error_or_warning(e, fatal=fatal)
821 # Youtube sometimes sends incomplete data
822 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
823 if not traverse_obj(response, *variadic(check_get_keys)):
824 retry.error = ExtractorError('Incomplete data received', expected=True)
825 continue
826
827 return response
828
829 @staticmethod
830 def is_music_url(url):
831 return re.match(r'https?://music\.youtube\.com/', url) is not None
832
833 def _extract_video(self, renderer):
834 video_id = renderer.get('videoId')
835 title = self._get_text(renderer, 'title')
836 description = self._get_text(renderer, 'descriptionSnippet')
837 duration = parse_duration(self._get_text(
838 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
839 if duration is None:
840 duration = parse_duration(self._search_regex(
841 r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',
842 traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),
843 video_id, default=None, group='duration'))
844
845 view_count = self._get_count(renderer, 'viewCountText')
846
847 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
848 channel_id = traverse_obj(
849 renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
850 expected_type=str, get_all=False)
851 timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')
852 scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
853 overlay_style = traverse_obj(
854 renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
855 get_all=False, expected_type=str)
856 badges = self._extract_badges(renderer)
857 thumbnails = self._extract_thumbnails(renderer, 'thumbnail')
858 navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
859 renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
860 expected_type=str)) or ''
861 url = f'https://www.youtube.com/watch?v={video_id}'
862 if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:
863 url = f'https://www.youtube.com/shorts/{video_id}'
864
865 return {
866 '_type': 'url',
867 'ie_key': YoutubeIE.ie_key(),
868 'id': video_id,
869 'url': url,
870 'title': title,
871 'description': description,
872 'duration': duration,
873 'view_count': view_count,
874 'uploader': uploader,
875 'channel_id': channel_id,
876 'thumbnails': thumbnails,
877 'upload_date': (strftime_or_none(timestamp, '%Y%m%d')
878 if self._configuration_arg('approximate_date', ie_key='youtubetab')
879 else None),
880 'live_status': ('is_upcoming' if scheduled_timestamp is not None
881 else 'was_live' if 'streamed' in time_text.lower()
882 else 'is_live' if overlay_style == 'LIVE' or 'live now' in badges
883 else None),
884 'release_timestamp': scheduled_timestamp,
885 'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)
886 }
887
888
889 class YoutubeIE(YoutubeBaseInfoExtractor):
890 IE_DESC = 'YouTube'
891 _VALID_URL = r"""(?x)^
892 (
893 (?:https?://|//) # http(s):// or protocol-independent URL
894 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
895 (?:www\.)?deturl\.com/www\.youtube\.com|
896 (?:www\.)?pwnyoutube\.com|
897 (?:www\.)?hooktube\.com|
898 (?:www\.)?yourepeat\.com|
899 tube\.majestyc\.net|
900 %(invidious)s|
901 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
902 (?:.*?\#/)? # handle anchor (#/) redirect urls
903 (?: # the various things that can precede the ID:
904 (?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/
905 |(?: # or the v= param in all its forms
906 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
907 (?:\?|\#!?) # the params delimiter ? or # or #!
908 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
909 v=
910 )
911 ))
912 |(?:
913 youtu\.be| # just youtu.be/xxxx
914 vid\.plus| # or vid.plus/xxxx
915 zwearz\.com/watch| # or zwearz.com/watch/xxxx
916 %(invidious)s
917 )/
918 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
919 )
920 )? # all until now is optional -> you can pass the naked ID
921 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
922 (?(1).+)? # if we found the ID, everything can follow
923 (?:\#|$)""" % {
924 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
925 }
926 _EMBED_REGEX = [r'''(?x)
927 (?:
928 <iframe[^>]+?src=|
929 data-video-url=|
930 <embed[^>]+?src=|
931 embedSWF\(?:\s*|
932 <object[^>]+data=|
933 new\s+SWFObject\(
934 )
935 (["\'])
936 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
937 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
938 \1''']
939 _PLAYER_INFO_RE = (
940 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
941 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
942 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
943 )
944 _formats = {
945 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
946 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
947 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
948 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
949 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
950 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
951 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
952 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
953 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
954 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
955 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
956 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
957 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
958 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
959 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
960 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
961 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
962 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
963
964
965 # 3D videos
966 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
967 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
968 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
969 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
970 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
971 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
972 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
973
974 # Apple HTTP Live Streaming
975 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
976 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
977 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
978 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
979 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
980 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
981 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
982 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
983
984 # DASH mp4 video
985 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
986 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
987 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
988 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
989 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
990 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
991 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
992 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
993 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
994 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
995 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
996 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
997
998 # Dash mp4 audio
999 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1000 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1001 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1002 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1003 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1004 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1005 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
1006
1007 # Dash webm
1008 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1009 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1010 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1011 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1012 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1013 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1014 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1015 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1016 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1017 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1018 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1019 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1020 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1021 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1022 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1023 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
1024 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1025 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1026 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1027 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1028 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1029 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1030
1031 # Dash webm audio
1032 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1033 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
1034
1035 # Dash webm audio with opus inside
1036 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1037 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1038 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
1039
1040 # RTMP (unnamed)
1041 '_rtmp': {'protocol': 'rtmp'},
1042
1043 # av01 video only formats sometimes served with "unknown" codecs
1044 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1045 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1046 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
1047 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1048 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1049 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1050 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1051 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1052 }
1053 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
1054
1055 _GEO_BYPASS = False
1056
1057 IE_NAME = 'youtube'
1058 _TESTS = [
1059 {
1060 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
1061 'info_dict': {
1062 'id': 'BaW_jenozKc',
1063 'ext': 'mp4',
1064 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1065 'uploader': 'Philipp Hagemeister',
1066 'uploader_id': 'phihag',
1067 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1068 'channel': 'Philipp Hagemeister',
1069 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1070 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1071 'upload_date': '20121002',
1072 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
1073 'categories': ['Science & Technology'],
1074 'tags': ['youtube-dl'],
1075 'duration': 10,
1076 'view_count': int,
1077 'like_count': int,
1078 'availability': 'public',
1079 'playable_in_embed': True,
1080 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1081 'live_status': 'not_live',
1082 'age_limit': 0,
1083 'start_time': 1,
1084 'end_time': 9,
1085 'comment_count': int,
1086 'channel_follower_count': int
1087 }
1088 },
1089 {
1090 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1091 'note': 'Embed-only video (#1746)',
1092 'info_dict': {
1093 'id': 'yZIXLfi8CZQ',
1094 'ext': 'mp4',
1095 'upload_date': '20120608',
1096 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1097 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1098 'uploader': 'SET India',
1099 'uploader_id': 'setindia',
1100 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
1101 'age_limit': 18,
1102 },
1103 'skip': 'Private video',
1104 },
1105 {
1106 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1107 'note': 'Use the first video ID in the URL',
1108 'info_dict': {
1109 'id': 'BaW_jenozKc',
1110 'ext': 'mp4',
1111 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1112 'uploader': 'Philipp Hagemeister',
1113 'uploader_id': 'phihag',
1114 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1115 'channel': 'Philipp Hagemeister',
1116 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1117 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1118 'upload_date': '20121002',
1119 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
1120 'categories': ['Science & Technology'],
1121 'tags': ['youtube-dl'],
1122 'duration': 10,
1123 'view_count': int,
1124 'like_count': int,
1125 'availability': 'public',
1126 'playable_in_embed': True,
1127 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1128 'live_status': 'not_live',
1129 'age_limit': 0,
1130 'comment_count': int,
1131 'channel_follower_count': int
1132 },
1133 'params': {
1134 'skip_download': True,
1135 },
1136 },
1137 {
1138 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1139 'note': '256k DASH audio (format 141) via DASH manifest',
1140 'info_dict': {
1141 'id': 'a9LDPn-MO4I',
1142 'ext': 'm4a',
1143 'upload_date': '20121002',
1144 'uploader_id': '8KVIDEO',
1145 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1146 'description': '',
1147 'uploader': '8KVIDEO',
1148 'title': 'UHDTV TEST 8K VIDEO.mp4'
1149 },
1150 'params': {
1151 'youtube_include_dash_manifest': True,
1152 'format': '141',
1153 },
1154 'skip': 'format 141 not served anymore',
1155 },
1156 # DASH manifest with encrypted signature
1157 {
1158 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1159 'info_dict': {
1160 'id': 'IB3lcPjvWLA',
1161 'ext': 'm4a',
1162 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1163 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1164 'duration': 244,
1165 'uploader': 'AfrojackVEVO',
1166 'uploader_id': 'AfrojackVEVO',
1167 'upload_date': '20131011',
1168 'abr': 129.495,
1169 'like_count': int,
1170 'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',
1171 'playable_in_embed': True,
1172 'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',
1173 'view_count': int,
1174 'track': 'The Spark',
1175 'live_status': 'not_live',
1176 'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',
1177 'channel': 'Afrojack',
1178 'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',
1179 'tags': 'count:19',
1180 'availability': 'public',
1181 'categories': ['Music'],
1182 'age_limit': 0,
1183 'alt_title': 'The Spark',
1184 'channel_follower_count': int
1185 },
1186 'params': {
1187 'youtube_include_dash_manifest': True,
1188 'format': '141/bestaudio[ext=m4a]',
1189 },
1190 },
1191 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
1192 {
1193 'note': 'Embed allowed age-gate video',
1194 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1195 'info_dict': {
1196 'id': 'HtVdAasjOgU',
1197 'ext': 'mp4',
1198 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1199 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1200 'duration': 142,
1201 'uploader': 'The Witcher',
1202 'uploader_id': 'WitcherGame',
1203 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1204 'upload_date': '20140605',
1205 'age_limit': 18,
1206 'categories': ['Gaming'],
1207 'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',
1208 'availability': 'needs_auth',
1209 'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
1210 'like_count': int,
1211 'channel': 'The Witcher',
1212 'live_status': 'not_live',
1213 'tags': 'count:17',
1214 'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
1215 'playable_in_embed': True,
1216 'view_count': int,
1217 'channel_follower_count': int
1218 },
1219 },
1220 {
1221 'note': 'Age-gate video with embed allowed in public site',
1222 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1223 'info_dict': {
1224 'id': 'HsUATh_Nc2U',
1225 'ext': 'mp4',
1226 'title': 'Godzilla 2 (Official Video)',
1227 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1228 'upload_date': '20200408',
1229 'uploader_id': 'FlyingKitty900',
1230 'uploader': 'FlyingKitty',
1231 'age_limit': 18,
1232 'availability': 'needs_auth',
1233 'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
1234 'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',
1235 'channel': 'FlyingKitty',
1236 'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
1237 'view_count': int,
1238 'categories': ['Entertainment'],
1239 'live_status': 'not_live',
1240 'tags': ['Flyingkitty', 'godzilla 2'],
1241 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
1242 'like_count': int,
1243 'duration': 177,
1244 'playable_in_embed': True,
1245 'channel_follower_count': int
1246 },
1247 },
1248 {
1249 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1250 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1251 'info_dict': {
1252 'id': 'Tq92D6wQ1mg',
1253 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
1254 'ext': 'mp4',
1255 'upload_date': '20191228',
1256 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1257 'uploader': 'Projekt Melody',
1258 'description': 'md5:17eccca93a786d51bc67646756894066',
1259 'age_limit': 18,
1260 'like_count': int,
1261 'availability': 'needs_auth',
1262 'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
1263 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1264 'view_count': int,
1265 'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',
1266 'channel': 'Projekt Melody',
1267 'live_status': 'not_live',
1268 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
1269 'playable_in_embed': True,
1270 'categories': ['Entertainment'],
1271 'duration': 106,
1272 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
1273 'comment_count': int,
1274 'channel_follower_count': int
1275 },
1276 },
1277 {
1278 'note': 'Non-Agegated non-embeddable video',
1279 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1280 'info_dict': {
1281 'id': 'MeJVWBSsPAY',
1282 'ext': 'mp4',
1283 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1284 'uploader': 'Herr Lurik',
1285 'uploader_id': 'st3in234',
1286 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1287 'upload_date': '20130730',
1288 'track': 'Such mich find mich',
1289 'age_limit': 0,
1290 'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
1291 'like_count': int,
1292 'playable_in_embed': False,
1293 'creator': 'OOMPH!',
1294 'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',
1295 'view_count': int,
1296 'alt_title': 'Such mich find mich',
1297 'duration': 210,
1298 'channel': 'Herr Lurik',
1299 'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
1300 'categories': ['Music'],
1301 'availability': 'public',
1302 'uploader_url': 'http://www.youtube.com/user/st3in234',
1303 'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
1304 'live_status': 'not_live',
1305 'artist': 'OOMPH!',
1306 'channel_follower_count': int
1307 },
1308 },
1309 {
1310 'note': 'Non-bypassable age-gated video',
1311 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1312 'only_matching': True,
1313 },
1314 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1315 # YouTube Red ad is not captured for creator
1316 {
1317 'url': '__2ABJjxzNo',
1318 'info_dict': {
1319 'id': '__2ABJjxzNo',
1320 'ext': 'mp4',
1321 'duration': 266,
1322 'upload_date': '20100430',
1323 'uploader_id': 'deadmau5',
1324 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1325 'creator': 'deadmau5',
1326 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1327 'uploader': 'deadmau5',
1328 'title': 'Deadmau5 - Some Chords (HD)',
1329 'alt_title': 'Some Chords',
1330 'availability': 'public',
1331 'tags': 'count:14',
1332 'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',
1333 'view_count': int,
1334 'live_status': 'not_live',
1335 'channel': 'deadmau5',
1336 'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',
1337 'like_count': int,
1338 'track': 'Some Chords',
1339 'artist': 'deadmau5',
1340 'playable_in_embed': True,
1341 'age_limit': 0,
1342 'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',
1343 'categories': ['Music'],
1344 'album': 'Some Chords',
1345 'channel_follower_count': int
1346 },
1347 'expected_warnings': [
1348 'DASH manifest missing',
1349 ]
1350 },
1351 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1352 {
1353 'url': 'lqQg6PlCWgI',
1354 'info_dict': {
1355 'id': 'lqQg6PlCWgI',
1356 'ext': 'mp4',
1357 'duration': 6085,
1358 'upload_date': '20150827',
1359 'uploader_id': 'olympic',
1360 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1361 'description': 'md5:04bbbf3ccceb6795947572ca36f45904',
1362 'uploader': 'Olympics',
1363 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1364 'like_count': int,
1365 'release_timestamp': 1343767800,
1366 'playable_in_embed': True,
1367 'categories': ['Sports'],
1368 'release_date': '20120731',
1369 'channel': 'Olympics',
1370 'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
1371 'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
1372 'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
1373 'age_limit': 0,
1374 'availability': 'public',
1375 'live_status': 'was_live',
1376 'view_count': int,
1377 'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
1378 'channel_follower_count': int
1379 },
1380 'params': {
1381 'skip_download': 'requires avconv',
1382 }
1383 },
1384 # Non-square pixels
1385 {
1386 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1387 'info_dict': {
1388 'id': '_b-2C3KPAM0',
1389 'ext': 'mp4',
1390 'stretched_ratio': 16 / 9.,
1391 'duration': 85,
1392 'upload_date': '20110310',
1393 'uploader_id': 'AllenMeow',
1394 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1395 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1396 'uploader': '孫ᄋᄅ',
1397 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1398 'playable_in_embed': True,
1399 'channel': '孫ᄋᄅ',
1400 'age_limit': 0,
1401 'tags': 'count:11',
1402 'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',
1403 'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',
1404 'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',
1405 'view_count': int,
1406 'categories': ['People & Blogs'],
1407 'like_count': int,
1408 'live_status': 'not_live',
1409 'availability': 'unlisted',
1410 'comment_count': int,
1411 'channel_follower_count': int
1412 },
1413 },
1414 # url_encoded_fmt_stream_map is empty string
1415 {
1416 'url': 'qEJwOuvDf7I',
1417 'info_dict': {
1418 'id': 'qEJwOuvDf7I',
1419 'ext': 'webm',
1420 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1421 'description': '',
1422 'upload_date': '20150404',
1423 'uploader_id': 'spbelect',
1424 'uploader': 'Наблюдатели Петербурга',
1425 },
1426 'params': {
1427 'skip_download': 'requires avconv',
1428 },
1429 'skip': 'This live event has ended.',
1430 },
1431 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1432 {
1433 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1434 'info_dict': {
1435 'id': 'FIl7x6_3R5Y',
1436 'ext': 'webm',
1437 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1438 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1439 'duration': 220,
1440 'upload_date': '20150625',
1441 'uploader_id': 'dorappi2000',
1442 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1443 'uploader': 'dorappi2000',
1444 'formats': 'mincount:31',
1445 },
1446 'skip': 'not actual anymore',
1447 },
1448 # DASH manifest with segment_list
1449 {
1450 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1451 'md5': '8ce563a1d667b599d21064e982ab9e31',
1452 'info_dict': {
1453 'id': 'CsmdDsKjzN8',
1454 'ext': 'mp4',
1455 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1456 'uploader': 'Airtek',
1457 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1458 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1459 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1460 },
1461 'params': {
1462 'youtube_include_dash_manifest': True,
1463 'format': '135', # bestvideo
1464 },
1465 'skip': 'This live event has ended.',
1466 },
1467 {
1468 # Multifeed videos (multiple cameras), URL is for Main Camera
1469 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1470 'info_dict': {
1471 'id': 'jvGDaLqkpTg',
1472 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1473 'description': 'md5:e03b909557865076822aa169218d6a5d',
1474 },
1475 'playlist': [{
1476 'info_dict': {
1477 'id': 'jvGDaLqkpTg',
1478 'ext': 'mp4',
1479 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1480 'description': 'md5:e03b909557865076822aa169218d6a5d',
1481 'duration': 10643,
1482 'upload_date': '20161111',
1483 'uploader': 'Team PGP',
1484 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1485 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1486 },
1487 }, {
1488 'info_dict': {
1489 'id': '3AKt1R1aDnw',
1490 'ext': 'mp4',
1491 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1492 'description': 'md5:e03b909557865076822aa169218d6a5d',
1493 'duration': 10991,
1494 'upload_date': '20161111',
1495 'uploader': 'Team PGP',
1496 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1497 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1498 },
1499 }, {
1500 'info_dict': {
1501 'id': 'RtAMM00gpVc',
1502 'ext': 'mp4',
1503 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1504 'description': 'md5:e03b909557865076822aa169218d6a5d',
1505 'duration': 10995,
1506 'upload_date': '20161111',
1507 'uploader': 'Team PGP',
1508 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1509 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1510 },
1511 }, {
1512 'info_dict': {
1513 'id': '6N2fdlP3C5U',
1514 'ext': 'mp4',
1515 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1516 'description': 'md5:e03b909557865076822aa169218d6a5d',
1517 'duration': 10990,
1518 'upload_date': '20161111',
1519 'uploader': 'Team PGP',
1520 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1521 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1522 },
1523 }],
1524 'params': {
1525 'skip_download': True,
1526 },
1527 'skip': 'Not multifeed anymore',
1528 },
1529 {
1530 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1531 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1532 'info_dict': {
1533 'id': 'gVfLd0zydlo',
1534 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1535 },
1536 'playlist_count': 2,
1537 'skip': 'Not multifeed anymore',
1538 },
1539 {
1540 'url': 'https://vid.plus/FlRa-iH7PGw',
1541 'only_matching': True,
1542 },
1543 {
1544 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1545 'only_matching': True,
1546 },
1547 {
1548 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1549 # Also tests cut-off URL expansion in video description (see
1550 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1551 # https://github.com/ytdl-org/youtube-dl/issues/8164)
1552 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1553 'info_dict': {
1554 'id': 'lsguqyKfVQg',
1555 'ext': 'mp4',
1556 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1557 'alt_title': 'Dark Walk',
1558 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1559 'duration': 133,
1560 'upload_date': '20151119',
1561 'uploader_id': 'IronSoulElf',
1562 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1563 'uploader': 'IronSoulElf',
1564 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1565 'track': 'Dark Walk',
1566 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1567 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1568 'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',
1569 'categories': ['Film & Animation'],
1570 'view_count': int,
1571 'live_status': 'not_live',
1572 'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',
1573 'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',
1574 'tags': 'count:13',
1575 'availability': 'public',
1576 'channel': 'IronSoulElf',
1577 'playable_in_embed': True,
1578 'like_count': int,
1579 'age_limit': 0,
1580 'channel_follower_count': int
1581 },
1582 'params': {
1583 'skip_download': True,
1584 },
1585 },
1586 {
1587 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1588 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1589 'only_matching': True,
1590 },
1591 {
1592 # Video with yt:stretch=17:0
1593 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1594 'info_dict': {
1595 'id': 'Q39EVAstoRM',
1596 'ext': 'mp4',
1597 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1598 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1599 'upload_date': '20151107',
1600 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1601 'uploader': 'CH GAMER DROID',
1602 },
1603 'params': {
1604 'skip_download': True,
1605 },
1606 'skip': 'This video does not exist.',
1607 },
1608 {
1609 # Video with incomplete 'yt:stretch=16:'
1610 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1611 'only_matching': True,
1612 },
1613 {
1614 # Video licensed under Creative Commons
1615 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1616 'info_dict': {
1617 'id': 'M4gD1WSo5mA',
1618 'ext': 'mp4',
1619 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1620 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1621 'duration': 721,
1622 'upload_date': '20150128',
1623 'uploader_id': 'BerkmanCenter',
1624 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1625 'uploader': 'The Berkman Klein Center for Internet & Society',
1626 'license': 'Creative Commons Attribution license (reuse allowed)',
1627 'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',
1628 'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',
1629 'like_count': int,
1630 'age_limit': 0,
1631 'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],
1632 'channel': 'The Berkman Klein Center for Internet & Society',
1633 'availability': 'public',
1634 'view_count': int,
1635 'categories': ['Education'],
1636 'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',
1637 'live_status': 'not_live',
1638 'playable_in_embed': True,
1639 'comment_count': int,
1640 'channel_follower_count': int
1641 },
1642 'params': {
1643 'skip_download': True,
1644 },
1645 },
1646 {
1647 # Channel-like uploader_url
1648 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1649 'info_dict': {
1650 'id': 'eQcmzGIKrzg',
1651 'ext': 'mp4',
1652 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1653 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1654 'duration': 4060,
1655 'upload_date': '20151120',
1656 'uploader': 'Bernie Sanders',
1657 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1658 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1659 'license': 'Creative Commons Attribution license (reuse allowed)',
1660 'playable_in_embed': True,
1661 'tags': 'count:12',
1662 'like_count': int,
1663 'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1664 'age_limit': 0,
1665 'availability': 'public',
1666 'categories': ['News & Politics'],
1667 'channel': 'Bernie Sanders',
1668 'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',
1669 'view_count': int,
1670 'live_status': 'not_live',
1671 'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1672 'comment_count': int,
1673 'channel_follower_count': int
1674 },
1675 'params': {
1676 'skip_download': True,
1677 },
1678 },
1679 {
1680 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1681 'only_matching': True,
1682 },
1683 {
1684 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1685 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1686 'only_matching': True,
1687 },
1688 {
1689 # Rental video preview
1690 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1691 'info_dict': {
1692 'id': 'uGpuVWrhIzE',
1693 'ext': 'mp4',
1694 'title': 'Piku - Trailer',
1695 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1696 'upload_date': '20150811',
1697 'uploader': 'FlixMatrix',
1698 'uploader_id': 'FlixMatrixKaravan',
1699 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1700 'license': 'Standard YouTube License',
1701 },
1702 'params': {
1703 'skip_download': True,
1704 },
1705 'skip': 'This video is not available.',
1706 },
1707 {
1708 # YouTube Red video with episode data
1709 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1710 'info_dict': {
1711 'id': 'iqKdEhx-dD4',
1712 'ext': 'mp4',
1713 'title': 'Isolation - Mind Field (Ep 1)',
1714 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1715 'duration': 2085,
1716 'upload_date': '20170118',
1717 'uploader': 'Vsauce',
1718 'uploader_id': 'Vsauce',
1719 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1720 'series': 'Mind Field',
1721 'season_number': 1,
1722 'episode_number': 1,
1723 'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',
1724 'tags': 'count:12',
1725 'view_count': int,
1726 'availability': 'public',
1727 'age_limit': 0,
1728 'channel': 'Vsauce',
1729 'episode': 'Episode 1',
1730 'categories': ['Entertainment'],
1731 'season': 'Season 1',
1732 'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',
1733 'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',
1734 'like_count': int,
1735 'playable_in_embed': True,
1736 'live_status': 'not_live',
1737 'channel_follower_count': int
1738 },
1739 'params': {
1740 'skip_download': True,
1741 },
1742 'expected_warnings': [
1743 'Skipping DASH manifest',
1744 ],
1745 },
1746 {
1747 # The following content has been identified by the YouTube community
1748 # as inappropriate or offensive to some audiences.
1749 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1750 'info_dict': {
1751 'id': '6SJNVb0GnPI',
1752 'ext': 'mp4',
1753 'title': 'Race Differences in Intelligence',
1754 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1755 'duration': 965,
1756 'upload_date': '20140124',
1757 'uploader': 'New Century Foundation',
1758 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1759 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1760 },
1761 'params': {
1762 'skip_download': True,
1763 },
1764 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1765 },
1766 {
1767 # itag 212
1768 'url': '1t24XAntNCY',
1769 'only_matching': True,
1770 },
1771 {
1772 # geo restricted to JP
1773 'url': 'sJL6WA-aGkQ',
1774 'only_matching': True,
1775 },
1776 {
1777 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1778 'only_matching': True,
1779 },
1780 {
1781 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1782 'only_matching': True,
1783 },
1784 {
1785 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1786 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1787 'only_matching': True,
1788 },
1789 {
1790 # DRM protected
1791 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1792 'only_matching': True,
1793 },
1794 {
1795 # Video with unsupported adaptive stream type formats
1796 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1797 'info_dict': {
1798 'id': 'Z4Vy8R84T1U',
1799 'ext': 'mp4',
1800 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1801 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1802 'duration': 433,
1803 'upload_date': '20130923',
1804 'uploader': 'Amelia Putri Harwita',
1805 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1806 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1807 'formats': 'maxcount:10',
1808 },
1809 'params': {
1810 'skip_download': True,
1811 'youtube_include_dash_manifest': False,
1812 },
1813 'skip': 'not actual anymore',
1814 },
1815 {
1816 # Youtube Music Auto-generated description
1817 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1818 'info_dict': {
1819 'id': 'MgNrAu2pzNs',
1820 'ext': 'mp4',
1821 'title': 'Voyeur Girl',
1822 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1823 'upload_date': '20190312',
1824 'uploader': 'Stephen - Topic',
1825 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1826 'artist': 'Stephen',
1827 'track': 'Voyeur Girl',
1828 'album': 'it\'s too much love to know my dear',
1829 'release_date': '20190313',
1830 'release_year': 2019,
1831 'alt_title': 'Voyeur Girl',
1832 'view_count': int,
1833 'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1834 'playable_in_embed': True,
1835 'like_count': int,
1836 'categories': ['Music'],
1837 'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1838 'channel': 'Stephen',
1839 'availability': 'public',
1840 'creator': 'Stephen',
1841 'duration': 169,
1842 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
1843 'age_limit': 0,
1844 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1845 'tags': 'count:11',
1846 'live_status': 'not_live',
1847 'channel_follower_count': int
1848 },
1849 'params': {
1850 'skip_download': True,
1851 },
1852 },
1853 {
1854 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1855 'only_matching': True,
1856 },
1857 {
1858 # invalid -> valid video id redirection
1859 'url': 'DJztXj2GPfl',
1860 'info_dict': {
1861 'id': 'DJztXj2GPfk',
1862 'ext': 'mp4',
1863 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1864 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1865 'upload_date': '20090125',
1866 'uploader': 'Prochorowka',
1867 'uploader_id': 'Prochorowka',
1868 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1869 'artist': 'Panjabi MC',
1870 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1871 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1872 },
1873 'params': {
1874 'skip_download': True,
1875 },
1876 'skip': 'Video unavailable',
1877 },
1878 {
1879 # empty description results in an empty string
1880 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1881 'info_dict': {
1882 'id': 'x41yOUIvK2k',
1883 'ext': 'mp4',
1884 'title': 'IMG 3456',
1885 'description': '',
1886 'upload_date': '20170613',
1887 'uploader_id': 'ElevageOrVert',
1888 'uploader': 'ElevageOrVert',
1889 'view_count': int,
1890 'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',
1891 'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',
1892 'like_count': int,
1893 'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',
1894 'tags': [],
1895 'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',
1896 'availability': 'public',
1897 'age_limit': 0,
1898 'categories': ['Pets & Animals'],
1899 'duration': 7,
1900 'playable_in_embed': True,
1901 'live_status': 'not_live',
1902 'channel': 'ElevageOrVert',
1903 'channel_follower_count': int
1904 },
1905 'params': {
1906 'skip_download': True,
1907 },
1908 },
1909 {
1910 # with '};' inside yt initial data (see [1])
1911 # see [2] for an example with '};' inside ytInitialPlayerResponse
1912 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1913 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1914 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1915 'info_dict': {
1916 'id': 'CHqg6qOn4no',
1917 'ext': 'mp4',
1918 'title': 'Part 77 Sort a list of simple types in c#',
1919 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1920 'upload_date': '20130831',
1921 'uploader_id': 'kudvenkat',
1922 'uploader': 'kudvenkat',
1923 'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',
1924 'like_count': int,
1925 'uploader_url': 'http://www.youtube.com/user/kudvenkat',
1926 'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',
1927 'live_status': 'not_live',
1928 'categories': ['Education'],
1929 'availability': 'public',
1930 'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',
1931 'tags': 'count:12',
1932 'playable_in_embed': True,
1933 'age_limit': 0,
1934 'view_count': int,
1935 'duration': 522,
1936 'channel': 'kudvenkat',
1937 'comment_count': int,
1938 'channel_follower_count': int
1939 },
1940 'params': {
1941 'skip_download': True,
1942 },
1943 },
1944 {
1945 # another example of '};' in ytInitialData
1946 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1947 'only_matching': True,
1948 },
1949 {
1950 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1951 'only_matching': True,
1952 },
1953 {
1954 # https://github.com/ytdl-org/youtube-dl/pull/28094
1955 'url': 'OtqTfy26tG0',
1956 'info_dict': {
1957 'id': 'OtqTfy26tG0',
1958 'ext': 'mp4',
1959 'title': 'Burn Out',
1960 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1961 'upload_date': '20141120',
1962 'uploader': 'The Cinematic Orchestra - Topic',
1963 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1964 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1965 'artist': 'The Cinematic Orchestra',
1966 'track': 'Burn Out',
1967 'album': 'Every Day',
1968 'like_count': int,
1969 'live_status': 'not_live',
1970 'alt_title': 'Burn Out',
1971 'duration': 614,
1972 'age_limit': 0,
1973 'view_count': int,
1974 'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1975 'creator': 'The Cinematic Orchestra',
1976 'channel': 'The Cinematic Orchestra',
1977 'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],
1978 'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1979 'availability': 'public',
1980 'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',
1981 'categories': ['Music'],
1982 'playable_in_embed': True,
1983 'channel_follower_count': int
1984 },
1985 'params': {
1986 'skip_download': True,
1987 },
1988 },
1989 {
1990 # controversial video, only works with bpctr when authenticated with cookies
1991 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1992 'only_matching': True,
1993 },
1994 {
1995 # controversial video, requires bpctr/contentCheckOk
1996 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1997 'info_dict': {
1998 'id': 'SZJvDhaSDnc',
1999 'ext': 'mp4',
2000 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
2001 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
2002 'uploader': 'CBS Mornings',
2003 'uploader_id': 'CBSThisMorning',
2004 'upload_date': '20140716',
2005 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',
2006 'duration': 170,
2007 'categories': ['News & Politics'],
2008 'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',
2009 'view_count': int,
2010 'channel': 'CBS Mornings',
2011 'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],
2012 'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',
2013 'age_limit': 18,
2014 'availability': 'needs_auth',
2015 'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',
2016 'like_count': int,
2017 'live_status': 'not_live',
2018 'playable_in_embed': True,
2019 'channel_follower_count': int
2020 }
2021 },
2022 {
2023 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
2024 'url': 'cBvYw8_A0vQ',
2025 'info_dict': {
2026 'id': 'cBvYw8_A0vQ',
2027 'ext': 'mp4',
2028 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
2029 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
2030 'upload_date': '20201120',
2031 'uploader': 'Walk around Japan',
2032 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2033 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2034 'duration': 1456,
2035 'categories': ['Travel & Events'],
2036 'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2037 'view_count': int,
2038 'channel': 'Walk around Japan',
2039 'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],
2040 'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',
2041 'age_limit': 0,
2042 'availability': 'public',
2043 'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2044 'live_status': 'not_live',
2045 'playable_in_embed': True,
2046 'channel_follower_count': int
2047 },
2048 'params': {
2049 'skip_download': True,
2050 },
2051 }, {
2052 # Has multiple audio streams
2053 'url': 'WaOKSUlf4TM',
2054 'only_matching': True
2055 }, {
2056 # Requires Premium: has format 141 when requested using YTM url
2057 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
2058 'only_matching': True
2059 }, {
2060 # multiple subtitles with same lang_code
2061 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
2062 'only_matching': True,
2063 }, {
2064 # Force use android client fallback
2065 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
2066 'info_dict': {
2067 'id': 'YOelRv7fMxY',
2068 'title': 'DIGGING A SECRET TUNNEL Part 1',
2069 'ext': '3gp',
2070 'upload_date': '20210624',
2071 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
2072 'uploader': 'colinfurze',
2073 'uploader_id': 'colinfurze',
2074 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
2075 'description': 'md5:5d5991195d599b56cd0c4148907eec50',
2076 'duration': 596,
2077 'categories': ['Entertainment'],
2078 'uploader_url': 'http://www.youtube.com/user/colinfurze',
2079 'view_count': int,
2080 'channel': 'colinfurze',
2081 'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],
2082 'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',
2083 'age_limit': 0,
2084 'availability': 'public',
2085 'like_count': int,
2086 'live_status': 'not_live',
2087 'playable_in_embed': True,
2088 'channel_follower_count': int
2089 },
2090 'params': {
2091 'format': '17', # 3gp format available on android
2092 'extractor_args': {'youtube': {'player_client': ['android']}},
2093 },
2094 },
2095 {
2096 # Skip download of additional client configs (remix client config in this case)
2097 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2098 'only_matching': True,
2099 'params': {
2100 'extractor_args': {'youtube': {'player_skip': ['configs']}},
2101 },
2102 }, {
2103 # shorts
2104 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
2105 'only_matching': True,
2106 }, {
2107 'note': 'Storyboards',
2108 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
2109 'info_dict': {
2110 'id': '5KLPxDtMqe8',
2111 'ext': 'mhtml',
2112 'format_id': 'sb0',
2113 'title': 'Your Brain is Plastic',
2114 'uploader_id': 'scishow',
2115 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
2116 'upload_date': '20140324',
2117 'uploader': 'SciShow',
2118 'like_count': int,
2119 'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',
2120 'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',
2121 'view_count': int,
2122 'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',
2123 'playable_in_embed': True,
2124 'tags': 'count:12',
2125 'uploader_url': 'http://www.youtube.com/user/scishow',
2126 'availability': 'public',
2127 'channel': 'SciShow',
2128 'live_status': 'not_live',
2129 'duration': 248,
2130 'categories': ['Education'],
2131 'age_limit': 0,
2132 'channel_follower_count': int
2133 }, 'params': {'format': 'mhtml', 'skip_download': True}
2134 }, {
2135 # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
2136 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2137 'info_dict': {
2138 'id': '2NUZ8W2llS4',
2139 'ext': 'mp4',
2140 'title': 'The NP that test your phone performance 🙂',
2141 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
2142 'uploader': 'Leon Nguyen',
2143 'uploader_id': 'VNSXIII',
2144 'uploader_url': 'http://www.youtube.com/user/VNSXIII',
2145 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2146 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2147 'duration': 21,
2148 'view_count': int,
2149 'age_limit': 0,
2150 'categories': ['Gaming'],
2151 'tags': 'count:23',
2152 'playable_in_embed': True,
2153 'live_status': 'not_live',
2154 'upload_date': '20220103',
2155 'like_count': int,
2156 'availability': 'public',
2157 'channel': 'Leon Nguyen',
2158 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
2159 'comment_count': int,
2160 'channel_follower_count': int
2161 }
2162 }, {
2163 # date text is premiered video, ensure upload date in UTC (published 1641172509)
2164 'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',
2165 'info_dict': {
2166 'id': 'mzZzzBU6lrM',
2167 'ext': 'mp4',
2168 'title': 'I Met GeorgeNotFound In Real Life...',
2169 'description': 'md5:cca98a355c7184e750f711f3a1b22c84',
2170 'uploader': 'Quackity',
2171 'uploader_id': 'QuackityHQ',
2172 'uploader_url': 'http://www.youtube.com/user/QuackityHQ',
2173 'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',
2174 'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',
2175 'duration': 955,
2176 'view_count': int,
2177 'age_limit': 0,
2178 'categories': ['Entertainment'],
2179 'tags': 'count:26',
2180 'playable_in_embed': True,
2181 'live_status': 'not_live',
2182 'release_timestamp': 1641172509,
2183 'release_date': '20220103',
2184 'upload_date': '20220103',
2185 'like_count': int,
2186 'availability': 'public',
2187 'channel': 'Quackity',
2188 'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',
2189 'channel_follower_count': int
2190 }
2191 },
2192 { # continuous livestream. Microformat upload date should be preferred.
2193 # Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27
2194 'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',
2195 'info_dict': {
2196 'id': 'kgx4WGK0oNU',
2197 'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
2198 'ext': 'mp4',
2199 'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2200 'availability': 'public',
2201 'age_limit': 0,
2202 'release_timestamp': 1637975704,
2203 'upload_date': '20210619',
2204 'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2205 'live_status': 'is_live',
2206 'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',
2207 'uploader': '阿鲍Abao',
2208 'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2209 'channel': 'Abao in Tokyo',
2210 'channel_follower_count': int,
2211 'release_date': '20211127',
2212 'tags': 'count:39',
2213 'categories': ['People & Blogs'],
2214 'like_count': int,
2215 'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2216 'view_count': int,
2217 'playable_in_embed': True,
2218 'description': 'md5:2ef1d002cad520f65825346e2084e49d',
2219 },
2220 'params': {'skip_download': True}
2221 }, {
2222 # Story. Requires specific player params to work.
2223 'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',
2224 'info_dict': {
2225 'id': 'vv8qTUWmulI',
2226 'ext': 'mp4',
2227 'availability': 'unlisted',
2228 'view_count': int,
2229 'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',
2230 'upload_date': '20220526',
2231 'categories': ['Education'],
2232 'title': 'Story',
2233 'channel': 'IT\'S HISTORY',
2234 'description': '',
2235 'uploader_id': 'BlastfromthePast',
2236 'duration': 12,
2237 'uploader': 'IT\'S HISTORY',
2238 'playable_in_embed': True,
2239 'age_limit': 0,
2240 'live_status': 'not_live',
2241 'tags': [],
2242 'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',
2243 'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',
2244 'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',
2245 },
2246 'skip': 'stories get removed after some period of time',
2247 }, {
2248 'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
2249 'info_dict': {
2250 'id': 'tjjjtzRLHvA',
2251 'ext': 'mp4',
2252 'title': 'ハッシュタグ無し };if window.ytcsi',
2253 'upload_date': '20220323',
2254 'like_count': int,
2255 'availability': 'unlisted',
2256 'channel': 'nao20010128nao',
2257 'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',
2258 'age_limit': 0,
2259 'uploader': 'nao20010128nao',
2260 'uploader_id': 'nao20010128nao',
2261 'categories': ['Music'],
2262 'view_count': int,
2263 'description': '',
2264 'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',
2265 'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',
2266 'live_status': 'not_live',
2267 'playable_in_embed': True,
2268 'channel_follower_count': int,
2269 'duration': 6,
2270 'tags': [],
2271 'uploader_url': 'http://www.youtube.com/user/nao20010128nao',
2272 }
2273 }, {
2274 'note': '6 channel audio',
2275 'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',
2276 'only_matching': True,
2277 }
2278 ]
2279
2280 _WEBPAGE_TESTS = [
2281 # YouTube <object> embed
2282 {
2283 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
2284 'md5': '873c81d308b979f0e23ee7e620b312a3',
2285 'info_dict': {
2286 'id': 'msN87y-iEx0',
2287 'ext': 'mp4',
2288 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
2289 'upload_date': '20080526',
2290 'description': 'md5:873c81d308b979f0e23ee7e620b312a3',
2291 'uploader': 'Christopher Sykes',
2292 'uploader_id': 'ChristopherJSykes',
2293 'age_limit': 0,
2294 'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],
2295 'channel_id': 'UCCeo--lls1vna5YJABWAcVA',
2296 'playable_in_embed': True,
2297 'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',
2298 'like_count': int,
2299 'comment_count': int,
2300 'channel': 'Christopher Sykes',
2301 'live_status': 'not_live',
2302 'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',
2303 'availability': 'public',
2304 'duration': 195,
2305 'view_count': int,
2306 'categories': ['Science & Technology'],
2307 'channel_follower_count': int,
2308 'uploader_url': 'http://www.youtube.com/user/ChristopherJSykes',
2309 },
2310 'params': {
2311 'skip_download': True,
2312 }
2313 },
2314 ]
2315
2316 @classmethod
2317 def suitable(cls, url):
2318 from ..utils import parse_qs
2319
2320 qs = parse_qs(url)
2321 if qs.get('list', [None])[0]:
2322 return False
2323 return super().suitable(url)
2324
2325 def __init__(self, *args, **kwargs):
2326 super().__init__(*args, **kwargs)
2327 self._code_cache = {}
2328 self._player_cache = {}
2329
2330 def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):
2331 lock = threading.Lock()
2332
2333 is_live = True
2334 start_time = time.time()
2335 formats = [f for f in formats if f.get('is_from_start')]
2336
2337 def refetch_manifest(format_id, delay):
2338 nonlocal formats, start_time, is_live
2339 if time.time() <= start_time + delay:
2340 return
2341
2342 _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
2343 video_details = traverse_obj(
2344 prs, (..., 'videoDetails'), expected_type=dict, default=[])
2345 microformats = traverse_obj(
2346 prs, (..., 'microformat', 'playerMicroformatRenderer'),
2347 expected_type=dict, default=[])
2348 _, is_live, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)
2349 start_time = time.time()
2350
2351 def mpd_feed(format_id, delay):
2352 """
2353 @returns (manifest_url, manifest_stream_number, is_live) or None
2354 """
2355 with lock:
2356 refetch_manifest(format_id, delay)
2357
2358 f = next((f for f in formats if f['format_id'] == format_id), None)
2359 if not f:
2360 if not is_live:
2361 self.to_screen(f'{video_id}: Video is no longer live')
2362 else:
2363 self.report_warning(
2364 f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')
2365 return None
2366 return f['manifest_url'], f['manifest_stream_number'], is_live
2367
2368 for f in formats:
2369 f['is_live'] = True
2370 f['protocol'] = 'http_dash_segments_generator'
2371 f['fragments'] = functools.partial(
2372 self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)
2373
2374 def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):
2375 FETCH_SPAN, MAX_DURATION = 5, 432000
2376
2377 mpd_url, stream_number, is_live = None, None, True
2378
2379 begin_index = 0
2380 download_start_time = ctx.get('start') or time.time()
2381
2382 lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
2383 if lack_early_segments:
2384 self.report_warning(bug_reports_message(
2385 'Starting download from the last 120 hours of the live stream since '
2386 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
2387 lack_early_segments = True
2388
2389 known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
2390 fragments, fragment_base_url = None, None
2391
2392 def _extract_sequence_from_mpd(refresh_sequence, immediate):
2393 nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
2394 # Obtain from MPD's maximum seq value
2395 old_mpd_url = mpd_url
2396 last_error = ctx.pop('last_error', None)
2397 expire_fast = immediate or last_error and isinstance(last_error, urllib.error.HTTPError) and last_error.code == 403
2398 mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
2399 or (mpd_url, stream_number, False))
2400 if not refresh_sequence:
2401 if expire_fast and not is_live:
2402 return False, last_seq
2403 elif old_mpd_url == mpd_url:
2404 return True, last_seq
2405 try:
2406 fmts, _ = self._extract_mpd_formats_and_subtitles(
2407 mpd_url, None, note=False, errnote=False, fatal=False)
2408 except ExtractorError:
2409 fmts = None
2410 if not fmts:
2411 no_fragment_score += 2
2412 return False, last_seq
2413 fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
2414 fragments = fmt_info['fragments']
2415 fragment_base_url = fmt_info['fragment_base_url']
2416 assert fragment_base_url
2417
2418 _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
2419 return True, _last_seq
2420
2421 while is_live:
2422 fetch_time = time.time()
2423 if no_fragment_score > 30:
2424 return
2425 if last_segment_url:
2426 # Obtain from "X-Head-Seqnum" header value from each segment
2427 try:
2428 urlh = self._request_webpage(
2429 last_segment_url, None, note=False, errnote=False, fatal=False)
2430 except ExtractorError:
2431 urlh = None
2432 last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
2433 if last_seq is None:
2434 no_fragment_score += 2
2435 last_segment_url = None
2436 continue
2437 else:
2438 should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)
2439 no_fragment_score += 2
2440 if not should_continue:
2441 continue
2442
2443 if known_idx > last_seq:
2444 last_segment_url = None
2445 continue
2446
2447 last_seq += 1
2448
2449 if begin_index < 0 and known_idx < 0:
2450 # skip from the start when it's negative value
2451 known_idx = last_seq + begin_index
2452 if lack_early_segments:
2453 known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
2454 try:
2455 for idx in range(known_idx, last_seq):
2456 # do not update sequence here or you'll get skipped some part of it
2457 should_continue, _ = _extract_sequence_from_mpd(False, False)
2458 if not should_continue:
2459 known_idx = idx - 1
2460 raise ExtractorError('breaking out of outer loop')
2461 last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)
2462 yield {
2463 'url': last_segment_url,
2464 'fragment_count': last_seq,
2465 }
2466 if known_idx == last_seq:
2467 no_fragment_score += 5
2468 else:
2469 no_fragment_score = 0
2470 known_idx = last_seq
2471 except ExtractorError:
2472 continue
2473
2474 time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
2475
2476 def _extract_player_url(self, *ytcfgs, webpage=None):
2477 player_url = traverse_obj(
2478 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
2479 get_all=False, expected_type=str)
2480 if not player_url:
2481 return
2482 return urljoin('https://www.youtube.com', player_url)
2483
2484 def _download_player_url(self, video_id, fatal=False):
2485 res = self._download_webpage(
2486 'https://www.youtube.com/iframe_api',
2487 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
2488 if res:
2489 player_version = self._search_regex(
2490 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
2491 if player_version:
2492 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
2493
2494 def _signature_cache_id(self, example_sig):
2495 """ Return a string representation of a signature """
2496 return '.'.join(str(len(part)) for part in example_sig.split('.'))
2497
2498 @classmethod
2499 def _extract_player_info(cls, player_url):
2500 for player_re in cls._PLAYER_INFO_RE:
2501 id_m = re.search(player_re, player_url)
2502 if id_m:
2503 break
2504 else:
2505 raise ExtractorError('Cannot identify player %r' % player_url)
2506 return id_m.group('id')
2507
2508 def _load_player(self, video_id, player_url, fatal=True):
2509 player_id = self._extract_player_info(player_url)
2510 if player_id not in self._code_cache:
2511 code = self._download_webpage(
2512 player_url, video_id, fatal=fatal,
2513 note='Downloading player ' + player_id,
2514 errnote='Download of %s failed' % player_url)
2515 if code:
2516 self._code_cache[player_id] = code
2517 return self._code_cache.get(player_id)
2518
2519 def _extract_signature_function(self, video_id, player_url, example_sig):
2520 player_id = self._extract_player_info(player_url)
2521
2522 # Read from filesystem cache
2523 func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'
2524 assert os.path.basename(func_id) == func_id
2525
2526 self.write_debug(f'Extracting signature function {func_id}')
2527 cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
2528
2529 if not cache_spec:
2530 code = self._load_player(video_id, player_url)
2531 if code:
2532 res = self._parse_sig_js(code)
2533 test_string = ''.join(map(chr, range(len(example_sig))))
2534 cache_spec = [ord(c) for c in res(test_string)]
2535 self.cache.store('youtube-sigfuncs', func_id, cache_spec)
2536
2537 return lambda s: ''.join(s[i] for i in cache_spec)
2538
2539 def _print_sig_code(self, func, example_sig):
2540 if not self.get_param('youtube_print_sig_code'):
2541 return
2542
2543 def gen_sig_code(idxs):
2544 def _genslice(start, end, step):
2545 starts = '' if start == 0 else str(start)
2546 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
2547 steps = '' if step == 1 else (':%d' % step)
2548 return f's[{starts}{ends}{steps}]'
2549
2550 step = None
2551 # Quelch pyflakes warnings - start will be set when step is set
2552 start = '(Never used)'
2553 for i, prev in zip(idxs[1:], idxs[:-1]):
2554 if step is not None:
2555 if i - prev == step:
2556 continue
2557 yield _genslice(start, prev, step)
2558 step = None
2559 continue
2560 if i - prev in [-1, 1]:
2561 step = i - prev
2562 start = prev
2563 continue
2564 else:
2565 yield 's[%d]' % prev
2566 if step is None:
2567 yield 's[%d]' % i
2568 else:
2569 yield _genslice(start, i, step)
2570
2571 test_string = ''.join(map(chr, range(len(example_sig))))
2572 cache_res = func(test_string)
2573 cache_spec = [ord(c) for c in cache_res]
2574 expr_code = ' + '.join(gen_sig_code(cache_spec))
2575 signature_id_tuple = '(%s)' % (
2576 ', '.join(str(len(p)) for p in example_sig.split('.')))
2577 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
2578 ' return %s\n') % (signature_id_tuple, expr_code)
2579 self.to_screen('Extracted signature function:\n' + code)
2580
2581 def _parse_sig_js(self, jscode):
2582 funcname = self._search_regex(
2583 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2584 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2585 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
2586 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
2587 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
2588 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
2589 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
2590 # Obsolete patterns
2591 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2592 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
2593 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2594 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2595 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2596 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2597 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2598 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
2599 jscode, 'Initial JS player signature function name', group='sig')
2600
2601 jsi = JSInterpreter(jscode)
2602 initial_function = jsi.extract_function(funcname)
2603 return lambda s: initial_function([s])
2604
2605 def _cached(self, func, *cache_id):
2606 def inner(*args, **kwargs):
2607 if cache_id not in self._player_cache:
2608 try:
2609 self._player_cache[cache_id] = func(*args, **kwargs)
2610 except ExtractorError as e:
2611 self._player_cache[cache_id] = e
2612 except Exception as e:
2613 self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)
2614
2615 ret = self._player_cache[cache_id]
2616 if isinstance(ret, Exception):
2617 raise ret
2618 return ret
2619 return inner
2620
2621 def _decrypt_signature(self, s, video_id, player_url):
2622 """Turn the encrypted s field into a working signature"""
2623 extract_sig = self._cached(
2624 self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))
2625 func = extract_sig(video_id, player_url, s)
2626 self._print_sig_code(func, s)
2627 return func(s)
2628
2629 def _decrypt_nsig(self, s, video_id, player_url):
2630 """Turn the encrypted n field into a working signature"""
2631 if player_url is None:
2632 raise ExtractorError('Cannot decrypt nsig without player_url')
2633 player_url = urljoin('https://www.youtube.com', player_url)
2634
2635 jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
2636 if self.get_param('youtube_print_sig_code'):
2637 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
2638
2639 try:
2640 extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)
2641 ret = extract_nsig(jsi, func_code)(s)
2642 except JSInterpreter.Exception as e:
2643 try:
2644 jsi = PhantomJSwrapper(self, timeout=5000)
2645 except ExtractorError:
2646 raise e
2647 self.report_warning(
2648 f'Native nsig extraction failed: Trying with PhantomJS\n'
2649 f' n = {s} ; player = {player_url}', video_id)
2650 self.write_debug(e)
2651
2652 args, func_body = func_code
2653 ret = jsi.execute(
2654 f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',
2655 video_id=video_id, note='Executing signature code').strip()
2656
2657 self.write_debug(f'Decrypted nsig {s} => {ret}')
2658 return ret
2659
2660 def _extract_n_function_name(self, jscode):
2661 funcname, idx = self._search_regex(
2662 r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)',
2663 jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
2664 if not idx:
2665 return funcname
2666
2667 return json.loads(js_to_json(self._search_regex(
2668 rf'var {re.escape(funcname)}\s*=\s*(\[.+?\]);', jscode,
2669 f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
2670
2671 def _extract_n_function_code(self, video_id, player_url):
2672 player_id = self._extract_player_info(player_url)
2673 func_code = self.cache.load('youtube-nsig', player_id, min_ver='2022.08.19.2')
2674 jscode = func_code or self._load_player(video_id, player_url)
2675 jsi = JSInterpreter(jscode)
2676
2677 if func_code:
2678 return jsi, player_id, func_code
2679
2680 func_code = jsi.extract_function_code(self._extract_n_function_name(jscode))
2681 self.cache.store('youtube-nsig', player_id, func_code)
2682 return jsi, player_id, func_code
2683
2684 def _extract_n_function_from_code(self, jsi, func_code):
2685 func = jsi.extract_function_from_code(*func_code)
2686
2687 def extract_nsig(s):
2688 try:
2689 ret = func([s])
2690 except JSInterpreter.Exception:
2691 raise
2692 except Exception as e:
2693 raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
2694
2695 if ret.startswith('enhanced_except_'):
2696 raise JSInterpreter.Exception('Signature function returned an exception')
2697 return ret
2698
2699 return extract_nsig
2700
2701 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
2702 """
2703 Extract signatureTimestamp (sts)
2704 Required to tell API what sig/player version is in use.
2705 """
2706 sts = None
2707 if isinstance(ytcfg, dict):
2708 sts = int_or_none(ytcfg.get('STS'))
2709
2710 if not sts:
2711 # Attempt to extract from player
2712 if player_url is None:
2713 error_msg = 'Cannot extract signature timestamp without player_url.'
2714 if fatal:
2715 raise ExtractorError(error_msg)
2716 self.report_warning(error_msg)
2717 return
2718 code = self._load_player(video_id, player_url, fatal=fatal)
2719 if code:
2720 sts = int_or_none(self._search_regex(
2721 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2722 'JS player signature timestamp', group='sts', fatal=fatal))
2723 return sts
2724
2725 def _mark_watched(self, video_id, player_responses):
2726 for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):
2727 label = 'fully ' if is_full else ''
2728 url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),
2729 expected_type=url_or_none)
2730 if not url:
2731 self.report_warning(f'Unable to mark {label}watched')
2732 return
2733 parsed_url = urllib.parse.urlparse(url)
2734 qs = urllib.parse.parse_qs(parsed_url.query)
2735
2736 # cpn generation algorithm is reverse engineered from base.js.
2737 # In fact it works even with dummy cpn.
2738 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2739 cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))
2740
2741 # # more consistent results setting it to right before the end
2742 video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]
2743
2744 qs.update({
2745 'ver': ['2'],
2746 'cpn': [cpn],
2747 'cmt': video_length,
2748 'el': 'detailpage', # otherwise defaults to "shorts"
2749 })
2750
2751 if is_full:
2752 # these seem to mark watchtime "history" in the real world
2753 # they're required, so send in a single value
2754 qs.update({
2755 'st': video_length,
2756 'et': video_length,
2757 })
2758
2759 url = urllib.parse.urlunparse(
2760 parsed_url._replace(query=urllib.parse.urlencode(qs, True)))
2761
2762 self._download_webpage(
2763 url, video_id, f'Marking {label}watched',
2764 'Unable to mark watched', fatal=False)
2765
2766 @classmethod
2767 def _extract_from_webpage(cls, url, webpage):
2768 # Invidious Instances
2769 # https://github.com/yt-dlp/yt-dlp/issues/195
2770 # https://github.com/iv-org/invidious/pull/1730
2771 mobj = re.search(
2772 r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',
2773 webpage)
2774 if mobj:
2775 yield cls.url_result(mobj.group('url'), cls)
2776 raise cls.StopExtraction()
2777
2778 yield from super()._extract_from_webpage(url, webpage)
2779
2780 # lazyYT YouTube embed
2781 for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):
2782 yield cls.url_result(unescapeHTML(id_), cls, id_)
2783
2784 # Wordpress "YouTube Video Importer" plugin
2785 for m in re.findall(r'''(?x)<div[^>]+
2786 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2787 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):
2788 yield cls.url_result(m[-1], cls, m[-1])
2789
2790 @classmethod
2791 def extract_id(cls, url):
2792 video_id = cls.get_temp_id(url)
2793 if not video_id:
2794 raise ExtractorError(f'Invalid URL: {url}')
2795 return video_id
2796
2797 def _extract_chapters_from_json(self, data, duration):
2798 chapter_list = traverse_obj(
2799 data, (
2800 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2801 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2802 ), expected_type=list)
2803
2804 return self._extract_chapters(
2805 chapter_list,
2806 chapter_time=lambda chapter: float_or_none(
2807 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2808 chapter_title=lambda chapter: traverse_obj(
2809 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2810 duration=duration)
2811
2812 def _extract_chapters_from_engagement_panel(self, data, duration):
2813 content_list = traverse_obj(
2814 data,
2815 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
2816 expected_type=list, default=[])
2817 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2818 chapter_title = lambda chapter: self._get_text(chapter, 'title')
2819
2820 return next(filter(None, (
2821 self._extract_chapters(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2822 chapter_time, chapter_title, duration)
2823 for contents in content_list)), [])
2824
2825 def _extract_chapters_from_description(self, description, duration):
2826 return self._extract_chapters(
2827 re.findall(r'(?m)^((?:\d+:)?\d{1,2}:\d{2})\b\W*\s(.+?)\s*$', description or ''),
2828 chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1],
2829 duration=duration, strict=False)
2830
2831 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True):
2832 if not duration:
2833 return
2834 chapter_list = [{
2835 'start_time': chapter_time(chapter),
2836 'title': chapter_title(chapter),
2837 } for chapter in chapter_list or []]
2838 if not strict:
2839 chapter_list.sort(key=lambda c: c['start_time'] or 0)
2840
2841 chapters = [{'start_time': 0}]
2842 for idx, chapter in enumerate(chapter_list):
2843 if chapter['start_time'] is None:
2844 self.report_warning(f'Incomplete chapter {idx}')
2845 elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:
2846 chapters.append(chapter)
2847 else:
2848 self.report_warning(f'Invalid start time for chapter "{chapter["title"]}"')
2849 return chapters[1:]
2850
2851 def _extract_comment(self, comment_renderer, parent=None):
2852 comment_id = comment_renderer.get('commentId')
2853 if not comment_id:
2854 return
2855
2856 text = self._get_text(comment_renderer, 'contentText')
2857
2858 # note: timestamp is an estimate calculated from the current time and time_text
2859 timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')
2860 author = self._get_text(comment_renderer, 'authorText')
2861 author_id = try_get(comment_renderer,
2862 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str)
2863
2864 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2865 lambda x: x['likeCount']), str)) or 0
2866 author_thumbnail = try_get(comment_renderer,
2867 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], str)
2868
2869 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2870 is_favorited = 'creatorHeart' in (try_get(
2871 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2872 return {
2873 'id': comment_id,
2874 'text': text,
2875 'timestamp': timestamp,
2876 'time_text': time_text,
2877 'like_count': votes,
2878 'is_favorited': is_favorited,
2879 'author': author,
2880 'author_id': author_id,
2881 'author_thumbnail': author_thumbnail,
2882 'author_is_uploader': author_is_uploader,
2883 'parent': parent or 'root'
2884 }
2885
2886 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
2887
2888 get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
2889
2890 def extract_header(contents):
2891 _continuation = None
2892 for content in contents:
2893 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
2894 expected_comment_count = self._get_count(
2895 comments_header_renderer, 'countText', 'commentsCount')
2896
2897 if expected_comment_count:
2898 tracker['est_total'] = expected_comment_count
2899 self.to_screen(f'Downloading ~{expected_comment_count} comments')
2900 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
2901
2902 sort_menu_item = try_get(
2903 comments_header_renderer,
2904 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2905 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2906
2907 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2908 if not _continuation:
2909 continue
2910
2911 sort_text = str_or_none(sort_menu_item.get('title'))
2912 if not sort_text:
2913 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2914 self.to_screen('Sorting comments by %s' % sort_text.lower())
2915 break
2916 return _continuation
2917
2918 def extract_thread(contents):
2919 if not parent:
2920 tracker['current_page_thread'] = 0
2921 for content in contents:
2922 if not parent and tracker['total_parent_comments'] >= max_parents:
2923 yield
2924 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2925 comment_renderer = get_first(
2926 (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
2927 expected_type=dict, default={})
2928
2929 comment = self._extract_comment(comment_renderer, parent)
2930 if not comment:
2931 continue
2932
2933 tracker['running_total'] += 1
2934 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
2935 yield comment
2936
2937 # Attempt to get the replies
2938 comment_replies_renderer = try_get(
2939 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2940
2941 if comment_replies_renderer:
2942 tracker['current_page_thread'] += 1
2943 comment_entries_iter = self._comment_entries(
2944 comment_replies_renderer, ytcfg, video_id,
2945 parent=comment.get('id'), tracker=tracker)
2946 yield from itertools.islice(comment_entries_iter, min(
2947 max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))
2948
2949 # Keeps track of counts across recursive calls
2950 if not tracker:
2951 tracker = dict(
2952 running_total=0,
2953 est_total=0,
2954 current_page_thread=0,
2955 total_parent_comments=0,
2956 total_reply_comments=0)
2957
2958 # TODO: Deprecated
2959 # YouTube comments have a max depth of 2
2960 max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
2961 if max_depth:
2962 self._downloader.deprecation_warning(
2963 '[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')
2964 if max_depth == 1 and parent:
2965 return
2966
2967 max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(
2968 lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
2969
2970 continuation = self._extract_continuation(root_continuation_data)
2971
2972 response = None
2973 is_forced_continuation = False
2974 is_first_continuation = parent is None
2975 if is_first_continuation and not continuation:
2976 # Sometimes you can get comments by generating the continuation yourself,
2977 # even if YouTube initially reports them being disabled - e.g. stories comments.
2978 # Note: if the comment section is actually disabled, YouTube may return a response with
2979 # required check_get_keys missing. So we will disable that check initially in this case.
2980 continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
2981 is_forced_continuation = True
2982
2983 for page_num in itertools.count(0):
2984 if not continuation:
2985 break
2986 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
2987 comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"
2988 if page_num == 0:
2989 if is_first_continuation:
2990 note_prefix = 'Downloading comment section API JSON'
2991 else:
2992 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
2993 tracker['current_page_thread'], comment_prog_str)
2994 else:
2995 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2996 ' ' if parent else '', ' replies' if parent else '',
2997 page_num, comment_prog_str)
2998
2999 response = self._extract_response(
3000 item_id=None, query=continuation,
3001 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
3002 check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)
3003 is_forced_continuation = False
3004 continuation_contents = traverse_obj(
3005 response, 'onResponseReceivedEndpoints', expected_type=list, default=[])
3006
3007 continuation = None
3008 for continuation_section in continuation_contents:
3009 continuation_items = traverse_obj(
3010 continuation_section,
3011 (('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),
3012 get_all=False, expected_type=list) or []
3013 if is_first_continuation:
3014 continuation = extract_header(continuation_items)
3015 is_first_continuation = False
3016 if continuation:
3017 break
3018 continue
3019
3020 for entry in extract_thread(continuation_items):
3021 if not entry:
3022 return
3023 yield entry
3024 continuation = self._extract_continuation({'contents': continuation_items})
3025 if continuation:
3026 break
3027
3028 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
3029 if message and not parent and tracker['running_total'] == 0:
3030 self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)
3031
3032 @staticmethod
3033 def _generate_comment_continuation(video_id):
3034 """
3035 Generates initial comment section continuation token from given video id
3036 """
3037 token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'
3038 return base64.b64encode(token.encode()).decode()
3039
3040 def _get_comments(self, ytcfg, video_id, contents, webpage):
3041 """Entry for comment extraction"""
3042 def _real_comment_extract(contents):
3043 renderer = next((
3044 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
3045 if item.get('sectionIdentifier') == 'comment-item-section'), None)
3046 yield from self._comment_entries(renderer, ytcfg, video_id)
3047
3048 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
3049 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
3050
3051 @staticmethod
3052 def _get_checkok_params():
3053 return {'contentCheckOk': True, 'racyCheckOk': True}
3054
3055 @classmethod
3056 def _generate_player_context(cls, sts=None):
3057 context = {
3058 'html5Preference': 'HTML5_PREF_WANTS',
3059 }
3060 if sts is not None:
3061 context['signatureTimestamp'] = sts
3062 return {
3063 'playbackContext': {
3064 'contentPlaybackContext': context
3065 },
3066 **cls._get_checkok_params()
3067 }
3068
3069 @staticmethod
3070 def _is_agegated(player_response):
3071 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
3072 return True
3073
3074 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
3075 AGE_GATE_REASONS = (
3076 'confirm your age', 'age-restricted', 'inappropriate', # reason
3077 'age_verification_required', 'age_check_required', # status
3078 )
3079 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
3080
3081 @staticmethod
3082 def _is_unplayable(player_response):
3083 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
3084
3085 _STORY_PLAYER_PARAMS = '8AEB'
3086
3087 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):
3088
3089 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
3090 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
3091 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
3092 headers = self.generate_api_headers(
3093 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
3094
3095 yt_query = {
3096 'videoId': video_id,
3097 }
3098 if smuggled_data.get('is_story') or _split_innertube_client(client)[0] == 'android':
3099 yt_query['params'] = self._STORY_PLAYER_PARAMS
3100
3101 yt_query.update(self._generate_player_context(sts))
3102 return self._extract_response(
3103 item_id=video_id, ep='player', query=yt_query,
3104 ytcfg=player_ytcfg, headers=headers, fatal=True,
3105 default_client=client,
3106 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
3107 ) or None
3108
3109 def _get_requested_clients(self, url, smuggled_data):
3110 requested_clients = []
3111 default = ['android', 'web']
3112 allowed_clients = sorted(
3113 (client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),
3114 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
3115 for client in self._configuration_arg('player_client'):
3116 if client in allowed_clients:
3117 requested_clients.append(client)
3118 elif client == 'default':
3119 requested_clients.extend(default)
3120 elif client == 'all':
3121 requested_clients.extend(allowed_clients)
3122 else:
3123 self.report_warning(f'Skipping unsupported client {client}')
3124 if not requested_clients:
3125 requested_clients = default
3126
3127 if smuggled_data.get('is_music_url') or self.is_music_url(url):
3128 requested_clients.extend(
3129 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
3130
3131 return orderedSet(requested_clients)
3132
3133 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
3134 initial_pr = None
3135 if webpage:
3136 initial_pr = self._search_json(
3137 self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
3138
3139 all_clients = set(clients)
3140 clients = clients[::-1]
3141 prs = []
3142
3143 def append_client(*client_names):
3144 """ Append the first client name that exists but not already used """
3145 for client_name in client_names:
3146 actual_client = _split_innertube_client(client_name)[0]
3147 if actual_client in INNERTUBE_CLIENTS:
3148 if actual_client not in all_clients:
3149 clients.append(client_name)
3150 all_clients.add(actual_client)
3151 return
3152
3153 # Android player_response does not have microFormats which are needed for
3154 # extraction of some data. So we return the initial_pr with formats
3155 # stripped out even if not requested by the user
3156 # See: https://github.com/yt-dlp/yt-dlp/issues/501
3157 if initial_pr:
3158 pr = dict(initial_pr)
3159 pr['streamingData'] = None
3160 prs.append(pr)
3161
3162 last_error = None
3163 tried_iframe_fallback = False
3164 player_url = None
3165 while clients:
3166 client, base_client, variant = _split_innertube_client(clients.pop())
3167 player_ytcfg = master_ytcfg if client == 'web' else {}
3168 if 'configs' not in self._configuration_arg('player_skip') and client != 'web':
3169 player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
3170
3171 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
3172 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
3173 if 'js' in self._configuration_arg('player_skip'):
3174 require_js_player = False
3175 player_url = None
3176
3177 if not player_url and not tried_iframe_fallback and require_js_player:
3178 player_url = self._download_player_url(video_id)
3179 tried_iframe_fallback = True
3180
3181 try:
3182 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
3183 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)
3184 except ExtractorError as e:
3185 if last_error:
3186 self.report_warning(last_error)
3187 last_error = e
3188 continue
3189
3190 if pr:
3191 # YouTube may return a different video player response than expected.
3192 # See: https://github.com/TeamNewPipe/NewPipe/issues/8713
3193 pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId'))
3194 if pr_video_id and pr_video_id != video_id:
3195 self.report_warning(
3196 f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message())
3197 else:
3198 prs.append(pr)
3199
3200 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
3201 if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
3202 append_client(f'{base_client}_creator')
3203 elif self._is_agegated(pr):
3204 if variant == 'tv_embedded':
3205 append_client(f'{base_client}_embedded')
3206 elif not variant:
3207 append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
3208
3209 if last_error:
3210 if not len(prs):
3211 raise last_error
3212 self.report_warning(last_error)
3213 return prs, player_url
3214
3215 def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, is_live, duration):
3216 itags, stream_ids = {}, []
3217 itag_qualities, res_qualities = {}, {0: None}
3218 q = qualities([
3219 # Normally tiny is the smallest video-only formats. But
3220 # audio-only formats with unknown quality may get tagged as tiny
3221 'tiny',
3222 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
3223 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
3224 ])
3225 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
3226
3227 for fmt in streaming_formats:
3228 if fmt.get('targetDurationSec'):
3229 continue
3230
3231 itag = str_or_none(fmt.get('itag'))
3232 audio_track = fmt.get('audioTrack') or {}
3233 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
3234 if stream_id in stream_ids:
3235 continue
3236
3237 quality = fmt.get('quality')
3238 height = int_or_none(fmt.get('height'))
3239 if quality == 'tiny' or not quality:
3240 quality = fmt.get('audioQuality', '').lower() or quality
3241 # The 3gp format (17) in android client has a quality of "small",
3242 # but is actually worse than other formats
3243 if itag == '17':
3244 quality = 'tiny'
3245 if quality:
3246 if itag:
3247 itag_qualities[itag] = quality
3248 if height:
3249 res_qualities[height] = quality
3250 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
3251 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
3252 # number of fragment that would subsequently requested with (`&sq=N`)
3253 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
3254 continue
3255
3256 fmt_url = fmt.get('url')
3257 if not fmt_url:
3258 sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))
3259 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
3260 encrypted_sig = try_get(sc, lambda x: x['s'][0])
3261 if not all((sc, fmt_url, player_url, encrypted_sig)):
3262 continue
3263 try:
3264 fmt_url += '&%s=%s' % (
3265 traverse_obj(sc, ('sp', -1)) or 'signature',
3266 self._decrypt_signature(encrypted_sig, video_id, player_url)
3267 )
3268 except ExtractorError as e:
3269 self.report_warning('Signature extraction failed: Some formats may be missing',
3270 video_id=video_id, only_once=True)
3271 self.write_debug(e, only_once=True)
3272 continue
3273
3274 query = parse_qs(fmt_url)
3275 throttled = False
3276 if query.get('n'):
3277 try:
3278 decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])
3279 fmt_url = update_url_query(fmt_url, {
3280 'n': decrypt_nsig(query['n'][0], video_id, player_url)
3281 })
3282 except ExtractorError as e:
3283 phantomjs_hint = ''
3284 if isinstance(e, JSInterpreter.Exception):
3285 phantomjs_hint = (f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} '
3286 f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')
3287 self.report_warning(
3288 f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}'
3289 f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)
3290 self.write_debug(e, only_once=True)
3291 throttled = True
3292
3293 if itag:
3294 itags[itag] = 'https'
3295 stream_ids.append(stream_id)
3296
3297 tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
3298 language_preference = (
3299 10 if audio_track.get('audioIsDefault') and 10
3300 else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
3301 else -1)
3302 # Some formats may have much smaller duration than others (possibly damaged during encoding)
3303 # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
3304 # Make sure to avoid false positives with small duration differences.
3305 # E.g. __2ABJjxzNo, ySuUZEjARPY
3306 is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)
3307 if is_damaged:
3308 self.report_warning(
3309 f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
3310 dct = {
3311 'asr': int_or_none(fmt.get('audioSampleRate')),
3312 'filesize': int_or_none(fmt.get('contentLength')),
3313 'format_id': itag,
3314 'format_note': join_nonempty(
3315 '%s%s' % (audio_track.get('displayName') or '',
3316 ' (default)' if language_preference > 0 else ''),
3317 fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
3318 try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
3319 try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
3320 throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),
3321 # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
3322 'source_preference': -10 if throttled else -5 if itag == '22' else -1,
3323 'fps': int_or_none(fmt.get('fps')) or None,
3324 'audio_channels': fmt.get('audioChannels'),
3325 'height': height,
3326 'quality': q(quality),
3327 'has_drm': bool(fmt.get('drmFamilies')),
3328 'tbr': tbr,
3329 'url': fmt_url,
3330 'width': int_or_none(fmt.get('width')),
3331 'language': join_nonempty(audio_track.get('id', '').split('.')[0],
3332 'desc' if language_preference < -1 else ''),
3333 'language_preference': language_preference,
3334 # Strictly de-prioritize damaged and 3gp formats
3335 'preference': -10 if is_damaged else -2 if itag == '17' else None,
3336 }
3337 mime_mobj = re.match(
3338 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
3339 if mime_mobj:
3340 dct['ext'] = mimetype2ext(mime_mobj.group(1))
3341 dct.update(parse_codecs(mime_mobj.group(2)))
3342 no_audio = dct.get('acodec') == 'none'
3343 no_video = dct.get('vcodec') == 'none'
3344 if no_audio:
3345 dct['vbr'] = tbr
3346 if no_video:
3347 dct['abr'] = tbr
3348 if no_audio or no_video:
3349 dct['downloader_options'] = {
3350 # Youtube throttles chunks >~10M
3351 'http_chunk_size': 10485760,
3352 }
3353 if dct.get('ext'):
3354 dct['container'] = dct['ext'] + '_dash'
3355 yield dct
3356
3357 live_from_start = is_live and self.get_param('live_from_start')
3358 skip_manifests = self._configuration_arg('skip')
3359 if not self.get_param('youtube_include_hls_manifest', True):
3360 skip_manifests.append('hls')
3361 if not self.get_param('youtube_include_dash_manifest', True):
3362 skip_manifests.append('dash')
3363 get_dash = 'dash' not in skip_manifests and (
3364 not is_live or live_from_start or self._configuration_arg('include_live_dash'))
3365 get_hls = not live_from_start and 'hls' not in skip_manifests
3366
3367 def process_manifest_format(f, proto, itag):
3368 if itag in itags:
3369 if itags[itag] == proto or f'{itag}-{proto}' in itags:
3370 return False
3371 itag = f'{itag}-{proto}'
3372 if itag:
3373 f['format_id'] = itag
3374 itags[itag] = proto
3375
3376 f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
3377 if f['quality'] == -1 and f.get('height'):
3378 f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
3379 return True
3380
3381 subtitles = {}
3382 for sd in streaming_data:
3383 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
3384 if hls_manifest_url:
3385 fmts, subs = self._extract_m3u8_formats_and_subtitles(hls_manifest_url, video_id, 'mp4', fatal=False, live=is_live)
3386 subtitles = self._merge_subtitles(subs, subtitles)
3387 for f in fmts:
3388 if process_manifest_format(f, 'hls', self._search_regex(
3389 r'/itag/(\d+)', f['url'], 'itag', default=None)):
3390 yield f
3391
3392 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
3393 if dash_manifest_url:
3394 formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
3395 subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
3396 for f in formats:
3397 if process_manifest_format(f, 'dash', f['format_id']):
3398 f['filesize'] = int_or_none(self._search_regex(
3399 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
3400 if live_from_start:
3401 f['is_from_start'] = True
3402
3403 yield f
3404 yield subtitles
3405
3406 def _extract_storyboard(self, player_responses, duration):
3407 spec = get_first(
3408 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
3409 base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))
3410 if not base_url:
3411 return
3412 L = len(spec) - 1
3413 for i, args in enumerate(spec):
3414 args = args.split('#')
3415 counts = list(map(int_or_none, args[:5]))
3416 if len(args) != 8 or not all(counts):
3417 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
3418 continue
3419 width, height, frame_count, cols, rows = counts
3420 N, sigh = args[6:]
3421
3422 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
3423 fragment_count = frame_count / (cols * rows)
3424 fragment_duration = duration / fragment_count
3425 yield {
3426 'format_id': f'sb{i}',
3427 'format_note': 'storyboard',
3428 'ext': 'mhtml',
3429 'protocol': 'mhtml',
3430 'acodec': 'none',
3431 'vcodec': 'none',
3432 'url': url,
3433 'width': width,
3434 'height': height,
3435 'fps': frame_count / duration,
3436 'rows': rows,
3437 'columns': cols,
3438 'fragments': [{
3439 'url': url.replace('$M', str(j)),
3440 'duration': min(fragment_duration, duration - (j * fragment_duration)),
3441 } for j in range(math.ceil(fragment_count))],
3442 }
3443
3444 def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
3445 webpage = None
3446 if 'webpage' not in self._configuration_arg('player_skip'):
3447 query = {'bpctr': '9999999999', 'has_verified': '1'}
3448 if smuggled_data.get('is_story'):
3449 query['pp'] = self._STORY_PLAYER_PARAMS
3450 webpage = self._download_webpage(
3451 webpage_url, video_id, fatal=False, query=query)
3452
3453 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
3454
3455 player_responses, player_url = self._extract_player_responses(
3456 self._get_requested_clients(url, smuggled_data),
3457 video_id, webpage, master_ytcfg, smuggled_data)
3458
3459 return webpage, master_ytcfg, player_responses, player_url
3460
3461 def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):
3462 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
3463 is_live = get_first(video_details, 'isLive')
3464 if is_live is None:
3465 is_live = get_first(live_broadcast_details, 'isLiveNow')
3466
3467 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
3468 *formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, is_live, duration)
3469
3470 return live_broadcast_details, is_live, streaming_data, formats, subtitles
3471
3472 def _real_extract(self, url):
3473 url, smuggled_data = unsmuggle_url(url, {})
3474 video_id = self._match_id(url)
3475
3476 base_url = self.http_scheme() + '//www.youtube.com/'
3477 webpage_url = base_url + 'watch?v=' + video_id
3478
3479 webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
3480
3481 playability_statuses = traverse_obj(
3482 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
3483
3484 trailer_video_id = get_first(
3485 playability_statuses,
3486 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
3487 expected_type=str)
3488 if trailer_video_id:
3489 return self.url_result(
3490 trailer_video_id, self.ie_key(), trailer_video_id)
3491
3492 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
3493 if webpage else (lambda x: None))
3494
3495 video_details = traverse_obj(
3496 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
3497 microformats = traverse_obj(
3498 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
3499 expected_type=dict, default=[])
3500 video_title = (
3501 get_first(video_details, 'title')
3502 or self._get_text(microformats, (..., 'title'))
3503 or search_meta(['og:title', 'twitter:title', 'title']))
3504 video_description = get_first(video_details, 'shortDescription')
3505
3506 multifeed_metadata_list = get_first(
3507 player_responses,
3508 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
3509 expected_type=str)
3510 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
3511 if self.get_param('noplaylist'):
3512 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3513 else:
3514 entries = []
3515 feed_ids = []
3516 for feed in multifeed_metadata_list.split(','):
3517 # Unquote should take place before split on comma (,) since textual
3518 # fields may contain comma as well (see
3519 # https://github.com/ytdl-org/youtube-dl/issues/8536)
3520 feed_data = urllib.parse.parse_qs(
3521 urllib.parse.unquote_plus(feed))
3522
3523 def feed_entry(name):
3524 return try_get(
3525 feed_data, lambda x: x[name][0], str)
3526
3527 feed_id = feed_entry('id')
3528 if not feed_id:
3529 continue
3530 feed_title = feed_entry('title')
3531 title = video_title
3532 if feed_title:
3533 title += ' (%s)' % feed_title
3534 entries.append({
3535 '_type': 'url_transparent',
3536 'ie_key': 'Youtube',
3537 'url': smuggle_url(
3538 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
3539 {'force_singlefeed': True}),
3540 'title': title,
3541 })
3542 feed_ids.append(feed_id)
3543 self.to_screen(
3544 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
3545 % (', '.join(feed_ids), video_id))
3546 return self.playlist_result(
3547 entries, video_id, video_title, video_description)
3548
3549 duration = int_or_none(
3550 get_first(video_details, 'lengthSeconds')
3551 or get_first(microformats, 'lengthSeconds')
3552 or parse_duration(search_meta('duration'))) or None
3553
3554 live_broadcast_details, is_live, streaming_data, formats, automatic_captions = \
3555 self._list_formats(video_id, microformats, video_details, player_responses, player_url)
3556
3557 if not formats:
3558 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
3559 self.report_drm(video_id)
3560 pemr = get_first(
3561 playability_statuses,
3562 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
3563 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
3564 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
3565 if subreason:
3566 if subreason == 'The uploader has not made this video available in your country.':
3567 countries = get_first(microformats, 'availableCountries')
3568 if not countries:
3569 regions_allowed = search_meta('regionsAllowed')
3570 countries = regions_allowed.split(',') if regions_allowed else None
3571 self.raise_geo_restricted(subreason, countries, metadata_available=True)
3572 reason += f'. {subreason}'
3573 if reason:
3574 self.raise_no_formats(reason, expected=True)
3575
3576 keywords = get_first(video_details, 'keywords', expected_type=list) or []
3577 if not keywords and webpage:
3578 keywords = [
3579 unescapeHTML(m.group('content'))
3580 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
3581 for keyword in keywords:
3582 if keyword.startswith('yt:stretch='):
3583 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
3584 if mobj:
3585 # NB: float is intentional for forcing float division
3586 w, h = (float(v) for v in mobj.groups())
3587 if w > 0 and h > 0:
3588 ratio = w / h
3589 for f in formats:
3590 if f.get('vcodec') != 'none':
3591 f['stretched_ratio'] = ratio
3592 break
3593 thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))
3594 thumbnail_url = search_meta(['og:image', 'twitter:image'])
3595 if thumbnail_url:
3596 thumbnails.append({
3597 'url': thumbnail_url,
3598 })
3599 original_thumbnails = thumbnails.copy()
3600
3601 # The best resolution thumbnails sometimes does not appear in the webpage
3602 # See: https://github.com/yt-dlp/yt-dlp/issues/340
3603 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
3604 thumbnail_names = [
3605 # While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants
3606 # in resolution, these are not the custom thumbnail. So de-prioritize them
3607 'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',
3608 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'
3609 ]
3610 n_thumbnail_names = len(thumbnail_names)
3611 thumbnails.extend({
3612 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
3613 video_id=video_id, name=name, ext=ext,
3614 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
3615 } for name in thumbnail_names for ext in ('webp', 'jpg'))
3616 for thumb in thumbnails:
3617 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
3618 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
3619 self._remove_duplicate_formats(thumbnails)
3620 self._downloader._sort_thumbnails(original_thumbnails)
3621
3622 category = get_first(microformats, 'category') or search_meta('genre')
3623 channel_id = str_or_none(
3624 get_first(video_details, 'channelId')
3625 or get_first(microformats, 'externalChannelId')
3626 or search_meta('channelId'))
3627 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
3628
3629 live_content = get_first(video_details, 'isLiveContent')
3630 is_upcoming = get_first(video_details, 'isUpcoming')
3631 if is_live is None:
3632 if is_upcoming or live_content is False:
3633 is_live = False
3634 if is_upcoming is None and (live_content or is_live):
3635 is_upcoming = False
3636 live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
3637 live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
3638 if not duration and live_end_time and live_start_time:
3639 duration = live_end_time - live_start_time
3640
3641 if is_live and self.get_param('live_from_start'):
3642 self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)
3643
3644 formats.extend(self._extract_storyboard(player_responses, duration))
3645
3646 # source_preference is lower for throttled/potentially damaged formats
3647 self._sort_formats(formats, (
3648 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto'))
3649
3650 info = {
3651 'id': video_id,
3652 'title': video_title,
3653 'formats': formats,
3654 'thumbnails': thumbnails,
3655 # The best thumbnail that we are sure exists. Prevents unnecessary
3656 # URL checking if user don't care about getting the best possible thumbnail
3657 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
3658 'description': video_description,
3659 'uploader': get_first(video_details, 'author'),
3660 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
3661 'uploader_url': owner_profile_url,
3662 'channel_id': channel_id,
3663 'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s'),
3664 'duration': duration,
3665 'view_count': int_or_none(
3666 get_first((video_details, microformats), (..., 'viewCount'))
3667 or search_meta('interactionCount')),
3668 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
3669 'age_limit': 18 if (
3670 get_first(microformats, 'isFamilySafe') is False
3671 or search_meta('isFamilyFriendly') == 'false'
3672 or search_meta('og:restrictions:age') == '18+') else 0,
3673 'webpage_url': webpage_url,
3674 'categories': [category] if category else None,
3675 'tags': keywords,
3676 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
3677 'is_live': is_live,
3678 'was_live': (False if is_live or is_upcoming or live_content is False
3679 else None if is_live is None or is_upcoming is None
3680 else live_content),
3681 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
3682 'release_timestamp': live_start_time,
3683 }
3684
3685 if get_first(video_details, 'isPostLiveDvr'):
3686 self.write_debug('Video is in Post-Live Manifestless mode')
3687 info['live_status'] = 'post_live'
3688 if (duration or 0) > 4 * 3600:
3689 self.report_warning(
3690 'The livestream has not finished processing. Only 4 hours of the video can be currently downloaded. '
3691 'This is a known issue and patches are welcome')
3692
3693 subtitles = {}
3694 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
3695 if pctr:
3696 def get_lang_code(track):
3697 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
3698 or track.get('languageCode'))
3699
3700 # Converted into dicts to remove duplicates
3701 captions = {
3702 get_lang_code(sub): sub
3703 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
3704 translation_languages = {
3705 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
3706 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
3707
3708 def process_language(container, base_url, lang_code, sub_name, query):
3709 lang_subs = container.setdefault(lang_code, [])
3710 for fmt in self._SUBTITLE_FORMATS:
3711 query.update({
3712 'fmt': fmt,
3713 })
3714 lang_subs.append({
3715 'ext': fmt,
3716 'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),
3717 'name': sub_name,
3718 })
3719
3720 # NB: Constructing the full subtitle dictionary is slow
3721 get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (
3722 self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))
3723 for lang_code, caption_track in captions.items():
3724 base_url = caption_track.get('baseUrl')
3725 orig_lang = parse_qs(base_url).get('lang', [None])[-1]
3726 if not base_url:
3727 continue
3728 lang_name = self._get_text(caption_track, 'name', max_runs=1)
3729 if caption_track.get('kind') != 'asr':
3730 if not lang_code:
3731 continue
3732 process_language(
3733 subtitles, base_url, lang_code, lang_name, {})
3734 if not caption_track.get('isTranslatable'):
3735 continue
3736 for trans_code, trans_name in translation_languages.items():
3737 if not trans_code:
3738 continue
3739 orig_trans_code = trans_code
3740 if caption_track.get('kind') != 'asr':
3741 if not get_translated_subs:
3742 continue
3743 trans_code += f'-{lang_code}'
3744 trans_name += format_field(lang_name, None, ' from %s')
3745 # Add an "-orig" label to the original language so that it can be distinguished.
3746 # The subs are returned without "-orig" as well for compatibility
3747 if lang_code == f'a-{orig_trans_code}':
3748 process_language(
3749 automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
3750 # Setting tlang=lang returns damaged subtitles.
3751 process_language(automatic_captions, base_url, trans_code, trans_name,
3752 {} if orig_lang == orig_trans_code else {'tlang': trans_code})
3753
3754 info['automatic_captions'] = automatic_captions
3755 info['subtitles'] = subtitles
3756
3757 parsed_url = urllib.parse.urlparse(url)
3758 for component in [parsed_url.fragment, parsed_url.query]:
3759 query = urllib.parse.parse_qs(component)
3760 for k, v in query.items():
3761 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
3762 d_k += '_time'
3763 if d_k not in info and k in s_ks:
3764 info[d_k] = parse_duration(query[k][0])
3765
3766 # Youtube Music Auto-generated description
3767 if video_description:
3768 mobj = re.search(
3769 r'''(?xs)
3770 (?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+
3771 (?P<album>[^\n]+)
3772 (?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
3773 (?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
3774 (.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?
3775 .+\nAuto-generated\ by\ YouTube\.\s*$
3776 ''', video_description)
3777 if mobj:
3778 release_year = mobj.group('release_year')
3779 release_date = mobj.group('release_date')
3780 if release_date:
3781 release_date = release_date.replace('-', '')
3782 if not release_year:
3783 release_year = release_date[:4]
3784 info.update({
3785 'album': mobj.group('album'.strip()),
3786 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
3787 'track': mobj.group('track').strip(),
3788 'release_date': release_date,
3789 'release_year': int_or_none(release_year),
3790 })
3791
3792 initial_data = None
3793 if webpage:
3794 initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)
3795 if not initial_data:
3796 query = {'videoId': video_id}
3797 query.update(self._get_checkok_params())
3798 initial_data = self._extract_response(
3799 item_id=video_id, ep='next', fatal=False,
3800 ytcfg=master_ytcfg, query=query,
3801 headers=self.generate_api_headers(ytcfg=master_ytcfg),
3802 note='Downloading initial data API JSON')
3803
3804 info['comment_count'] = traverse_obj(initial_data, (
3805 'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',
3806 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount', 'simpleText'
3807 ), (
3808 'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',
3809 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo', 'runs', ..., 'text'
3810 ), expected_type=int_or_none, get_all=False)
3811
3812 try: # This will error if there is no livechat
3813 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
3814 except (KeyError, IndexError, TypeError):
3815 pass
3816 else:
3817 info.setdefault('subtitles', {})['live_chat'] = [{
3818 # url is needed to set cookies
3819 'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',
3820 'video_id': video_id,
3821 'ext': 'json',
3822 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
3823 }]
3824
3825 if initial_data:
3826 info['chapters'] = (
3827 self._extract_chapters_from_json(initial_data, duration)
3828 or self._extract_chapters_from_engagement_panel(initial_data, duration)
3829 or self._extract_chapters_from_description(video_description, duration)
3830 or None)
3831
3832 contents = traverse_obj(
3833 initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
3834 expected_type=list, default=[])
3835
3836 vpir = get_first(contents, 'videoPrimaryInfoRenderer')
3837 if vpir:
3838 stl = vpir.get('superTitleLink')
3839 if stl:
3840 stl = self._get_text(stl)
3841 if try_get(
3842 vpir,
3843 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3844 info['location'] = stl
3845 else:
3846 mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)
3847 if mobj:
3848 info.update({
3849 'series': mobj.group(1),
3850 'season_number': int(mobj.group(2)),
3851 'episode_number': int(mobj.group(3)),
3852 })
3853 for tlb in (try_get(
3854 vpir,
3855 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3856 list) or []):
3857 tbr = tlb.get('toggleButtonRenderer') or {}
3858 for getter, regex in [(
3859 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3860 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3861 lambda x: x['accessibility'],
3862 lambda x: x['accessibilityData']['accessibilityData'],
3863 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3864 label = (try_get(tbr, getter, dict) or {}).get('label')
3865 if label:
3866 mobj = re.match(regex, label)
3867 if mobj:
3868 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3869 break
3870 sbr_tooltip = try_get(
3871 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3872 if sbr_tooltip:
3873 like_count, dislike_count = sbr_tooltip.split(' / ')
3874 info.update({
3875 'like_count': str_to_int(like_count),
3876 'dislike_count': str_to_int(dislike_count),
3877 })
3878 vsir = get_first(contents, 'videoSecondaryInfoRenderer')
3879 if vsir:
3880 vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))
3881 info.update({
3882 'channel': self._get_text(vor, 'title'),
3883 'channel_follower_count': self._get_count(vor, 'subscriberCountText')})
3884
3885 rows = try_get(
3886 vsir,
3887 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3888 list) or []
3889 multiple_songs = False
3890 for row in rows:
3891 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3892 multiple_songs = True
3893 break
3894 for row in rows:
3895 mrr = row.get('metadataRowRenderer') or {}
3896 mrr_title = mrr.get('title')
3897 if not mrr_title:
3898 continue
3899 mrr_title = self._get_text(mrr, 'title')
3900 mrr_contents_text = self._get_text(mrr, ('contents', 0))
3901 if mrr_title == 'License':
3902 info['license'] = mrr_contents_text
3903 elif not multiple_songs:
3904 if mrr_title == 'Album':
3905 info['album'] = mrr_contents_text
3906 elif mrr_title == 'Artist':
3907 info['artist'] = mrr_contents_text
3908 elif mrr_title == 'Song':
3909 info['track'] = mrr_contents_text
3910
3911 fallbacks = {
3912 'channel': 'uploader',
3913 'channel_id': 'uploader_id',
3914 'channel_url': 'uploader_url',
3915 }
3916
3917 # The upload date for scheduled, live and past live streams / premieres in microformats
3918 # may be different from the stream date. Although not in UTC, we will prefer it in this case.
3919 # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139
3920 upload_date = (
3921 unified_strdate(get_first(microformats, 'uploadDate'))
3922 or unified_strdate(search_meta('uploadDate')))
3923 if not upload_date or (not info.get('is_live') and not info.get('was_live') and info.get('live_status') != 'is_upcoming'):
3924 upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d') or upload_date
3925 info['upload_date'] = upload_date
3926
3927 for to, frm in fallbacks.items():
3928 if not info.get(to):
3929 info[to] = info.get(frm)
3930
3931 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3932 v = info.get(s_k)
3933 if v:
3934 info[d_k] = v
3935
3936 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3937 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
3938 is_membersonly = None
3939 is_premium = None
3940 if initial_data and is_private is not None:
3941 is_membersonly = False
3942 is_premium = False
3943 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3944 badge_labels = set()
3945 for content in contents:
3946 if not isinstance(content, dict):
3947 continue
3948 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3949 for badge_label in badge_labels:
3950 if badge_label.lower() == 'members only':
3951 is_membersonly = True
3952 elif badge_label.lower() == 'premium':
3953 is_premium = True
3954 elif badge_label.lower() == 'unlisted':
3955 is_unlisted = True
3956
3957 info['availability'] = self._availability(
3958 is_private=is_private,
3959 needs_premium=is_premium,
3960 needs_subscription=is_membersonly,
3961 needs_auth=info['age_limit'] >= 18,
3962 is_unlisted=None if is_private is None else is_unlisted)
3963
3964 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
3965
3966 self.mark_watched(video_id, player_responses)
3967
3968 return info
3969
3970
3971 class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
3972
3973 @staticmethod
3974 def passthrough_smuggled_data(func):
3975 def _smuggle(entries, smuggled_data):
3976 for entry in entries:
3977 # TODO: Convert URL to music.youtube instead.
3978 # Do we need to passthrough any other smuggled_data?
3979 entry['url'] = smuggle_url(entry['url'], smuggled_data)
3980 yield entry
3981
3982 @functools.wraps(func)
3983 def wrapper(self, url):
3984 url, smuggled_data = unsmuggle_url(url, {})
3985 if self.is_music_url(url):
3986 smuggled_data['is_music_url'] = True
3987 info_dict = func(self, url, smuggled_data)
3988 if smuggled_data and info_dict.get('entries'):
3989 info_dict['entries'] = _smuggle(info_dict['entries'], smuggled_data)
3990 return info_dict
3991 return wrapper
3992
3993 def _extract_channel_id(self, webpage):
3994 channel_id = self._html_search_meta(
3995 'channelId', webpage, 'channel id', default=None)
3996 if channel_id:
3997 return channel_id
3998 channel_url = self._html_search_meta(
3999 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
4000 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
4001 'twitter:app:url:googleplay'), webpage, 'channel url')
4002 return self._search_regex(
4003 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
4004 channel_url, 'channel id')
4005
4006 @staticmethod
4007 def _extract_basic_item_renderer(item):
4008 # Modified from _extract_grid_item_renderer
4009 known_basic_renderers = (
4010 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'
4011 )
4012 for key, renderer in item.items():
4013 if not isinstance(renderer, dict):
4014 continue
4015 elif key in known_basic_renderers:
4016 return renderer
4017 elif key.startswith('grid') and key.endswith('Renderer'):
4018 return renderer
4019
4020 def _grid_entries(self, grid_renderer):
4021 for item in grid_renderer['items']:
4022 if not isinstance(item, dict):
4023 continue
4024 renderer = self._extract_basic_item_renderer(item)
4025 if not isinstance(renderer, dict):
4026 continue
4027 title = self._get_text(renderer, 'title')
4028
4029 # playlist
4030 playlist_id = renderer.get('playlistId')
4031 if playlist_id:
4032 yield self.url_result(
4033 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4034 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4035 video_title=title)
4036 continue
4037 # video
4038 video_id = renderer.get('videoId')
4039 if video_id:
4040 yield self._extract_video(renderer)
4041 continue
4042 # channel
4043 channel_id = renderer.get('channelId')
4044 if channel_id:
4045 yield self.url_result(
4046 'https://www.youtube.com/channel/%s' % channel_id,
4047 ie=YoutubeTabIE.ie_key(), video_title=title)
4048 continue
4049 # generic endpoint URL support
4050 ep_url = urljoin('https://www.youtube.com/', try_get(
4051 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
4052 str))
4053 if ep_url:
4054 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
4055 if ie.suitable(ep_url):
4056 yield self.url_result(
4057 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
4058 break
4059
4060 def _music_reponsive_list_entry(self, renderer):
4061 video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))
4062 if video_id:
4063 return self.url_result(f'https://music.youtube.com/watch?v={video_id}',
4064 ie=YoutubeIE.ie_key(), video_id=video_id)
4065 playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))
4066 if playlist_id:
4067 video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))
4068 if video_id:
4069 return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',
4070 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4071 return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',
4072 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4073 browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))
4074 if browse_id:
4075 return self.url_result(f'https://music.youtube.com/browse/{browse_id}',
4076 ie=YoutubeTabIE.ie_key(), video_id=browse_id)
4077
4078 def _shelf_entries_from_content(self, shelf_renderer):
4079 content = shelf_renderer.get('content')
4080 if not isinstance(content, dict):
4081 return
4082 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
4083 if renderer:
4084 # TODO: add support for nested playlists so each shelf is processed
4085 # as separate playlist
4086 # TODO: this includes only first N items
4087 yield from self._grid_entries(renderer)
4088 renderer = content.get('horizontalListRenderer')
4089 if renderer:
4090 # TODO
4091 pass
4092
4093 def _shelf_entries(self, shelf_renderer, skip_channels=False):
4094 ep = try_get(
4095 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4096 str)
4097 shelf_url = urljoin('https://www.youtube.com', ep)
4098 if shelf_url:
4099 # Skipping links to another channels, note that checking for
4100 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
4101 # will not work
4102 if skip_channels and '/channels?' in shelf_url:
4103 return
4104 title = self._get_text(shelf_renderer, 'title')
4105 yield self.url_result(shelf_url, video_title=title)
4106 # Shelf may not contain shelf URL, fallback to extraction from content
4107 yield from self._shelf_entries_from_content(shelf_renderer)
4108
4109 def _playlist_entries(self, video_list_renderer):
4110 for content in video_list_renderer['contents']:
4111 if not isinstance(content, dict):
4112 continue
4113 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
4114 if not isinstance(renderer, dict):
4115 continue
4116 video_id = renderer.get('videoId')
4117 if not video_id:
4118 continue
4119 yield self._extract_video(renderer)
4120
4121 def _rich_entries(self, rich_grid_renderer):
4122 renderer = try_get(
4123 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
4124 video_id = renderer.get('videoId')
4125 if not video_id:
4126 return
4127 yield self._extract_video(renderer)
4128
4129 def _video_entry(self, video_renderer):
4130 video_id = video_renderer.get('videoId')
4131 if video_id:
4132 return self._extract_video(video_renderer)
4133
4134 def _hashtag_tile_entry(self, hashtag_tile_renderer):
4135 url = urljoin('https://youtube.com', traverse_obj(
4136 hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))
4137 if url:
4138 return self.url_result(
4139 url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))
4140
4141 def _post_thread_entries(self, post_thread_renderer):
4142 post_renderer = try_get(
4143 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
4144 if not post_renderer:
4145 return
4146 # video attachment
4147 video_renderer = try_get(
4148 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
4149 video_id = video_renderer.get('videoId')
4150 if video_id:
4151 entry = self._extract_video(video_renderer)
4152 if entry:
4153 yield entry
4154 # playlist attachment
4155 playlist_id = try_get(
4156 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)
4157 if playlist_id:
4158 yield self.url_result(
4159 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4160 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4161 # inline video links
4162 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
4163 for run in runs:
4164 if not isinstance(run, dict):
4165 continue
4166 ep_url = try_get(
4167 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)
4168 if not ep_url:
4169 continue
4170 if not YoutubeIE.suitable(ep_url):
4171 continue
4172 ep_video_id = YoutubeIE._match_id(ep_url)
4173 if video_id == ep_video_id:
4174 continue
4175 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
4176
4177 def _post_thread_continuation_entries(self, post_thread_continuation):
4178 contents = post_thread_continuation.get('contents')
4179 if not isinstance(contents, list):
4180 return
4181 for content in contents:
4182 renderer = content.get('backstagePostThreadRenderer')
4183 if isinstance(renderer, dict):
4184 yield from self._post_thread_entries(renderer)
4185 continue
4186 renderer = content.get('videoRenderer')
4187 if isinstance(renderer, dict):
4188 yield self._video_entry(renderer)
4189
4190 r''' # unused
4191 def _rich_grid_entries(self, contents):
4192 for content in contents:
4193 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
4194 if video_renderer:
4195 entry = self._video_entry(video_renderer)
4196 if entry:
4197 yield entry
4198 '''
4199
4200 def _extract_entries(self, parent_renderer, continuation_list):
4201 # continuation_list is modified in-place with continuation_list = [continuation_token]
4202 continuation_list[:] = [None]
4203 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
4204 for content in contents:
4205 if not isinstance(content, dict):
4206 continue
4207 is_renderer = traverse_obj(
4208 content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',
4209 expected_type=dict)
4210 if not is_renderer:
4211 renderer = content.get('richItemRenderer')
4212 if renderer:
4213 for entry in self._rich_entries(renderer):
4214 yield entry
4215 continuation_list[0] = self._extract_continuation(parent_renderer)
4216 continue
4217 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
4218 for isr_content in isr_contents:
4219 if not isinstance(isr_content, dict):
4220 continue
4221
4222 known_renderers = {
4223 'playlistVideoListRenderer': self._playlist_entries,
4224 'gridRenderer': self._grid_entries,
4225 'reelShelfRenderer': self._grid_entries,
4226 'shelfRenderer': self._shelf_entries,
4227 'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],
4228 'backstagePostThreadRenderer': self._post_thread_entries,
4229 'videoRenderer': lambda x: [self._video_entry(x)],
4230 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
4231 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
4232 'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]
4233 }
4234 for key, renderer in isr_content.items():
4235 if key not in known_renderers:
4236 continue
4237 for entry in known_renderers[key](renderer):
4238 if entry:
4239 yield entry
4240 continuation_list[0] = self._extract_continuation(renderer)
4241 break
4242
4243 if not continuation_list[0]:
4244 continuation_list[0] = self._extract_continuation(is_renderer)
4245
4246 if not continuation_list[0]:
4247 continuation_list[0] = self._extract_continuation(parent_renderer)
4248
4249 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
4250 continuation_list = [None]
4251 extract_entries = lambda x: self._extract_entries(x, continuation_list)
4252 tab_content = try_get(tab, lambda x: x['content'], dict)
4253 if not tab_content:
4254 return
4255 parent_renderer = (
4256 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
4257 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
4258 yield from extract_entries(parent_renderer)
4259 continuation = continuation_list[0]
4260
4261 for page_num in itertools.count(1):
4262 if not continuation:
4263 break
4264 headers = self.generate_api_headers(
4265 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
4266 response = self._extract_response(
4267 item_id=f'{item_id} page {page_num}',
4268 query=continuation, headers=headers, ytcfg=ytcfg,
4269 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
4270
4271 if not response:
4272 break
4273 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
4274 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
4275 visitor_data = self._extract_visitor_data(response) or visitor_data
4276
4277 known_continuation_renderers = {
4278 'playlistVideoListContinuation': self._playlist_entries,
4279 'gridContinuation': self._grid_entries,
4280 'itemSectionContinuation': self._post_thread_continuation_entries,
4281 'sectionListContinuation': extract_entries, # for feeds
4282 }
4283 continuation_contents = try_get(
4284 response, lambda x: x['continuationContents'], dict) or {}
4285 continuation_renderer = None
4286 for key, value in continuation_contents.items():
4287 if key not in known_continuation_renderers:
4288 continue
4289 continuation_renderer = value
4290 continuation_list = [None]
4291 yield from known_continuation_renderers[key](continuation_renderer)
4292 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
4293 break
4294 if continuation_renderer:
4295 continue
4296
4297 known_renderers = {
4298 'videoRenderer': (self._grid_entries, 'items'), # for membership tab
4299 'gridPlaylistRenderer': (self._grid_entries, 'items'),
4300 'gridVideoRenderer': (self._grid_entries, 'items'),
4301 'gridChannelRenderer': (self._grid_entries, 'items'),
4302 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
4303 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
4304 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
4305 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
4306 }
4307 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
4308 continuation_items = try_get(
4309 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
4310 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
4311 video_items_renderer = None
4312 for key, value in continuation_item.items():
4313 if key not in known_renderers:
4314 continue
4315 video_items_renderer = {known_renderers[key][1]: continuation_items}
4316 continuation_list = [None]
4317 yield from known_renderers[key][0](video_items_renderer)
4318 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
4319 break
4320 if video_items_renderer:
4321 continue
4322 break
4323
4324 @staticmethod
4325 def _extract_selected_tab(tabs, fatal=True):
4326 for tab in tabs:
4327 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
4328 if renderer.get('selected') is True:
4329 return renderer
4330 else:
4331 if fatal:
4332 raise ExtractorError('Unable to find selected tab')
4333
4334 def _extract_uploader(self, data):
4335 uploader = {}
4336 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
4337 owner = try_get(
4338 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
4339 if owner:
4340 owner_text = owner.get('text')
4341 uploader['uploader'] = self._search_regex(
4342 r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text)
4343 uploader['uploader_id'] = try_get(
4344 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], str)
4345 uploader['uploader_url'] = urljoin(
4346 'https://www.youtube.com/',
4347 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], str))
4348 return {k: v for k, v in uploader.items() if v is not None}
4349
4350 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
4351 playlist_id = title = description = channel_url = channel_name = channel_id = None
4352 tags = []
4353
4354 selected_tab = self._extract_selected_tab(tabs)
4355 primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4356 renderer = try_get(
4357 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
4358 if renderer:
4359 channel_name = renderer.get('title')
4360 channel_url = renderer.get('channelUrl')
4361 channel_id = renderer.get('externalId')
4362 else:
4363 renderer = try_get(
4364 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
4365
4366 if renderer:
4367 title = renderer.get('title')
4368 description = renderer.get('description', '')
4369 playlist_id = channel_id
4370 tags = renderer.get('keywords', '').split()
4371
4372 # We can get the uncropped banner/avatar by replacing the crop params with '=s0'
4373 # See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714
4374 def _get_uncropped(url):
4375 return url_or_none((url or '').split('=')[0] + '=s0')
4376
4377 avatar_thumbnails = self._extract_thumbnails(renderer, 'avatar')
4378 if avatar_thumbnails:
4379 uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])
4380 if uncropped_avatar:
4381 avatar_thumbnails.append({
4382 'url': uncropped_avatar,
4383 'id': 'avatar_uncropped',
4384 'preference': 1
4385 })
4386
4387 channel_banners = self._extract_thumbnails(
4388 data, ('header', ..., ['banner', 'mobileBanner', 'tvBanner']))
4389 for banner in channel_banners:
4390 banner['preference'] = -10
4391
4392 if channel_banners:
4393 uncropped_banner = _get_uncropped(channel_banners[0]['url'])
4394 if uncropped_banner:
4395 channel_banners.append({
4396 'url': uncropped_banner,
4397 'id': 'banner_uncropped',
4398 'preference': -5
4399 })
4400
4401 primary_thumbnails = self._extract_thumbnails(
4402 primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))
4403
4404 if playlist_id is None:
4405 playlist_id = item_id
4406
4407 playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')
4408 last_updated_unix, _ = self._extract_time_text(playlist_stats, 2)
4409 if title is None:
4410 title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id
4411 title += format_field(selected_tab, 'title', ' - %s')
4412 title += format_field(selected_tab, 'expandedText', ' - %s')
4413
4414 metadata = {
4415 'playlist_id': playlist_id,
4416 'playlist_title': title,
4417 'playlist_description': description,
4418 'uploader': channel_name,
4419 'uploader_id': channel_id,
4420 'uploader_url': channel_url,
4421 'thumbnails': primary_thumbnails + avatar_thumbnails + channel_banners,
4422 'tags': tags,
4423 'view_count': self._get_count(playlist_stats, 1),
4424 'availability': self._extract_availability(data),
4425 'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'),
4426 'playlist_count': self._get_count(playlist_stats, 0),
4427 'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),
4428 }
4429 if not channel_id:
4430 metadata.update(self._extract_uploader(data))
4431 metadata.update({
4432 'channel': metadata['uploader'],
4433 'channel_id': metadata['uploader_id'],
4434 'channel_url': metadata['uploader_url']})
4435 return self.playlist_result(
4436 self._entries(
4437 selected_tab, playlist_id, ytcfg,
4438 self._extract_account_syncid(ytcfg, data),
4439 self._extract_visitor_data(data, ytcfg)),
4440 **metadata)
4441
4442 def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):
4443 first_id = last_id = response = None
4444 for page_num in itertools.count(1):
4445 videos = list(self._playlist_entries(playlist))
4446 if not videos:
4447 return
4448 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4449 if start >= len(videos):
4450 return
4451 yield from videos[start:]
4452 first_id = first_id or videos[0]['id']
4453 last_id = videos[-1]['id']
4454 watch_endpoint = try_get(
4455 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
4456 headers = self.generate_api_headers(
4457 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4458 visitor_data=self._extract_visitor_data(response, data, ytcfg))
4459 query = {
4460 'playlistId': playlist_id,
4461 'videoId': watch_endpoint.get('videoId') or last_id,
4462 'index': watch_endpoint.get('index') or len(videos),
4463 'params': watch_endpoint.get('params') or 'OAE%3D'
4464 }
4465 response = self._extract_response(
4466 item_id='%s page %d' % (playlist_id, page_num),
4467 query=query, ep='next', headers=headers, ytcfg=ytcfg,
4468 check_get_keys='contents'
4469 )
4470 playlist = try_get(
4471 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4472
4473 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
4474 title = playlist.get('title') or try_get(
4475 data, lambda x: x['titleText']['simpleText'], str)
4476 playlist_id = playlist.get('playlistId') or item_id
4477
4478 # Delegating everything except mix playlists to regular tab-based playlist URL
4479 playlist_url = urljoin(url, try_get(
4480 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4481 str))
4482
4483 # Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]
4484 # [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg
4485 is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)
4486
4487 if playlist_url and playlist_url != url and not is_known_unviewable:
4488 return self.url_result(
4489 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4490 video_title=title)
4491
4492 return self.playlist_result(
4493 self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),
4494 playlist_id=playlist_id, playlist_title=title)
4495
4496 def _extract_availability(self, data):
4497 """
4498 Gets the availability of a given playlist/tab.
4499 Note: Unless YouTube tells us explicitly, we do not assume it is public
4500 @param data: response
4501 """
4502 is_private = is_unlisted = None
4503 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4504 badge_labels = self._extract_badges(renderer)
4505
4506 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4507 privacy_dropdown_entries = try_get(
4508 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4509 for renderer_dict in privacy_dropdown_entries:
4510 is_selected = try_get(
4511 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4512 if not is_selected:
4513 continue
4514 label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
4515 if label:
4516 badge_labels.add(label.lower())
4517 break
4518
4519 for badge_label in badge_labels:
4520 if badge_label == 'unlisted':
4521 is_unlisted = True
4522 elif badge_label == 'private':
4523 is_private = True
4524 elif badge_label == 'public':
4525 is_unlisted = is_private = False
4526 return self._availability(is_private, False, False, False, is_unlisted)
4527
4528 @staticmethod
4529 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4530 sidebar_renderer = try_get(
4531 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4532 for item in sidebar_renderer:
4533 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4534 if renderer:
4535 return renderer
4536
4537 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
4538 """
4539 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4540 """
4541 browse_id = params = None
4542 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4543 if not renderer:
4544 return
4545 menu_renderer = try_get(
4546 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4547 for menu_item in menu_renderer:
4548 if not isinstance(menu_item, dict):
4549 continue
4550 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4551 text = try_get(
4552 nav_item_renderer, lambda x: x['text']['simpleText'], str)
4553 if not text or text.lower() != 'show unavailable videos':
4554 continue
4555 browse_endpoint = try_get(
4556 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4557 browse_id = browse_endpoint.get('browseId')
4558 params = browse_endpoint.get('params')
4559 break
4560
4561 headers = self.generate_api_headers(
4562 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4563 visitor_data=self._extract_visitor_data(data, ytcfg))
4564 query = {
4565 'params': params or 'wgYCCAA=',
4566 'browseId': browse_id or 'VL%s' % item_id
4567 }
4568 return self._extract_response(
4569 item_id=item_id, headers=headers, query=query,
4570 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
4571 note='Downloading API JSON with unavailable videos')
4572
4573 @functools.cached_property
4574 def skip_webpage(self):
4575 return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())
4576
4577 def _extract_webpage(self, url, item_id, fatal=True):
4578 webpage, data = None, None
4579 for retry in self.RetryManager(fatal=fatal):
4580 try:
4581 webpage = self._download_webpage(url, item_id, note='Downloading webpage')
4582 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
4583 except ExtractorError as e:
4584 if isinstance(e.cause, network_exceptions):
4585 if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):
4586 retry.error = e
4587 continue
4588 self._error_or_warning(e, fatal=fatal)
4589 break
4590
4591 try:
4592 self._extract_and_report_alerts(data)
4593 except ExtractorError as e:
4594 self._error_or_warning(e, fatal=fatal)
4595 break
4596
4597 # Sometimes youtube returns a webpage with incomplete ytInitialData
4598 # See: https://github.com/yt-dlp/yt-dlp/issues/116
4599 if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):
4600 retry.error = ExtractorError('Incomplete yt initial data received')
4601 continue
4602
4603 return webpage, data
4604
4605 def _report_playlist_authcheck(self, ytcfg, fatal=True):
4606 """Use if failed to extract ytcfg (and data) from initial webpage"""
4607 if not ytcfg and self.is_authenticated:
4608 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'
4609 if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:
4610 raise ExtractorError(
4611 f'{msg}. If you are not downloading private content, or '
4612 'your cookies are only for the first account and channel,'
4613 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
4614 expected=True)
4615 self.report_warning(msg, only_once=True)
4616
4617 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
4618 data = None
4619 if not self.skip_webpage:
4620 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
4621 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
4622 # Reject webpage data if redirected to home page without explicitly requesting
4623 selected_tab = self._extract_selected_tab(traverse_obj(
4624 data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list, default=[]), fatal=False) or {}
4625 if (url != 'https://www.youtube.com/feed/recommended'
4626 and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page
4627 and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):
4628 msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'
4629 if fatal:
4630 raise ExtractorError(msg, expected=True)
4631 self.report_warning(msg, only_once=True)
4632 if not data:
4633 self._report_playlist_authcheck(ytcfg, fatal=fatal)
4634 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
4635 return data, ytcfg
4636
4637 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
4638 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
4639 resolve_response = self._extract_response(
4640 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
4641 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
4642 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
4643 for ep_key, ep in endpoints.items():
4644 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
4645 if params:
4646 return self._extract_response(
4647 item_id=item_id, query=params, ep=ep, headers=headers,
4648 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
4649 check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))
4650 err_note = 'Failed to resolve url (does the playlist exist?)'
4651 if fatal:
4652 raise ExtractorError(err_note, expected=True)
4653 self.report_warning(err_note, item_id)
4654
4655 _SEARCH_PARAMS = None
4656
4657 def _search_results(self, query, params=NO_DEFAULT, default_client='web'):
4658 data = {'query': query}
4659 if params is NO_DEFAULT:
4660 params = self._SEARCH_PARAMS
4661 if params:
4662 data['params'] = params
4663
4664 content_keys = (
4665 ('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),
4666 ('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),
4667 # ytmusic search
4668 ('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),
4669 ('continuationContents', ),
4670 )
4671 display_id = f'query "{query}"'
4672 check_get_keys = tuple({keys[0] for keys in content_keys})
4673 ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}
4674 self._report_playlist_authcheck(ytcfg, fatal=False)
4675
4676 continuation_list = [None]
4677 search = None
4678 for page_num in itertools.count(1):
4679 data.update(continuation_list[0] or {})
4680 headers = self.generate_api_headers(
4681 ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)
4682 search = self._extract_response(
4683 item_id=f'{display_id} page {page_num}', ep='search', query=data,
4684 default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)
4685 slr_contents = traverse_obj(search, *content_keys)
4686 yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)
4687 if not continuation_list[0]:
4688 break
4689
4690
4691 class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
4692 IE_DESC = 'YouTube Tabs'
4693 _VALID_URL = r'''(?x:
4694 https?://
4695 (?:\w+\.)?
4696 (?:
4697 youtube(?:kids)?\.com|
4698 %(invidious)s
4699 )/
4700 (?:
4701 (?P<channel_type>channel|c|user|browse)/|
4702 (?P<not_channel>
4703 feed/|hashtag/|
4704 (?:playlist|watch)\?.*?\blist=
4705 )|
4706 (?!(?:%(reserved_names)s)\b) # Direct URLs
4707 )
4708 (?P<id>[^/?\#&]+)
4709 )''' % {
4710 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
4711 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
4712 }
4713 IE_NAME = 'youtube:tab'
4714
4715 _TESTS = [{
4716 'note': 'playlists, multipage',
4717 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
4718 'playlist_mincount': 94,
4719 'info_dict': {
4720 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
4721 'title': 'Igor Kleiner - Playlists',
4722 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
4723 'uploader': 'Igor Kleiner',
4724 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4725 'channel': 'Igor Kleiner',
4726 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4727 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
4728 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4729 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4730 'channel_follower_count': int
4731 },
4732 }, {
4733 'note': 'playlists, multipage, different order',
4734 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
4735 'playlist_mincount': 94,
4736 'info_dict': {
4737 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
4738 'title': 'Igor Kleiner - Playlists',
4739 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
4740 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4741 'uploader': 'Igor Kleiner',
4742 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4743 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
4744 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4745 'channel': 'Igor Kleiner',
4746 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4747 'channel_follower_count': int
4748 },
4749 }, {
4750 'note': 'playlists, series',
4751 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
4752 'playlist_mincount': 5,
4753 'info_dict': {
4754 'id': 'UCYO_jab_esuFRV4b17AJtAw',
4755 'title': '3Blue1Brown - Playlists',
4756 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
4757 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
4758 'uploader': '3Blue1Brown',
4759 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4760 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4761 'channel': '3Blue1Brown',
4762 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
4763 'tags': ['Mathematics'],
4764 'channel_follower_count': int
4765 },
4766 }, {
4767 'note': 'playlists, singlepage',
4768 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
4769 'playlist_mincount': 4,
4770 'info_dict': {
4771 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
4772 'title': 'ThirstForScience - Playlists',
4773 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
4774 'uploader': 'ThirstForScience',
4775 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
4776 'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
4777 'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
4778 'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
4779 'tags': 'count:13',
4780 'channel': 'ThirstForScience',
4781 'channel_follower_count': int
4782 }
4783 }, {
4784 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
4785 'only_matching': True,
4786 }, {
4787 'note': 'basic, single video playlist',
4788 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4789 'info_dict': {
4790 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4791 'uploader': 'Sergey M.',
4792 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4793 'title': 'youtube-dl public playlist',
4794 'description': '',
4795 'tags': [],
4796 'view_count': int,
4797 'modified_date': '20201130',
4798 'channel': 'Sergey M.',
4799 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4800 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4801 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4802 },
4803 'playlist_count': 1,
4804 }, {
4805 'note': 'empty playlist',
4806 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
4807 'info_dict': {
4808 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4809 'uploader': 'Sergey M.',
4810 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
4811 'title': 'youtube-dl empty playlist',
4812 'tags': [],
4813 'channel': 'Sergey M.',
4814 'description': '',
4815 'modified_date': '20160902',
4816 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4817 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4818 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4819 },
4820 'playlist_count': 0,
4821 }, {
4822 'note': 'Home tab',
4823 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
4824 'info_dict': {
4825 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4826 'title': 'lex will - Home',
4827 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4828 'uploader': 'lex will',
4829 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4830 'channel': 'lex will',
4831 'tags': ['bible', 'history', 'prophesy'],
4832 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4833 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4834 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4835 'channel_follower_count': int
4836 },
4837 'playlist_mincount': 2,
4838 }, {
4839 'note': 'Videos tab',
4840 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
4841 'info_dict': {
4842 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4843 'title': 'lex will - Videos',
4844 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4845 'uploader': 'lex will',
4846 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4847 'tags': ['bible', 'history', 'prophesy'],
4848 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4849 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4850 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4851 'channel': 'lex will',
4852 'channel_follower_count': int
4853 },
4854 'playlist_mincount': 975,
4855 }, {
4856 'note': 'Videos tab, sorted by popular',
4857 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
4858 'info_dict': {
4859 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4860 'title': 'lex will - Videos',
4861 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4862 'uploader': 'lex will',
4863 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4864 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4865 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4866 'channel': 'lex will',
4867 'tags': ['bible', 'history', 'prophesy'],
4868 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4869 'channel_follower_count': int
4870 },
4871 'playlist_mincount': 199,
4872 }, {
4873 'note': 'Playlists tab',
4874 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
4875 'info_dict': {
4876 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4877 'title': 'lex will - Playlists',
4878 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4879 'uploader': 'lex will',
4880 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4881 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4882 'channel': 'lex will',
4883 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4884 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4885 'tags': ['bible', 'history', 'prophesy'],
4886 'channel_follower_count': int
4887 },
4888 'playlist_mincount': 17,
4889 }, {
4890 'note': 'Community tab',
4891 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
4892 'info_dict': {
4893 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4894 'title': 'lex will - Community',
4895 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4896 'uploader': 'lex will',
4897 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4898 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4899 'channel': 'lex will',
4900 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4901 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4902 'tags': ['bible', 'history', 'prophesy'],
4903 'channel_follower_count': int
4904 },
4905 'playlist_mincount': 18,
4906 }, {
4907 'note': 'Channels tab',
4908 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
4909 'info_dict': {
4910 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4911 'title': 'lex will - Channels',
4912 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4913 'uploader': 'lex will',
4914 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4915 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4916 'channel': 'lex will',
4917 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4918 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4919 'tags': ['bible', 'history', 'prophesy'],
4920 'channel_follower_count': int
4921 },
4922 'playlist_mincount': 12,
4923 }, {
4924 'note': 'Search tab',
4925 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
4926 'playlist_mincount': 40,
4927 'info_dict': {
4928 'id': 'UCYO_jab_esuFRV4b17AJtAw',
4929 'title': '3Blue1Brown - Search - linear algebra',
4930 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
4931 'uploader': '3Blue1Brown',
4932 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
4933 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4934 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4935 'tags': ['Mathematics'],
4936 'channel': '3Blue1Brown',
4937 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
4938 'channel_follower_count': int
4939 },
4940 }, {
4941 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4942 'only_matching': True,
4943 }, {
4944 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4945 'only_matching': True,
4946 }, {
4947 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4948 'only_matching': True,
4949 }, {
4950 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
4951 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
4952 'info_dict': {
4953 'title': '29C3: Not my department',
4954 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
4955 'uploader': 'Christiaan008',
4956 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
4957 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
4958 'tags': [],
4959 'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',
4960 'view_count': int,
4961 'modified_date': '20150605',
4962 'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
4963 'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',
4964 'channel': 'Christiaan008',
4965 },
4966 'playlist_count': 96,
4967 }, {
4968 'note': 'Large playlist',
4969 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
4970 'info_dict': {
4971 'title': 'Uploads from Cauchemar',
4972 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
4973 'uploader': 'Cauchemar',
4974 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
4975 'channel_url': 'https://www.youtube.com/c/Cauchemar89',
4976 'tags': [],
4977 'modified_date': r're:\d{8}',
4978 'channel': 'Cauchemar',
4979 'uploader_url': 'https://www.youtube.com/c/Cauchemar89',
4980 'view_count': int,
4981 'description': '',
4982 'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
4983 },
4984 'playlist_mincount': 1123,
4985 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
4986 }, {
4987 'note': 'even larger playlist, 8832 videos',
4988 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
4989 'only_matching': True,
4990 }, {
4991 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
4992 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
4993 'info_dict': {
4994 'title': 'Uploads from Interstellar Movie',
4995 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
4996 'uploader': 'Interstellar Movie',
4997 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4998 'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',
4999 'tags': [],
5000 'view_count': int,
5001 'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
5002 'channel_url': 'https://www.youtube.com/c/InterstellarMovie',
5003 'channel': 'Interstellar Movie',
5004 'description': '',
5005 'modified_date': r're:\d{8}',
5006 },
5007 'playlist_mincount': 21,
5008 }, {
5009 'note': 'Playlist with "show unavailable videos" button',
5010 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
5011 'info_dict': {
5012 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
5013 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
5014 'uploader': 'Phim Siêu Nhân Nhật Bản',
5015 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
5016 'view_count': int,
5017 'channel': 'Phim Siêu Nhân Nhật Bản',
5018 'tags': [],
5019 'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5020 'description': '',
5021 'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5022 'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
5023 'modified_date': r're:\d{8}',
5024 },
5025 'playlist_mincount': 200,
5026 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5027 }, {
5028 'note': 'Playlist with unavailable videos in page 7',
5029 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
5030 'info_dict': {
5031 'title': 'Uploads from BlankTV',
5032 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
5033 'uploader': 'BlankTV',
5034 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
5035 'channel': 'BlankTV',
5036 'channel_url': 'https://www.youtube.com/c/blanktv',
5037 'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',
5038 'view_count': int,
5039 'tags': [],
5040 'uploader_url': 'https://www.youtube.com/c/blanktv',
5041 'modified_date': r're:\d{8}',
5042 'description': '',
5043 },
5044 'playlist_mincount': 1000,
5045 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5046 }, {
5047 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
5048 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5049 'info_dict': {
5050 'title': 'Data Analysis with Dr Mike Pound',
5051 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5052 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
5053 'uploader': 'Computerphile',
5054 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
5055 'uploader_url': 'https://www.youtube.com/user/Computerphile',
5056 'tags': [],
5057 'view_count': int,
5058 'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
5059 'channel_url': 'https://www.youtube.com/user/Computerphile',
5060 'channel': 'Computerphile',
5061 },
5062 'playlist_mincount': 11,
5063 }, {
5064 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5065 'only_matching': True,
5066 }, {
5067 'note': 'Playlist URL that does not actually serve a playlist',
5068 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
5069 'info_dict': {
5070 'id': 'FqZTN594JQw',
5071 'ext': 'webm',
5072 'title': "Smiley's People 01 detective, Adventure Series, Action",
5073 'uploader': 'STREEM',
5074 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
5075 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
5076 'upload_date': '20150526',
5077 'license': 'Standard YouTube License',
5078 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
5079 'categories': ['People & Blogs'],
5080 'tags': list,
5081 'view_count': int,
5082 'like_count': int,
5083 },
5084 'params': {
5085 'skip_download': True,
5086 },
5087 'skip': 'This video is not available.',
5088 'add_ie': [YoutubeIE.ie_key()],
5089 }, {
5090 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
5091 'only_matching': True,
5092 }, {
5093 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
5094 'only_matching': True,
5095 }, {
5096 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
5097 'info_dict': {
5098 'id': 'Wq15eF5vCbI', # This will keep changing
5099 'ext': 'mp4',
5100 'title': str,
5101 'uploader': 'Sky News',
5102 'uploader_id': 'skynews',
5103 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
5104 'upload_date': r're:\d{8}',
5105 'description': str,
5106 'categories': ['News & Politics'],
5107 'tags': list,
5108 'like_count': int,
5109 'release_timestamp': 1642502819,
5110 'channel': 'Sky News',
5111 'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',
5112 'age_limit': 0,
5113 'view_count': int,
5114 'thumbnail': 'https://i.ytimg.com/vi/GgL890LIznQ/maxresdefault_live.jpg',
5115 'playable_in_embed': True,
5116 'release_date': '20220118',
5117 'availability': 'public',
5118 'live_status': 'is_live',
5119 'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',
5120 'channel_follower_count': int
5121 },
5122 'params': {
5123 'skip_download': True,
5124 },
5125 'expected_warnings': ['Ignoring subtitle tracks found in '],
5126 }, {
5127 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
5128 'info_dict': {
5129 'id': 'a48o2S1cPoo',
5130 'ext': 'mp4',
5131 'title': 'The Young Turks - Live Main Show',
5132 'uploader': 'The Young Turks',
5133 'uploader_id': 'TheYoungTurks',
5134 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
5135 'upload_date': '20150715',
5136 'license': 'Standard YouTube License',
5137 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
5138 'categories': ['News & Politics'],
5139 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
5140 'like_count': int,
5141 },
5142 'params': {
5143 'skip_download': True,
5144 },
5145 'only_matching': True,
5146 }, {
5147 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
5148 'only_matching': True,
5149 }, {
5150 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
5151 'only_matching': True,
5152 }, {
5153 'note': 'A channel that is not live. Should raise error',
5154 'url': 'https://www.youtube.com/user/numberphile/live',
5155 'only_matching': True,
5156 }, {
5157 'url': 'https://www.youtube.com/feed/trending',
5158 'only_matching': True,
5159 }, {
5160 'url': 'https://www.youtube.com/feed/library',
5161 'only_matching': True,
5162 }, {
5163 'url': 'https://www.youtube.com/feed/history',
5164 'only_matching': True,
5165 }, {
5166 'url': 'https://www.youtube.com/feed/subscriptions',
5167 'only_matching': True,
5168 }, {
5169 'url': 'https://www.youtube.com/feed/watch_later',
5170 'only_matching': True,
5171 }, {
5172 'note': 'Recommended - redirects to home page.',
5173 'url': 'https://www.youtube.com/feed/recommended',
5174 'only_matching': True,
5175 }, {
5176 'note': 'inline playlist with not always working continuations',
5177 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
5178 'only_matching': True,
5179 }, {
5180 'url': 'https://www.youtube.com/course',
5181 'only_matching': True,
5182 }, {
5183 'url': 'https://www.youtube.com/zsecurity',
5184 'only_matching': True,
5185 }, {
5186 'url': 'http://www.youtube.com/NASAgovVideo/videos',
5187 'only_matching': True,
5188 }, {
5189 'url': 'https://www.youtube.com/TheYoungTurks/live',
5190 'only_matching': True,
5191 }, {
5192 'url': 'https://www.youtube.com/hashtag/cctv9',
5193 'info_dict': {
5194 'id': 'cctv9',
5195 'title': '#cctv9',
5196 'tags': [],
5197 },
5198 'playlist_mincount': 350,
5199 }, {
5200 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
5201 'only_matching': True,
5202 }, {
5203 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
5204 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5205 'only_matching': True
5206 }, {
5207 'note': '/browse/ should redirect to /channel/',
5208 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
5209 'only_matching': True
5210 }, {
5211 'note': 'VLPL, should redirect to playlist?list=PL...',
5212 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5213 'info_dict': {
5214 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5215 'uploader': 'NoCopyrightSounds',
5216 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
5217 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
5218 'title': 'NCS : All Releases 💿',
5219 'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',
5220 'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',
5221 'modified_date': r're:\d{8}',
5222 'view_count': int,
5223 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
5224 'tags': [],
5225 'channel': 'NoCopyrightSounds',
5226 },
5227 'playlist_mincount': 166,
5228 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5229 }, {
5230 'note': 'Topic, should redirect to playlist?list=UU...',
5231 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5232 'info_dict': {
5233 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5234 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5235 'title': 'Uploads from Royalty Free Music - Topic',
5236 'uploader': 'Royalty Free Music - Topic',
5237 'tags': [],
5238 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5239 'channel': 'Royalty Free Music - Topic',
5240 'view_count': int,
5241 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5242 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5243 'modified_date': r're:\d{8}',
5244 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5245 'description': '',
5246 },
5247 'expected_warnings': [
5248 'The URL does not have a videos tab',
5249 r'[Uu]navailable videos (are|will be) hidden',
5250 ],
5251 'playlist_mincount': 101,
5252 }, {
5253 'note': 'Topic without a UU playlist',
5254 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
5255 'info_dict': {
5256 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
5257 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
5258 'tags': [],
5259 },
5260 'expected_warnings': [
5261 'the playlist redirect gave error',
5262 ],
5263 'playlist_mincount': 9,
5264 }, {
5265 'note': 'Youtube music Album',
5266 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
5267 'info_dict': {
5268 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
5269 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
5270 'tags': [],
5271 'view_count': int,
5272 'description': '',
5273 'availability': 'unlisted',
5274 'modified_date': r're:\d{8}',
5275 },
5276 'playlist_count': 50,
5277 }, {
5278 'note': 'unlisted single video playlist',
5279 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5280 'info_dict': {
5281 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5282 'uploader': 'colethedj',
5283 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5284 'title': 'yt-dlp unlisted playlist test',
5285 'availability': 'unlisted',
5286 'tags': [],
5287 'modified_date': '20220418',
5288 'channel': 'colethedj',
5289 'view_count': int,
5290 'description': '',
5291 'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
5292 'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5293 'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
5294 },
5295 'playlist_count': 1,
5296 }, {
5297 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
5298 'url': 'https://www.youtube.com/feed/recommended',
5299 'info_dict': {
5300 'id': 'recommended',
5301 'title': 'recommended',
5302 'tags': [],
5303 },
5304 'playlist_mincount': 50,
5305 'params': {
5306 'skip_download': True,
5307 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5308 },
5309 }, {
5310 'note': 'API Fallback: /videos tab, sorted by oldest first',
5311 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
5312 'info_dict': {
5313 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5314 'title': 'Cody\'sLab - Videos',
5315 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
5316 'uploader': 'Cody\'sLab',
5317 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5318 'channel': 'Cody\'sLab',
5319 'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5320 'tags': [],
5321 'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
5322 'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
5323 'channel_follower_count': int
5324 },
5325 'playlist_mincount': 650,
5326 'params': {
5327 'skip_download': True,
5328 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5329 },
5330 }, {
5331 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
5332 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5333 'info_dict': {
5334 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5335 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5336 'title': 'Uploads from Royalty Free Music - Topic',
5337 'uploader': 'Royalty Free Music - Topic',
5338 'modified_date': r're:\d{8}',
5339 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5340 'description': '',
5341 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5342 'tags': [],
5343 'channel': 'Royalty Free Music - Topic',
5344 'view_count': int,
5345 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5346 },
5347 'expected_warnings': [
5348 'does not have a videos tab',
5349 r'[Uu]navailable videos (are|will be) hidden',
5350 ],
5351 'playlist_mincount': 101,
5352 'params': {
5353 'skip_download': True,
5354 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5355 },
5356 }, {
5357 'note': 'non-standard redirect to regional channel',
5358 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
5359 'only_matching': True
5360 }, {
5361 'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
5362 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5363 'info_dict': {
5364 'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5365 'modified_date': '20220407',
5366 'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5367 'tags': [],
5368 'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5369 'uploader': 'pukkandan',
5370 'availability': 'unlisted',
5371 'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5372 'channel': 'pukkandan',
5373 'description': 'Test for collaborative playlist',
5374 'title': 'yt-dlp test - collaborative playlist',
5375 'view_count': int,
5376 'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5377 },
5378 'playlist_mincount': 2
5379 }]
5380
5381 @classmethod
5382 def suitable(cls, url):
5383 return False if YoutubeIE.suitable(url) else super().suitable(url)
5384
5385 _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$')
5386
5387 @YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
5388 def _real_extract(self, url, smuggled_data):
5389 item_id = self._match_id(url)
5390 url = urllib.parse.urlunparse(
5391 urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))
5392 compat_opts = self.get_param('compat_opts', [])
5393
5394 def get_mobj(url):
5395 mobj = self._URL_RE.match(url).groupdict()
5396 mobj.update((k, '') for k, v in mobj.items() if v is None)
5397 return mobj
5398
5399 mobj, redirect_warning = get_mobj(url), None
5400 # Youtube returns incomplete data if tabname is not lower case
5401 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
5402 if is_channel:
5403 if smuggled_data.get('is_music_url'):
5404 if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
5405 item_id = item_id[2:]
5406 pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False
5407 elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
5408 mdata = self._extract_tab_endpoint(
5409 f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
5410 murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
5411 get_all=False, expected_type=str)
5412 if not murl:
5413 raise ExtractorError('Failed to resolve album to playlist')
5414 return self.url_result(murl, ie=YoutubeTabIE.ie_key())
5415 elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
5416 pre = f'https://www.youtube.com/channel/{item_id}'
5417
5418 original_tab_name = tab
5419 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
5420 # Home URLs should redirect to /videos/
5421 redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '
5422 'To download only the videos in the home page, add a "/featured" to the URL')
5423 tab = '/videos'
5424
5425 url = ''.join((pre, tab, post))
5426 mobj = get_mobj(url)
5427
5428 # Handle both video/playlist URLs
5429 qs = parse_qs(url)
5430 video_id, playlist_id = (qs.get(key, [None])[0] for key in ('v', 'list'))
5431
5432 if not video_id and mobj['not_channel'].startswith('watch'):
5433 if not playlist_id:
5434 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
5435 raise ExtractorError('Unable to recognize tab page')
5436 # Common mistake: https://www.youtube.com/watch?list=playlist_id
5437 self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
5438 url = f'https://www.youtube.com/playlist?list={playlist_id}'
5439 mobj = get_mobj(url)
5440
5441 if video_id and playlist_id:
5442 if self.get_param('noplaylist'):
5443 self.to_screen(f'Downloading just video {video_id} because of --no-playlist')
5444 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
5445 ie=YoutubeIE.ie_key(), video_id=video_id)
5446 self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')
5447
5448 data, ytcfg = self._extract_data(url, item_id)
5449
5450 # YouTube may provide a non-standard redirect to the regional channel
5451 # See: https://github.com/yt-dlp/yt-dlp/issues/2694
5452 redirect_url = traverse_obj(
5453 data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)
5454 if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:
5455 redirect_url = ''.join((
5456 urljoin('https://www.youtube.com', redirect_url), mobj['tab'], mobj['post']))
5457 self.to_screen(f'This playlist is likely not available in your region. Following redirect to regional playlist {redirect_url}')
5458 return self.url_result(redirect_url, ie=YoutubeTabIE.ie_key())
5459
5460 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
5461 if tabs:
5462 selected_tab = self._extract_selected_tab(tabs)
5463 selected_tab_name = selected_tab.get('title', '').lower()
5464 if selected_tab_name == 'home':
5465 selected_tab_name = 'featured'
5466 requested_tab_name = mobj['tab'][1:]
5467 if 'no-youtube-channel-redirect' not in compat_opts:
5468 if requested_tab_name == 'live': # Live tab should have redirected to the video
5469 raise UserNotLive(video_id=mobj['id'])
5470 if requested_tab_name not in ('', selected_tab_name):
5471 redirect_warning = f'The channel does not have a {requested_tab_name} tab'
5472 if not original_tab_name:
5473 if item_id[:2] == 'UC':
5474 # Topic channels don't have /videos. Use the equivalent playlist instead
5475 pl_id = f'UU{item_id[2:]}'
5476 pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
5477 try:
5478 data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)
5479 except ExtractorError:
5480 redirect_warning += ' and the playlist redirect gave error'
5481 else:
5482 item_id, url, selected_tab_name = pl_id, pl_url, requested_tab_name
5483 redirect_warning += f'. Redirecting to playlist {pl_id} instead'
5484 if selected_tab_name and selected_tab_name != requested_tab_name:
5485 redirect_warning += f'. {selected_tab_name} tab is being downloaded instead'
5486 else:
5487 raise ExtractorError(redirect_warning, expected=True)
5488
5489 if redirect_warning:
5490 self.to_screen(redirect_warning)
5491 self.write_debug(f'Final URL: {url}')
5492
5493 # YouTube sometimes provides a button to reload playlist with unavailable videos.
5494 if 'no-youtube-unavailable-videos' not in compat_opts:
5495 data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data
5496 self._extract_and_report_alerts(data, only_once=True)
5497 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
5498 if tabs:
5499 return self._extract_from_tabs(item_id, ytcfg, data, tabs)
5500
5501 playlist = traverse_obj(
5502 data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
5503 if playlist:
5504 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
5505
5506 video_id = traverse_obj(
5507 data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
5508 if video_id:
5509 if mobj['tab'] != '/live': # live tab is expected to redirect to video
5510 self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
5511 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
5512 ie=YoutubeIE.ie_key(), video_id=video_id)
5513
5514 raise ExtractorError('Unable to recognize tab page')
5515
5516
5517 class YoutubePlaylistIE(InfoExtractor):
5518 IE_DESC = 'YouTube playlists'
5519 _VALID_URL = r'''(?x)(?:
5520 (?:https?://)?
5521 (?:\w+\.)?
5522 (?:
5523 (?:
5524 youtube(?:kids)?\.com|
5525 %(invidious)s
5526 )
5527 /.*?\?.*?\blist=
5528 )?
5529 (?P<id>%(playlist_id)s)
5530 )''' % {
5531 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
5532 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
5533 }
5534 IE_NAME = 'youtube:playlist'
5535 _TESTS = [{
5536 'note': 'issue #673',
5537 'url': 'PLBB231211A4F62143',
5538 'info_dict': {
5539 'title': '[OLD]Team Fortress 2 (Class-based LP)',
5540 'id': 'PLBB231211A4F62143',
5541 'uploader': 'Wickman',
5542 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
5543 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
5544 'view_count': int,
5545 'uploader_url': 'https://www.youtube.com/user/Wickydoo',
5546 'modified_date': r're:\d{8}',
5547 'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
5548 'channel': 'Wickman',
5549 'tags': [],
5550 'channel_url': 'https://www.youtube.com/user/Wickydoo',
5551 },
5552 'playlist_mincount': 29,
5553 }, {
5554 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
5555 'info_dict': {
5556 'title': 'YDL_safe_search',
5557 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
5558 },
5559 'playlist_count': 2,
5560 'skip': 'This playlist is private',
5561 }, {
5562 'note': 'embedded',
5563 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
5564 'playlist_count': 4,
5565 'info_dict': {
5566 'title': 'JODA15',
5567 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
5568 'uploader': 'milan',
5569 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
5570 'description': '',
5571 'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
5572 'tags': [],
5573 'modified_date': '20140919',
5574 'view_count': int,
5575 'channel': 'milan',
5576 'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
5577 'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
5578 },
5579 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5580 }, {
5581 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
5582 'playlist_mincount': 455,
5583 'info_dict': {
5584 'title': '2018 Chinese New Singles (11/6 updated)',
5585 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
5586 'uploader': 'LBK',
5587 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
5588 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
5589 'channel': 'LBK',
5590 'view_count': int,
5591 'channel_url': 'https://www.youtube.com/c/愛低音的國王',
5592 'tags': [],
5593 'uploader_url': 'https://www.youtube.com/c/愛低音的國王',
5594 'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
5595 'modified_date': r're:\d{8}',
5596 },
5597 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5598 }, {
5599 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
5600 'only_matching': True,
5601 }, {
5602 # music album playlist
5603 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
5604 'only_matching': True,
5605 }]
5606
5607 @classmethod
5608 def suitable(cls, url):
5609 if YoutubeTabIE.suitable(url):
5610 return False
5611 from ..utils import parse_qs
5612 qs = parse_qs(url)
5613 if qs.get('v', [None])[0]:
5614 return False
5615 return super().suitable(url)
5616
5617 def _real_extract(self, url):
5618 playlist_id = self._match_id(url)
5619 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
5620 url = update_url_query(
5621 'https://www.youtube.com/playlist',
5622 parse_qs(url) or {'list': playlist_id})
5623 if is_music_url:
5624 url = smuggle_url(url, {'is_music_url': True})
5625 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
5626
5627
5628 class YoutubeYtBeIE(InfoExtractor):
5629 IE_DESC = 'youtu.be'
5630 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
5631 _TESTS = [{
5632 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
5633 'info_dict': {
5634 'id': 'yeWKywCrFtk',
5635 'ext': 'mp4',
5636 'title': 'Small Scale Baler and Braiding Rugs',
5637 'uploader': 'Backus-Page House Museum',
5638 'uploader_id': 'backuspagemuseum',
5639 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
5640 'upload_date': '20161008',
5641 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
5642 'categories': ['Nonprofits & Activism'],
5643 'tags': list,
5644 'like_count': int,
5645 'age_limit': 0,
5646 'playable_in_embed': True,
5647 'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',
5648 'channel': 'Backus-Page House Museum',
5649 'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',
5650 'live_status': 'not_live',
5651 'view_count': int,
5652 'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',
5653 'availability': 'public',
5654 'duration': 59,
5655 'comment_count': int,
5656 'channel_follower_count': int
5657 },
5658 'params': {
5659 'noplaylist': True,
5660 'skip_download': True,
5661 },
5662 }, {
5663 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
5664 'only_matching': True,
5665 }]
5666
5667 def _real_extract(self, url):
5668 mobj = self._match_valid_url(url)
5669 video_id = mobj.group('id')
5670 playlist_id = mobj.group('playlist_id')
5671 return self.url_result(
5672 update_url_query('https://www.youtube.com/watch', {
5673 'v': video_id,
5674 'list': playlist_id,
5675 'feature': 'youtu.be',
5676 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
5677
5678
5679 class YoutubeLivestreamEmbedIE(InfoExtractor):
5680 IE_DESC = 'YouTube livestream embeds'
5681 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'
5682 _TESTS = [{
5683 'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',
5684 'only_matching': True,
5685 }]
5686
5687 def _real_extract(self, url):
5688 channel_id = self._match_id(url)
5689 return self.url_result(
5690 f'https://www.youtube.com/channel/{channel_id}/live',
5691 ie=YoutubeTabIE.ie_key(), video_id=channel_id)
5692
5693
5694 class YoutubeYtUserIE(InfoExtractor):
5695 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
5696 IE_NAME = 'youtube:user'
5697 _VALID_URL = r'ytuser:(?P<id>.+)'
5698 _TESTS = [{
5699 'url': 'ytuser:phihag',
5700 'only_matching': True,
5701 }]
5702
5703 def _real_extract(self, url):
5704 user_id = self._match_id(url)
5705 return self.url_result(
5706 'https://www.youtube.com/user/%s/videos' % user_id,
5707 ie=YoutubeTabIE.ie_key(), video_id=user_id)
5708
5709
5710 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
5711 IE_NAME = 'youtube:favorites'
5712 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
5713 _VALID_URL = r':ytfav(?:ou?rite)?s?'
5714 _LOGIN_REQUIRED = True
5715 _TESTS = [{
5716 'url': ':ytfav',
5717 'only_matching': True,
5718 }, {
5719 'url': ':ytfavorites',
5720 'only_matching': True,
5721 }]
5722
5723 def _real_extract(self, url):
5724 return self.url_result(
5725 'https://www.youtube.com/playlist?list=LL',
5726 ie=YoutubeTabIE.ie_key())
5727
5728
5729 class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):
5730 IE_NAME = 'youtube:notif'
5731 IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'
5732 _VALID_URL = r':ytnotif(?:ication)?s?'
5733 _LOGIN_REQUIRED = True
5734 _TESTS = [{
5735 'url': ':ytnotif',
5736 'only_matching': True,
5737 }, {
5738 'url': ':ytnotifications',
5739 'only_matching': True,
5740 }]
5741
5742 def _extract_notification_menu(self, response, continuation_list):
5743 notification_list = traverse_obj(
5744 response,
5745 ('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),
5746 ('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),
5747 expected_type=list) or []
5748 continuation_list[0] = None
5749 for item in notification_list:
5750 entry = self._extract_notification_renderer(item.get('notificationRenderer'))
5751 if entry:
5752 yield entry
5753 continuation = item.get('continuationItemRenderer')
5754 if continuation:
5755 continuation_list[0] = continuation
5756
5757 def _extract_notification_renderer(self, notification):
5758 video_id = traverse_obj(
5759 notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)
5760 url = f'https://www.youtube.com/watch?v={video_id}'
5761 channel_id = None
5762 if not video_id:
5763 browse_ep = traverse_obj(
5764 notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)
5765 channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)
5766 post_id = self._search_regex(
5767 r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),
5768 'post id', default=None)
5769 if not channel_id or not post_id:
5770 return
5771 # The direct /post url redirects to this in the browser
5772 url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'
5773
5774 channel = traverse_obj(
5775 notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),
5776 expected_type=str)
5777 notification_title = self._get_text(notification, 'shortMessage')
5778 if notification_title:
5779 notification_title = notification_title.replace('\xad', '') # remove soft hyphens
5780 # TODO: handle recommended videos
5781 title = self._search_regex(
5782 rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,
5783 'video title', default=None)
5784 upload_date = (strftime_or_none(self._extract_time_text(notification, 'sentTimeText')[0], '%Y%m%d')
5785 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE.ie_key())
5786 else None)
5787 return {
5788 '_type': 'url',
5789 'url': url,
5790 'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),
5791 'video_id': video_id,
5792 'title': title,
5793 'channel_id': channel_id,
5794 'channel': channel,
5795 'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),
5796 'upload_date': upload_date,
5797 }
5798
5799 def _notification_menu_entries(self, ytcfg):
5800 continuation_list = [None]
5801 response = None
5802 for page in itertools.count(1):
5803 ctoken = traverse_obj(
5804 continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)
5805 response = self._extract_response(
5806 item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,
5807 ep='notification/get_notification_menu', check_get_keys='actions',
5808 headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))
5809 yield from self._extract_notification_menu(response, continuation_list)
5810 if not continuation_list[0]:
5811 break
5812
5813 def _real_extract(self, url):
5814 display_id = 'notifications'
5815 ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}
5816 self._report_playlist_authcheck(ytcfg)
5817 return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)
5818
5819
5820 class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
5821 IE_DESC = 'YouTube search'
5822 IE_NAME = 'youtube:search'
5823 _SEARCH_KEY = 'ytsearch'
5824 _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
5825 _TESTS = [{
5826 'url': 'ytsearch5:youtube-dl test video',
5827 'playlist_count': 5,
5828 'info_dict': {
5829 'id': 'youtube-dl test video',
5830 'title': 'youtube-dl test video',
5831 }
5832 }]
5833
5834
5835 class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
5836 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
5837 _SEARCH_KEY = 'ytsearchdate'
5838 IE_DESC = 'YouTube search, newest videos first'
5839 _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
5840 _TESTS = [{
5841 'url': 'ytsearchdate5:youtube-dl test video',
5842 'playlist_count': 5,
5843 'info_dict': {
5844 'id': 'youtube-dl test video',
5845 'title': 'youtube-dl test video',
5846 }
5847 }]
5848
5849
5850 class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
5851 IE_DESC = 'YouTube search URLs with sorting and filter support'
5852 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
5853 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
5854 _TESTS = [{
5855 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
5856 'playlist_mincount': 5,
5857 'info_dict': {
5858 'id': 'youtube-dl test video',
5859 'title': 'youtube-dl test video',
5860 }
5861 }, {
5862 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
5863 'playlist_mincount': 5,
5864 'info_dict': {
5865 'id': 'python',
5866 'title': 'python',
5867 }
5868 }, {
5869 'url': 'https://www.youtube.com/results?search_query=%23cats',
5870 'playlist_mincount': 1,
5871 'info_dict': {
5872 'id': '#cats',
5873 'title': '#cats',
5874 # The test suite does not have support for nested playlists
5875 # 'entries': [{
5876 # 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',
5877 # 'title': '#cats',
5878 # }],
5879 },
5880 }, {
5881 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
5882 'only_matching': True,
5883 }]
5884
5885 def _real_extract(self, url):
5886 qs = parse_qs(url)
5887 query = (qs.get('search_query') or qs.get('q'))[0]
5888 return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
5889
5890
5891 class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
5892 IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'
5893 IE_NAME = 'youtube:music:search_url'
5894 _VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
5895 _TESTS = [{
5896 'url': 'https://music.youtube.com/search?q=royalty+free+music',
5897 'playlist_count': 16,
5898 'info_dict': {
5899 'id': 'royalty free music',
5900 'title': 'royalty free music',
5901 }
5902 }, {
5903 'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',
5904 'playlist_mincount': 30,
5905 'info_dict': {
5906 'id': 'royalty free music - songs',
5907 'title': 'royalty free music - songs',
5908 },
5909 'params': {'extract_flat': 'in_playlist'}
5910 }, {
5911 'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',
5912 'playlist_mincount': 30,
5913 'info_dict': {
5914 'id': 'royalty free music - community playlists',
5915 'title': 'royalty free music - community playlists',
5916 },
5917 'params': {'extract_flat': 'in_playlist'}
5918 }]
5919
5920 _SECTIONS = {
5921 'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',
5922 'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',
5923 'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',
5924 'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',
5925 'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',
5926 'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',
5927 }
5928
5929 def _real_extract(self, url):
5930 qs = parse_qs(url)
5931 query = (qs.get('search_query') or qs.get('q'))[0]
5932 params = qs.get('sp', (None,))[0]
5933 if params:
5934 section = next((k for k, v in self._SECTIONS.items() if v == params), params)
5935 else:
5936 section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower()
5937 params = self._SECTIONS.get(section)
5938 if not params:
5939 section = None
5940 title = join_nonempty(query, section, delim=' - ')
5941 return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)
5942
5943
5944 class YoutubeFeedsInfoExtractor(InfoExtractor):
5945 """
5946 Base class for feed extractors
5947 Subclasses must re-define the _FEED_NAME property.
5948 """
5949 _LOGIN_REQUIRED = True
5950 _FEED_NAME = 'feeds'
5951
5952 def _real_initialize(self):
5953 YoutubeBaseInfoExtractor._check_login_required(self)
5954
5955 @classproperty
5956 def IE_NAME(self):
5957 return f'youtube:{self._FEED_NAME}'
5958
5959 def _real_extract(self, url):
5960 return self.url_result(
5961 f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())
5962
5963
5964 class YoutubeWatchLaterIE(InfoExtractor):
5965 IE_NAME = 'youtube:watchlater'
5966 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
5967 _VALID_URL = r':ytwatchlater'
5968 _TESTS = [{
5969 'url': ':ytwatchlater',
5970 'only_matching': True,
5971 }]
5972
5973 def _real_extract(self, url):
5974 return self.url_result(
5975 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
5976
5977
5978 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
5979 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
5980 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
5981 _FEED_NAME = 'recommended'
5982 _LOGIN_REQUIRED = False
5983 _TESTS = [{
5984 'url': ':ytrec',
5985 'only_matching': True,
5986 }, {
5987 'url': ':ytrecommended',
5988 'only_matching': True,
5989 }, {
5990 'url': 'https://youtube.com',
5991 'only_matching': True,
5992 }]
5993
5994
5995 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
5996 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
5997 _VALID_URL = r':ytsub(?:scription)?s?'
5998 _FEED_NAME = 'subscriptions'
5999 _TESTS = [{
6000 'url': ':ytsubs',
6001 'only_matching': True,
6002 }, {
6003 'url': ':ytsubscriptions',
6004 'only_matching': True,
6005 }]
6006
6007
6008 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
6009 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
6010 _VALID_URL = r':ythis(?:tory)?'
6011 _FEED_NAME = 'history'
6012 _TESTS = [{
6013 'url': ':ythistory',
6014 'only_matching': True,
6015 }]
6016
6017
6018 class YoutubeStoriesIE(InfoExtractor):
6019 IE_DESC = 'YouTube channel stories; "ytstories:" prefix'
6020 IE_NAME = 'youtube:stories'
6021 _VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'
6022 _TESTS = [{
6023 'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',
6024 'only_matching': True,
6025 }]
6026
6027 def _real_extract(self, url):
6028 playlist_id = f'RLTD{self._match_id(url)}'
6029 return self.url_result(
6030 smuggle_url(f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1', {'is_story': True}),
6031 ie=YoutubeTabIE, video_id=playlist_id)
6032
6033
6034 class YoutubeTruncatedURLIE(InfoExtractor):
6035 IE_NAME = 'youtube:truncated_url'
6036 IE_DESC = False # Do not list
6037 _VALID_URL = r'''(?x)
6038 (?:https?://)?
6039 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
6040 (?:watch\?(?:
6041 feature=[a-z_]+|
6042 annotation_id=annotation_[^&]+|
6043 x-yt-cl=[0-9]+|
6044 hl=[^&]*|
6045 t=[0-9]+
6046 )?
6047 |
6048 attribution_link\?a=[^&]+
6049 )
6050 $
6051 '''
6052
6053 _TESTS = [{
6054 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
6055 'only_matching': True,
6056 }, {
6057 'url': 'https://www.youtube.com/watch?',
6058 'only_matching': True,
6059 }, {
6060 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
6061 'only_matching': True,
6062 }, {
6063 'url': 'https://www.youtube.com/watch?feature=foo',
6064 'only_matching': True,
6065 }, {
6066 'url': 'https://www.youtube.com/watch?hl=en-GB',
6067 'only_matching': True,
6068 }, {
6069 'url': 'https://www.youtube.com/watch?t=2372',
6070 'only_matching': True,
6071 }]
6072
6073 def _real_extract(self, url):
6074 raise ExtractorError(
6075 'Did you forget to quote the URL? Remember that & is a meta '
6076 'character in most shells, so you want to put the URL in quotes, '
6077 'like youtube-dl '
6078 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
6079 ' or simply youtube-dl BaW_jenozKc .',
6080 expected=True)
6081
6082
6083 class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
6084 IE_NAME = 'youtube:clip'
6085 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'
6086 _TESTS = [{
6087 # FIXME: Other metadata should be extracted from the clip, not from the base video
6088 'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',
6089 'info_dict': {
6090 'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',
6091 'ext': 'mp4',
6092 'section_start': 29.0,
6093 'section_end': 39.7,
6094 'duration': 10.7,
6095 'age_limit': 0,
6096 'availability': 'public',
6097 'categories': ['Gaming'],
6098 'channel': 'Scott The Woz',
6099 'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',
6100 'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',
6101 'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',
6102 'like_count': int,
6103 'playable_in_embed': True,
6104 'tags': 'count:17',
6105 'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',
6106 'title': 'Mobile Games on Console - Scott The Woz',
6107 'upload_date': '20210920',
6108 'uploader': 'Scott The Woz',
6109 'uploader_id': 'scottthewoz',
6110 'uploader_url': 'http://www.youtube.com/user/scottthewoz',
6111 'view_count': int,
6112 'live_status': 'not_live',
6113 'channel_follower_count': int
6114 }
6115 }]
6116
6117 def _real_extract(self, url):
6118 clip_id = self._match_id(url)
6119 _, data = self._extract_webpage(url, clip_id)
6120
6121 video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))
6122 if not video_id:
6123 raise ExtractorError('Unable to find video ID')
6124
6125 clip_data = traverse_obj(data, (
6126 'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',
6127 'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,
6128 'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',
6129 'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)
6130
6131 return {
6132 '_type': 'url_transparent',
6133 'url': f'https://www.youtube.com/watch?v={video_id}',
6134 'ie_key': YoutubeIE.ie_key(),
6135 'id': clip_id,
6136 'section_start': int(clip_data['startTimeMs']) / 1000,
6137 'section_end': int(clip_data['endTimeMs']) / 1000,
6138 }
6139
6140
6141 class YoutubeTruncatedIDIE(InfoExtractor):
6142 IE_NAME = 'youtube:truncated_id'
6143 IE_DESC = False # Do not list
6144 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
6145
6146 _TESTS = [{
6147 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
6148 'only_matching': True,
6149 }]
6150
6151 def _real_extract(self, url):
6152 video_id = self._match_id(url)
6153 raise ExtractorError(
6154 f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',
6155 expected=True)