]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/youtube.py
4a5d6805e9dbf700f6ad1af91c617099b239d2cc
[yt-dlp.git] / yt_dlp / extractor / youtube.py
1 import base64
2 import calendar
3 import copy
4 import datetime
5 import hashlib
6 import itertools
7 import json
8 import math
9 import os.path
10 import random
11 import re
12 import sys
13 import threading
14 import time
15 import traceback
16 import urllib.error
17 import urllib.parse
18
19 from .common import InfoExtractor, SearchInfoExtractor
20 from .openload import PhantomJSwrapper
21 from ..compat import functools
22 from ..jsinterp import JSInterpreter
23 from ..utils import (
24 NO_DEFAULT,
25 ExtractorError,
26 UserNotLive,
27 bug_reports_message,
28 classproperty,
29 clean_html,
30 datetime_from_str,
31 dict_get,
32 float_or_none,
33 format_field,
34 get_first,
35 int_or_none,
36 is_html,
37 join_nonempty,
38 js_to_json,
39 mimetype2ext,
40 network_exceptions,
41 orderedSet,
42 parse_codecs,
43 parse_count,
44 parse_duration,
45 parse_iso8601,
46 parse_qs,
47 qualities,
48 remove_start,
49 smuggle_url,
50 str_or_none,
51 str_to_int,
52 strftime_or_none,
53 traverse_obj,
54 try_get,
55 unescapeHTML,
56 unified_strdate,
57 unified_timestamp,
58 unsmuggle_url,
59 update_url_query,
60 url_or_none,
61 urljoin,
62 variadic,
63 )
64
65 # any clients starting with _ cannot be explicitly requested by the user
66 INNERTUBE_CLIENTS = {
67 'web': {
68 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
69 'INNERTUBE_CONTEXT': {
70 'client': {
71 'clientName': 'WEB',
72 'clientVersion': '2.20220801.00.00',
73 }
74 },
75 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
76 },
77 'web_embedded': {
78 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
79 'INNERTUBE_CONTEXT': {
80 'client': {
81 'clientName': 'WEB_EMBEDDED_PLAYER',
82 'clientVersion': '1.20220731.00.00',
83 },
84 },
85 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
86 },
87 'web_music': {
88 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
89 'INNERTUBE_HOST': 'music.youtube.com',
90 'INNERTUBE_CONTEXT': {
91 'client': {
92 'clientName': 'WEB_REMIX',
93 'clientVersion': '1.20220727.01.00',
94 }
95 },
96 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
97 },
98 'web_creator': {
99 'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
100 'INNERTUBE_CONTEXT': {
101 'client': {
102 'clientName': 'WEB_CREATOR',
103 'clientVersion': '1.20220726.00.00',
104 }
105 },
106 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
107 },
108 'android': {
109 'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
110 'INNERTUBE_CONTEXT': {
111 'client': {
112 'clientName': 'ANDROID',
113 'clientVersion': '17.31.35',
114 'androidSdkVersion': 30,
115 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
116 }
117 },
118 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
119 'REQUIRE_JS_PLAYER': False
120 },
121 'android_embedded': {
122 'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
123 'INNERTUBE_CONTEXT': {
124 'client': {
125 'clientName': 'ANDROID_EMBEDDED_PLAYER',
126 'clientVersion': '17.31.35',
127 'androidSdkVersion': 30,
128 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
129 },
130 },
131 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
132 'REQUIRE_JS_PLAYER': False
133 },
134 'android_music': {
135 'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
136 'INNERTUBE_CONTEXT': {
137 'client': {
138 'clientName': 'ANDROID_MUSIC',
139 'clientVersion': '5.16.51',
140 'androidSdkVersion': 30,
141 'userAgent': 'com.google.android.apps.youtube.music/5.16.51 (Linux; U; Android 11) gzip'
142 }
143 },
144 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
145 'REQUIRE_JS_PLAYER': False
146 },
147 'android_creator': {
148 'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
149 'INNERTUBE_CONTEXT': {
150 'client': {
151 'clientName': 'ANDROID_CREATOR',
152 'clientVersion': '22.30.100',
153 'androidSdkVersion': 30,
154 'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip'
155 },
156 },
157 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
158 'REQUIRE_JS_PLAYER': False
159 },
160 # iOS clients have HLS live streams. Setting device model to get 60fps formats.
161 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
162 'ios': {
163 'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
164 'INNERTUBE_CONTEXT': {
165 'client': {
166 'clientName': 'IOS',
167 'clientVersion': '17.33.2',
168 'deviceModel': 'iPhone14,3',
169 'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
170 }
171 },
172 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
173 'REQUIRE_JS_PLAYER': False
174 },
175 'ios_embedded': {
176 'INNERTUBE_CONTEXT': {
177 'client': {
178 'clientName': 'IOS_MESSAGES_EXTENSION',
179 'clientVersion': '17.33.2',
180 'deviceModel': 'iPhone14,3',
181 'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
182 },
183 },
184 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
185 'REQUIRE_JS_PLAYER': False
186 },
187 'ios_music': {
188 'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
189 'INNERTUBE_CONTEXT': {
190 'client': {
191 'clientName': 'IOS_MUSIC',
192 'clientVersion': '5.21',
193 'deviceModel': 'iPhone14,3',
194 'userAgent': 'com.google.ios.youtubemusic/5.21 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
195 },
196 },
197 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
198 'REQUIRE_JS_PLAYER': False
199 },
200 'ios_creator': {
201 'INNERTUBE_CONTEXT': {
202 'client': {
203 'clientName': 'IOS_CREATOR',
204 'clientVersion': '22.33.101',
205 'deviceModel': 'iPhone14,3',
206 'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
207 },
208 },
209 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
210 'REQUIRE_JS_PLAYER': False
211 },
212 # mweb has 'ultralow' formats
213 # See: https://github.com/yt-dlp/yt-dlp/pull/557
214 'mweb': {
215 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
216 'INNERTUBE_CONTEXT': {
217 'client': {
218 'clientName': 'MWEB',
219 'clientVersion': '2.20220801.00.00',
220 }
221 },
222 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
223 },
224 # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
225 # See: https://github.com/zerodytrash/YouTube-Internal-Clients
226 'tv_embedded': {
227 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
228 'INNERTUBE_CONTEXT': {
229 'client': {
230 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
231 'clientVersion': '2.0',
232 },
233 },
234 'INNERTUBE_CONTEXT_CLIENT_NAME': 85
235 },
236 }
237
238
239 def _split_innertube_client(client_name):
240 variant, *base = client_name.rsplit('.', 1)
241 if base:
242 return variant, base[0], variant
243 base, *variant = client_name.split('_', 1)
244 return client_name, base, variant[0] if variant else None
245
246
247 def build_innertube_clients():
248 THIRD_PARTY = {
249 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
250 }
251 BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
252 priority = qualities(BASE_CLIENTS[::-1])
253
254 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
255 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
256 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
257 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
258 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
259
260 _, base_client, variant = _split_innertube_client(client)
261 ytcfg['priority'] = 10 * priority(base_client)
262
263 if not variant:
264 INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
265 embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
266 embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
267 embedscreen['priority'] -= 3
268 elif variant == 'embedded':
269 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
270 ytcfg['priority'] -= 2
271 else:
272 ytcfg['priority'] -= 3
273
274
275 build_innertube_clients()
276
277
278 class YoutubeBaseInfoExtractor(InfoExtractor):
279 """Provide base functions for Youtube extractors"""
280
281 _RESERVED_NAMES = (
282 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
283 r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|'
284 r'browse|oembed|get_video_info|iframe_api|s/player|'
285 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
286
287 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
288
289 # _NETRC_MACHINE = 'youtube'
290
291 # If True it will raise an error if no login info is provided
292 _LOGIN_REQUIRED = False
293
294 _INVIDIOUS_SITES = (
295 # invidious-redirect websites
296 r'(?:www\.)?redirect\.invidious\.io',
297 r'(?:(?:www|dev)\.)?invidio\.us',
298 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
299 r'(?:www\.)?invidious\.pussthecat\.org',
300 r'(?:www\.)?invidious\.zee\.li',
301 r'(?:www\.)?invidious\.ethibox\.fr',
302 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
303 r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
304 r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
305 # youtube-dl invidious instances list
306 r'(?:(?:www|no)\.)?invidiou\.sh',
307 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
308 r'(?:www\.)?invidious\.kabi\.tk',
309 r'(?:www\.)?invidious\.mastodon\.host',
310 r'(?:www\.)?invidious\.zapashcanon\.fr',
311 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
312 r'(?:www\.)?invidious\.tinfoil-hat\.net',
313 r'(?:www\.)?invidious\.himiko\.cloud',
314 r'(?:www\.)?invidious\.reallyancient\.tech',
315 r'(?:www\.)?invidious\.tube',
316 r'(?:www\.)?invidiou\.site',
317 r'(?:www\.)?invidious\.site',
318 r'(?:www\.)?invidious\.xyz',
319 r'(?:www\.)?invidious\.nixnet\.xyz',
320 r'(?:www\.)?invidious\.048596\.xyz',
321 r'(?:www\.)?invidious\.drycat\.fr',
322 r'(?:www\.)?inv\.skyn3t\.in',
323 r'(?:www\.)?tube\.poal\.co',
324 r'(?:www\.)?tube\.connect\.cafe',
325 r'(?:www\.)?vid\.wxzm\.sx',
326 r'(?:www\.)?vid\.mint\.lgbt',
327 r'(?:www\.)?vid\.puffyan\.us',
328 r'(?:www\.)?yewtu\.be',
329 r'(?:www\.)?yt\.elukerio\.org',
330 r'(?:www\.)?yt\.lelux\.fi',
331 r'(?:www\.)?invidious\.ggc-project\.de',
332 r'(?:www\.)?yt\.maisputain\.ovh',
333 r'(?:www\.)?ytprivate\.com',
334 r'(?:www\.)?invidious\.13ad\.de',
335 r'(?:www\.)?invidious\.toot\.koeln',
336 r'(?:www\.)?invidious\.fdn\.fr',
337 r'(?:www\.)?watch\.nettohikari\.com',
338 r'(?:www\.)?invidious\.namazso\.eu',
339 r'(?:www\.)?invidious\.silkky\.cloud',
340 r'(?:www\.)?invidious\.exonip\.de',
341 r'(?:www\.)?invidious\.riverside\.rocks',
342 r'(?:www\.)?invidious\.blamefran\.net',
343 r'(?:www\.)?invidious\.moomoo\.de',
344 r'(?:www\.)?ytb\.trom\.tf',
345 r'(?:www\.)?yt\.cyberhost\.uk',
346 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
347 r'(?:www\.)?qklhadlycap4cnod\.onion',
348 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
349 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
350 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
351 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
352 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
353 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
354 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
355 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
356 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
357 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
358 # piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
359 r'(?:www\.)?piped\.kavin\.rocks',
360 r'(?:www\.)?piped\.silkky\.cloud',
361 r'(?:www\.)?piped\.tokhmi\.xyz',
362 r'(?:www\.)?piped\.moomoo\.me',
363 r'(?:www\.)?il\.ax',
364 r'(?:www\.)?piped\.syncpundit\.com',
365 r'(?:www\.)?piped\.mha\.fi',
366 r'(?:www\.)?piped\.mint\.lgbt',
367 r'(?:www\.)?piped\.privacy\.com\.de',
368 )
369
370 def _initialize_consent(self):
371 cookies = self._get_cookies('https://www.youtube.com/')
372 if cookies.get('__Secure-3PSID'):
373 return
374 consent_id = None
375 consent = cookies.get('CONSENT')
376 if consent:
377 if 'YES' in consent.value:
378 return
379 consent_id = self._search_regex(
380 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
381 if not consent_id:
382 consent_id = random.randint(100, 999)
383 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
384
385 def _initialize_pref(self):
386 cookies = self._get_cookies('https://www.youtube.com/')
387 pref_cookie = cookies.get('PREF')
388 pref = {}
389 if pref_cookie:
390 try:
391 pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
392 except ValueError:
393 self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
394 pref.update({'hl': 'en', 'tz': 'UTC'})
395 self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
396
397 def _real_initialize(self):
398 self._initialize_pref()
399 self._initialize_consent()
400 self._check_login_required()
401
402 def _check_login_required(self):
403 if self._LOGIN_REQUIRED and not self._cookies_passed:
404 self.raise_login_required('Login details are needed to download this content', method='cookies')
405
406 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='
407 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
408
409 def _get_default_ytcfg(self, client='web'):
410 return copy.deepcopy(INNERTUBE_CLIENTS[client])
411
412 def _get_innertube_host(self, client='web'):
413 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
414
415 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
416 # try_get but with fallback to default ytcfg client values when present
417 _func = lambda y: try_get(y, getter, expected_type)
418 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
419
420 def _extract_client_name(self, ytcfg, default_client='web'):
421 return self._ytcfg_get_safe(
422 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
423 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
424
425 def _extract_client_version(self, ytcfg, default_client='web'):
426 return self._ytcfg_get_safe(
427 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
428 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
429
430 def _select_api_hostname(self, req_api_hostname, default_client=None):
431 return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
432 or req_api_hostname or self._get_innertube_host(default_client or 'web'))
433
434 def _extract_api_key(self, ytcfg=None, default_client='web'):
435 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
436
437 def _extract_context(self, ytcfg=None, default_client='web'):
438 context = get_first(
439 (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
440 # Enforce language and tz for extraction
441 client_context = traverse_obj(context, 'client', expected_type=dict, default={})
442 client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
443 return context
444
445 _SAPISID = None
446
447 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
448 time_now = round(time.time())
449 if self._SAPISID is None:
450 yt_cookies = self._get_cookies('https://www.youtube.com')
451 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
452 # See: https://github.com/yt-dlp/yt-dlp/issues/393
453 sapisid_cookie = dict_get(
454 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
455 if sapisid_cookie and sapisid_cookie.value:
456 self._SAPISID = sapisid_cookie.value
457 self.write_debug('Extracted SAPISID cookie')
458 # SAPISID cookie is required if not already present
459 if not yt_cookies.get('SAPISID'):
460 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
461 self._set_cookie(
462 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
463 else:
464 self._SAPISID = False
465 if not self._SAPISID:
466 return None
467 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
468 sapisidhash = hashlib.sha1(
469 f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
470 return f'SAPISIDHASH {time_now}_{sapisidhash}'
471
472 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
473 note='Downloading API JSON', errnote='Unable to download API page',
474 context=None, api_key=None, api_hostname=None, default_client='web'):
475
476 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
477 data.update(query)
478 real_headers = self.generate_api_headers(default_client=default_client)
479 real_headers.update({'content-type': 'application/json'})
480 if headers:
481 real_headers.update(headers)
482 api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]
483 or api_key or self._extract_api_key(default_client=default_client))
484 return self._download_json(
485 f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
486 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
487 data=json.dumps(data).encode('utf8'), headers=real_headers,
488 query={'key': api_key, 'prettyPrint': 'false'})
489
490 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
491 return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
492
493 @staticmethod
494 def _extract_session_index(*data):
495 """
496 Index of current account in account list.
497 See: https://github.com/yt-dlp/yt-dlp/pull/519
498 """
499 for ytcfg in data:
500 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
501 if session_index is not None:
502 return session_index
503
504 # Deprecated?
505 def _extract_identity_token(self, ytcfg=None, webpage=None):
506 if ytcfg:
507 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)
508 if token:
509 return token
510 if webpage:
511 return self._search_regex(
512 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
513 'identity token', default=None, fatal=False)
514
515 @staticmethod
516 def _extract_account_syncid(*args):
517 """
518 Extract syncId required to download private playlists of secondary channels
519 @params response and/or ytcfg
520 """
521 for data in args:
522 # ytcfg includes channel_syncid if on secondary channel
523 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)
524 if delegated_sid:
525 return delegated_sid
526 sync_ids = (try_get(
527 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
528 lambda x: x['DATASYNC_ID']), str) or '').split('||')
529 if len(sync_ids) >= 2 and sync_ids[1]:
530 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
531 # and just "user_syncid||" for primary channel. We only want the channel_syncid
532 return sync_ids[0]
533
534 @staticmethod
535 def _extract_visitor_data(*args):
536 """
537 Extracts visitorData from an API response or ytcfg
538 Appears to be used to track session state
539 """
540 return get_first(
541 args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
542 expected_type=str)
543
544 @functools.cached_property
545 def is_authenticated(self):
546 return bool(self._generate_sapisidhash_header())
547
548 def extract_ytcfg(self, video_id, webpage):
549 if not webpage:
550 return {}
551 return self._parse_json(
552 self._search_regex(
553 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
554 default='{}'), video_id, fatal=False) or {}
555
556 def generate_api_headers(
557 self, *, ytcfg=None, account_syncid=None, session_index=None,
558 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
559
560 origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))
561 headers = {
562 'X-YouTube-Client-Name': str(
563 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
564 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
565 'Origin': origin,
566 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
567 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
568 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),
569 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client)
570 }
571 if session_index is None:
572 session_index = self._extract_session_index(ytcfg)
573 if account_syncid or session_index is not None:
574 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
575
576 auth = self._generate_sapisidhash_header(origin)
577 if auth is not None:
578 headers['Authorization'] = auth
579 headers['X-Origin'] = origin
580 return {h: v for h, v in headers.items() if v is not None}
581
582 def _download_ytcfg(self, client, video_id):
583 url = {
584 'web': 'https://www.youtube.com',
585 'web_music': 'https://music.youtube.com',
586 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
587 }.get(client)
588 if not url:
589 return {}
590 webpage = self._download_webpage(
591 url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')
592 return self.extract_ytcfg(video_id, webpage) or {}
593
594 @staticmethod
595 def _build_api_continuation_query(continuation, ctp=None):
596 query = {
597 'continuation': continuation
598 }
599 # TODO: Inconsistency with clickTrackingParams.
600 # Currently we have a fixed ctp contained within context (from ytcfg)
601 # and a ctp in root query for continuation.
602 if ctp:
603 query['clickTracking'] = {'clickTrackingParams': ctp}
604 return query
605
606 @classmethod
607 def _extract_next_continuation_data(cls, renderer):
608 next_continuation = try_get(
609 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
610 lambda x: x['continuation']['reloadContinuationData']), dict)
611 if not next_continuation:
612 return
613 continuation = next_continuation.get('continuation')
614 if not continuation:
615 return
616 ctp = next_continuation.get('clickTrackingParams')
617 return cls._build_api_continuation_query(continuation, ctp)
618
619 @classmethod
620 def _extract_continuation_ep_data(cls, continuation_ep: dict):
621 if isinstance(continuation_ep, dict):
622 continuation = try_get(
623 continuation_ep, lambda x: x['continuationCommand']['token'], str)
624 if not continuation:
625 return
626 ctp = continuation_ep.get('clickTrackingParams')
627 return cls._build_api_continuation_query(continuation, ctp)
628
629 @classmethod
630 def _extract_continuation(cls, renderer):
631 next_continuation = cls._extract_next_continuation_data(renderer)
632 if next_continuation:
633 return next_continuation
634
635 contents = []
636 for key in ('contents', 'items'):
637 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
638
639 for content in contents:
640 if not isinstance(content, dict):
641 continue
642 continuation_ep = try_get(
643 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
644 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
645 dict)
646 continuation = cls._extract_continuation_ep_data(continuation_ep)
647 if continuation:
648 return continuation
649
650 @classmethod
651 def _extract_alerts(cls, data):
652 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
653 if not isinstance(alert_dict, dict):
654 continue
655 for alert in alert_dict.values():
656 alert_type = alert.get('type')
657 if not alert_type:
658 continue
659 message = cls._get_text(alert, 'text')
660 if message:
661 yield alert_type, message
662
663 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
664 errors = []
665 warnings = []
666 for alert_type, alert_message in alerts:
667 if alert_type.lower() == 'error' and fatal:
668 errors.append([alert_type, alert_message])
669 else:
670 warnings.append([alert_type, alert_message])
671
672 for alert_type, alert_message in (warnings + errors[:-1]):
673 self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)
674 if errors:
675 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
676
677 def _extract_and_report_alerts(self, data, *args, **kwargs):
678 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
679
680 def _extract_badges(self, renderer: dict):
681 badges = set()
682 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
683 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], str)
684 if label:
685 badges.add(label.lower())
686 return badges
687
688 @staticmethod
689 def _get_text(data, *path_list, max_runs=None):
690 for path in path_list or [None]:
691 if path is None:
692 obj = [data]
693 else:
694 obj = traverse_obj(data, path, default=[])
695 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
696 obj = [obj]
697 for item in obj:
698 text = try_get(item, lambda x: x['simpleText'], str)
699 if text:
700 return text
701 runs = try_get(item, lambda x: x['runs'], list) or []
702 if not runs and isinstance(item, list):
703 runs = item
704
705 runs = runs[:min(len(runs), max_runs or len(runs))]
706 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
707 if text:
708 return text
709
710 def _get_count(self, data, *path_list):
711 count_text = self._get_text(data, *path_list) or ''
712 count = parse_count(count_text)
713 if count is None:
714 count = str_to_int(
715 self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))
716 return count
717
718 @staticmethod
719 def _extract_thumbnails(data, *path_list):
720 """
721 Extract thumbnails from thumbnails dict
722 @param path_list: path list to level that contains 'thumbnails' key
723 """
724 thumbnails = []
725 for path in path_list or [()]:
726 for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):
727 thumbnail_url = url_or_none(thumbnail.get('url'))
728 if not thumbnail_url:
729 continue
730 # Sometimes youtube gives a wrong thumbnail URL. See:
731 # https://github.com/yt-dlp/yt-dlp/issues/233
732 # https://github.com/ytdl-org/youtube-dl/issues/28023
733 if 'maxresdefault' in thumbnail_url:
734 thumbnail_url = thumbnail_url.split('?')[0]
735 thumbnails.append({
736 'url': thumbnail_url,
737 'height': int_or_none(thumbnail.get('height')),
738 'width': int_or_none(thumbnail.get('width')),
739 })
740 return thumbnails
741
742 @staticmethod
743 def extract_relative_time(relative_time_text):
744 """
745 Extracts a relative time from string and converts to dt object
746 e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'
747 """
748 mobj = re.search(r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?\s*ago', relative_time_text)
749 if mobj:
750 start = mobj.group('start')
751 if start:
752 return datetime_from_str(start)
753 try:
754 return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))
755 except ValueError:
756 return None
757
758 def _extract_time_text(self, renderer, *path_list):
759 """@returns (timestamp, time_text)"""
760 text = self._get_text(renderer, *path_list) or ''
761 dt = self.extract_relative_time(text)
762 timestamp = None
763 if isinstance(dt, datetime.datetime):
764 timestamp = calendar.timegm(dt.timetuple())
765
766 if timestamp is None:
767 timestamp = (
768 unified_timestamp(text) or unified_timestamp(
769 self._search_regex(
770 (r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),
771 text.lower(), 'time text', default=None)))
772
773 if text and timestamp is None:
774 self.report_warning(f"Cannot parse localized time text '{text}'" + bug_reports_message(), only_once=True)
775 return timestamp, text
776
777 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
778 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
779 default_client='web'):
780 for retry in self.RetryManager():
781 try:
782 response = self._call_api(
783 ep=ep, fatal=True, headers=headers,
784 video_id=item_id, query=query, note=note,
785 context=self._extract_context(ytcfg, default_client),
786 api_key=self._extract_api_key(ytcfg, default_client),
787 api_hostname=api_hostname, default_client=default_client)
788 except ExtractorError as e:
789 if not isinstance(e.cause, network_exceptions):
790 return self._error_or_warning(e, fatal=fatal)
791 elif not isinstance(e.cause, urllib.error.HTTPError):
792 retry.error = e
793 continue
794
795 first_bytes = e.cause.read(512)
796 if not is_html(first_bytes):
797 yt_error = try_get(
798 self._parse_json(
799 self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
800 lambda x: x['error']['message'], str)
801 if yt_error:
802 self._report_alerts([('ERROR', yt_error)], fatal=False)
803 # Downloading page may result in intermittent 5xx HTTP error
804 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
805 # We also want to catch all other network exceptions since errors in later pages can be troublesome
806 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
807 if e.cause.code not in (403, 429):
808 retry.error = e
809 continue
810 return self._error_or_warning(e, fatal=fatal)
811
812 try:
813 self._extract_and_report_alerts(response, only_once=True)
814 except ExtractorError as e:
815 # YouTube servers may return errors we want to retry on in a 200 OK response
816 # See: https://github.com/yt-dlp/yt-dlp/issues/839
817 if 'unknown error' in e.msg.lower():
818 retry.error = e
819 continue
820 return self._error_or_warning(e, fatal=fatal)
821 # Youtube sometimes sends incomplete data
822 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
823 if not traverse_obj(response, *variadic(check_get_keys)):
824 retry.error = ExtractorError('Incomplete data received', expected=True)
825 continue
826
827 return response
828
829 @staticmethod
830 def is_music_url(url):
831 return re.match(r'https?://music\.youtube\.com/', url) is not None
832
833 def _extract_video(self, renderer):
834 video_id = renderer.get('videoId')
835 title = self._get_text(renderer, 'title')
836 description = self._get_text(renderer, 'descriptionSnippet')
837 duration = parse_duration(self._get_text(
838 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
839 if duration is None:
840 duration = parse_duration(self._search_regex(
841 r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',
842 traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),
843 video_id, default=None, group='duration'))
844
845 view_count = self._get_count(renderer, 'viewCountText')
846
847 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
848 channel_id = traverse_obj(
849 renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
850 expected_type=str, get_all=False)
851 timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')
852 scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
853 overlay_style = traverse_obj(
854 renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
855 get_all=False, expected_type=str)
856 badges = self._extract_badges(renderer)
857 thumbnails = self._extract_thumbnails(renderer, 'thumbnail')
858 navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
859 renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
860 expected_type=str)) or ''
861 url = f'https://www.youtube.com/watch?v={video_id}'
862 if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:
863 url = f'https://www.youtube.com/shorts/{video_id}'
864
865 return {
866 '_type': 'url',
867 'ie_key': YoutubeIE.ie_key(),
868 'id': video_id,
869 'url': url,
870 'title': title,
871 'description': description,
872 'duration': duration,
873 'view_count': view_count,
874 'uploader': uploader,
875 'channel_id': channel_id,
876 'thumbnails': thumbnails,
877 'upload_date': (strftime_or_none(timestamp, '%Y%m%d')
878 if self._configuration_arg('approximate_date', ie_key='youtubetab')
879 else None),
880 'live_status': ('is_upcoming' if scheduled_timestamp is not None
881 else 'was_live' if 'streamed' in time_text.lower()
882 else 'is_live' if overlay_style == 'LIVE' or 'live now' in badges
883 else None),
884 'release_timestamp': scheduled_timestamp,
885 'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)
886 }
887
888
889 class YoutubeIE(YoutubeBaseInfoExtractor):
890 IE_DESC = 'YouTube'
891 _VALID_URL = r"""(?x)^
892 (
893 (?:https?://|//) # http(s):// or protocol-independent URL
894 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
895 (?:www\.)?deturl\.com/www\.youtube\.com|
896 (?:www\.)?pwnyoutube\.com|
897 (?:www\.)?hooktube\.com|
898 (?:www\.)?yourepeat\.com|
899 tube\.majestyc\.net|
900 %(invidious)s|
901 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
902 (?:.*?\#/)? # handle anchor (#/) redirect urls
903 (?: # the various things that can precede the ID:
904 (?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/
905 |(?: # or the v= param in all its forms
906 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
907 (?:\?|\#!?) # the params delimiter ? or # or #!
908 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
909 v=
910 )
911 ))
912 |(?:
913 youtu\.be| # just youtu.be/xxxx
914 vid\.plus| # or vid.plus/xxxx
915 zwearz\.com/watch| # or zwearz.com/watch/xxxx
916 %(invidious)s
917 )/
918 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
919 )
920 )? # all until now is optional -> you can pass the naked ID
921 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
922 (?(1).+)? # if we found the ID, everything can follow
923 (?:\#|$)""" % {
924 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
925 }
926 _EMBED_REGEX = [
927 r'''(?x)
928 (?:
929 <iframe[^>]+?src=|
930 data-video-url=|
931 <embed[^>]+?src=|
932 embedSWF\(?:\s*|
933 <object[^>]+data=|
934 new\s+SWFObject\(
935 )
936 (["\'])
937 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
938 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
939 \1''',
940 # https://wordpress.org/plugins/lazy-load-for-videos/
941 r'''(?xs)
942 <a\s[^>]*\bhref="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"
943 \s[^>]*\bclass="[^"]*\blazy-load-youtube''',
944 ]
945
946 _PLAYER_INFO_RE = (
947 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
948 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
949 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
950 )
951 _formats = {
952 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
953 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
954 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
955 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
956 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
957 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
958 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
959 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
960 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
961 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
962 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
963 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
964 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
965 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
966 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
967 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
968 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
969 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
970
971
972 # 3D videos
973 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
974 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
975 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
976 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
977 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
978 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
979 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
980
981 # Apple HTTP Live Streaming
982 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
983 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
984 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
985 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
986 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
987 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
988 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
989 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
990
991 # DASH mp4 video
992 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
993 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
994 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
995 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
996 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
997 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
998 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
999 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1000 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
1001 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1002 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1003 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
1004
1005 # Dash mp4 audio
1006 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1007 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1008 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1009 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1010 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1011 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1012 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
1013
1014 # Dash webm
1015 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1016 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1017 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1018 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1019 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1020 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1021 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1022 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1023 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1024 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1025 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1026 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1027 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1028 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1029 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1030 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
1031 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1032 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1033 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1034 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1035 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1036 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1037
1038 # Dash webm audio
1039 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1040 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
1041
1042 # Dash webm audio with opus inside
1043 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1044 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1045 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
1046
1047 # RTMP (unnamed)
1048 '_rtmp': {'protocol': 'rtmp'},
1049
1050 # av01 video only formats sometimes served with "unknown" codecs
1051 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1052 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1053 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
1054 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1055 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1056 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1057 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1058 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1059 }
1060 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
1061
1062 _GEO_BYPASS = False
1063
1064 IE_NAME = 'youtube'
1065 _TESTS = [
1066 {
1067 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
1068 'info_dict': {
1069 'id': 'BaW_jenozKc',
1070 'ext': 'mp4',
1071 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1072 'uploader': 'Philipp Hagemeister',
1073 'uploader_id': 'phihag',
1074 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1075 'channel': 'Philipp Hagemeister',
1076 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1077 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1078 'upload_date': '20121002',
1079 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
1080 'categories': ['Science & Technology'],
1081 'tags': ['youtube-dl'],
1082 'duration': 10,
1083 'view_count': int,
1084 'like_count': int,
1085 'availability': 'public',
1086 'playable_in_embed': True,
1087 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1088 'live_status': 'not_live',
1089 'age_limit': 0,
1090 'start_time': 1,
1091 'end_time': 9,
1092 'comment_count': int,
1093 'channel_follower_count': int
1094 }
1095 },
1096 {
1097 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1098 'note': 'Embed-only video (#1746)',
1099 'info_dict': {
1100 'id': 'yZIXLfi8CZQ',
1101 'ext': 'mp4',
1102 'upload_date': '20120608',
1103 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1104 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1105 'uploader': 'SET India',
1106 'uploader_id': 'setindia',
1107 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
1108 'age_limit': 18,
1109 },
1110 'skip': 'Private video',
1111 },
1112 {
1113 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1114 'note': 'Use the first video ID in the URL',
1115 'info_dict': {
1116 'id': 'BaW_jenozKc',
1117 'ext': 'mp4',
1118 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1119 'uploader': 'Philipp Hagemeister',
1120 'uploader_id': 'phihag',
1121 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1122 'channel': 'Philipp Hagemeister',
1123 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1124 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1125 'upload_date': '20121002',
1126 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
1127 'categories': ['Science & Technology'],
1128 'tags': ['youtube-dl'],
1129 'duration': 10,
1130 'view_count': int,
1131 'like_count': int,
1132 'availability': 'public',
1133 'playable_in_embed': True,
1134 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1135 'live_status': 'not_live',
1136 'age_limit': 0,
1137 'comment_count': int,
1138 'channel_follower_count': int
1139 },
1140 'params': {
1141 'skip_download': True,
1142 },
1143 },
1144 {
1145 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1146 'note': '256k DASH audio (format 141) via DASH manifest',
1147 'info_dict': {
1148 'id': 'a9LDPn-MO4I',
1149 'ext': 'm4a',
1150 'upload_date': '20121002',
1151 'uploader_id': '8KVIDEO',
1152 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1153 'description': '',
1154 'uploader': '8KVIDEO',
1155 'title': 'UHDTV TEST 8K VIDEO.mp4'
1156 },
1157 'params': {
1158 'youtube_include_dash_manifest': True,
1159 'format': '141',
1160 },
1161 'skip': 'format 141 not served anymore',
1162 },
1163 # DASH manifest with encrypted signature
1164 {
1165 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1166 'info_dict': {
1167 'id': 'IB3lcPjvWLA',
1168 'ext': 'm4a',
1169 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1170 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1171 'duration': 244,
1172 'uploader': 'AfrojackVEVO',
1173 'uploader_id': 'AfrojackVEVO',
1174 'upload_date': '20131011',
1175 'abr': 129.495,
1176 'like_count': int,
1177 'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',
1178 'playable_in_embed': True,
1179 'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',
1180 'view_count': int,
1181 'track': 'The Spark',
1182 'live_status': 'not_live',
1183 'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',
1184 'channel': 'Afrojack',
1185 'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',
1186 'tags': 'count:19',
1187 'availability': 'public',
1188 'categories': ['Music'],
1189 'age_limit': 0,
1190 'alt_title': 'The Spark',
1191 'channel_follower_count': int
1192 },
1193 'params': {
1194 'youtube_include_dash_manifest': True,
1195 'format': '141/bestaudio[ext=m4a]',
1196 },
1197 },
1198 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
1199 {
1200 'note': 'Embed allowed age-gate video',
1201 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1202 'info_dict': {
1203 'id': 'HtVdAasjOgU',
1204 'ext': 'mp4',
1205 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1206 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1207 'duration': 142,
1208 'uploader': 'The Witcher',
1209 'uploader_id': 'WitcherGame',
1210 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1211 'upload_date': '20140605',
1212 'age_limit': 18,
1213 'categories': ['Gaming'],
1214 'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',
1215 'availability': 'needs_auth',
1216 'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
1217 'like_count': int,
1218 'channel': 'The Witcher',
1219 'live_status': 'not_live',
1220 'tags': 'count:17',
1221 'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
1222 'playable_in_embed': True,
1223 'view_count': int,
1224 'channel_follower_count': int
1225 },
1226 },
1227 {
1228 'note': 'Age-gate video with embed allowed in public site',
1229 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1230 'info_dict': {
1231 'id': 'HsUATh_Nc2U',
1232 'ext': 'mp4',
1233 'title': 'Godzilla 2 (Official Video)',
1234 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1235 'upload_date': '20200408',
1236 'uploader_id': 'FlyingKitty900',
1237 'uploader': 'FlyingKitty',
1238 'age_limit': 18,
1239 'availability': 'needs_auth',
1240 'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
1241 'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',
1242 'channel': 'FlyingKitty',
1243 'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
1244 'view_count': int,
1245 'categories': ['Entertainment'],
1246 'live_status': 'not_live',
1247 'tags': ['Flyingkitty', 'godzilla 2'],
1248 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
1249 'like_count': int,
1250 'duration': 177,
1251 'playable_in_embed': True,
1252 'channel_follower_count': int
1253 },
1254 },
1255 {
1256 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1257 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1258 'info_dict': {
1259 'id': 'Tq92D6wQ1mg',
1260 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
1261 'ext': 'mp4',
1262 'upload_date': '20191228',
1263 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1264 'uploader': 'Projekt Melody',
1265 'description': 'md5:17eccca93a786d51bc67646756894066',
1266 'age_limit': 18,
1267 'like_count': int,
1268 'availability': 'needs_auth',
1269 'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
1270 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1271 'view_count': int,
1272 'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',
1273 'channel': 'Projekt Melody',
1274 'live_status': 'not_live',
1275 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
1276 'playable_in_embed': True,
1277 'categories': ['Entertainment'],
1278 'duration': 106,
1279 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
1280 'comment_count': int,
1281 'channel_follower_count': int
1282 },
1283 },
1284 {
1285 'note': 'Non-Agegated non-embeddable video',
1286 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1287 'info_dict': {
1288 'id': 'MeJVWBSsPAY',
1289 'ext': 'mp4',
1290 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1291 'uploader': 'Herr Lurik',
1292 'uploader_id': 'st3in234',
1293 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1294 'upload_date': '20130730',
1295 'track': 'Such mich find mich',
1296 'age_limit': 0,
1297 'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
1298 'like_count': int,
1299 'playable_in_embed': False,
1300 'creator': 'OOMPH!',
1301 'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',
1302 'view_count': int,
1303 'alt_title': 'Such mich find mich',
1304 'duration': 210,
1305 'channel': 'Herr Lurik',
1306 'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
1307 'categories': ['Music'],
1308 'availability': 'public',
1309 'uploader_url': 'http://www.youtube.com/user/st3in234',
1310 'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
1311 'live_status': 'not_live',
1312 'artist': 'OOMPH!',
1313 'channel_follower_count': int
1314 },
1315 },
1316 {
1317 'note': 'Non-bypassable age-gated video',
1318 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1319 'only_matching': True,
1320 },
1321 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1322 # YouTube Red ad is not captured for creator
1323 {
1324 'url': '__2ABJjxzNo',
1325 'info_dict': {
1326 'id': '__2ABJjxzNo',
1327 'ext': 'mp4',
1328 'duration': 266,
1329 'upload_date': '20100430',
1330 'uploader_id': 'deadmau5',
1331 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1332 'creator': 'deadmau5',
1333 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1334 'uploader': 'deadmau5',
1335 'title': 'Deadmau5 - Some Chords (HD)',
1336 'alt_title': 'Some Chords',
1337 'availability': 'public',
1338 'tags': 'count:14',
1339 'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',
1340 'view_count': int,
1341 'live_status': 'not_live',
1342 'channel': 'deadmau5',
1343 'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',
1344 'like_count': int,
1345 'track': 'Some Chords',
1346 'artist': 'deadmau5',
1347 'playable_in_embed': True,
1348 'age_limit': 0,
1349 'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',
1350 'categories': ['Music'],
1351 'album': 'Some Chords',
1352 'channel_follower_count': int
1353 },
1354 'expected_warnings': [
1355 'DASH manifest missing',
1356 ]
1357 },
1358 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1359 {
1360 'url': 'lqQg6PlCWgI',
1361 'info_dict': {
1362 'id': 'lqQg6PlCWgI',
1363 'ext': 'mp4',
1364 'duration': 6085,
1365 'upload_date': '20150827',
1366 'uploader_id': 'olympic',
1367 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1368 'description': 'md5:04bbbf3ccceb6795947572ca36f45904',
1369 'uploader': 'Olympics',
1370 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1371 'like_count': int,
1372 'release_timestamp': 1343767800,
1373 'playable_in_embed': True,
1374 'categories': ['Sports'],
1375 'release_date': '20120731',
1376 'channel': 'Olympics',
1377 'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
1378 'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
1379 'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
1380 'age_limit': 0,
1381 'availability': 'public',
1382 'live_status': 'was_live',
1383 'view_count': int,
1384 'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
1385 'channel_follower_count': int
1386 },
1387 'params': {
1388 'skip_download': 'requires avconv',
1389 }
1390 },
1391 # Non-square pixels
1392 {
1393 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1394 'info_dict': {
1395 'id': '_b-2C3KPAM0',
1396 'ext': 'mp4',
1397 'stretched_ratio': 16 / 9.,
1398 'duration': 85,
1399 'upload_date': '20110310',
1400 'uploader_id': 'AllenMeow',
1401 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1402 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1403 'uploader': '孫ᄋᄅ',
1404 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1405 'playable_in_embed': True,
1406 'channel': '孫ᄋᄅ',
1407 'age_limit': 0,
1408 'tags': 'count:11',
1409 'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',
1410 'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',
1411 'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',
1412 'view_count': int,
1413 'categories': ['People & Blogs'],
1414 'like_count': int,
1415 'live_status': 'not_live',
1416 'availability': 'unlisted',
1417 'comment_count': int,
1418 'channel_follower_count': int
1419 },
1420 },
1421 # url_encoded_fmt_stream_map is empty string
1422 {
1423 'url': 'qEJwOuvDf7I',
1424 'info_dict': {
1425 'id': 'qEJwOuvDf7I',
1426 'ext': 'webm',
1427 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1428 'description': '',
1429 'upload_date': '20150404',
1430 'uploader_id': 'spbelect',
1431 'uploader': 'Наблюдатели Петербурга',
1432 },
1433 'params': {
1434 'skip_download': 'requires avconv',
1435 },
1436 'skip': 'This live event has ended.',
1437 },
1438 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1439 {
1440 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1441 'info_dict': {
1442 'id': 'FIl7x6_3R5Y',
1443 'ext': 'webm',
1444 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1445 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1446 'duration': 220,
1447 'upload_date': '20150625',
1448 'uploader_id': 'dorappi2000',
1449 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1450 'uploader': 'dorappi2000',
1451 'formats': 'mincount:31',
1452 },
1453 'skip': 'not actual anymore',
1454 },
1455 # DASH manifest with segment_list
1456 {
1457 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1458 'md5': '8ce563a1d667b599d21064e982ab9e31',
1459 'info_dict': {
1460 'id': 'CsmdDsKjzN8',
1461 'ext': 'mp4',
1462 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1463 'uploader': 'Airtek',
1464 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1465 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1466 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1467 },
1468 'params': {
1469 'youtube_include_dash_manifest': True,
1470 'format': '135', # bestvideo
1471 },
1472 'skip': 'This live event has ended.',
1473 },
1474 {
1475 # Multifeed videos (multiple cameras), URL is for Main Camera
1476 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1477 'info_dict': {
1478 'id': 'jvGDaLqkpTg',
1479 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1480 'description': 'md5:e03b909557865076822aa169218d6a5d',
1481 },
1482 'playlist': [{
1483 'info_dict': {
1484 'id': 'jvGDaLqkpTg',
1485 'ext': 'mp4',
1486 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1487 'description': 'md5:e03b909557865076822aa169218d6a5d',
1488 'duration': 10643,
1489 'upload_date': '20161111',
1490 'uploader': 'Team PGP',
1491 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1492 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1493 },
1494 }, {
1495 'info_dict': {
1496 'id': '3AKt1R1aDnw',
1497 'ext': 'mp4',
1498 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1499 'description': 'md5:e03b909557865076822aa169218d6a5d',
1500 'duration': 10991,
1501 'upload_date': '20161111',
1502 'uploader': 'Team PGP',
1503 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1504 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1505 },
1506 }, {
1507 'info_dict': {
1508 'id': 'RtAMM00gpVc',
1509 'ext': 'mp4',
1510 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1511 'description': 'md5:e03b909557865076822aa169218d6a5d',
1512 'duration': 10995,
1513 'upload_date': '20161111',
1514 'uploader': 'Team PGP',
1515 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1516 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1517 },
1518 }, {
1519 'info_dict': {
1520 'id': '6N2fdlP3C5U',
1521 'ext': 'mp4',
1522 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1523 'description': 'md5:e03b909557865076822aa169218d6a5d',
1524 'duration': 10990,
1525 'upload_date': '20161111',
1526 'uploader': 'Team PGP',
1527 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1528 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1529 },
1530 }],
1531 'params': {
1532 'skip_download': True,
1533 },
1534 'skip': 'Not multifeed anymore',
1535 },
1536 {
1537 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1538 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1539 'info_dict': {
1540 'id': 'gVfLd0zydlo',
1541 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1542 },
1543 'playlist_count': 2,
1544 'skip': 'Not multifeed anymore',
1545 },
1546 {
1547 'url': 'https://vid.plus/FlRa-iH7PGw',
1548 'only_matching': True,
1549 },
1550 {
1551 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1552 'only_matching': True,
1553 },
1554 {
1555 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1556 # Also tests cut-off URL expansion in video description (see
1557 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1558 # https://github.com/ytdl-org/youtube-dl/issues/8164)
1559 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1560 'info_dict': {
1561 'id': 'lsguqyKfVQg',
1562 'ext': 'mp4',
1563 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1564 'alt_title': 'Dark Walk',
1565 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1566 'duration': 133,
1567 'upload_date': '20151119',
1568 'uploader_id': 'IronSoulElf',
1569 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1570 'uploader': 'IronSoulElf',
1571 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1572 'track': 'Dark Walk',
1573 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1574 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1575 'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',
1576 'categories': ['Film & Animation'],
1577 'view_count': int,
1578 'live_status': 'not_live',
1579 'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',
1580 'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',
1581 'tags': 'count:13',
1582 'availability': 'public',
1583 'channel': 'IronSoulElf',
1584 'playable_in_embed': True,
1585 'like_count': int,
1586 'age_limit': 0,
1587 'channel_follower_count': int
1588 },
1589 'params': {
1590 'skip_download': True,
1591 },
1592 },
1593 {
1594 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1595 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1596 'only_matching': True,
1597 },
1598 {
1599 # Video with yt:stretch=17:0
1600 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1601 'info_dict': {
1602 'id': 'Q39EVAstoRM',
1603 'ext': 'mp4',
1604 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1605 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1606 'upload_date': '20151107',
1607 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1608 'uploader': 'CH GAMER DROID',
1609 },
1610 'params': {
1611 'skip_download': True,
1612 },
1613 'skip': 'This video does not exist.',
1614 },
1615 {
1616 # Video with incomplete 'yt:stretch=16:'
1617 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1618 'only_matching': True,
1619 },
1620 {
1621 # Video licensed under Creative Commons
1622 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1623 'info_dict': {
1624 'id': 'M4gD1WSo5mA',
1625 'ext': 'mp4',
1626 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1627 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1628 'duration': 721,
1629 'upload_date': '20150128',
1630 'uploader_id': 'BerkmanCenter',
1631 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1632 'uploader': 'The Berkman Klein Center for Internet & Society',
1633 'license': 'Creative Commons Attribution license (reuse allowed)',
1634 'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',
1635 'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',
1636 'like_count': int,
1637 'age_limit': 0,
1638 'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],
1639 'channel': 'The Berkman Klein Center for Internet & Society',
1640 'availability': 'public',
1641 'view_count': int,
1642 'categories': ['Education'],
1643 'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',
1644 'live_status': 'not_live',
1645 'playable_in_embed': True,
1646 'comment_count': int,
1647 'channel_follower_count': int
1648 },
1649 'params': {
1650 'skip_download': True,
1651 },
1652 },
1653 {
1654 # Channel-like uploader_url
1655 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1656 'info_dict': {
1657 'id': 'eQcmzGIKrzg',
1658 'ext': 'mp4',
1659 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1660 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1661 'duration': 4060,
1662 'upload_date': '20151120',
1663 'uploader': 'Bernie Sanders',
1664 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1665 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1666 'license': 'Creative Commons Attribution license (reuse allowed)',
1667 'playable_in_embed': True,
1668 'tags': 'count:12',
1669 'like_count': int,
1670 'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1671 'age_limit': 0,
1672 'availability': 'public',
1673 'categories': ['News & Politics'],
1674 'channel': 'Bernie Sanders',
1675 'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',
1676 'view_count': int,
1677 'live_status': 'not_live',
1678 'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1679 'comment_count': int,
1680 'channel_follower_count': int
1681 },
1682 'params': {
1683 'skip_download': True,
1684 },
1685 },
1686 {
1687 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1688 'only_matching': True,
1689 },
1690 {
1691 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1692 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1693 'only_matching': True,
1694 },
1695 {
1696 # Rental video preview
1697 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1698 'info_dict': {
1699 'id': 'uGpuVWrhIzE',
1700 'ext': 'mp4',
1701 'title': 'Piku - Trailer',
1702 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1703 'upload_date': '20150811',
1704 'uploader': 'FlixMatrix',
1705 'uploader_id': 'FlixMatrixKaravan',
1706 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1707 'license': 'Standard YouTube License',
1708 },
1709 'params': {
1710 'skip_download': True,
1711 },
1712 'skip': 'This video is not available.',
1713 },
1714 {
1715 # YouTube Red video with episode data
1716 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1717 'info_dict': {
1718 'id': 'iqKdEhx-dD4',
1719 'ext': 'mp4',
1720 'title': 'Isolation - Mind Field (Ep 1)',
1721 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1722 'duration': 2085,
1723 'upload_date': '20170118',
1724 'uploader': 'Vsauce',
1725 'uploader_id': 'Vsauce',
1726 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1727 'series': 'Mind Field',
1728 'season_number': 1,
1729 'episode_number': 1,
1730 'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',
1731 'tags': 'count:12',
1732 'view_count': int,
1733 'availability': 'public',
1734 'age_limit': 0,
1735 'channel': 'Vsauce',
1736 'episode': 'Episode 1',
1737 'categories': ['Entertainment'],
1738 'season': 'Season 1',
1739 'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',
1740 'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',
1741 'like_count': int,
1742 'playable_in_embed': True,
1743 'live_status': 'not_live',
1744 'channel_follower_count': int
1745 },
1746 'params': {
1747 'skip_download': True,
1748 },
1749 'expected_warnings': [
1750 'Skipping DASH manifest',
1751 ],
1752 },
1753 {
1754 # The following content has been identified by the YouTube community
1755 # as inappropriate or offensive to some audiences.
1756 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1757 'info_dict': {
1758 'id': '6SJNVb0GnPI',
1759 'ext': 'mp4',
1760 'title': 'Race Differences in Intelligence',
1761 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1762 'duration': 965,
1763 'upload_date': '20140124',
1764 'uploader': 'New Century Foundation',
1765 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1766 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1767 },
1768 'params': {
1769 'skip_download': True,
1770 },
1771 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1772 },
1773 {
1774 # itag 212
1775 'url': '1t24XAntNCY',
1776 'only_matching': True,
1777 },
1778 {
1779 # geo restricted to JP
1780 'url': 'sJL6WA-aGkQ',
1781 'only_matching': True,
1782 },
1783 {
1784 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1785 'only_matching': True,
1786 },
1787 {
1788 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1789 'only_matching': True,
1790 },
1791 {
1792 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1793 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1794 'only_matching': True,
1795 },
1796 {
1797 # DRM protected
1798 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1799 'only_matching': True,
1800 },
1801 {
1802 # Video with unsupported adaptive stream type formats
1803 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1804 'info_dict': {
1805 'id': 'Z4Vy8R84T1U',
1806 'ext': 'mp4',
1807 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1808 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1809 'duration': 433,
1810 'upload_date': '20130923',
1811 'uploader': 'Amelia Putri Harwita',
1812 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1813 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1814 'formats': 'maxcount:10',
1815 },
1816 'params': {
1817 'skip_download': True,
1818 'youtube_include_dash_manifest': False,
1819 },
1820 'skip': 'not actual anymore',
1821 },
1822 {
1823 # Youtube Music Auto-generated description
1824 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1825 'info_dict': {
1826 'id': 'MgNrAu2pzNs',
1827 'ext': 'mp4',
1828 'title': 'Voyeur Girl',
1829 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1830 'upload_date': '20190312',
1831 'uploader': 'Stephen - Topic',
1832 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1833 'artist': 'Stephen',
1834 'track': 'Voyeur Girl',
1835 'album': 'it\'s too much love to know my dear',
1836 'release_date': '20190313',
1837 'release_year': 2019,
1838 'alt_title': 'Voyeur Girl',
1839 'view_count': int,
1840 'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1841 'playable_in_embed': True,
1842 'like_count': int,
1843 'categories': ['Music'],
1844 'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1845 'channel': 'Stephen',
1846 'availability': 'public',
1847 'creator': 'Stephen',
1848 'duration': 169,
1849 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
1850 'age_limit': 0,
1851 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1852 'tags': 'count:11',
1853 'live_status': 'not_live',
1854 'channel_follower_count': int
1855 },
1856 'params': {
1857 'skip_download': True,
1858 },
1859 },
1860 {
1861 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1862 'only_matching': True,
1863 },
1864 {
1865 # invalid -> valid video id redirection
1866 'url': 'DJztXj2GPfl',
1867 'info_dict': {
1868 'id': 'DJztXj2GPfk',
1869 'ext': 'mp4',
1870 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1871 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1872 'upload_date': '20090125',
1873 'uploader': 'Prochorowka',
1874 'uploader_id': 'Prochorowka',
1875 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1876 'artist': 'Panjabi MC',
1877 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1878 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1879 },
1880 'params': {
1881 'skip_download': True,
1882 },
1883 'skip': 'Video unavailable',
1884 },
1885 {
1886 # empty description results in an empty string
1887 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1888 'info_dict': {
1889 'id': 'x41yOUIvK2k',
1890 'ext': 'mp4',
1891 'title': 'IMG 3456',
1892 'description': '',
1893 'upload_date': '20170613',
1894 'uploader_id': 'ElevageOrVert',
1895 'uploader': 'ElevageOrVert',
1896 'view_count': int,
1897 'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',
1898 'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',
1899 'like_count': int,
1900 'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',
1901 'tags': [],
1902 'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',
1903 'availability': 'public',
1904 'age_limit': 0,
1905 'categories': ['Pets & Animals'],
1906 'duration': 7,
1907 'playable_in_embed': True,
1908 'live_status': 'not_live',
1909 'channel': 'ElevageOrVert',
1910 'channel_follower_count': int
1911 },
1912 'params': {
1913 'skip_download': True,
1914 },
1915 },
1916 {
1917 # with '};' inside yt initial data (see [1])
1918 # see [2] for an example with '};' inside ytInitialPlayerResponse
1919 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1920 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1921 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1922 'info_dict': {
1923 'id': 'CHqg6qOn4no',
1924 'ext': 'mp4',
1925 'title': 'Part 77 Sort a list of simple types in c#',
1926 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1927 'upload_date': '20130831',
1928 'uploader_id': 'kudvenkat',
1929 'uploader': 'kudvenkat',
1930 'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',
1931 'like_count': int,
1932 'uploader_url': 'http://www.youtube.com/user/kudvenkat',
1933 'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',
1934 'live_status': 'not_live',
1935 'categories': ['Education'],
1936 'availability': 'public',
1937 'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',
1938 'tags': 'count:12',
1939 'playable_in_embed': True,
1940 'age_limit': 0,
1941 'view_count': int,
1942 'duration': 522,
1943 'channel': 'kudvenkat',
1944 'comment_count': int,
1945 'channel_follower_count': int
1946 },
1947 'params': {
1948 'skip_download': True,
1949 },
1950 },
1951 {
1952 # another example of '};' in ytInitialData
1953 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1954 'only_matching': True,
1955 },
1956 {
1957 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1958 'only_matching': True,
1959 },
1960 {
1961 # https://github.com/ytdl-org/youtube-dl/pull/28094
1962 'url': 'OtqTfy26tG0',
1963 'info_dict': {
1964 'id': 'OtqTfy26tG0',
1965 'ext': 'mp4',
1966 'title': 'Burn Out',
1967 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1968 'upload_date': '20141120',
1969 'uploader': 'The Cinematic Orchestra - Topic',
1970 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1971 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1972 'artist': 'The Cinematic Orchestra',
1973 'track': 'Burn Out',
1974 'album': 'Every Day',
1975 'like_count': int,
1976 'live_status': 'not_live',
1977 'alt_title': 'Burn Out',
1978 'duration': 614,
1979 'age_limit': 0,
1980 'view_count': int,
1981 'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1982 'creator': 'The Cinematic Orchestra',
1983 'channel': 'The Cinematic Orchestra',
1984 'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],
1985 'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1986 'availability': 'public',
1987 'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',
1988 'categories': ['Music'],
1989 'playable_in_embed': True,
1990 'channel_follower_count': int
1991 },
1992 'params': {
1993 'skip_download': True,
1994 },
1995 },
1996 {
1997 # controversial video, only works with bpctr when authenticated with cookies
1998 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1999 'only_matching': True,
2000 },
2001 {
2002 # controversial video, requires bpctr/contentCheckOk
2003 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
2004 'info_dict': {
2005 'id': 'SZJvDhaSDnc',
2006 'ext': 'mp4',
2007 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
2008 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
2009 'uploader': 'CBS Mornings',
2010 'uploader_id': 'CBSThisMorning',
2011 'upload_date': '20140716',
2012 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',
2013 'duration': 170,
2014 'categories': ['News & Politics'],
2015 'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',
2016 'view_count': int,
2017 'channel': 'CBS Mornings',
2018 'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],
2019 'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',
2020 'age_limit': 18,
2021 'availability': 'needs_auth',
2022 'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',
2023 'like_count': int,
2024 'live_status': 'not_live',
2025 'playable_in_embed': True,
2026 'channel_follower_count': int
2027 }
2028 },
2029 {
2030 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
2031 'url': 'cBvYw8_A0vQ',
2032 'info_dict': {
2033 'id': 'cBvYw8_A0vQ',
2034 'ext': 'mp4',
2035 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
2036 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
2037 'upload_date': '20201120',
2038 'uploader': 'Walk around Japan',
2039 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2040 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2041 'duration': 1456,
2042 'categories': ['Travel & Events'],
2043 'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2044 'view_count': int,
2045 'channel': 'Walk around Japan',
2046 'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],
2047 'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',
2048 'age_limit': 0,
2049 'availability': 'public',
2050 'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2051 'live_status': 'not_live',
2052 'playable_in_embed': True,
2053 'channel_follower_count': int
2054 },
2055 'params': {
2056 'skip_download': True,
2057 },
2058 }, {
2059 # Has multiple audio streams
2060 'url': 'WaOKSUlf4TM',
2061 'only_matching': True
2062 }, {
2063 # Requires Premium: has format 141 when requested using YTM url
2064 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
2065 'only_matching': True
2066 }, {
2067 # multiple subtitles with same lang_code
2068 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
2069 'only_matching': True,
2070 }, {
2071 # Force use android client fallback
2072 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
2073 'info_dict': {
2074 'id': 'YOelRv7fMxY',
2075 'title': 'DIGGING A SECRET TUNNEL Part 1',
2076 'ext': '3gp',
2077 'upload_date': '20210624',
2078 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
2079 'uploader': 'colinfurze',
2080 'uploader_id': 'colinfurze',
2081 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
2082 'description': 'md5:5d5991195d599b56cd0c4148907eec50',
2083 'duration': 596,
2084 'categories': ['Entertainment'],
2085 'uploader_url': 'http://www.youtube.com/user/colinfurze',
2086 'view_count': int,
2087 'channel': 'colinfurze',
2088 'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],
2089 'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',
2090 'age_limit': 0,
2091 'availability': 'public',
2092 'like_count': int,
2093 'live_status': 'not_live',
2094 'playable_in_embed': True,
2095 'channel_follower_count': int
2096 },
2097 'params': {
2098 'format': '17', # 3gp format available on android
2099 'extractor_args': {'youtube': {'player_client': ['android']}},
2100 },
2101 },
2102 {
2103 # Skip download of additional client configs (remix client config in this case)
2104 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2105 'only_matching': True,
2106 'params': {
2107 'extractor_args': {'youtube': {'player_skip': ['configs']}},
2108 },
2109 }, {
2110 # shorts
2111 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
2112 'only_matching': True,
2113 }, {
2114 'note': 'Storyboards',
2115 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
2116 'info_dict': {
2117 'id': '5KLPxDtMqe8',
2118 'ext': 'mhtml',
2119 'format_id': 'sb0',
2120 'title': 'Your Brain is Plastic',
2121 'uploader_id': 'scishow',
2122 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
2123 'upload_date': '20140324',
2124 'uploader': 'SciShow',
2125 'like_count': int,
2126 'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',
2127 'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',
2128 'view_count': int,
2129 'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',
2130 'playable_in_embed': True,
2131 'tags': 'count:12',
2132 'uploader_url': 'http://www.youtube.com/user/scishow',
2133 'availability': 'public',
2134 'channel': 'SciShow',
2135 'live_status': 'not_live',
2136 'duration': 248,
2137 'categories': ['Education'],
2138 'age_limit': 0,
2139 'channel_follower_count': int
2140 }, 'params': {'format': 'mhtml', 'skip_download': True}
2141 }, {
2142 # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
2143 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2144 'info_dict': {
2145 'id': '2NUZ8W2llS4',
2146 'ext': 'mp4',
2147 'title': 'The NP that test your phone performance 🙂',
2148 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
2149 'uploader': 'Leon Nguyen',
2150 'uploader_id': 'VNSXIII',
2151 'uploader_url': 'http://www.youtube.com/user/VNSXIII',
2152 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2153 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2154 'duration': 21,
2155 'view_count': int,
2156 'age_limit': 0,
2157 'categories': ['Gaming'],
2158 'tags': 'count:23',
2159 'playable_in_embed': True,
2160 'live_status': 'not_live',
2161 'upload_date': '20220103',
2162 'like_count': int,
2163 'availability': 'public',
2164 'channel': 'Leon Nguyen',
2165 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
2166 'comment_count': int,
2167 'channel_follower_count': int
2168 }
2169 }, {
2170 # Same video as above, but with --compat-opt no-youtube-prefer-utc-upload-date
2171 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2172 'info_dict': {
2173 'id': '2NUZ8W2llS4',
2174 'ext': 'mp4',
2175 'title': 'The NP that test your phone performance 🙂',
2176 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
2177 'uploader': 'Leon Nguyen',
2178 'uploader_id': 'VNSXIII',
2179 'uploader_url': 'http://www.youtube.com/user/VNSXIII',
2180 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2181 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2182 'duration': 21,
2183 'view_count': int,
2184 'age_limit': 0,
2185 'categories': ['Gaming'],
2186 'tags': 'count:23',
2187 'playable_in_embed': True,
2188 'live_status': 'not_live',
2189 'upload_date': '20220102',
2190 'like_count': int,
2191 'availability': 'public',
2192 'channel': 'Leon Nguyen',
2193 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
2194 'comment_count': int,
2195 'channel_follower_count': int
2196 },
2197 'params': {'compat_opts': ['no-youtube-prefer-utc-upload-date']}
2198 }, {
2199 # date text is premiered video, ensure upload date in UTC (published 1641172509)
2200 'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',
2201 'info_dict': {
2202 'id': 'mzZzzBU6lrM',
2203 'ext': 'mp4',
2204 'title': 'I Met GeorgeNotFound In Real Life...',
2205 'description': 'md5:cca98a355c7184e750f711f3a1b22c84',
2206 'uploader': 'Quackity',
2207 'uploader_id': 'QuackityHQ',
2208 'uploader_url': 'http://www.youtube.com/user/QuackityHQ',
2209 'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',
2210 'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',
2211 'duration': 955,
2212 'view_count': int,
2213 'age_limit': 0,
2214 'categories': ['Entertainment'],
2215 'tags': 'count:26',
2216 'playable_in_embed': True,
2217 'live_status': 'not_live',
2218 'release_timestamp': 1641172509,
2219 'release_date': '20220103',
2220 'upload_date': '20220103',
2221 'like_count': int,
2222 'availability': 'public',
2223 'channel': 'Quackity',
2224 'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',
2225 'channel_follower_count': int
2226 }
2227 },
2228 { # continuous livestream. Microformat upload date should be preferred.
2229 # Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27
2230 'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',
2231 'info_dict': {
2232 'id': 'kgx4WGK0oNU',
2233 'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
2234 'ext': 'mp4',
2235 'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2236 'availability': 'public',
2237 'age_limit': 0,
2238 'release_timestamp': 1637975704,
2239 'upload_date': '20210619',
2240 'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2241 'live_status': 'is_live',
2242 'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',
2243 'uploader': '阿鲍Abao',
2244 'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2245 'channel': 'Abao in Tokyo',
2246 'channel_follower_count': int,
2247 'release_date': '20211127',
2248 'tags': 'count:39',
2249 'categories': ['People & Blogs'],
2250 'like_count': int,
2251 'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2252 'view_count': int,
2253 'playable_in_embed': True,
2254 'description': 'md5:2ef1d002cad520f65825346e2084e49d',
2255 },
2256 'params': {'skip_download': True}
2257 }, {
2258 # Story. Requires specific player params to work.
2259 'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',
2260 'info_dict': {
2261 'id': 'vv8qTUWmulI',
2262 'ext': 'mp4',
2263 'availability': 'unlisted',
2264 'view_count': int,
2265 'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',
2266 'upload_date': '20220526',
2267 'categories': ['Education'],
2268 'title': 'Story',
2269 'channel': 'IT\'S HISTORY',
2270 'description': '',
2271 'uploader_id': 'BlastfromthePast',
2272 'duration': 12,
2273 'uploader': 'IT\'S HISTORY',
2274 'playable_in_embed': True,
2275 'age_limit': 0,
2276 'live_status': 'not_live',
2277 'tags': [],
2278 'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',
2279 'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',
2280 'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',
2281 },
2282 'skip': 'stories get removed after some period of time',
2283 }, {
2284 'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
2285 'info_dict': {
2286 'id': 'tjjjtzRLHvA',
2287 'ext': 'mp4',
2288 'title': 'ハッシュタグ無し };if window.ytcsi',
2289 'upload_date': '20220323',
2290 'like_count': int,
2291 'availability': 'unlisted',
2292 'channel': 'nao20010128nao',
2293 'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',
2294 'age_limit': 0,
2295 'uploader': 'nao20010128nao',
2296 'uploader_id': 'nao20010128nao',
2297 'categories': ['Music'],
2298 'view_count': int,
2299 'description': '',
2300 'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',
2301 'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',
2302 'live_status': 'not_live',
2303 'playable_in_embed': True,
2304 'channel_follower_count': int,
2305 'duration': 6,
2306 'tags': [],
2307 'uploader_url': 'http://www.youtube.com/user/nao20010128nao',
2308 }
2309 }, {
2310 'note': '6 channel audio',
2311 'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',
2312 'only_matching': True,
2313 }
2314 ]
2315
2316 _WEBPAGE_TESTS = [
2317 # YouTube <object> embed
2318 {
2319 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
2320 'md5': '873c81d308b979f0e23ee7e620b312a3',
2321 'info_dict': {
2322 'id': 'msN87y-iEx0',
2323 'ext': 'mp4',
2324 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
2325 'upload_date': '20080526',
2326 'description': 'md5:873c81d308b979f0e23ee7e620b312a3',
2327 'uploader': 'Christopher Sykes',
2328 'uploader_id': 'ChristopherJSykes',
2329 'age_limit': 0,
2330 'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],
2331 'channel_id': 'UCCeo--lls1vna5YJABWAcVA',
2332 'playable_in_embed': True,
2333 'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',
2334 'like_count': int,
2335 'comment_count': int,
2336 'channel': 'Christopher Sykes',
2337 'live_status': 'not_live',
2338 'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',
2339 'availability': 'public',
2340 'duration': 195,
2341 'view_count': int,
2342 'categories': ['Science & Technology'],
2343 'channel_follower_count': int,
2344 'uploader_url': 'http://www.youtube.com/user/ChristopherJSykes',
2345 },
2346 'params': {
2347 'skip_download': True,
2348 }
2349 },
2350 ]
2351
2352 @classmethod
2353 def suitable(cls, url):
2354 from ..utils import parse_qs
2355
2356 qs = parse_qs(url)
2357 if qs.get('list', [None])[0]:
2358 return False
2359 return super().suitable(url)
2360
2361 def __init__(self, *args, **kwargs):
2362 super().__init__(*args, **kwargs)
2363 self._code_cache = {}
2364 self._player_cache = {}
2365
2366 def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):
2367 lock = threading.Lock()
2368
2369 is_live = True
2370 start_time = time.time()
2371 formats = [f for f in formats if f.get('is_from_start')]
2372
2373 def refetch_manifest(format_id, delay):
2374 nonlocal formats, start_time, is_live
2375 if time.time() <= start_time + delay:
2376 return
2377
2378 _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
2379 video_details = traverse_obj(
2380 prs, (..., 'videoDetails'), expected_type=dict, default=[])
2381 microformats = traverse_obj(
2382 prs, (..., 'microformat', 'playerMicroformatRenderer'),
2383 expected_type=dict, default=[])
2384 _, is_live, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)
2385 start_time = time.time()
2386
2387 def mpd_feed(format_id, delay):
2388 """
2389 @returns (manifest_url, manifest_stream_number, is_live) or None
2390 """
2391 with lock:
2392 refetch_manifest(format_id, delay)
2393
2394 f = next((f for f in formats if f['format_id'] == format_id), None)
2395 if not f:
2396 if not is_live:
2397 self.to_screen(f'{video_id}: Video is no longer live')
2398 else:
2399 self.report_warning(
2400 f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')
2401 return None
2402 return f['manifest_url'], f['manifest_stream_number'], is_live
2403
2404 for f in formats:
2405 f['is_live'] = True
2406 f['protocol'] = 'http_dash_segments_generator'
2407 f['fragments'] = functools.partial(
2408 self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)
2409
2410 def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):
2411 FETCH_SPAN, MAX_DURATION = 5, 432000
2412
2413 mpd_url, stream_number, is_live = None, None, True
2414
2415 begin_index = 0
2416 download_start_time = ctx.get('start') or time.time()
2417
2418 lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
2419 if lack_early_segments:
2420 self.report_warning(bug_reports_message(
2421 'Starting download from the last 120 hours of the live stream since '
2422 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
2423 lack_early_segments = True
2424
2425 known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
2426 fragments, fragment_base_url = None, None
2427
2428 def _extract_sequence_from_mpd(refresh_sequence, immediate):
2429 nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
2430 # Obtain from MPD's maximum seq value
2431 old_mpd_url = mpd_url
2432 last_error = ctx.pop('last_error', None)
2433 expire_fast = immediate or last_error and isinstance(last_error, urllib.error.HTTPError) and last_error.code == 403
2434 mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
2435 or (mpd_url, stream_number, False))
2436 if not refresh_sequence:
2437 if expire_fast and not is_live:
2438 return False, last_seq
2439 elif old_mpd_url == mpd_url:
2440 return True, last_seq
2441 try:
2442 fmts, _ = self._extract_mpd_formats_and_subtitles(
2443 mpd_url, None, note=False, errnote=False, fatal=False)
2444 except ExtractorError:
2445 fmts = None
2446 if not fmts:
2447 no_fragment_score += 2
2448 return False, last_seq
2449 fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
2450 fragments = fmt_info['fragments']
2451 fragment_base_url = fmt_info['fragment_base_url']
2452 assert fragment_base_url
2453
2454 _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
2455 return True, _last_seq
2456
2457 while is_live:
2458 fetch_time = time.time()
2459 if no_fragment_score > 30:
2460 return
2461 if last_segment_url:
2462 # Obtain from "X-Head-Seqnum" header value from each segment
2463 try:
2464 urlh = self._request_webpage(
2465 last_segment_url, None, note=False, errnote=False, fatal=False)
2466 except ExtractorError:
2467 urlh = None
2468 last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
2469 if last_seq is None:
2470 no_fragment_score += 2
2471 last_segment_url = None
2472 continue
2473 else:
2474 should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)
2475 no_fragment_score += 2
2476 if not should_continue:
2477 continue
2478
2479 if known_idx > last_seq:
2480 last_segment_url = None
2481 continue
2482
2483 last_seq += 1
2484
2485 if begin_index < 0 and known_idx < 0:
2486 # skip from the start when it's negative value
2487 known_idx = last_seq + begin_index
2488 if lack_early_segments:
2489 known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
2490 try:
2491 for idx in range(known_idx, last_seq):
2492 # do not update sequence here or you'll get skipped some part of it
2493 should_continue, _ = _extract_sequence_from_mpd(False, False)
2494 if not should_continue:
2495 known_idx = idx - 1
2496 raise ExtractorError('breaking out of outer loop')
2497 last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)
2498 yield {
2499 'url': last_segment_url,
2500 'fragment_count': last_seq,
2501 }
2502 if known_idx == last_seq:
2503 no_fragment_score += 5
2504 else:
2505 no_fragment_score = 0
2506 known_idx = last_seq
2507 except ExtractorError:
2508 continue
2509
2510 time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
2511
2512 def _extract_player_url(self, *ytcfgs, webpage=None):
2513 player_url = traverse_obj(
2514 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
2515 get_all=False, expected_type=str)
2516 if not player_url:
2517 return
2518 return urljoin('https://www.youtube.com', player_url)
2519
2520 def _download_player_url(self, video_id, fatal=False):
2521 res = self._download_webpage(
2522 'https://www.youtube.com/iframe_api',
2523 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
2524 if res:
2525 player_version = self._search_regex(
2526 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
2527 if player_version:
2528 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
2529
2530 def _signature_cache_id(self, example_sig):
2531 """ Return a string representation of a signature """
2532 return '.'.join(str(len(part)) for part in example_sig.split('.'))
2533
2534 @classmethod
2535 def _extract_player_info(cls, player_url):
2536 for player_re in cls._PLAYER_INFO_RE:
2537 id_m = re.search(player_re, player_url)
2538 if id_m:
2539 break
2540 else:
2541 raise ExtractorError('Cannot identify player %r' % player_url)
2542 return id_m.group('id')
2543
2544 def _load_player(self, video_id, player_url, fatal=True):
2545 player_id = self._extract_player_info(player_url)
2546 if player_id not in self._code_cache:
2547 code = self._download_webpage(
2548 player_url, video_id, fatal=fatal,
2549 note='Downloading player ' + player_id,
2550 errnote='Download of %s failed' % player_url)
2551 if code:
2552 self._code_cache[player_id] = code
2553 return self._code_cache.get(player_id)
2554
2555 def _extract_signature_function(self, video_id, player_url, example_sig):
2556 player_id = self._extract_player_info(player_url)
2557
2558 # Read from filesystem cache
2559 func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'
2560 assert os.path.basename(func_id) == func_id
2561
2562 self.write_debug(f'Extracting signature function {func_id}')
2563 cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
2564
2565 if not cache_spec:
2566 code = self._load_player(video_id, player_url)
2567 if code:
2568 res = self._parse_sig_js(code)
2569 test_string = ''.join(map(chr, range(len(example_sig))))
2570 cache_spec = [ord(c) for c in res(test_string)]
2571 self.cache.store('youtube-sigfuncs', func_id, cache_spec)
2572
2573 return lambda s: ''.join(s[i] for i in cache_spec)
2574
2575 def _print_sig_code(self, func, example_sig):
2576 if not self.get_param('youtube_print_sig_code'):
2577 return
2578
2579 def gen_sig_code(idxs):
2580 def _genslice(start, end, step):
2581 starts = '' if start == 0 else str(start)
2582 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
2583 steps = '' if step == 1 else (':%d' % step)
2584 return f's[{starts}{ends}{steps}]'
2585
2586 step = None
2587 # Quelch pyflakes warnings - start will be set when step is set
2588 start = '(Never used)'
2589 for i, prev in zip(idxs[1:], idxs[:-1]):
2590 if step is not None:
2591 if i - prev == step:
2592 continue
2593 yield _genslice(start, prev, step)
2594 step = None
2595 continue
2596 if i - prev in [-1, 1]:
2597 step = i - prev
2598 start = prev
2599 continue
2600 else:
2601 yield 's[%d]' % prev
2602 if step is None:
2603 yield 's[%d]' % i
2604 else:
2605 yield _genslice(start, i, step)
2606
2607 test_string = ''.join(map(chr, range(len(example_sig))))
2608 cache_res = func(test_string)
2609 cache_spec = [ord(c) for c in cache_res]
2610 expr_code = ' + '.join(gen_sig_code(cache_spec))
2611 signature_id_tuple = '(%s)' % (
2612 ', '.join(str(len(p)) for p in example_sig.split('.')))
2613 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
2614 ' return %s\n') % (signature_id_tuple, expr_code)
2615 self.to_screen('Extracted signature function:\n' + code)
2616
2617 def _parse_sig_js(self, jscode):
2618 funcname = self._search_regex(
2619 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2620 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2621 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
2622 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
2623 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
2624 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
2625 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
2626 # Obsolete patterns
2627 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2628 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
2629 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2630 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2631 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2632 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2633 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2634 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
2635 jscode, 'Initial JS player signature function name', group='sig')
2636
2637 jsi = JSInterpreter(jscode)
2638 initial_function = jsi.extract_function(funcname)
2639 return lambda s: initial_function([s])
2640
2641 def _cached(self, func, *cache_id):
2642 def inner(*args, **kwargs):
2643 if cache_id not in self._player_cache:
2644 try:
2645 self._player_cache[cache_id] = func(*args, **kwargs)
2646 except ExtractorError as e:
2647 self._player_cache[cache_id] = e
2648 except Exception as e:
2649 self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)
2650
2651 ret = self._player_cache[cache_id]
2652 if isinstance(ret, Exception):
2653 raise ret
2654 return ret
2655 return inner
2656
2657 def _decrypt_signature(self, s, video_id, player_url):
2658 """Turn the encrypted s field into a working signature"""
2659 extract_sig = self._cached(
2660 self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))
2661 func = extract_sig(video_id, player_url, s)
2662 self._print_sig_code(func, s)
2663 return func(s)
2664
2665 def _decrypt_nsig(self, s, video_id, player_url):
2666 """Turn the encrypted n field into a working signature"""
2667 if player_url is None:
2668 raise ExtractorError('Cannot decrypt nsig without player_url')
2669 player_url = urljoin('https://www.youtube.com', player_url)
2670
2671 try:
2672 jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
2673 except ExtractorError as e:
2674 raise ExtractorError('Unable to extract nsig function code', cause=e)
2675 if self.get_param('youtube_print_sig_code'):
2676 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
2677
2678 try:
2679 extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)
2680 ret = extract_nsig(jsi, func_code)(s)
2681 except JSInterpreter.Exception as e:
2682 try:
2683 jsi = PhantomJSwrapper(self, timeout=5000)
2684 except ExtractorError:
2685 raise e
2686 self.report_warning(
2687 f'Native nsig extraction failed: Trying with PhantomJS\n'
2688 f' n = {s} ; player = {player_url}', video_id)
2689 self.write_debug(e)
2690
2691 args, func_body = func_code
2692 ret = jsi.execute(
2693 f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',
2694 video_id=video_id, note='Executing signature code').strip()
2695
2696 self.write_debug(f'Decrypted nsig {s} => {ret}')
2697 return ret
2698
2699 def _extract_n_function_name(self, jscode):
2700 funcname, idx = self._search_regex(
2701 r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)',
2702 jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
2703 if not idx:
2704 return funcname
2705
2706 return json.loads(js_to_json(self._search_regex(
2707 rf'var {re.escape(funcname)}\s*=\s*(\[.+?\]);', jscode,
2708 f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
2709
2710 def _extract_n_function_code(self, video_id, player_url):
2711 player_id = self._extract_player_info(player_url)
2712 func_code = self.cache.load('youtube-nsig', player_id, min_ver='2022.09.1')
2713 jscode = func_code or self._load_player(video_id, player_url)
2714 jsi = JSInterpreter(jscode)
2715
2716 if func_code:
2717 return jsi, player_id, func_code
2718
2719 func_name = self._extract_n_function_name(jscode)
2720
2721 # For redundancy
2722 func_code = self._search_regex(
2723 r'''(?xs)%s\s*=\s*function\s*\((?P<var>[\w$]+)\)\s*
2724 # NB: The end of the regex is intentionally kept strict
2725 {(?P<code>.+?}\s*return\ [\w$]+.join\(""\))};''' % func_name,
2726 jscode, 'nsig function', group=('var', 'code'), default=None)
2727 if func_code:
2728 func_code = ([func_code[0]], func_code[1])
2729 else:
2730 self.write_debug('Extracting nsig function with jsinterp')
2731 func_code = jsi.extract_function_code(func_name)
2732
2733 self.cache.store('youtube-nsig', player_id, func_code)
2734 return jsi, player_id, func_code
2735
2736 def _extract_n_function_from_code(self, jsi, func_code):
2737 func = jsi.extract_function_from_code(*func_code)
2738
2739 def extract_nsig(s):
2740 try:
2741 ret = func([s])
2742 except JSInterpreter.Exception:
2743 raise
2744 except Exception as e:
2745 raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
2746
2747 if ret.startswith('enhanced_except_'):
2748 raise JSInterpreter.Exception('Signature function returned an exception')
2749 return ret
2750
2751 return extract_nsig
2752
2753 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
2754 """
2755 Extract signatureTimestamp (sts)
2756 Required to tell API what sig/player version is in use.
2757 """
2758 sts = None
2759 if isinstance(ytcfg, dict):
2760 sts = int_or_none(ytcfg.get('STS'))
2761
2762 if not sts:
2763 # Attempt to extract from player
2764 if player_url is None:
2765 error_msg = 'Cannot extract signature timestamp without player_url.'
2766 if fatal:
2767 raise ExtractorError(error_msg)
2768 self.report_warning(error_msg)
2769 return
2770 code = self._load_player(video_id, player_url, fatal=fatal)
2771 if code:
2772 sts = int_or_none(self._search_regex(
2773 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2774 'JS player signature timestamp', group='sts', fatal=fatal))
2775 return sts
2776
2777 def _mark_watched(self, video_id, player_responses):
2778 for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):
2779 label = 'fully ' if is_full else ''
2780 url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),
2781 expected_type=url_or_none)
2782 if not url:
2783 self.report_warning(f'Unable to mark {label}watched')
2784 return
2785 parsed_url = urllib.parse.urlparse(url)
2786 qs = urllib.parse.parse_qs(parsed_url.query)
2787
2788 # cpn generation algorithm is reverse engineered from base.js.
2789 # In fact it works even with dummy cpn.
2790 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2791 cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))
2792
2793 # # more consistent results setting it to right before the end
2794 video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]
2795
2796 qs.update({
2797 'ver': ['2'],
2798 'cpn': [cpn],
2799 'cmt': video_length,
2800 'el': 'detailpage', # otherwise defaults to "shorts"
2801 })
2802
2803 if is_full:
2804 # these seem to mark watchtime "history" in the real world
2805 # they're required, so send in a single value
2806 qs.update({
2807 'st': video_length,
2808 'et': video_length,
2809 })
2810
2811 url = urllib.parse.urlunparse(
2812 parsed_url._replace(query=urllib.parse.urlencode(qs, True)))
2813
2814 self._download_webpage(
2815 url, video_id, f'Marking {label}watched',
2816 'Unable to mark watched', fatal=False)
2817
2818 @classmethod
2819 def _extract_from_webpage(cls, url, webpage):
2820 # Invidious Instances
2821 # https://github.com/yt-dlp/yt-dlp/issues/195
2822 # https://github.com/iv-org/invidious/pull/1730
2823 mobj = re.search(
2824 r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',
2825 webpage)
2826 if mobj:
2827 yield cls.url_result(mobj.group('url'), cls)
2828 raise cls.StopExtraction()
2829
2830 yield from super()._extract_from_webpage(url, webpage)
2831
2832 # lazyYT YouTube embed
2833 for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):
2834 yield cls.url_result(unescapeHTML(id_), cls, id_)
2835
2836 # Wordpress "YouTube Video Importer" plugin
2837 for m in re.findall(r'''(?x)<div[^>]+
2838 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2839 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):
2840 yield cls.url_result(m[-1], cls, m[-1])
2841
2842 @classmethod
2843 def extract_id(cls, url):
2844 video_id = cls.get_temp_id(url)
2845 if not video_id:
2846 raise ExtractorError(f'Invalid URL: {url}')
2847 return video_id
2848
2849 def _extract_chapters_from_json(self, data, duration):
2850 chapter_list = traverse_obj(
2851 data, (
2852 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2853 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2854 ), expected_type=list)
2855
2856 return self._extract_chapters(
2857 chapter_list,
2858 chapter_time=lambda chapter: float_or_none(
2859 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2860 chapter_title=lambda chapter: traverse_obj(
2861 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2862 duration=duration)
2863
2864 def _extract_chapters_from_engagement_panel(self, data, duration):
2865 content_list = traverse_obj(
2866 data,
2867 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
2868 expected_type=list, default=[])
2869 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2870 chapter_title = lambda chapter: self._get_text(chapter, 'title')
2871
2872 return next(filter(None, (
2873 self._extract_chapters(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2874 chapter_time, chapter_title, duration)
2875 for contents in content_list)), [])
2876
2877 def _extract_chapters_from_description(self, description, duration):
2878 return self._extract_chapters(
2879 re.findall(r'(?m)^((?:\d+:)?\d{1,2}:\d{2})\b\W*\s(.+?)\s*$', description or ''),
2880 chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1],
2881 duration=duration, strict=False)
2882
2883 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True):
2884 if not duration:
2885 return
2886 chapter_list = [{
2887 'start_time': chapter_time(chapter),
2888 'title': chapter_title(chapter),
2889 } for chapter in chapter_list or []]
2890 if not strict:
2891 chapter_list.sort(key=lambda c: c['start_time'] or 0)
2892
2893 chapters = [{'start_time': 0}]
2894 for idx, chapter in enumerate(chapter_list):
2895 if chapter['start_time'] is None:
2896 self.report_warning(f'Incomplete chapter {idx}')
2897 elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:
2898 chapters.append(chapter)
2899 else:
2900 self.report_warning(f'Invalid start time for chapter "{chapter["title"]}"')
2901 return chapters[1:]
2902
2903 def _extract_comment(self, comment_renderer, parent=None):
2904 comment_id = comment_renderer.get('commentId')
2905 if not comment_id:
2906 return
2907
2908 text = self._get_text(comment_renderer, 'contentText')
2909
2910 # note: timestamp is an estimate calculated from the current time and time_text
2911 timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')
2912 author = self._get_text(comment_renderer, 'authorText')
2913 author_id = try_get(comment_renderer,
2914 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str)
2915
2916 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2917 lambda x: x['likeCount']), str)) or 0
2918 author_thumbnail = try_get(comment_renderer,
2919 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], str)
2920
2921 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2922 is_favorited = 'creatorHeart' in (try_get(
2923 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2924 return {
2925 'id': comment_id,
2926 'text': text,
2927 'timestamp': timestamp,
2928 'time_text': time_text,
2929 'like_count': votes,
2930 'is_favorited': is_favorited,
2931 'author': author,
2932 'author_id': author_id,
2933 'author_thumbnail': author_thumbnail,
2934 'author_is_uploader': author_is_uploader,
2935 'parent': parent or 'root'
2936 }
2937
2938 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
2939
2940 get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
2941
2942 def extract_header(contents):
2943 _continuation = None
2944 for content in contents:
2945 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
2946 expected_comment_count = self._get_count(
2947 comments_header_renderer, 'countText', 'commentsCount')
2948
2949 if expected_comment_count:
2950 tracker['est_total'] = expected_comment_count
2951 self.to_screen(f'Downloading ~{expected_comment_count} comments')
2952 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
2953
2954 sort_menu_item = try_get(
2955 comments_header_renderer,
2956 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2957 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2958
2959 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2960 if not _continuation:
2961 continue
2962
2963 sort_text = str_or_none(sort_menu_item.get('title'))
2964 if not sort_text:
2965 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2966 self.to_screen('Sorting comments by %s' % sort_text.lower())
2967 break
2968 return _continuation
2969
2970 def extract_thread(contents):
2971 if not parent:
2972 tracker['current_page_thread'] = 0
2973 for content in contents:
2974 if not parent and tracker['total_parent_comments'] >= max_parents:
2975 yield
2976 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2977 comment_renderer = get_first(
2978 (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
2979 expected_type=dict, default={})
2980
2981 comment = self._extract_comment(comment_renderer, parent)
2982 if not comment:
2983 continue
2984
2985 tracker['running_total'] += 1
2986 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
2987 yield comment
2988
2989 # Attempt to get the replies
2990 comment_replies_renderer = try_get(
2991 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2992
2993 if comment_replies_renderer:
2994 tracker['current_page_thread'] += 1
2995 comment_entries_iter = self._comment_entries(
2996 comment_replies_renderer, ytcfg, video_id,
2997 parent=comment.get('id'), tracker=tracker)
2998 yield from itertools.islice(comment_entries_iter, min(
2999 max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))
3000
3001 # Keeps track of counts across recursive calls
3002 if not tracker:
3003 tracker = dict(
3004 running_total=0,
3005 est_total=0,
3006 current_page_thread=0,
3007 total_parent_comments=0,
3008 total_reply_comments=0)
3009
3010 # TODO: Deprecated
3011 # YouTube comments have a max depth of 2
3012 max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
3013 if max_depth:
3014 self._downloader.deprecated_feature('[youtube] max_comment_depth extractor argument is deprecated. '
3015 'Set max replies in the max-comments extractor argument instead')
3016 if max_depth == 1 and parent:
3017 return
3018
3019 max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(
3020 lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
3021
3022 continuation = self._extract_continuation(root_continuation_data)
3023
3024 response = None
3025 is_forced_continuation = False
3026 is_first_continuation = parent is None
3027 if is_first_continuation and not continuation:
3028 # Sometimes you can get comments by generating the continuation yourself,
3029 # even if YouTube initially reports them being disabled - e.g. stories comments.
3030 # Note: if the comment section is actually disabled, YouTube may return a response with
3031 # required check_get_keys missing. So we will disable that check initially in this case.
3032 continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
3033 is_forced_continuation = True
3034
3035 for page_num in itertools.count(0):
3036 if not continuation:
3037 break
3038 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
3039 comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"
3040 if page_num == 0:
3041 if is_first_continuation:
3042 note_prefix = 'Downloading comment section API JSON'
3043 else:
3044 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
3045 tracker['current_page_thread'], comment_prog_str)
3046 else:
3047 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
3048 ' ' if parent else '', ' replies' if parent else '',
3049 page_num, comment_prog_str)
3050
3051 response = self._extract_response(
3052 item_id=None, query=continuation,
3053 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
3054 check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)
3055 is_forced_continuation = False
3056 continuation_contents = traverse_obj(
3057 response, 'onResponseReceivedEndpoints', expected_type=list, default=[])
3058
3059 continuation = None
3060 for continuation_section in continuation_contents:
3061 continuation_items = traverse_obj(
3062 continuation_section,
3063 (('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),
3064 get_all=False, expected_type=list) or []
3065 if is_first_continuation:
3066 continuation = extract_header(continuation_items)
3067 is_first_continuation = False
3068 if continuation:
3069 break
3070 continue
3071
3072 for entry in extract_thread(continuation_items):
3073 if not entry:
3074 return
3075 yield entry
3076 continuation = self._extract_continuation({'contents': continuation_items})
3077 if continuation:
3078 break
3079
3080 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
3081 if message and not parent and tracker['running_total'] == 0:
3082 self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)
3083
3084 @staticmethod
3085 def _generate_comment_continuation(video_id):
3086 """
3087 Generates initial comment section continuation token from given video id
3088 """
3089 token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'
3090 return base64.b64encode(token.encode()).decode()
3091
3092 def _get_comments(self, ytcfg, video_id, contents, webpage):
3093 """Entry for comment extraction"""
3094 def _real_comment_extract(contents):
3095 renderer = next((
3096 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
3097 if item.get('sectionIdentifier') == 'comment-item-section'), None)
3098 yield from self._comment_entries(renderer, ytcfg, video_id)
3099
3100 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
3101 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
3102
3103 @staticmethod
3104 def _get_checkok_params():
3105 return {'contentCheckOk': True, 'racyCheckOk': True}
3106
3107 @classmethod
3108 def _generate_player_context(cls, sts=None):
3109 context = {
3110 'html5Preference': 'HTML5_PREF_WANTS',
3111 }
3112 if sts is not None:
3113 context['signatureTimestamp'] = sts
3114 return {
3115 'playbackContext': {
3116 'contentPlaybackContext': context
3117 },
3118 **cls._get_checkok_params()
3119 }
3120
3121 @staticmethod
3122 def _is_agegated(player_response):
3123 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
3124 return True
3125
3126 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
3127 AGE_GATE_REASONS = (
3128 'confirm your age', 'age-restricted', 'inappropriate', # reason
3129 'age_verification_required', 'age_check_required', # status
3130 )
3131 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
3132
3133 @staticmethod
3134 def _is_unplayable(player_response):
3135 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
3136
3137 _STORY_PLAYER_PARAMS = '8AEB'
3138
3139 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):
3140
3141 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
3142 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
3143 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
3144 headers = self.generate_api_headers(
3145 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
3146
3147 yt_query = {
3148 'videoId': video_id,
3149 }
3150 if smuggled_data.get('is_story') or _split_innertube_client(client)[0] == 'android':
3151 yt_query['params'] = self._STORY_PLAYER_PARAMS
3152
3153 yt_query.update(self._generate_player_context(sts))
3154 return self._extract_response(
3155 item_id=video_id, ep='player', query=yt_query,
3156 ytcfg=player_ytcfg, headers=headers, fatal=True,
3157 default_client=client,
3158 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
3159 ) or None
3160
3161 def _get_requested_clients(self, url, smuggled_data):
3162 requested_clients = []
3163 default = ['android', 'web']
3164 allowed_clients = sorted(
3165 (client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),
3166 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
3167 for client in self._configuration_arg('player_client'):
3168 if client in allowed_clients:
3169 requested_clients.append(client)
3170 elif client == 'default':
3171 requested_clients.extend(default)
3172 elif client == 'all':
3173 requested_clients.extend(allowed_clients)
3174 else:
3175 self.report_warning(f'Skipping unsupported client {client}')
3176 if not requested_clients:
3177 requested_clients = default
3178
3179 if smuggled_data.get('is_music_url') or self.is_music_url(url):
3180 requested_clients.extend(
3181 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
3182
3183 return orderedSet(requested_clients)
3184
3185 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
3186 initial_pr = None
3187 if webpage:
3188 initial_pr = self._search_json(
3189 self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
3190
3191 all_clients = set(clients)
3192 clients = clients[::-1]
3193 prs = []
3194
3195 def append_client(*client_names):
3196 """ Append the first client name that exists but not already used """
3197 for client_name in client_names:
3198 actual_client = _split_innertube_client(client_name)[0]
3199 if actual_client in INNERTUBE_CLIENTS:
3200 if actual_client not in all_clients:
3201 clients.append(client_name)
3202 all_clients.add(actual_client)
3203 return
3204
3205 # Android player_response does not have microFormats which are needed for
3206 # extraction of some data. So we return the initial_pr with formats
3207 # stripped out even if not requested by the user
3208 # See: https://github.com/yt-dlp/yt-dlp/issues/501
3209 if initial_pr:
3210 pr = dict(initial_pr)
3211 pr['streamingData'] = None
3212 prs.append(pr)
3213
3214 last_error = None
3215 tried_iframe_fallback = False
3216 player_url = None
3217 while clients:
3218 client, base_client, variant = _split_innertube_client(clients.pop())
3219 player_ytcfg = master_ytcfg if client == 'web' else {}
3220 if 'configs' not in self._configuration_arg('player_skip') and client != 'web':
3221 player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
3222
3223 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
3224 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
3225 if 'js' in self._configuration_arg('player_skip'):
3226 require_js_player = False
3227 player_url = None
3228
3229 if not player_url and not tried_iframe_fallback and require_js_player:
3230 player_url = self._download_player_url(video_id)
3231 tried_iframe_fallback = True
3232
3233 try:
3234 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
3235 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)
3236 except ExtractorError as e:
3237 if last_error:
3238 self.report_warning(last_error)
3239 last_error = e
3240 continue
3241
3242 if pr:
3243 # YouTube may return a different video player response than expected.
3244 # See: https://github.com/TeamNewPipe/NewPipe/issues/8713
3245 pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId'))
3246 if pr_video_id and pr_video_id != video_id:
3247 self.report_warning(
3248 f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message())
3249 else:
3250 prs.append(pr)
3251
3252 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
3253 if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
3254 append_client(f'{base_client}_creator')
3255 elif self._is_agegated(pr):
3256 if variant == 'tv_embedded':
3257 append_client(f'{base_client}_embedded')
3258 elif not variant:
3259 append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
3260
3261 if last_error:
3262 if not len(prs):
3263 raise last_error
3264 self.report_warning(last_error)
3265 return prs, player_url
3266
3267 def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, is_live, duration):
3268 itags, stream_ids = {}, []
3269 itag_qualities, res_qualities = {}, {0: None}
3270 q = qualities([
3271 # Normally tiny is the smallest video-only formats. But
3272 # audio-only formats with unknown quality may get tagged as tiny
3273 'tiny',
3274 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
3275 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
3276 ])
3277 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
3278
3279 for fmt in streaming_formats:
3280 if fmt.get('targetDurationSec'):
3281 continue
3282
3283 itag = str_or_none(fmt.get('itag'))
3284 audio_track = fmt.get('audioTrack') or {}
3285 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
3286 if stream_id in stream_ids:
3287 continue
3288
3289 quality = fmt.get('quality')
3290 height = int_or_none(fmt.get('height'))
3291 if quality == 'tiny' or not quality:
3292 quality = fmt.get('audioQuality', '').lower() or quality
3293 # The 3gp format (17) in android client has a quality of "small",
3294 # but is actually worse than other formats
3295 if itag == '17':
3296 quality = 'tiny'
3297 if quality:
3298 if itag:
3299 itag_qualities[itag] = quality
3300 if height:
3301 res_qualities[height] = quality
3302 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
3303 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
3304 # number of fragment that would subsequently requested with (`&sq=N`)
3305 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
3306 continue
3307
3308 fmt_url = fmt.get('url')
3309 if not fmt_url:
3310 sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))
3311 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
3312 encrypted_sig = try_get(sc, lambda x: x['s'][0])
3313 if not all((sc, fmt_url, player_url, encrypted_sig)):
3314 continue
3315 try:
3316 fmt_url += '&%s=%s' % (
3317 traverse_obj(sc, ('sp', -1)) or 'signature',
3318 self._decrypt_signature(encrypted_sig, video_id, player_url)
3319 )
3320 except ExtractorError as e:
3321 self.report_warning('Signature extraction failed: Some formats may be missing',
3322 video_id=video_id, only_once=True)
3323 self.write_debug(e, only_once=True)
3324 continue
3325
3326 query = parse_qs(fmt_url)
3327 throttled = False
3328 if query.get('n'):
3329 try:
3330 decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])
3331 fmt_url = update_url_query(fmt_url, {
3332 'n': decrypt_nsig(query['n'][0], video_id, player_url)
3333 })
3334 except ExtractorError as e:
3335 phantomjs_hint = ''
3336 if isinstance(e, JSInterpreter.Exception):
3337 phantomjs_hint = (f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} '
3338 f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')
3339 self.report_warning(
3340 f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}'
3341 f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)
3342 self.write_debug(e, only_once=True)
3343 throttled = True
3344
3345 if itag:
3346 itags[itag] = 'https'
3347 stream_ids.append(stream_id)
3348
3349 tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
3350 language_preference = (
3351 10 if audio_track.get('audioIsDefault') and 10
3352 else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
3353 else -1)
3354 # Some formats may have much smaller duration than others (possibly damaged during encoding)
3355 # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
3356 # Make sure to avoid false positives with small duration differences.
3357 # E.g. __2ABJjxzNo, ySuUZEjARPY
3358 is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)
3359 if is_damaged:
3360 self.report_warning(
3361 f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
3362 dct = {
3363 'asr': int_or_none(fmt.get('audioSampleRate')),
3364 'filesize': int_or_none(fmt.get('contentLength')),
3365 'format_id': itag,
3366 'format_note': join_nonempty(
3367 '%s%s' % (audio_track.get('displayName') or '',
3368 ' (default)' if language_preference > 0 else ''),
3369 fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
3370 try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
3371 try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
3372 throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),
3373 # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
3374 'source_preference': -10 if throttled else -5 if itag == '22' else -1,
3375 'fps': int_or_none(fmt.get('fps')) or None,
3376 'audio_channels': fmt.get('audioChannels'),
3377 'height': height,
3378 'quality': q(quality),
3379 'has_drm': bool(fmt.get('drmFamilies')),
3380 'tbr': tbr,
3381 'url': fmt_url,
3382 'width': int_or_none(fmt.get('width')),
3383 'language': join_nonempty(audio_track.get('id', '').split('.')[0],
3384 'desc' if language_preference < -1 else ''),
3385 'language_preference': language_preference,
3386 # Strictly de-prioritize damaged and 3gp formats
3387 'preference': -10 if is_damaged else -2 if itag == '17' else None,
3388 }
3389 mime_mobj = re.match(
3390 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
3391 if mime_mobj:
3392 dct['ext'] = mimetype2ext(mime_mobj.group(1))
3393 dct.update(parse_codecs(mime_mobj.group(2)))
3394 no_audio = dct.get('acodec') == 'none'
3395 no_video = dct.get('vcodec') == 'none'
3396 if no_audio:
3397 dct['vbr'] = tbr
3398 if no_video:
3399 dct['abr'] = tbr
3400 if no_audio or no_video:
3401 dct['downloader_options'] = {
3402 # Youtube throttles chunks >~10M
3403 'http_chunk_size': 10485760,
3404 }
3405 if dct.get('ext'):
3406 dct['container'] = dct['ext'] + '_dash'
3407 yield dct
3408
3409 live_from_start = is_live and self.get_param('live_from_start')
3410 skip_manifests = self._configuration_arg('skip')
3411 if not self.get_param('youtube_include_hls_manifest', True):
3412 skip_manifests.append('hls')
3413 if not self.get_param('youtube_include_dash_manifest', True):
3414 skip_manifests.append('dash')
3415 get_dash = 'dash' not in skip_manifests and (
3416 not is_live or live_from_start or self._configuration_arg('include_live_dash'))
3417 get_hls = not live_from_start and 'hls' not in skip_manifests
3418
3419 def process_manifest_format(f, proto, itag):
3420 if itag in itags:
3421 if itags[itag] == proto or f'{itag}-{proto}' in itags:
3422 return False
3423 itag = f'{itag}-{proto}'
3424 if itag:
3425 f['format_id'] = itag
3426 itags[itag] = proto
3427
3428 f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
3429 if f['quality'] == -1 and f.get('height'):
3430 f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
3431 return True
3432
3433 subtitles = {}
3434 for sd in streaming_data:
3435 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
3436 if hls_manifest_url:
3437 fmts, subs = self._extract_m3u8_formats_and_subtitles(hls_manifest_url, video_id, 'mp4', fatal=False, live=is_live)
3438 subtitles = self._merge_subtitles(subs, subtitles)
3439 for f in fmts:
3440 if process_manifest_format(f, 'hls', self._search_regex(
3441 r'/itag/(\d+)', f['url'], 'itag', default=None)):
3442 yield f
3443
3444 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
3445 if dash_manifest_url:
3446 formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
3447 subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
3448 for f in formats:
3449 if process_manifest_format(f, 'dash', f['format_id']):
3450 f['filesize'] = int_or_none(self._search_regex(
3451 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
3452 if live_from_start:
3453 f['is_from_start'] = True
3454
3455 yield f
3456 yield subtitles
3457
3458 def _extract_storyboard(self, player_responses, duration):
3459 spec = get_first(
3460 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
3461 base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))
3462 if not base_url:
3463 return
3464 L = len(spec) - 1
3465 for i, args in enumerate(spec):
3466 args = args.split('#')
3467 counts = list(map(int_or_none, args[:5]))
3468 if len(args) != 8 or not all(counts):
3469 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
3470 continue
3471 width, height, frame_count, cols, rows = counts
3472 N, sigh = args[6:]
3473
3474 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
3475 fragment_count = frame_count / (cols * rows)
3476 fragment_duration = duration / fragment_count
3477 yield {
3478 'format_id': f'sb{i}',
3479 'format_note': 'storyboard',
3480 'ext': 'mhtml',
3481 'protocol': 'mhtml',
3482 'acodec': 'none',
3483 'vcodec': 'none',
3484 'url': url,
3485 'width': width,
3486 'height': height,
3487 'fps': frame_count / duration,
3488 'rows': rows,
3489 'columns': cols,
3490 'fragments': [{
3491 'url': url.replace('$M', str(j)),
3492 'duration': min(fragment_duration, duration - (j * fragment_duration)),
3493 } for j in range(math.ceil(fragment_count))],
3494 }
3495
3496 def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
3497 webpage = None
3498 if 'webpage' not in self._configuration_arg('player_skip'):
3499 query = {'bpctr': '9999999999', 'has_verified': '1'}
3500 if smuggled_data.get('is_story'):
3501 query['pp'] = self._STORY_PLAYER_PARAMS
3502 webpage = self._download_webpage(
3503 webpage_url, video_id, fatal=False, query=query)
3504
3505 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
3506
3507 player_responses, player_url = self._extract_player_responses(
3508 self._get_requested_clients(url, smuggled_data),
3509 video_id, webpage, master_ytcfg, smuggled_data)
3510
3511 return webpage, master_ytcfg, player_responses, player_url
3512
3513 def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):
3514 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
3515 is_live = get_first(video_details, 'isLive')
3516 if is_live is None:
3517 is_live = get_first(live_broadcast_details, 'isLiveNow')
3518
3519 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
3520 *formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, is_live, duration)
3521
3522 return live_broadcast_details, is_live, streaming_data, formats, subtitles
3523
3524 def _real_extract(self, url):
3525 url, smuggled_data = unsmuggle_url(url, {})
3526 video_id = self._match_id(url)
3527
3528 base_url = self.http_scheme() + '//www.youtube.com/'
3529 webpage_url = base_url + 'watch?v=' + video_id
3530
3531 webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
3532
3533 playability_statuses = traverse_obj(
3534 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
3535
3536 trailer_video_id = get_first(
3537 playability_statuses,
3538 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
3539 expected_type=str)
3540 if trailer_video_id:
3541 return self.url_result(
3542 trailer_video_id, self.ie_key(), trailer_video_id)
3543
3544 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
3545 if webpage else (lambda x: None))
3546
3547 video_details = traverse_obj(
3548 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
3549 microformats = traverse_obj(
3550 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
3551 expected_type=dict, default=[])
3552 video_title = (
3553 get_first(video_details, 'title')
3554 or self._get_text(microformats, (..., 'title'))
3555 or search_meta(['og:title', 'twitter:title', 'title']))
3556 video_description = get_first(video_details, 'shortDescription')
3557
3558 multifeed_metadata_list = get_first(
3559 player_responses,
3560 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
3561 expected_type=str)
3562 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
3563 if self.get_param('noplaylist'):
3564 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3565 else:
3566 entries = []
3567 feed_ids = []
3568 for feed in multifeed_metadata_list.split(','):
3569 # Unquote should take place before split on comma (,) since textual
3570 # fields may contain comma as well (see
3571 # https://github.com/ytdl-org/youtube-dl/issues/8536)
3572 feed_data = urllib.parse.parse_qs(
3573 urllib.parse.unquote_plus(feed))
3574
3575 def feed_entry(name):
3576 return try_get(
3577 feed_data, lambda x: x[name][0], str)
3578
3579 feed_id = feed_entry('id')
3580 if not feed_id:
3581 continue
3582 feed_title = feed_entry('title')
3583 title = video_title
3584 if feed_title:
3585 title += ' (%s)' % feed_title
3586 entries.append({
3587 '_type': 'url_transparent',
3588 'ie_key': 'Youtube',
3589 'url': smuggle_url(
3590 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
3591 {'force_singlefeed': True}),
3592 'title': title,
3593 })
3594 feed_ids.append(feed_id)
3595 self.to_screen(
3596 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
3597 % (', '.join(feed_ids), video_id))
3598 return self.playlist_result(
3599 entries, video_id, video_title, video_description)
3600
3601 duration = int_or_none(
3602 get_first(video_details, 'lengthSeconds')
3603 or get_first(microformats, 'lengthSeconds')
3604 or parse_duration(search_meta('duration'))) or None
3605
3606 live_broadcast_details, is_live, streaming_data, formats, automatic_captions = \
3607 self._list_formats(video_id, microformats, video_details, player_responses, player_url)
3608
3609 if not formats:
3610 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
3611 self.report_drm(video_id)
3612 pemr = get_first(
3613 playability_statuses,
3614 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
3615 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
3616 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
3617 if subreason:
3618 if subreason == 'The uploader has not made this video available in your country.':
3619 countries = get_first(microformats, 'availableCountries')
3620 if not countries:
3621 regions_allowed = search_meta('regionsAllowed')
3622 countries = regions_allowed.split(',') if regions_allowed else None
3623 self.raise_geo_restricted(subreason, countries, metadata_available=True)
3624 reason += f'. {subreason}'
3625 if reason:
3626 self.raise_no_formats(reason, expected=True)
3627
3628 keywords = get_first(video_details, 'keywords', expected_type=list) or []
3629 if not keywords and webpage:
3630 keywords = [
3631 unescapeHTML(m.group('content'))
3632 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
3633 for keyword in keywords:
3634 if keyword.startswith('yt:stretch='):
3635 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
3636 if mobj:
3637 # NB: float is intentional for forcing float division
3638 w, h = (float(v) for v in mobj.groups())
3639 if w > 0 and h > 0:
3640 ratio = w / h
3641 for f in formats:
3642 if f.get('vcodec') != 'none':
3643 f['stretched_ratio'] = ratio
3644 break
3645 thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))
3646 thumbnail_url = search_meta(['og:image', 'twitter:image'])
3647 if thumbnail_url:
3648 thumbnails.append({
3649 'url': thumbnail_url,
3650 })
3651 original_thumbnails = thumbnails.copy()
3652
3653 # The best resolution thumbnails sometimes does not appear in the webpage
3654 # See: https://github.com/yt-dlp/yt-dlp/issues/340
3655 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
3656 thumbnail_names = [
3657 # While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants
3658 # in resolution, these are not the custom thumbnail. So de-prioritize them
3659 'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',
3660 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'
3661 ]
3662 n_thumbnail_names = len(thumbnail_names)
3663 thumbnails.extend({
3664 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
3665 video_id=video_id, name=name, ext=ext,
3666 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
3667 } for name in thumbnail_names for ext in ('webp', 'jpg'))
3668 for thumb in thumbnails:
3669 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
3670 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
3671 self._remove_duplicate_formats(thumbnails)
3672 self._downloader._sort_thumbnails(original_thumbnails)
3673
3674 category = get_first(microformats, 'category') or search_meta('genre')
3675 channel_id = str_or_none(
3676 get_first(video_details, 'channelId')
3677 or get_first(microformats, 'externalChannelId')
3678 or search_meta('channelId'))
3679 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
3680
3681 live_content = get_first(video_details, 'isLiveContent')
3682 is_upcoming = get_first(video_details, 'isUpcoming')
3683 if is_live is None:
3684 if is_upcoming or live_content is False:
3685 is_live = False
3686 if is_upcoming is None and (live_content or is_live):
3687 is_upcoming = False
3688 live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
3689 live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
3690 if not duration and live_end_time and live_start_time:
3691 duration = live_end_time - live_start_time
3692
3693 if is_live and self.get_param('live_from_start'):
3694 self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)
3695
3696 formats.extend(self._extract_storyboard(player_responses, duration))
3697
3698 # source_preference is lower for throttled/potentially damaged formats
3699 self._sort_formats(formats, (
3700 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto'))
3701
3702 info = {
3703 'id': video_id,
3704 'title': video_title,
3705 'formats': formats,
3706 'thumbnails': thumbnails,
3707 # The best thumbnail that we are sure exists. Prevents unnecessary
3708 # URL checking if user don't care about getting the best possible thumbnail
3709 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
3710 'description': video_description,
3711 'uploader': get_first(video_details, 'author'),
3712 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
3713 'uploader_url': owner_profile_url,
3714 'channel_id': channel_id,
3715 'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s'),
3716 'duration': duration,
3717 'view_count': int_or_none(
3718 get_first((video_details, microformats), (..., 'viewCount'))
3719 or search_meta('interactionCount')),
3720 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
3721 'age_limit': 18 if (
3722 get_first(microformats, 'isFamilySafe') is False
3723 or search_meta('isFamilyFriendly') == 'false'
3724 or search_meta('og:restrictions:age') == '18+') else 0,
3725 'webpage_url': webpage_url,
3726 'categories': [category] if category else None,
3727 'tags': keywords,
3728 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
3729 'is_live': is_live,
3730 'was_live': (False if is_live or is_upcoming or live_content is False
3731 else None if is_live is None or is_upcoming is None
3732 else live_content),
3733 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
3734 'release_timestamp': live_start_time,
3735 }
3736
3737 if get_first(video_details, 'isPostLiveDvr'):
3738 self.write_debug('Video is in Post-Live Manifestless mode')
3739 info['live_status'] = 'post_live'
3740 if (duration or 0) > 4 * 3600:
3741 self.report_warning(
3742 'The livestream has not finished processing. Only 4 hours of the video can be currently downloaded. '
3743 'This is a known issue and patches are welcome')
3744
3745 subtitles = {}
3746 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
3747 if pctr:
3748 def get_lang_code(track):
3749 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
3750 or track.get('languageCode'))
3751
3752 # Converted into dicts to remove duplicates
3753 captions = {
3754 get_lang_code(sub): sub
3755 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
3756 translation_languages = {
3757 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
3758 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
3759
3760 def process_language(container, base_url, lang_code, sub_name, query):
3761 lang_subs = container.setdefault(lang_code, [])
3762 for fmt in self._SUBTITLE_FORMATS:
3763 query.update({
3764 'fmt': fmt,
3765 })
3766 lang_subs.append({
3767 'ext': fmt,
3768 'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),
3769 'name': sub_name,
3770 })
3771
3772 # NB: Constructing the full subtitle dictionary is slow
3773 get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (
3774 self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))
3775 for lang_code, caption_track in captions.items():
3776 base_url = caption_track.get('baseUrl')
3777 orig_lang = parse_qs(base_url).get('lang', [None])[-1]
3778 if not base_url:
3779 continue
3780 lang_name = self._get_text(caption_track, 'name', max_runs=1)
3781 if caption_track.get('kind') != 'asr':
3782 if not lang_code:
3783 continue
3784 process_language(
3785 subtitles, base_url, lang_code, lang_name, {})
3786 if not caption_track.get('isTranslatable'):
3787 continue
3788 for trans_code, trans_name in translation_languages.items():
3789 if not trans_code:
3790 continue
3791 orig_trans_code = trans_code
3792 if caption_track.get('kind') != 'asr':
3793 if not get_translated_subs:
3794 continue
3795 trans_code += f'-{lang_code}'
3796 trans_name += format_field(lang_name, None, ' from %s')
3797 # Add an "-orig" label to the original language so that it can be distinguished.
3798 # The subs are returned without "-orig" as well for compatibility
3799 if lang_code == f'a-{orig_trans_code}':
3800 process_language(
3801 automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
3802 # Setting tlang=lang returns damaged subtitles.
3803 process_language(automatic_captions, base_url, trans_code, trans_name,
3804 {} if orig_lang == orig_trans_code else {'tlang': trans_code})
3805
3806 info['automatic_captions'] = automatic_captions
3807 info['subtitles'] = subtitles
3808
3809 parsed_url = urllib.parse.urlparse(url)
3810 for component in [parsed_url.fragment, parsed_url.query]:
3811 query = urllib.parse.parse_qs(component)
3812 for k, v in query.items():
3813 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
3814 d_k += '_time'
3815 if d_k not in info and k in s_ks:
3816 info[d_k] = parse_duration(query[k][0])
3817
3818 # Youtube Music Auto-generated description
3819 if video_description:
3820 mobj = re.search(
3821 r'''(?xs)
3822 (?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+
3823 (?P<album>[^\n]+)
3824 (?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
3825 (?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
3826 (.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?
3827 .+\nAuto-generated\ by\ YouTube\.\s*$
3828 ''', video_description)
3829 if mobj:
3830 release_year = mobj.group('release_year')
3831 release_date = mobj.group('release_date')
3832 if release_date:
3833 release_date = release_date.replace('-', '')
3834 if not release_year:
3835 release_year = release_date[:4]
3836 info.update({
3837 'album': mobj.group('album'.strip()),
3838 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
3839 'track': mobj.group('track').strip(),
3840 'release_date': release_date,
3841 'release_year': int_or_none(release_year),
3842 })
3843
3844 initial_data = None
3845 if webpage:
3846 initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)
3847 if not initial_data:
3848 query = {'videoId': video_id}
3849 query.update(self._get_checkok_params())
3850 initial_data = self._extract_response(
3851 item_id=video_id, ep='next', fatal=False,
3852 ytcfg=master_ytcfg, query=query,
3853 headers=self.generate_api_headers(ytcfg=master_ytcfg),
3854 note='Downloading initial data API JSON')
3855
3856 info['comment_count'] = traverse_obj(initial_data, (
3857 'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',
3858 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount', 'simpleText'
3859 ), (
3860 'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',
3861 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo', 'runs', ..., 'text'
3862 ), expected_type=int_or_none, get_all=False)
3863
3864 try: # This will error if there is no livechat
3865 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
3866 except (KeyError, IndexError, TypeError):
3867 pass
3868 else:
3869 info.setdefault('subtitles', {})['live_chat'] = [{
3870 # url is needed to set cookies
3871 'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',
3872 'video_id': video_id,
3873 'ext': 'json',
3874 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
3875 }]
3876
3877 if initial_data:
3878 info['chapters'] = (
3879 self._extract_chapters_from_json(initial_data, duration)
3880 or self._extract_chapters_from_engagement_panel(initial_data, duration)
3881 or self._extract_chapters_from_description(video_description, duration)
3882 or None)
3883
3884 contents = traverse_obj(
3885 initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
3886 expected_type=list, default=[])
3887
3888 vpir = get_first(contents, 'videoPrimaryInfoRenderer')
3889 if vpir:
3890 stl = vpir.get('superTitleLink')
3891 if stl:
3892 stl = self._get_text(stl)
3893 if try_get(
3894 vpir,
3895 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3896 info['location'] = stl
3897 else:
3898 mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)
3899 if mobj:
3900 info.update({
3901 'series': mobj.group(1),
3902 'season_number': int(mobj.group(2)),
3903 'episode_number': int(mobj.group(3)),
3904 })
3905 for tlb in (try_get(
3906 vpir,
3907 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3908 list) or []):
3909 tbr = tlb.get('toggleButtonRenderer') or {}
3910 for getter, regex in [(
3911 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3912 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3913 lambda x: x['accessibility'],
3914 lambda x: x['accessibilityData']['accessibilityData'],
3915 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3916 label = (try_get(tbr, getter, dict) or {}).get('label')
3917 if label:
3918 mobj = re.match(regex, label)
3919 if mobj:
3920 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3921 break
3922 sbr_tooltip = try_get(
3923 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3924 if sbr_tooltip:
3925 like_count, dislike_count = sbr_tooltip.split(' / ')
3926 info.update({
3927 'like_count': str_to_int(like_count),
3928 'dislike_count': str_to_int(dislike_count),
3929 })
3930 vsir = get_first(contents, 'videoSecondaryInfoRenderer')
3931 if vsir:
3932 vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))
3933 info.update({
3934 'channel': self._get_text(vor, 'title'),
3935 'channel_follower_count': self._get_count(vor, 'subscriberCountText')})
3936
3937 rows = try_get(
3938 vsir,
3939 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3940 list) or []
3941 multiple_songs = False
3942 for row in rows:
3943 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3944 multiple_songs = True
3945 break
3946 for row in rows:
3947 mrr = row.get('metadataRowRenderer') or {}
3948 mrr_title = mrr.get('title')
3949 if not mrr_title:
3950 continue
3951 mrr_title = self._get_text(mrr, 'title')
3952 mrr_contents_text = self._get_text(mrr, ('contents', 0))
3953 if mrr_title == 'License':
3954 info['license'] = mrr_contents_text
3955 elif not multiple_songs:
3956 if mrr_title == 'Album':
3957 info['album'] = mrr_contents_text
3958 elif mrr_title == 'Artist':
3959 info['artist'] = mrr_contents_text
3960 elif mrr_title == 'Song':
3961 info['track'] = mrr_contents_text
3962
3963 fallbacks = {
3964 'channel': 'uploader',
3965 'channel_id': 'uploader_id',
3966 'channel_url': 'uploader_url',
3967 }
3968
3969 # The upload date for scheduled, live and past live streams / premieres in microformats
3970 # may be different from the stream date. Although not in UTC, we will prefer it in this case.
3971 # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139
3972 upload_date = (
3973 unified_strdate(get_first(microformats, 'uploadDate'))
3974 or unified_strdate(search_meta('uploadDate')))
3975 if not upload_date or (
3976 not info.get('is_live')
3977 and not info.get('was_live')
3978 and info.get('live_status') != 'is_upcoming'
3979 and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', [])
3980 ):
3981 upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d') or upload_date
3982 info['upload_date'] = upload_date
3983
3984 for to, frm in fallbacks.items():
3985 if not info.get(to):
3986 info[to] = info.get(frm)
3987
3988 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3989 v = info.get(s_k)
3990 if v:
3991 info[d_k] = v
3992
3993 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3994 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
3995 is_membersonly = None
3996 is_premium = None
3997 if initial_data and is_private is not None:
3998 is_membersonly = False
3999 is_premium = False
4000 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
4001 badge_labels = set()
4002 for content in contents:
4003 if not isinstance(content, dict):
4004 continue
4005 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
4006 for badge_label in badge_labels:
4007 if badge_label.lower() == 'members only':
4008 is_membersonly = True
4009 elif badge_label.lower() == 'premium':
4010 is_premium = True
4011 elif badge_label.lower() == 'unlisted':
4012 is_unlisted = True
4013
4014 info['availability'] = self._availability(
4015 is_private=is_private,
4016 needs_premium=is_premium,
4017 needs_subscription=is_membersonly,
4018 needs_auth=info['age_limit'] >= 18,
4019 is_unlisted=None if is_private is None else is_unlisted)
4020
4021 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
4022
4023 self.mark_watched(video_id, player_responses)
4024
4025 return info
4026
4027
4028 class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
4029
4030 @staticmethod
4031 def passthrough_smuggled_data(func):
4032 def _smuggle(entries, smuggled_data):
4033 for entry in entries:
4034 # TODO: Convert URL to music.youtube instead.
4035 # Do we need to passthrough any other smuggled_data?
4036 entry['url'] = smuggle_url(entry['url'], smuggled_data)
4037 yield entry
4038
4039 @functools.wraps(func)
4040 def wrapper(self, url):
4041 url, smuggled_data = unsmuggle_url(url, {})
4042 if self.is_music_url(url):
4043 smuggled_data['is_music_url'] = True
4044 info_dict = func(self, url, smuggled_data)
4045 if smuggled_data and info_dict.get('entries'):
4046 info_dict['entries'] = _smuggle(info_dict['entries'], smuggled_data)
4047 return info_dict
4048 return wrapper
4049
4050 def _extract_channel_id(self, webpage):
4051 channel_id = self._html_search_meta(
4052 'channelId', webpage, 'channel id', default=None)
4053 if channel_id:
4054 return channel_id
4055 channel_url = self._html_search_meta(
4056 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
4057 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
4058 'twitter:app:url:googleplay'), webpage, 'channel url')
4059 return self._search_regex(
4060 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
4061 channel_url, 'channel id')
4062
4063 @staticmethod
4064 def _extract_basic_item_renderer(item):
4065 # Modified from _extract_grid_item_renderer
4066 known_basic_renderers = (
4067 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'
4068 )
4069 for key, renderer in item.items():
4070 if not isinstance(renderer, dict):
4071 continue
4072 elif key in known_basic_renderers:
4073 return renderer
4074 elif key.startswith('grid') and key.endswith('Renderer'):
4075 return renderer
4076
4077 def _grid_entries(self, grid_renderer):
4078 for item in grid_renderer['items']:
4079 if not isinstance(item, dict):
4080 continue
4081 renderer = self._extract_basic_item_renderer(item)
4082 if not isinstance(renderer, dict):
4083 continue
4084 title = self._get_text(renderer, 'title')
4085
4086 # playlist
4087 playlist_id = renderer.get('playlistId')
4088 if playlist_id:
4089 yield self.url_result(
4090 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4091 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4092 video_title=title)
4093 continue
4094 # video
4095 video_id = renderer.get('videoId')
4096 if video_id:
4097 yield self._extract_video(renderer)
4098 continue
4099 # channel
4100 channel_id = renderer.get('channelId')
4101 if channel_id:
4102 yield self.url_result(
4103 'https://www.youtube.com/channel/%s' % channel_id,
4104 ie=YoutubeTabIE.ie_key(), video_title=title)
4105 continue
4106 # generic endpoint URL support
4107 ep_url = urljoin('https://www.youtube.com/', try_get(
4108 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
4109 str))
4110 if ep_url:
4111 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
4112 if ie.suitable(ep_url):
4113 yield self.url_result(
4114 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
4115 break
4116
4117 def _music_reponsive_list_entry(self, renderer):
4118 video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))
4119 if video_id:
4120 return self.url_result(f'https://music.youtube.com/watch?v={video_id}',
4121 ie=YoutubeIE.ie_key(), video_id=video_id)
4122 playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))
4123 if playlist_id:
4124 video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))
4125 if video_id:
4126 return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',
4127 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4128 return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',
4129 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4130 browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))
4131 if browse_id:
4132 return self.url_result(f'https://music.youtube.com/browse/{browse_id}',
4133 ie=YoutubeTabIE.ie_key(), video_id=browse_id)
4134
4135 def _shelf_entries_from_content(self, shelf_renderer):
4136 content = shelf_renderer.get('content')
4137 if not isinstance(content, dict):
4138 return
4139 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
4140 if renderer:
4141 # TODO: add support for nested playlists so each shelf is processed
4142 # as separate playlist
4143 # TODO: this includes only first N items
4144 yield from self._grid_entries(renderer)
4145 renderer = content.get('horizontalListRenderer')
4146 if renderer:
4147 # TODO
4148 pass
4149
4150 def _shelf_entries(self, shelf_renderer, skip_channels=False):
4151 ep = try_get(
4152 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4153 str)
4154 shelf_url = urljoin('https://www.youtube.com', ep)
4155 if shelf_url:
4156 # Skipping links to another channels, note that checking for
4157 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
4158 # will not work
4159 if skip_channels and '/channels?' in shelf_url:
4160 return
4161 title = self._get_text(shelf_renderer, 'title')
4162 yield self.url_result(shelf_url, video_title=title)
4163 # Shelf may not contain shelf URL, fallback to extraction from content
4164 yield from self._shelf_entries_from_content(shelf_renderer)
4165
4166 def _playlist_entries(self, video_list_renderer):
4167 for content in video_list_renderer['contents']:
4168 if not isinstance(content, dict):
4169 continue
4170 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
4171 if not isinstance(renderer, dict):
4172 continue
4173 video_id = renderer.get('videoId')
4174 if not video_id:
4175 continue
4176 yield self._extract_video(renderer)
4177
4178 def _rich_entries(self, rich_grid_renderer):
4179 renderer = try_get(
4180 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
4181 video_id = renderer.get('videoId')
4182 if not video_id:
4183 return
4184 yield self._extract_video(renderer)
4185
4186 def _video_entry(self, video_renderer):
4187 video_id = video_renderer.get('videoId')
4188 if video_id:
4189 return self._extract_video(video_renderer)
4190
4191 def _hashtag_tile_entry(self, hashtag_tile_renderer):
4192 url = urljoin('https://youtube.com', traverse_obj(
4193 hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))
4194 if url:
4195 return self.url_result(
4196 url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))
4197
4198 def _post_thread_entries(self, post_thread_renderer):
4199 post_renderer = try_get(
4200 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
4201 if not post_renderer:
4202 return
4203 # video attachment
4204 video_renderer = try_get(
4205 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
4206 video_id = video_renderer.get('videoId')
4207 if video_id:
4208 entry = self._extract_video(video_renderer)
4209 if entry:
4210 yield entry
4211 # playlist attachment
4212 playlist_id = try_get(
4213 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)
4214 if playlist_id:
4215 yield self.url_result(
4216 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4217 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4218 # inline video links
4219 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
4220 for run in runs:
4221 if not isinstance(run, dict):
4222 continue
4223 ep_url = try_get(
4224 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)
4225 if not ep_url:
4226 continue
4227 if not YoutubeIE.suitable(ep_url):
4228 continue
4229 ep_video_id = YoutubeIE._match_id(ep_url)
4230 if video_id == ep_video_id:
4231 continue
4232 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
4233
4234 def _post_thread_continuation_entries(self, post_thread_continuation):
4235 contents = post_thread_continuation.get('contents')
4236 if not isinstance(contents, list):
4237 return
4238 for content in contents:
4239 renderer = content.get('backstagePostThreadRenderer')
4240 if isinstance(renderer, dict):
4241 yield from self._post_thread_entries(renderer)
4242 continue
4243 renderer = content.get('videoRenderer')
4244 if isinstance(renderer, dict):
4245 yield self._video_entry(renderer)
4246
4247 r''' # unused
4248 def _rich_grid_entries(self, contents):
4249 for content in contents:
4250 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
4251 if video_renderer:
4252 entry = self._video_entry(video_renderer)
4253 if entry:
4254 yield entry
4255 '''
4256
4257 def _extract_entries(self, parent_renderer, continuation_list):
4258 # continuation_list is modified in-place with continuation_list = [continuation_token]
4259 continuation_list[:] = [None]
4260 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
4261 for content in contents:
4262 if not isinstance(content, dict):
4263 continue
4264 is_renderer = traverse_obj(
4265 content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',
4266 expected_type=dict)
4267 if not is_renderer:
4268 renderer = content.get('richItemRenderer')
4269 if renderer:
4270 for entry in self._rich_entries(renderer):
4271 yield entry
4272 continuation_list[0] = self._extract_continuation(parent_renderer)
4273 continue
4274 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
4275 for isr_content in isr_contents:
4276 if not isinstance(isr_content, dict):
4277 continue
4278
4279 known_renderers = {
4280 'playlistVideoListRenderer': self._playlist_entries,
4281 'gridRenderer': self._grid_entries,
4282 'reelShelfRenderer': self._grid_entries,
4283 'shelfRenderer': self._shelf_entries,
4284 'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],
4285 'backstagePostThreadRenderer': self._post_thread_entries,
4286 'videoRenderer': lambda x: [self._video_entry(x)],
4287 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
4288 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
4289 'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]
4290 }
4291 for key, renderer in isr_content.items():
4292 if key not in known_renderers:
4293 continue
4294 for entry in known_renderers[key](renderer):
4295 if entry:
4296 yield entry
4297 continuation_list[0] = self._extract_continuation(renderer)
4298 break
4299
4300 if not continuation_list[0]:
4301 continuation_list[0] = self._extract_continuation(is_renderer)
4302
4303 if not continuation_list[0]:
4304 continuation_list[0] = self._extract_continuation(parent_renderer)
4305
4306 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
4307 continuation_list = [None]
4308 extract_entries = lambda x: self._extract_entries(x, continuation_list)
4309 tab_content = try_get(tab, lambda x: x['content'], dict)
4310 if not tab_content:
4311 return
4312 parent_renderer = (
4313 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
4314 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
4315 yield from extract_entries(parent_renderer)
4316 continuation = continuation_list[0]
4317
4318 for page_num in itertools.count(1):
4319 if not continuation:
4320 break
4321 headers = self.generate_api_headers(
4322 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
4323 response = self._extract_response(
4324 item_id=f'{item_id} page {page_num}',
4325 query=continuation, headers=headers, ytcfg=ytcfg,
4326 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
4327
4328 if not response:
4329 break
4330 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
4331 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
4332 visitor_data = self._extract_visitor_data(response) or visitor_data
4333
4334 known_continuation_renderers = {
4335 'playlistVideoListContinuation': self._playlist_entries,
4336 'gridContinuation': self._grid_entries,
4337 'itemSectionContinuation': self._post_thread_continuation_entries,
4338 'sectionListContinuation': extract_entries, # for feeds
4339 }
4340 continuation_contents = try_get(
4341 response, lambda x: x['continuationContents'], dict) or {}
4342 continuation_renderer = None
4343 for key, value in continuation_contents.items():
4344 if key not in known_continuation_renderers:
4345 continue
4346 continuation_renderer = value
4347 continuation_list = [None]
4348 yield from known_continuation_renderers[key](continuation_renderer)
4349 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
4350 break
4351 if continuation_renderer:
4352 continue
4353
4354 known_renderers = {
4355 'videoRenderer': (self._grid_entries, 'items'), # for membership tab
4356 'gridPlaylistRenderer': (self._grid_entries, 'items'),
4357 'gridVideoRenderer': (self._grid_entries, 'items'),
4358 'gridChannelRenderer': (self._grid_entries, 'items'),
4359 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
4360 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
4361 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
4362 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
4363 }
4364 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
4365 continuation_items = try_get(
4366 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
4367 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
4368 video_items_renderer = None
4369 for key, value in continuation_item.items():
4370 if key not in known_renderers:
4371 continue
4372 video_items_renderer = {known_renderers[key][1]: continuation_items}
4373 continuation_list = [None]
4374 yield from known_renderers[key][0](video_items_renderer)
4375 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
4376 break
4377 if video_items_renderer:
4378 continue
4379 break
4380
4381 @staticmethod
4382 def _extract_selected_tab(tabs, fatal=True):
4383 for tab in tabs:
4384 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
4385 if renderer.get('selected') is True:
4386 return renderer
4387 else:
4388 if fatal:
4389 raise ExtractorError('Unable to find selected tab')
4390
4391 def _extract_uploader(self, data):
4392 uploader = {}
4393 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
4394 owner = try_get(
4395 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
4396 if owner:
4397 owner_text = owner.get('text')
4398 uploader['uploader'] = self._search_regex(
4399 r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text)
4400 uploader['uploader_id'] = try_get(
4401 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], str)
4402 uploader['uploader_url'] = urljoin(
4403 'https://www.youtube.com/',
4404 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], str))
4405 return {k: v for k, v in uploader.items() if v is not None}
4406
4407 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
4408 playlist_id = title = description = channel_url = channel_name = channel_id = None
4409 tags = []
4410
4411 selected_tab = self._extract_selected_tab(tabs)
4412 primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4413 renderer = try_get(
4414 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
4415 if renderer:
4416 channel_name = renderer.get('title')
4417 channel_url = renderer.get('channelUrl')
4418 channel_id = renderer.get('externalId')
4419 else:
4420 renderer = try_get(
4421 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
4422
4423 if renderer:
4424 title = renderer.get('title')
4425 description = renderer.get('description', '')
4426 playlist_id = channel_id
4427 tags = renderer.get('keywords', '').split()
4428
4429 # We can get the uncropped banner/avatar by replacing the crop params with '=s0'
4430 # See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714
4431 def _get_uncropped(url):
4432 return url_or_none((url or '').split('=')[0] + '=s0')
4433
4434 avatar_thumbnails = self._extract_thumbnails(renderer, 'avatar')
4435 if avatar_thumbnails:
4436 uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])
4437 if uncropped_avatar:
4438 avatar_thumbnails.append({
4439 'url': uncropped_avatar,
4440 'id': 'avatar_uncropped',
4441 'preference': 1
4442 })
4443
4444 channel_banners = self._extract_thumbnails(
4445 data, ('header', ..., ['banner', 'mobileBanner', 'tvBanner']))
4446 for banner in channel_banners:
4447 banner['preference'] = -10
4448
4449 if channel_banners:
4450 uncropped_banner = _get_uncropped(channel_banners[0]['url'])
4451 if uncropped_banner:
4452 channel_banners.append({
4453 'url': uncropped_banner,
4454 'id': 'banner_uncropped',
4455 'preference': -5
4456 })
4457
4458 primary_thumbnails = self._extract_thumbnails(
4459 primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))
4460
4461 if playlist_id is None:
4462 playlist_id = item_id
4463
4464 playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')
4465 last_updated_unix, _ = self._extract_time_text(playlist_stats, 2)
4466 if title is None:
4467 title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id
4468 title += format_field(selected_tab, 'title', ' - %s')
4469 title += format_field(selected_tab, 'expandedText', ' - %s')
4470
4471 metadata = {
4472 'playlist_id': playlist_id,
4473 'playlist_title': title,
4474 'playlist_description': description,
4475 'uploader': channel_name,
4476 'uploader_id': channel_id,
4477 'uploader_url': channel_url,
4478 'thumbnails': primary_thumbnails + avatar_thumbnails + channel_banners,
4479 'tags': tags,
4480 'view_count': self._get_count(playlist_stats, 1),
4481 'availability': self._extract_availability(data),
4482 'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'),
4483 'playlist_count': self._get_count(playlist_stats, 0),
4484 'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),
4485 }
4486 if not channel_id:
4487 metadata.update(self._extract_uploader(data))
4488 metadata.update({
4489 'channel': metadata['uploader'],
4490 'channel_id': metadata['uploader_id'],
4491 'channel_url': metadata['uploader_url']})
4492 return self.playlist_result(
4493 self._entries(
4494 selected_tab, playlist_id, ytcfg,
4495 self._extract_account_syncid(ytcfg, data),
4496 self._extract_visitor_data(data, ytcfg)),
4497 **metadata)
4498
4499 def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):
4500 first_id = last_id = response = None
4501 for page_num in itertools.count(1):
4502 videos = list(self._playlist_entries(playlist))
4503 if not videos:
4504 return
4505 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4506 if start >= len(videos):
4507 return
4508 yield from videos[start:]
4509 first_id = first_id or videos[0]['id']
4510 last_id = videos[-1]['id']
4511 watch_endpoint = try_get(
4512 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
4513 headers = self.generate_api_headers(
4514 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4515 visitor_data=self._extract_visitor_data(response, data, ytcfg))
4516 query = {
4517 'playlistId': playlist_id,
4518 'videoId': watch_endpoint.get('videoId') or last_id,
4519 'index': watch_endpoint.get('index') or len(videos),
4520 'params': watch_endpoint.get('params') or 'OAE%3D'
4521 }
4522 response = self._extract_response(
4523 item_id='%s page %d' % (playlist_id, page_num),
4524 query=query, ep='next', headers=headers, ytcfg=ytcfg,
4525 check_get_keys='contents'
4526 )
4527 playlist = try_get(
4528 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4529
4530 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
4531 title = playlist.get('title') or try_get(
4532 data, lambda x: x['titleText']['simpleText'], str)
4533 playlist_id = playlist.get('playlistId') or item_id
4534
4535 # Delegating everything except mix playlists to regular tab-based playlist URL
4536 playlist_url = urljoin(url, try_get(
4537 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4538 str))
4539
4540 # Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]
4541 # [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg
4542 is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)
4543
4544 if playlist_url and playlist_url != url and not is_known_unviewable:
4545 return self.url_result(
4546 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4547 video_title=title)
4548
4549 return self.playlist_result(
4550 self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),
4551 playlist_id=playlist_id, playlist_title=title)
4552
4553 def _extract_availability(self, data):
4554 """
4555 Gets the availability of a given playlist/tab.
4556 Note: Unless YouTube tells us explicitly, we do not assume it is public
4557 @param data: response
4558 """
4559 is_private = is_unlisted = None
4560 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4561 badge_labels = self._extract_badges(renderer)
4562
4563 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4564 privacy_dropdown_entries = try_get(
4565 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4566 for renderer_dict in privacy_dropdown_entries:
4567 is_selected = try_get(
4568 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4569 if not is_selected:
4570 continue
4571 label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
4572 if label:
4573 badge_labels.add(label.lower())
4574 break
4575
4576 for badge_label in badge_labels:
4577 if badge_label == 'unlisted':
4578 is_unlisted = True
4579 elif badge_label == 'private':
4580 is_private = True
4581 elif badge_label == 'public':
4582 is_unlisted = is_private = False
4583 return self._availability(is_private, False, False, False, is_unlisted)
4584
4585 @staticmethod
4586 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4587 sidebar_renderer = try_get(
4588 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4589 for item in sidebar_renderer:
4590 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4591 if renderer:
4592 return renderer
4593
4594 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
4595 """
4596 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4597 """
4598 browse_id = params = None
4599 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4600 if not renderer:
4601 return
4602 menu_renderer = try_get(
4603 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4604 for menu_item in menu_renderer:
4605 if not isinstance(menu_item, dict):
4606 continue
4607 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4608 text = try_get(
4609 nav_item_renderer, lambda x: x['text']['simpleText'], str)
4610 if not text or text.lower() != 'show unavailable videos':
4611 continue
4612 browse_endpoint = try_get(
4613 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4614 browse_id = browse_endpoint.get('browseId')
4615 params = browse_endpoint.get('params')
4616 break
4617
4618 headers = self.generate_api_headers(
4619 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4620 visitor_data=self._extract_visitor_data(data, ytcfg))
4621 query = {
4622 'params': params or 'wgYCCAA=',
4623 'browseId': browse_id or 'VL%s' % item_id
4624 }
4625 return self._extract_response(
4626 item_id=item_id, headers=headers, query=query,
4627 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
4628 note='Downloading API JSON with unavailable videos')
4629
4630 @functools.cached_property
4631 def skip_webpage(self):
4632 return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())
4633
4634 def _extract_webpage(self, url, item_id, fatal=True):
4635 webpage, data = None, None
4636 for retry in self.RetryManager(fatal=fatal):
4637 try:
4638 webpage = self._download_webpage(url, item_id, note='Downloading webpage')
4639 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
4640 except ExtractorError as e:
4641 if isinstance(e.cause, network_exceptions):
4642 if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):
4643 retry.error = e
4644 continue
4645 self._error_or_warning(e, fatal=fatal)
4646 break
4647
4648 try:
4649 self._extract_and_report_alerts(data)
4650 except ExtractorError as e:
4651 self._error_or_warning(e, fatal=fatal)
4652 break
4653
4654 # Sometimes youtube returns a webpage with incomplete ytInitialData
4655 # See: https://github.com/yt-dlp/yt-dlp/issues/116
4656 if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):
4657 retry.error = ExtractorError('Incomplete yt initial data received')
4658 continue
4659
4660 return webpage, data
4661
4662 def _report_playlist_authcheck(self, ytcfg, fatal=True):
4663 """Use if failed to extract ytcfg (and data) from initial webpage"""
4664 if not ytcfg and self.is_authenticated:
4665 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'
4666 if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:
4667 raise ExtractorError(
4668 f'{msg}. If you are not downloading private content, or '
4669 'your cookies are only for the first account and channel,'
4670 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
4671 expected=True)
4672 self.report_warning(msg, only_once=True)
4673
4674 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
4675 data = None
4676 if not self.skip_webpage:
4677 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
4678 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
4679 # Reject webpage data if redirected to home page without explicitly requesting
4680 selected_tab = self._extract_selected_tab(traverse_obj(
4681 data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list, default=[]), fatal=False) or {}
4682 if (url != 'https://www.youtube.com/feed/recommended'
4683 and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page
4684 and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):
4685 msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'
4686 if fatal:
4687 raise ExtractorError(msg, expected=True)
4688 self.report_warning(msg, only_once=True)
4689 if not data:
4690 self._report_playlist_authcheck(ytcfg, fatal=fatal)
4691 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
4692 return data, ytcfg
4693
4694 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
4695 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
4696 resolve_response = self._extract_response(
4697 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
4698 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
4699 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
4700 for ep_key, ep in endpoints.items():
4701 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
4702 if params:
4703 return self._extract_response(
4704 item_id=item_id, query=params, ep=ep, headers=headers,
4705 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
4706 check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))
4707 err_note = 'Failed to resolve url (does the playlist exist?)'
4708 if fatal:
4709 raise ExtractorError(err_note, expected=True)
4710 self.report_warning(err_note, item_id)
4711
4712 _SEARCH_PARAMS = None
4713
4714 def _search_results(self, query, params=NO_DEFAULT, default_client='web'):
4715 data = {'query': query}
4716 if params is NO_DEFAULT:
4717 params = self._SEARCH_PARAMS
4718 if params:
4719 data['params'] = params
4720
4721 content_keys = (
4722 ('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),
4723 ('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),
4724 # ytmusic search
4725 ('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),
4726 ('continuationContents', ),
4727 )
4728 display_id = f'query "{query}"'
4729 check_get_keys = tuple({keys[0] for keys in content_keys})
4730 ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}
4731 self._report_playlist_authcheck(ytcfg, fatal=False)
4732
4733 continuation_list = [None]
4734 search = None
4735 for page_num in itertools.count(1):
4736 data.update(continuation_list[0] or {})
4737 headers = self.generate_api_headers(
4738 ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)
4739 search = self._extract_response(
4740 item_id=f'{display_id} page {page_num}', ep='search', query=data,
4741 default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)
4742 slr_contents = traverse_obj(search, *content_keys)
4743 yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)
4744 if not continuation_list[0]:
4745 break
4746
4747
4748 class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
4749 IE_DESC = 'YouTube Tabs'
4750 _VALID_URL = r'''(?x:
4751 https?://
4752 (?:\w+\.)?
4753 (?:
4754 youtube(?:kids)?\.com|
4755 %(invidious)s
4756 )/
4757 (?:
4758 (?P<channel_type>channel|c|user|browse)/|
4759 (?P<not_channel>
4760 feed/|hashtag/|
4761 (?:playlist|watch)\?.*?\blist=
4762 )|
4763 (?!(?:%(reserved_names)s)\b) # Direct URLs
4764 )
4765 (?P<id>[^/?\#&]+)
4766 )''' % {
4767 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
4768 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
4769 }
4770 IE_NAME = 'youtube:tab'
4771
4772 _TESTS = [{
4773 'note': 'playlists, multipage',
4774 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
4775 'playlist_mincount': 94,
4776 'info_dict': {
4777 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
4778 'title': 'Igor Kleiner - Playlists',
4779 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
4780 'uploader': 'Igor Kleiner',
4781 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4782 'channel': 'Igor Kleiner',
4783 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4784 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
4785 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4786 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4787 'channel_follower_count': int
4788 },
4789 }, {
4790 'note': 'playlists, multipage, different order',
4791 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
4792 'playlist_mincount': 94,
4793 'info_dict': {
4794 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
4795 'title': 'Igor Kleiner - Playlists',
4796 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
4797 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4798 'uploader': 'Igor Kleiner',
4799 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4800 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
4801 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4802 'channel': 'Igor Kleiner',
4803 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4804 'channel_follower_count': int
4805 },
4806 }, {
4807 'note': 'playlists, series',
4808 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
4809 'playlist_mincount': 5,
4810 'info_dict': {
4811 'id': 'UCYO_jab_esuFRV4b17AJtAw',
4812 'title': '3Blue1Brown - Playlists',
4813 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
4814 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
4815 'uploader': '3Blue1Brown',
4816 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4817 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4818 'channel': '3Blue1Brown',
4819 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
4820 'tags': ['Mathematics'],
4821 'channel_follower_count': int
4822 },
4823 }, {
4824 'note': 'playlists, singlepage',
4825 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
4826 'playlist_mincount': 4,
4827 'info_dict': {
4828 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
4829 'title': 'ThirstForScience - Playlists',
4830 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
4831 'uploader': 'ThirstForScience',
4832 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
4833 'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
4834 'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
4835 'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
4836 'tags': 'count:13',
4837 'channel': 'ThirstForScience',
4838 'channel_follower_count': int
4839 }
4840 }, {
4841 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
4842 'only_matching': True,
4843 }, {
4844 'note': 'basic, single video playlist',
4845 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4846 'info_dict': {
4847 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4848 'uploader': 'Sergey M.',
4849 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4850 'title': 'youtube-dl public playlist',
4851 'description': '',
4852 'tags': [],
4853 'view_count': int,
4854 'modified_date': '20201130',
4855 'channel': 'Sergey M.',
4856 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4857 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4858 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4859 },
4860 'playlist_count': 1,
4861 }, {
4862 'note': 'empty playlist',
4863 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
4864 'info_dict': {
4865 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4866 'uploader': 'Sergey M.',
4867 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
4868 'title': 'youtube-dl empty playlist',
4869 'tags': [],
4870 'channel': 'Sergey M.',
4871 'description': '',
4872 'modified_date': '20160902',
4873 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4874 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4875 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4876 },
4877 'playlist_count': 0,
4878 }, {
4879 'note': 'Home tab',
4880 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
4881 'info_dict': {
4882 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4883 'title': 'lex will - Home',
4884 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4885 'uploader': 'lex will',
4886 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4887 'channel': 'lex will',
4888 'tags': ['bible', 'history', 'prophesy'],
4889 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4890 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4891 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4892 'channel_follower_count': int
4893 },
4894 'playlist_mincount': 2,
4895 }, {
4896 'note': 'Videos tab',
4897 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
4898 'info_dict': {
4899 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4900 'title': 'lex will - Videos',
4901 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4902 'uploader': 'lex will',
4903 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4904 'tags': ['bible', 'history', 'prophesy'],
4905 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4906 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4907 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4908 'channel': 'lex will',
4909 'channel_follower_count': int
4910 },
4911 'playlist_mincount': 975,
4912 }, {
4913 'note': 'Videos tab, sorted by popular',
4914 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
4915 'info_dict': {
4916 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4917 'title': 'lex will - Videos',
4918 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4919 'uploader': 'lex will',
4920 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4921 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4922 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4923 'channel': 'lex will',
4924 'tags': ['bible', 'history', 'prophesy'],
4925 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4926 'channel_follower_count': int
4927 },
4928 'playlist_mincount': 199,
4929 }, {
4930 'note': 'Playlists tab',
4931 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
4932 'info_dict': {
4933 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4934 'title': 'lex will - Playlists',
4935 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4936 'uploader': 'lex will',
4937 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4938 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4939 'channel': 'lex will',
4940 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4941 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4942 'tags': ['bible', 'history', 'prophesy'],
4943 'channel_follower_count': int
4944 },
4945 'playlist_mincount': 17,
4946 }, {
4947 'note': 'Community tab',
4948 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
4949 'info_dict': {
4950 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4951 'title': 'lex will - Community',
4952 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4953 'uploader': 'lex will',
4954 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4955 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4956 'channel': 'lex will',
4957 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4958 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4959 'tags': ['bible', 'history', 'prophesy'],
4960 'channel_follower_count': int
4961 },
4962 'playlist_mincount': 18,
4963 }, {
4964 'note': 'Channels tab',
4965 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
4966 'info_dict': {
4967 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4968 'title': 'lex will - Channels',
4969 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4970 'uploader': 'lex will',
4971 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4972 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4973 'channel': 'lex will',
4974 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4975 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4976 'tags': ['bible', 'history', 'prophesy'],
4977 'channel_follower_count': int
4978 },
4979 'playlist_mincount': 12,
4980 }, {
4981 'note': 'Search tab',
4982 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
4983 'playlist_mincount': 40,
4984 'info_dict': {
4985 'id': 'UCYO_jab_esuFRV4b17AJtAw',
4986 'title': '3Blue1Brown - Search - linear algebra',
4987 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
4988 'uploader': '3Blue1Brown',
4989 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
4990 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4991 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4992 'tags': ['Mathematics'],
4993 'channel': '3Blue1Brown',
4994 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
4995 'channel_follower_count': int
4996 },
4997 }, {
4998 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4999 'only_matching': True,
5000 }, {
5001 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5002 'only_matching': True,
5003 }, {
5004 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5005 'only_matching': True,
5006 }, {
5007 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
5008 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
5009 'info_dict': {
5010 'title': '29C3: Not my department',
5011 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
5012 'uploader': 'Christiaan008',
5013 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
5014 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
5015 'tags': [],
5016 'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',
5017 'view_count': int,
5018 'modified_date': '20150605',
5019 'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
5020 'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',
5021 'channel': 'Christiaan008',
5022 },
5023 'playlist_count': 96,
5024 }, {
5025 'note': 'Large playlist',
5026 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
5027 'info_dict': {
5028 'title': 'Uploads from Cauchemar',
5029 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
5030 'uploader': 'Cauchemar',
5031 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
5032 'channel_url': 'https://www.youtube.com/c/Cauchemar89',
5033 'tags': [],
5034 'modified_date': r're:\d{8}',
5035 'channel': 'Cauchemar',
5036 'uploader_url': 'https://www.youtube.com/c/Cauchemar89',
5037 'view_count': int,
5038 'description': '',
5039 'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
5040 },
5041 'playlist_mincount': 1123,
5042 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5043 }, {
5044 'note': 'even larger playlist, 8832 videos',
5045 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
5046 'only_matching': True,
5047 }, {
5048 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
5049 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
5050 'info_dict': {
5051 'title': 'Uploads from Interstellar Movie',
5052 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
5053 'uploader': 'Interstellar Movie',
5054 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
5055 'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',
5056 'tags': [],
5057 'view_count': int,
5058 'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
5059 'channel_url': 'https://www.youtube.com/c/InterstellarMovie',
5060 'channel': 'Interstellar Movie',
5061 'description': '',
5062 'modified_date': r're:\d{8}',
5063 },
5064 'playlist_mincount': 21,
5065 }, {
5066 'note': 'Playlist with "show unavailable videos" button',
5067 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
5068 'info_dict': {
5069 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
5070 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
5071 'uploader': 'Phim Siêu Nhân Nhật Bản',
5072 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
5073 'view_count': int,
5074 'channel': 'Phim Siêu Nhân Nhật Bản',
5075 'tags': [],
5076 'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5077 'description': '',
5078 'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5079 'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
5080 'modified_date': r're:\d{8}',
5081 },
5082 'playlist_mincount': 200,
5083 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5084 }, {
5085 'note': 'Playlist with unavailable videos in page 7',
5086 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
5087 'info_dict': {
5088 'title': 'Uploads from BlankTV',
5089 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
5090 'uploader': 'BlankTV',
5091 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
5092 'channel': 'BlankTV',
5093 'channel_url': 'https://www.youtube.com/c/blanktv',
5094 'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',
5095 'view_count': int,
5096 'tags': [],
5097 'uploader_url': 'https://www.youtube.com/c/blanktv',
5098 'modified_date': r're:\d{8}',
5099 'description': '',
5100 },
5101 'playlist_mincount': 1000,
5102 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5103 }, {
5104 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
5105 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5106 'info_dict': {
5107 'title': 'Data Analysis with Dr Mike Pound',
5108 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5109 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
5110 'uploader': 'Computerphile',
5111 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
5112 'uploader_url': 'https://www.youtube.com/user/Computerphile',
5113 'tags': [],
5114 'view_count': int,
5115 'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
5116 'channel_url': 'https://www.youtube.com/user/Computerphile',
5117 'channel': 'Computerphile',
5118 },
5119 'playlist_mincount': 11,
5120 }, {
5121 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5122 'only_matching': True,
5123 }, {
5124 'note': 'Playlist URL that does not actually serve a playlist',
5125 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
5126 'info_dict': {
5127 'id': 'FqZTN594JQw',
5128 'ext': 'webm',
5129 'title': "Smiley's People 01 detective, Adventure Series, Action",
5130 'uploader': 'STREEM',
5131 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
5132 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
5133 'upload_date': '20150526',
5134 'license': 'Standard YouTube License',
5135 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
5136 'categories': ['People & Blogs'],
5137 'tags': list,
5138 'view_count': int,
5139 'like_count': int,
5140 },
5141 'params': {
5142 'skip_download': True,
5143 },
5144 'skip': 'This video is not available.',
5145 'add_ie': [YoutubeIE.ie_key()],
5146 }, {
5147 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
5148 'only_matching': True,
5149 }, {
5150 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
5151 'only_matching': True,
5152 }, {
5153 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
5154 'info_dict': {
5155 'id': 'Wq15eF5vCbI', # This will keep changing
5156 'ext': 'mp4',
5157 'title': str,
5158 'uploader': 'Sky News',
5159 'uploader_id': 'skynews',
5160 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
5161 'upload_date': r're:\d{8}',
5162 'description': str,
5163 'categories': ['News & Politics'],
5164 'tags': list,
5165 'like_count': int,
5166 'release_timestamp': 1642502819,
5167 'channel': 'Sky News',
5168 'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',
5169 'age_limit': 0,
5170 'view_count': int,
5171 'thumbnail': 'https://i.ytimg.com/vi/GgL890LIznQ/maxresdefault_live.jpg',
5172 'playable_in_embed': True,
5173 'release_date': '20220118',
5174 'availability': 'public',
5175 'live_status': 'is_live',
5176 'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',
5177 'channel_follower_count': int
5178 },
5179 'params': {
5180 'skip_download': True,
5181 },
5182 'expected_warnings': ['Ignoring subtitle tracks found in '],
5183 }, {
5184 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
5185 'info_dict': {
5186 'id': 'a48o2S1cPoo',
5187 'ext': 'mp4',
5188 'title': 'The Young Turks - Live Main Show',
5189 'uploader': 'The Young Turks',
5190 'uploader_id': 'TheYoungTurks',
5191 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
5192 'upload_date': '20150715',
5193 'license': 'Standard YouTube License',
5194 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
5195 'categories': ['News & Politics'],
5196 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
5197 'like_count': int,
5198 },
5199 'params': {
5200 'skip_download': True,
5201 },
5202 'only_matching': True,
5203 }, {
5204 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
5205 'only_matching': True,
5206 }, {
5207 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
5208 'only_matching': True,
5209 }, {
5210 'note': 'A channel that is not live. Should raise error',
5211 'url': 'https://www.youtube.com/user/numberphile/live',
5212 'only_matching': True,
5213 }, {
5214 'url': 'https://www.youtube.com/feed/trending',
5215 'only_matching': True,
5216 }, {
5217 'url': 'https://www.youtube.com/feed/library',
5218 'only_matching': True,
5219 }, {
5220 'url': 'https://www.youtube.com/feed/history',
5221 'only_matching': True,
5222 }, {
5223 'url': 'https://www.youtube.com/feed/subscriptions',
5224 'only_matching': True,
5225 }, {
5226 'url': 'https://www.youtube.com/feed/watch_later',
5227 'only_matching': True,
5228 }, {
5229 'note': 'Recommended - redirects to home page.',
5230 'url': 'https://www.youtube.com/feed/recommended',
5231 'only_matching': True,
5232 }, {
5233 'note': 'inline playlist with not always working continuations',
5234 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
5235 'only_matching': True,
5236 }, {
5237 'url': 'https://www.youtube.com/course',
5238 'only_matching': True,
5239 }, {
5240 'url': 'https://www.youtube.com/zsecurity',
5241 'only_matching': True,
5242 }, {
5243 'url': 'http://www.youtube.com/NASAgovVideo/videos',
5244 'only_matching': True,
5245 }, {
5246 'url': 'https://www.youtube.com/TheYoungTurks/live',
5247 'only_matching': True,
5248 }, {
5249 'url': 'https://www.youtube.com/hashtag/cctv9',
5250 'info_dict': {
5251 'id': 'cctv9',
5252 'title': '#cctv9',
5253 'tags': [],
5254 },
5255 'playlist_mincount': 350,
5256 }, {
5257 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
5258 'only_matching': True,
5259 }, {
5260 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
5261 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5262 'only_matching': True
5263 }, {
5264 'note': '/browse/ should redirect to /channel/',
5265 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
5266 'only_matching': True
5267 }, {
5268 'note': 'VLPL, should redirect to playlist?list=PL...',
5269 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5270 'info_dict': {
5271 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5272 'uploader': 'NoCopyrightSounds',
5273 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
5274 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
5275 'title': 'NCS : All Releases 💿',
5276 'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',
5277 'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',
5278 'modified_date': r're:\d{8}',
5279 'view_count': int,
5280 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
5281 'tags': [],
5282 'channel': 'NoCopyrightSounds',
5283 },
5284 'playlist_mincount': 166,
5285 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5286 }, {
5287 'note': 'Topic, should redirect to playlist?list=UU...',
5288 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5289 'info_dict': {
5290 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5291 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5292 'title': 'Uploads from Royalty Free Music - Topic',
5293 'uploader': 'Royalty Free Music - Topic',
5294 'tags': [],
5295 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5296 'channel': 'Royalty Free Music - Topic',
5297 'view_count': int,
5298 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5299 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5300 'modified_date': r're:\d{8}',
5301 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5302 'description': '',
5303 },
5304 'expected_warnings': [
5305 'The URL does not have a videos tab',
5306 r'[Uu]navailable videos (are|will be) hidden',
5307 ],
5308 'playlist_mincount': 101,
5309 }, {
5310 'note': 'Topic without a UU playlist',
5311 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
5312 'info_dict': {
5313 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
5314 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
5315 'tags': [],
5316 },
5317 'expected_warnings': [
5318 'the playlist redirect gave error',
5319 ],
5320 'playlist_mincount': 9,
5321 }, {
5322 'note': 'Youtube music Album',
5323 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
5324 'info_dict': {
5325 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
5326 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
5327 'tags': [],
5328 'view_count': int,
5329 'description': '',
5330 'availability': 'unlisted',
5331 'modified_date': r're:\d{8}',
5332 },
5333 'playlist_count': 50,
5334 }, {
5335 'note': 'unlisted single video playlist',
5336 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5337 'info_dict': {
5338 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5339 'uploader': 'colethedj',
5340 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5341 'title': 'yt-dlp unlisted playlist test',
5342 'availability': 'unlisted',
5343 'tags': [],
5344 'modified_date': '20220418',
5345 'channel': 'colethedj',
5346 'view_count': int,
5347 'description': '',
5348 'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
5349 'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5350 'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
5351 },
5352 'playlist_count': 1,
5353 }, {
5354 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
5355 'url': 'https://www.youtube.com/feed/recommended',
5356 'info_dict': {
5357 'id': 'recommended',
5358 'title': 'recommended',
5359 'tags': [],
5360 },
5361 'playlist_mincount': 50,
5362 'params': {
5363 'skip_download': True,
5364 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5365 },
5366 }, {
5367 'note': 'API Fallback: /videos tab, sorted by oldest first',
5368 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
5369 'info_dict': {
5370 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5371 'title': 'Cody\'sLab - Videos',
5372 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
5373 'uploader': 'Cody\'sLab',
5374 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5375 'channel': 'Cody\'sLab',
5376 'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5377 'tags': [],
5378 'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
5379 'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
5380 'channel_follower_count': int
5381 },
5382 'playlist_mincount': 650,
5383 'params': {
5384 'skip_download': True,
5385 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5386 },
5387 }, {
5388 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
5389 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5390 'info_dict': {
5391 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5392 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5393 'title': 'Uploads from Royalty Free Music - Topic',
5394 'uploader': 'Royalty Free Music - Topic',
5395 'modified_date': r're:\d{8}',
5396 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5397 'description': '',
5398 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5399 'tags': [],
5400 'channel': 'Royalty Free Music - Topic',
5401 'view_count': int,
5402 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5403 },
5404 'expected_warnings': [
5405 'does not have a videos tab',
5406 r'[Uu]navailable videos (are|will be) hidden',
5407 ],
5408 'playlist_mincount': 101,
5409 'params': {
5410 'skip_download': True,
5411 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5412 },
5413 }, {
5414 'note': 'non-standard redirect to regional channel',
5415 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
5416 'only_matching': True
5417 }, {
5418 'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
5419 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5420 'info_dict': {
5421 'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5422 'modified_date': '20220407',
5423 'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5424 'tags': [],
5425 'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5426 'uploader': 'pukkandan',
5427 'availability': 'unlisted',
5428 'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5429 'channel': 'pukkandan',
5430 'description': 'Test for collaborative playlist',
5431 'title': 'yt-dlp test - collaborative playlist',
5432 'view_count': int,
5433 'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5434 },
5435 'playlist_mincount': 2
5436 }]
5437
5438 @classmethod
5439 def suitable(cls, url):
5440 return False if YoutubeIE.suitable(url) else super().suitable(url)
5441
5442 _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$')
5443
5444 @YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
5445 def _real_extract(self, url, smuggled_data):
5446 item_id = self._match_id(url)
5447 url = urllib.parse.urlunparse(
5448 urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))
5449 compat_opts = self.get_param('compat_opts', [])
5450
5451 def get_mobj(url):
5452 mobj = self._URL_RE.match(url).groupdict()
5453 mobj.update((k, '') for k, v in mobj.items() if v is None)
5454 return mobj
5455
5456 mobj, redirect_warning = get_mobj(url), None
5457 # Youtube returns incomplete data if tabname is not lower case
5458 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
5459 if is_channel:
5460 if smuggled_data.get('is_music_url'):
5461 if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
5462 item_id = item_id[2:]
5463 pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False
5464 elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
5465 mdata = self._extract_tab_endpoint(
5466 f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
5467 murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
5468 get_all=False, expected_type=str)
5469 if not murl:
5470 raise ExtractorError('Failed to resolve album to playlist')
5471 return self.url_result(murl, ie=YoutubeTabIE.ie_key())
5472 elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
5473 pre = f'https://www.youtube.com/channel/{item_id}'
5474
5475 original_tab_name = tab
5476 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
5477 # Home URLs should redirect to /videos/
5478 redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '
5479 'To download only the videos in the home page, add a "/featured" to the URL')
5480 tab = '/videos'
5481
5482 url = ''.join((pre, tab, post))
5483 mobj = get_mobj(url)
5484
5485 # Handle both video/playlist URLs
5486 qs = parse_qs(url)
5487 video_id, playlist_id = (qs.get(key, [None])[0] for key in ('v', 'list'))
5488
5489 if not video_id and mobj['not_channel'].startswith('watch'):
5490 if not playlist_id:
5491 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
5492 raise ExtractorError('Unable to recognize tab page')
5493 # Common mistake: https://www.youtube.com/watch?list=playlist_id
5494 self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
5495 url = f'https://www.youtube.com/playlist?list={playlist_id}'
5496 mobj = get_mobj(url)
5497
5498 if video_id and playlist_id:
5499 if self.get_param('noplaylist'):
5500 self.to_screen(f'Downloading just video {video_id} because of --no-playlist')
5501 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
5502 ie=YoutubeIE.ie_key(), video_id=video_id)
5503 self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')
5504
5505 data, ytcfg = self._extract_data(url, item_id)
5506
5507 # YouTube may provide a non-standard redirect to the regional channel
5508 # See: https://github.com/yt-dlp/yt-dlp/issues/2694
5509 redirect_url = traverse_obj(
5510 data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)
5511 if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:
5512 redirect_url = ''.join((
5513 urljoin('https://www.youtube.com', redirect_url), mobj['tab'], mobj['post']))
5514 self.to_screen(f'This playlist is likely not available in your region. Following redirect to regional playlist {redirect_url}')
5515 return self.url_result(redirect_url, ie=YoutubeTabIE.ie_key())
5516
5517 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
5518 if tabs:
5519 selected_tab = self._extract_selected_tab(tabs)
5520 selected_tab_name = selected_tab.get('title', '').lower()
5521 if selected_tab_name == 'home':
5522 selected_tab_name = 'featured'
5523 requested_tab_name = mobj['tab'][1:]
5524 if 'no-youtube-channel-redirect' not in compat_opts:
5525 if requested_tab_name == 'live': # Live tab should have redirected to the video
5526 raise UserNotLive(video_id=mobj['id'])
5527 if requested_tab_name not in ('', selected_tab_name):
5528 redirect_warning = f'The channel does not have a {requested_tab_name} tab'
5529 if not original_tab_name:
5530 if item_id[:2] == 'UC':
5531 # Topic channels don't have /videos. Use the equivalent playlist instead
5532 pl_id = f'UU{item_id[2:]}'
5533 pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
5534 try:
5535 data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)
5536 except ExtractorError:
5537 redirect_warning += ' and the playlist redirect gave error'
5538 else:
5539 item_id, url, selected_tab_name = pl_id, pl_url, requested_tab_name
5540 redirect_warning += f'. Redirecting to playlist {pl_id} instead'
5541 if selected_tab_name and selected_tab_name != requested_tab_name:
5542 redirect_warning += f'. {selected_tab_name} tab is being downloaded instead'
5543 else:
5544 raise ExtractorError(redirect_warning, expected=True)
5545
5546 if redirect_warning:
5547 self.to_screen(redirect_warning)
5548 self.write_debug(f'Final URL: {url}')
5549
5550 # YouTube sometimes provides a button to reload playlist with unavailable videos.
5551 if 'no-youtube-unavailable-videos' not in compat_opts:
5552 data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data
5553 self._extract_and_report_alerts(data, only_once=True)
5554 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
5555 if tabs:
5556 return self._extract_from_tabs(item_id, ytcfg, data, tabs)
5557
5558 playlist = traverse_obj(
5559 data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
5560 if playlist:
5561 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
5562
5563 video_id = traverse_obj(
5564 data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
5565 if video_id:
5566 if mobj['tab'] != '/live': # live tab is expected to redirect to video
5567 self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
5568 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
5569 ie=YoutubeIE.ie_key(), video_id=video_id)
5570
5571 raise ExtractorError('Unable to recognize tab page')
5572
5573
5574 class YoutubePlaylistIE(InfoExtractor):
5575 IE_DESC = 'YouTube playlists'
5576 _VALID_URL = r'''(?x)(?:
5577 (?:https?://)?
5578 (?:\w+\.)?
5579 (?:
5580 (?:
5581 youtube(?:kids)?\.com|
5582 %(invidious)s
5583 )
5584 /.*?\?.*?\blist=
5585 )?
5586 (?P<id>%(playlist_id)s)
5587 )''' % {
5588 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
5589 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
5590 }
5591 IE_NAME = 'youtube:playlist'
5592 _TESTS = [{
5593 'note': 'issue #673',
5594 'url': 'PLBB231211A4F62143',
5595 'info_dict': {
5596 'title': '[OLD]Team Fortress 2 (Class-based LP)',
5597 'id': 'PLBB231211A4F62143',
5598 'uploader': 'Wickman',
5599 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
5600 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
5601 'view_count': int,
5602 'uploader_url': 'https://www.youtube.com/user/Wickydoo',
5603 'modified_date': r're:\d{8}',
5604 'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
5605 'channel': 'Wickman',
5606 'tags': [],
5607 'channel_url': 'https://www.youtube.com/user/Wickydoo',
5608 },
5609 'playlist_mincount': 29,
5610 }, {
5611 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
5612 'info_dict': {
5613 'title': 'YDL_safe_search',
5614 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
5615 },
5616 'playlist_count': 2,
5617 'skip': 'This playlist is private',
5618 }, {
5619 'note': 'embedded',
5620 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
5621 'playlist_count': 4,
5622 'info_dict': {
5623 'title': 'JODA15',
5624 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
5625 'uploader': 'milan',
5626 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
5627 'description': '',
5628 'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
5629 'tags': [],
5630 'modified_date': '20140919',
5631 'view_count': int,
5632 'channel': 'milan',
5633 'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
5634 'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
5635 },
5636 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5637 }, {
5638 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
5639 'playlist_mincount': 455,
5640 'info_dict': {
5641 'title': '2018 Chinese New Singles (11/6 updated)',
5642 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
5643 'uploader': 'LBK',
5644 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
5645 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
5646 'channel': 'LBK',
5647 'view_count': int,
5648 'channel_url': 'https://www.youtube.com/c/愛低音的國王',
5649 'tags': [],
5650 'uploader_url': 'https://www.youtube.com/c/愛低音的國王',
5651 'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
5652 'modified_date': r're:\d{8}',
5653 },
5654 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5655 }, {
5656 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
5657 'only_matching': True,
5658 }, {
5659 # music album playlist
5660 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
5661 'only_matching': True,
5662 }]
5663
5664 @classmethod
5665 def suitable(cls, url):
5666 if YoutubeTabIE.suitable(url):
5667 return False
5668 from ..utils import parse_qs
5669 qs = parse_qs(url)
5670 if qs.get('v', [None])[0]:
5671 return False
5672 return super().suitable(url)
5673
5674 def _real_extract(self, url):
5675 playlist_id = self._match_id(url)
5676 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
5677 url = update_url_query(
5678 'https://www.youtube.com/playlist',
5679 parse_qs(url) or {'list': playlist_id})
5680 if is_music_url:
5681 url = smuggle_url(url, {'is_music_url': True})
5682 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
5683
5684
5685 class YoutubeYtBeIE(InfoExtractor):
5686 IE_DESC = 'youtu.be'
5687 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
5688 _TESTS = [{
5689 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
5690 'info_dict': {
5691 'id': 'yeWKywCrFtk',
5692 'ext': 'mp4',
5693 'title': 'Small Scale Baler and Braiding Rugs',
5694 'uploader': 'Backus-Page House Museum',
5695 'uploader_id': 'backuspagemuseum',
5696 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
5697 'upload_date': '20161008',
5698 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
5699 'categories': ['Nonprofits & Activism'],
5700 'tags': list,
5701 'like_count': int,
5702 'age_limit': 0,
5703 'playable_in_embed': True,
5704 'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',
5705 'channel': 'Backus-Page House Museum',
5706 'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',
5707 'live_status': 'not_live',
5708 'view_count': int,
5709 'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',
5710 'availability': 'public',
5711 'duration': 59,
5712 'comment_count': int,
5713 'channel_follower_count': int
5714 },
5715 'params': {
5716 'noplaylist': True,
5717 'skip_download': True,
5718 },
5719 }, {
5720 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
5721 'only_matching': True,
5722 }]
5723
5724 def _real_extract(self, url):
5725 mobj = self._match_valid_url(url)
5726 video_id = mobj.group('id')
5727 playlist_id = mobj.group('playlist_id')
5728 return self.url_result(
5729 update_url_query('https://www.youtube.com/watch', {
5730 'v': video_id,
5731 'list': playlist_id,
5732 'feature': 'youtu.be',
5733 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
5734
5735
5736 class YoutubeLivestreamEmbedIE(InfoExtractor):
5737 IE_DESC = 'YouTube livestream embeds'
5738 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'
5739 _TESTS = [{
5740 'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',
5741 'only_matching': True,
5742 }]
5743
5744 def _real_extract(self, url):
5745 channel_id = self._match_id(url)
5746 return self.url_result(
5747 f'https://www.youtube.com/channel/{channel_id}/live',
5748 ie=YoutubeTabIE.ie_key(), video_id=channel_id)
5749
5750
5751 class YoutubeYtUserIE(InfoExtractor):
5752 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
5753 IE_NAME = 'youtube:user'
5754 _VALID_URL = r'ytuser:(?P<id>.+)'
5755 _TESTS = [{
5756 'url': 'ytuser:phihag',
5757 'only_matching': True,
5758 }]
5759
5760 def _real_extract(self, url):
5761 user_id = self._match_id(url)
5762 return self.url_result(
5763 'https://www.youtube.com/user/%s/videos' % user_id,
5764 ie=YoutubeTabIE.ie_key(), video_id=user_id)
5765
5766
5767 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
5768 IE_NAME = 'youtube:favorites'
5769 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
5770 _VALID_URL = r':ytfav(?:ou?rite)?s?'
5771 _LOGIN_REQUIRED = True
5772 _TESTS = [{
5773 'url': ':ytfav',
5774 'only_matching': True,
5775 }, {
5776 'url': ':ytfavorites',
5777 'only_matching': True,
5778 }]
5779
5780 def _real_extract(self, url):
5781 return self.url_result(
5782 'https://www.youtube.com/playlist?list=LL',
5783 ie=YoutubeTabIE.ie_key())
5784
5785
5786 class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):
5787 IE_NAME = 'youtube:notif'
5788 IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'
5789 _VALID_URL = r':ytnotif(?:ication)?s?'
5790 _LOGIN_REQUIRED = True
5791 _TESTS = [{
5792 'url': ':ytnotif',
5793 'only_matching': True,
5794 }, {
5795 'url': ':ytnotifications',
5796 'only_matching': True,
5797 }]
5798
5799 def _extract_notification_menu(self, response, continuation_list):
5800 notification_list = traverse_obj(
5801 response,
5802 ('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),
5803 ('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),
5804 expected_type=list) or []
5805 continuation_list[0] = None
5806 for item in notification_list:
5807 entry = self._extract_notification_renderer(item.get('notificationRenderer'))
5808 if entry:
5809 yield entry
5810 continuation = item.get('continuationItemRenderer')
5811 if continuation:
5812 continuation_list[0] = continuation
5813
5814 def _extract_notification_renderer(self, notification):
5815 video_id = traverse_obj(
5816 notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)
5817 url = f'https://www.youtube.com/watch?v={video_id}'
5818 channel_id = None
5819 if not video_id:
5820 browse_ep = traverse_obj(
5821 notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)
5822 channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)
5823 post_id = self._search_regex(
5824 r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),
5825 'post id', default=None)
5826 if not channel_id or not post_id:
5827 return
5828 # The direct /post url redirects to this in the browser
5829 url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'
5830
5831 channel = traverse_obj(
5832 notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),
5833 expected_type=str)
5834 notification_title = self._get_text(notification, 'shortMessage')
5835 if notification_title:
5836 notification_title = notification_title.replace('\xad', '') # remove soft hyphens
5837 # TODO: handle recommended videos
5838 title = self._search_regex(
5839 rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,
5840 'video title', default=None)
5841 upload_date = (strftime_or_none(self._extract_time_text(notification, 'sentTimeText')[0], '%Y%m%d')
5842 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE.ie_key())
5843 else None)
5844 return {
5845 '_type': 'url',
5846 'url': url,
5847 'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),
5848 'video_id': video_id,
5849 'title': title,
5850 'channel_id': channel_id,
5851 'channel': channel,
5852 'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),
5853 'upload_date': upload_date,
5854 }
5855
5856 def _notification_menu_entries(self, ytcfg):
5857 continuation_list = [None]
5858 response = None
5859 for page in itertools.count(1):
5860 ctoken = traverse_obj(
5861 continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)
5862 response = self._extract_response(
5863 item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,
5864 ep='notification/get_notification_menu', check_get_keys='actions',
5865 headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))
5866 yield from self._extract_notification_menu(response, continuation_list)
5867 if not continuation_list[0]:
5868 break
5869
5870 def _real_extract(self, url):
5871 display_id = 'notifications'
5872 ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}
5873 self._report_playlist_authcheck(ytcfg)
5874 return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)
5875
5876
5877 class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
5878 IE_DESC = 'YouTube search'
5879 IE_NAME = 'youtube:search'
5880 _SEARCH_KEY = 'ytsearch'
5881 _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
5882 _TESTS = [{
5883 'url': 'ytsearch5:youtube-dl test video',
5884 'playlist_count': 5,
5885 'info_dict': {
5886 'id': 'youtube-dl test video',
5887 'title': 'youtube-dl test video',
5888 }
5889 }]
5890
5891
5892 class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
5893 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
5894 _SEARCH_KEY = 'ytsearchdate'
5895 IE_DESC = 'YouTube search, newest videos first'
5896 _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
5897 _TESTS = [{
5898 'url': 'ytsearchdate5:youtube-dl test video',
5899 'playlist_count': 5,
5900 'info_dict': {
5901 'id': 'youtube-dl test video',
5902 'title': 'youtube-dl test video',
5903 }
5904 }]
5905
5906
5907 class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
5908 IE_DESC = 'YouTube search URLs with sorting and filter support'
5909 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
5910 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
5911 _TESTS = [{
5912 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
5913 'playlist_mincount': 5,
5914 'info_dict': {
5915 'id': 'youtube-dl test video',
5916 'title': 'youtube-dl test video',
5917 }
5918 }, {
5919 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
5920 'playlist_mincount': 5,
5921 'info_dict': {
5922 'id': 'python',
5923 'title': 'python',
5924 }
5925 }, {
5926 'url': 'https://www.youtube.com/results?search_query=%23cats',
5927 'playlist_mincount': 1,
5928 'info_dict': {
5929 'id': '#cats',
5930 'title': '#cats',
5931 # The test suite does not have support for nested playlists
5932 # 'entries': [{
5933 # 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',
5934 # 'title': '#cats',
5935 # }],
5936 },
5937 }, {
5938 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
5939 'only_matching': True,
5940 }]
5941
5942 def _real_extract(self, url):
5943 qs = parse_qs(url)
5944 query = (qs.get('search_query') or qs.get('q'))[0]
5945 return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
5946
5947
5948 class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
5949 IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'
5950 IE_NAME = 'youtube:music:search_url'
5951 _VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
5952 _TESTS = [{
5953 'url': 'https://music.youtube.com/search?q=royalty+free+music',
5954 'playlist_count': 16,
5955 'info_dict': {
5956 'id': 'royalty free music',
5957 'title': 'royalty free music',
5958 }
5959 }, {
5960 'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',
5961 'playlist_mincount': 30,
5962 'info_dict': {
5963 'id': 'royalty free music - songs',
5964 'title': 'royalty free music - songs',
5965 },
5966 'params': {'extract_flat': 'in_playlist'}
5967 }, {
5968 'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',
5969 'playlist_mincount': 30,
5970 'info_dict': {
5971 'id': 'royalty free music - community playlists',
5972 'title': 'royalty free music - community playlists',
5973 },
5974 'params': {'extract_flat': 'in_playlist'}
5975 }]
5976
5977 _SECTIONS = {
5978 'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',
5979 'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',
5980 'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',
5981 'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',
5982 'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',
5983 'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',
5984 }
5985
5986 def _real_extract(self, url):
5987 qs = parse_qs(url)
5988 query = (qs.get('search_query') or qs.get('q'))[0]
5989 params = qs.get('sp', (None,))[0]
5990 if params:
5991 section = next((k for k, v in self._SECTIONS.items() if v == params), params)
5992 else:
5993 section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower()
5994 params = self._SECTIONS.get(section)
5995 if not params:
5996 section = None
5997 title = join_nonempty(query, section, delim=' - ')
5998 return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)
5999
6000
6001 class YoutubeFeedsInfoExtractor(InfoExtractor):
6002 """
6003 Base class for feed extractors
6004 Subclasses must re-define the _FEED_NAME property.
6005 """
6006 _LOGIN_REQUIRED = True
6007 _FEED_NAME = 'feeds'
6008
6009 def _real_initialize(self):
6010 YoutubeBaseInfoExtractor._check_login_required(self)
6011
6012 @classproperty
6013 def IE_NAME(self):
6014 return f'youtube:{self._FEED_NAME}'
6015
6016 def _real_extract(self, url):
6017 return self.url_result(
6018 f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())
6019
6020
6021 class YoutubeWatchLaterIE(InfoExtractor):
6022 IE_NAME = 'youtube:watchlater'
6023 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
6024 _VALID_URL = r':ytwatchlater'
6025 _TESTS = [{
6026 'url': ':ytwatchlater',
6027 'only_matching': True,
6028 }]
6029
6030 def _real_extract(self, url):
6031 return self.url_result(
6032 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
6033
6034
6035 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
6036 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
6037 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
6038 _FEED_NAME = 'recommended'
6039 _LOGIN_REQUIRED = False
6040 _TESTS = [{
6041 'url': ':ytrec',
6042 'only_matching': True,
6043 }, {
6044 'url': ':ytrecommended',
6045 'only_matching': True,
6046 }, {
6047 'url': 'https://youtube.com',
6048 'only_matching': True,
6049 }]
6050
6051
6052 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
6053 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
6054 _VALID_URL = r':ytsub(?:scription)?s?'
6055 _FEED_NAME = 'subscriptions'
6056 _TESTS = [{
6057 'url': ':ytsubs',
6058 'only_matching': True,
6059 }, {
6060 'url': ':ytsubscriptions',
6061 'only_matching': True,
6062 }]
6063
6064
6065 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
6066 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
6067 _VALID_URL = r':ythis(?:tory)?'
6068 _FEED_NAME = 'history'
6069 _TESTS = [{
6070 'url': ':ythistory',
6071 'only_matching': True,
6072 }]
6073
6074
6075 class YoutubeStoriesIE(InfoExtractor):
6076 IE_DESC = 'YouTube channel stories; "ytstories:" prefix'
6077 IE_NAME = 'youtube:stories'
6078 _VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'
6079 _TESTS = [{
6080 'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',
6081 'only_matching': True,
6082 }]
6083
6084 def _real_extract(self, url):
6085 playlist_id = f'RLTD{self._match_id(url)}'
6086 return self.url_result(
6087 smuggle_url(f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1', {'is_story': True}),
6088 ie=YoutubeTabIE, video_id=playlist_id)
6089
6090
6091 class YoutubeTruncatedURLIE(InfoExtractor):
6092 IE_NAME = 'youtube:truncated_url'
6093 IE_DESC = False # Do not list
6094 _VALID_URL = r'''(?x)
6095 (?:https?://)?
6096 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
6097 (?:watch\?(?:
6098 feature=[a-z_]+|
6099 annotation_id=annotation_[^&]+|
6100 x-yt-cl=[0-9]+|
6101 hl=[^&]*|
6102 t=[0-9]+
6103 )?
6104 |
6105 attribution_link\?a=[^&]+
6106 )
6107 $
6108 '''
6109
6110 _TESTS = [{
6111 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
6112 'only_matching': True,
6113 }, {
6114 'url': 'https://www.youtube.com/watch?',
6115 'only_matching': True,
6116 }, {
6117 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
6118 'only_matching': True,
6119 }, {
6120 'url': 'https://www.youtube.com/watch?feature=foo',
6121 'only_matching': True,
6122 }, {
6123 'url': 'https://www.youtube.com/watch?hl=en-GB',
6124 'only_matching': True,
6125 }, {
6126 'url': 'https://www.youtube.com/watch?t=2372',
6127 'only_matching': True,
6128 }]
6129
6130 def _real_extract(self, url):
6131 raise ExtractorError(
6132 'Did you forget to quote the URL? Remember that & is a meta '
6133 'character in most shells, so you want to put the URL in quotes, '
6134 'like youtube-dl '
6135 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
6136 ' or simply youtube-dl BaW_jenozKc .',
6137 expected=True)
6138
6139
6140 class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
6141 IE_NAME = 'youtube:clip'
6142 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'
6143 _TESTS = [{
6144 # FIXME: Other metadata should be extracted from the clip, not from the base video
6145 'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',
6146 'info_dict': {
6147 'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',
6148 'ext': 'mp4',
6149 'section_start': 29.0,
6150 'section_end': 39.7,
6151 'duration': 10.7,
6152 'age_limit': 0,
6153 'availability': 'public',
6154 'categories': ['Gaming'],
6155 'channel': 'Scott The Woz',
6156 'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',
6157 'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',
6158 'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',
6159 'like_count': int,
6160 'playable_in_embed': True,
6161 'tags': 'count:17',
6162 'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',
6163 'title': 'Mobile Games on Console - Scott The Woz',
6164 'upload_date': '20210920',
6165 'uploader': 'Scott The Woz',
6166 'uploader_id': 'scottthewoz',
6167 'uploader_url': 'http://www.youtube.com/user/scottthewoz',
6168 'view_count': int,
6169 'live_status': 'not_live',
6170 'channel_follower_count': int
6171 }
6172 }]
6173
6174 def _real_extract(self, url):
6175 clip_id = self._match_id(url)
6176 _, data = self._extract_webpage(url, clip_id)
6177
6178 video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))
6179 if not video_id:
6180 raise ExtractorError('Unable to find video ID')
6181
6182 clip_data = traverse_obj(data, (
6183 'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',
6184 'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,
6185 'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',
6186 'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)
6187
6188 return {
6189 '_type': 'url_transparent',
6190 'url': f'https://www.youtube.com/watch?v={video_id}',
6191 'ie_key': YoutubeIE.ie_key(),
6192 'id': clip_id,
6193 'section_start': int(clip_data['startTimeMs']) / 1000,
6194 'section_end': int(clip_data['endTimeMs']) / 1000,
6195 }
6196
6197
6198 class YoutubeTruncatedIDIE(InfoExtractor):
6199 IE_NAME = 'youtube:truncated_id'
6200 IE_DESC = False # Do not list
6201 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
6202
6203 _TESTS = [{
6204 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
6205 'only_matching': True,
6206 }]
6207
6208 def _real_extract(self, url):
6209 video_id = self._match_id(url)
6210 raise ExtractorError(
6211 f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',
6212 expected=True)