]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/youtube.py
[jsinterp] Fix escape in regex
[yt-dlp.git] / yt_dlp / extractor / youtube.py
1 import base64
2 import calendar
3 import copy
4 import datetime
5 import hashlib
6 import itertools
7 import json
8 import math
9 import os.path
10 import random
11 import re
12 import sys
13 import threading
14 import time
15 import traceback
16 import urllib.error
17 import urllib.parse
18
19 from .common import InfoExtractor, SearchInfoExtractor
20 from .openload import PhantomJSwrapper
21 from ..compat import functools
22 from ..jsinterp import JSInterpreter
23 from ..utils import (
24 NO_DEFAULT,
25 ExtractorError,
26 UserNotLive,
27 bug_reports_message,
28 classproperty,
29 clean_html,
30 datetime_from_str,
31 dict_get,
32 float_or_none,
33 format_field,
34 get_first,
35 int_or_none,
36 is_html,
37 join_nonempty,
38 js_to_json,
39 mimetype2ext,
40 network_exceptions,
41 orderedSet,
42 parse_codecs,
43 parse_count,
44 parse_duration,
45 parse_iso8601,
46 parse_qs,
47 qualities,
48 remove_start,
49 smuggle_url,
50 str_or_none,
51 str_to_int,
52 strftime_or_none,
53 traverse_obj,
54 try_get,
55 unescapeHTML,
56 unified_strdate,
57 unified_timestamp,
58 unsmuggle_url,
59 update_url_query,
60 url_or_none,
61 urljoin,
62 variadic,
63 )
64
65 # any clients starting with _ cannot be explicitly requested by the user
66 INNERTUBE_CLIENTS = {
67 'web': {
68 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
69 'INNERTUBE_CONTEXT': {
70 'client': {
71 'clientName': 'WEB',
72 'clientVersion': '2.20220801.00.00',
73 }
74 },
75 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
76 },
77 'web_embedded': {
78 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
79 'INNERTUBE_CONTEXT': {
80 'client': {
81 'clientName': 'WEB_EMBEDDED_PLAYER',
82 'clientVersion': '1.20220731.00.00',
83 },
84 },
85 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
86 },
87 'web_music': {
88 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
89 'INNERTUBE_HOST': 'music.youtube.com',
90 'INNERTUBE_CONTEXT': {
91 'client': {
92 'clientName': 'WEB_REMIX',
93 'clientVersion': '1.20220727.01.00',
94 }
95 },
96 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
97 },
98 'web_creator': {
99 'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
100 'INNERTUBE_CONTEXT': {
101 'client': {
102 'clientName': 'WEB_CREATOR',
103 'clientVersion': '1.20220726.00.00',
104 }
105 },
106 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
107 },
108 'android': {
109 'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
110 'INNERTUBE_CONTEXT': {
111 'client': {
112 'clientName': 'ANDROID',
113 'clientVersion': '17.31.35',
114 'androidSdkVersion': 30,
115 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
116 }
117 },
118 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
119 'REQUIRE_JS_PLAYER': False
120 },
121 'android_embedded': {
122 'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
123 'INNERTUBE_CONTEXT': {
124 'client': {
125 'clientName': 'ANDROID_EMBEDDED_PLAYER',
126 'clientVersion': '17.31.35',
127 'androidSdkVersion': 30,
128 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
129 },
130 },
131 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
132 'REQUIRE_JS_PLAYER': False
133 },
134 'android_music': {
135 'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
136 'INNERTUBE_CONTEXT': {
137 'client': {
138 'clientName': 'ANDROID_MUSIC',
139 'clientVersion': '5.16.51',
140 'androidSdkVersion': 30,
141 'userAgent': 'com.google.android.apps.youtube.music/5.16.51 (Linux; U; Android 11) gzip'
142 }
143 },
144 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
145 'REQUIRE_JS_PLAYER': False
146 },
147 'android_creator': {
148 'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
149 'INNERTUBE_CONTEXT': {
150 'client': {
151 'clientName': 'ANDROID_CREATOR',
152 'clientVersion': '22.30.100',
153 'androidSdkVersion': 30,
154 'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip'
155 },
156 },
157 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
158 'REQUIRE_JS_PLAYER': False
159 },
160 # iOS clients have HLS live streams. Setting device model to get 60fps formats.
161 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
162 'ios': {
163 'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
164 'INNERTUBE_CONTEXT': {
165 'client': {
166 'clientName': 'IOS',
167 'clientVersion': '17.33.2',
168 'deviceModel': 'iPhone14,3',
169 'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
170 }
171 },
172 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
173 'REQUIRE_JS_PLAYER': False
174 },
175 'ios_embedded': {
176 'INNERTUBE_CONTEXT': {
177 'client': {
178 'clientName': 'IOS_MESSAGES_EXTENSION',
179 'clientVersion': '17.33.2',
180 'deviceModel': 'iPhone14,3',
181 'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
182 },
183 },
184 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
185 'REQUIRE_JS_PLAYER': False
186 },
187 'ios_music': {
188 'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
189 'INNERTUBE_CONTEXT': {
190 'client': {
191 'clientName': 'IOS_MUSIC',
192 'clientVersion': '5.21',
193 'deviceModel': 'iPhone14,3',
194 'userAgent': 'com.google.ios.youtubemusic/5.21 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
195 },
196 },
197 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
198 'REQUIRE_JS_PLAYER': False
199 },
200 'ios_creator': {
201 'INNERTUBE_CONTEXT': {
202 'client': {
203 'clientName': 'IOS_CREATOR',
204 'clientVersion': '22.33.101',
205 'deviceModel': 'iPhone14,3',
206 'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
207 },
208 },
209 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
210 'REQUIRE_JS_PLAYER': False
211 },
212 # mweb has 'ultralow' formats
213 # See: https://github.com/yt-dlp/yt-dlp/pull/557
214 'mweb': {
215 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
216 'INNERTUBE_CONTEXT': {
217 'client': {
218 'clientName': 'MWEB',
219 'clientVersion': '2.20220801.00.00',
220 }
221 },
222 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
223 },
224 # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
225 # See: https://github.com/zerodytrash/YouTube-Internal-Clients
226 'tv_embedded': {
227 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
228 'INNERTUBE_CONTEXT': {
229 'client': {
230 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
231 'clientVersion': '2.0',
232 },
233 },
234 'INNERTUBE_CONTEXT_CLIENT_NAME': 85
235 },
236 }
237
238
239 def _split_innertube_client(client_name):
240 variant, *base = client_name.rsplit('.', 1)
241 if base:
242 return variant, base[0], variant
243 base, *variant = client_name.split('_', 1)
244 return client_name, base, variant[0] if variant else None
245
246
247 def build_innertube_clients():
248 THIRD_PARTY = {
249 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
250 }
251 BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
252 priority = qualities(BASE_CLIENTS[::-1])
253
254 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
255 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
256 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
257 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
258 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
259
260 _, base_client, variant = _split_innertube_client(client)
261 ytcfg['priority'] = 10 * priority(base_client)
262
263 if not variant:
264 INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
265 embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
266 embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
267 embedscreen['priority'] -= 3
268 elif variant == 'embedded':
269 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
270 ytcfg['priority'] -= 2
271 else:
272 ytcfg['priority'] -= 3
273
274
275 build_innertube_clients()
276
277
278 class YoutubeBaseInfoExtractor(InfoExtractor):
279 """Provide base functions for Youtube extractors"""
280
281 _RESERVED_NAMES = (
282 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
283 r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|'
284 r'browse|oembed|get_video_info|iframe_api|s/player|'
285 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
286
287 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
288
289 # _NETRC_MACHINE = 'youtube'
290
291 # If True it will raise an error if no login info is provided
292 _LOGIN_REQUIRED = False
293
294 _INVIDIOUS_SITES = (
295 # invidious-redirect websites
296 r'(?:www\.)?redirect\.invidious\.io',
297 r'(?:(?:www|dev)\.)?invidio\.us',
298 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
299 r'(?:www\.)?invidious\.pussthecat\.org',
300 r'(?:www\.)?invidious\.zee\.li',
301 r'(?:www\.)?invidious\.ethibox\.fr',
302 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
303 r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
304 r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
305 # youtube-dl invidious instances list
306 r'(?:(?:www|no)\.)?invidiou\.sh',
307 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
308 r'(?:www\.)?invidious\.kabi\.tk',
309 r'(?:www\.)?invidious\.mastodon\.host',
310 r'(?:www\.)?invidious\.zapashcanon\.fr',
311 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
312 r'(?:www\.)?invidious\.tinfoil-hat\.net',
313 r'(?:www\.)?invidious\.himiko\.cloud',
314 r'(?:www\.)?invidious\.reallyancient\.tech',
315 r'(?:www\.)?invidious\.tube',
316 r'(?:www\.)?invidiou\.site',
317 r'(?:www\.)?invidious\.site',
318 r'(?:www\.)?invidious\.xyz',
319 r'(?:www\.)?invidious\.nixnet\.xyz',
320 r'(?:www\.)?invidious\.048596\.xyz',
321 r'(?:www\.)?invidious\.drycat\.fr',
322 r'(?:www\.)?inv\.skyn3t\.in',
323 r'(?:www\.)?tube\.poal\.co',
324 r'(?:www\.)?tube\.connect\.cafe',
325 r'(?:www\.)?vid\.wxzm\.sx',
326 r'(?:www\.)?vid\.mint\.lgbt',
327 r'(?:www\.)?vid\.puffyan\.us',
328 r'(?:www\.)?yewtu\.be',
329 r'(?:www\.)?yt\.elukerio\.org',
330 r'(?:www\.)?yt\.lelux\.fi',
331 r'(?:www\.)?invidious\.ggc-project\.de',
332 r'(?:www\.)?yt\.maisputain\.ovh',
333 r'(?:www\.)?ytprivate\.com',
334 r'(?:www\.)?invidious\.13ad\.de',
335 r'(?:www\.)?invidious\.toot\.koeln',
336 r'(?:www\.)?invidious\.fdn\.fr',
337 r'(?:www\.)?watch\.nettohikari\.com',
338 r'(?:www\.)?invidious\.namazso\.eu',
339 r'(?:www\.)?invidious\.silkky\.cloud',
340 r'(?:www\.)?invidious\.exonip\.de',
341 r'(?:www\.)?invidious\.riverside\.rocks',
342 r'(?:www\.)?invidious\.blamefran\.net',
343 r'(?:www\.)?invidious\.moomoo\.de',
344 r'(?:www\.)?ytb\.trom\.tf',
345 r'(?:www\.)?yt\.cyberhost\.uk',
346 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
347 r'(?:www\.)?qklhadlycap4cnod\.onion',
348 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
349 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
350 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
351 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
352 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
353 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
354 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
355 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
356 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
357 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
358 # piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
359 r'(?:www\.)?piped\.kavin\.rocks',
360 r'(?:www\.)?piped\.silkky\.cloud',
361 r'(?:www\.)?piped\.tokhmi\.xyz',
362 r'(?:www\.)?piped\.moomoo\.me',
363 r'(?:www\.)?il\.ax',
364 r'(?:www\.)?piped\.syncpundit\.com',
365 r'(?:www\.)?piped\.mha\.fi',
366 r'(?:www\.)?piped\.mint\.lgbt',
367 r'(?:www\.)?piped\.privacy\.com\.de',
368 )
369
370 def _initialize_consent(self):
371 cookies = self._get_cookies('https://www.youtube.com/')
372 if cookies.get('__Secure-3PSID'):
373 return
374 consent_id = None
375 consent = cookies.get('CONSENT')
376 if consent:
377 if 'YES' in consent.value:
378 return
379 consent_id = self._search_regex(
380 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
381 if not consent_id:
382 consent_id = random.randint(100, 999)
383 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
384
385 def _initialize_pref(self):
386 cookies = self._get_cookies('https://www.youtube.com/')
387 pref_cookie = cookies.get('PREF')
388 pref = {}
389 if pref_cookie:
390 try:
391 pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
392 except ValueError:
393 self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
394 pref.update({'hl': 'en', 'tz': 'UTC'})
395 self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
396
397 def _real_initialize(self):
398 self._initialize_pref()
399 self._initialize_consent()
400 self._check_login_required()
401
402 def _check_login_required(self):
403 if self._LOGIN_REQUIRED and not self._cookies_passed:
404 self.raise_login_required('Login details are needed to download this content', method='cookies')
405
406 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='
407 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
408
409 def _get_default_ytcfg(self, client='web'):
410 return copy.deepcopy(INNERTUBE_CLIENTS[client])
411
412 def _get_innertube_host(self, client='web'):
413 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
414
415 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
416 # try_get but with fallback to default ytcfg client values when present
417 _func = lambda y: try_get(y, getter, expected_type)
418 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
419
420 def _extract_client_name(self, ytcfg, default_client='web'):
421 return self._ytcfg_get_safe(
422 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
423 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
424
425 def _extract_client_version(self, ytcfg, default_client='web'):
426 return self._ytcfg_get_safe(
427 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
428 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
429
430 def _select_api_hostname(self, req_api_hostname, default_client=None):
431 return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
432 or req_api_hostname or self._get_innertube_host(default_client or 'web'))
433
434 def _extract_api_key(self, ytcfg=None, default_client='web'):
435 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
436
437 def _extract_context(self, ytcfg=None, default_client='web'):
438 context = get_first(
439 (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
440 # Enforce language and tz for extraction
441 client_context = traverse_obj(context, 'client', expected_type=dict, default={})
442 client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
443 return context
444
445 _SAPISID = None
446
447 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
448 time_now = round(time.time())
449 if self._SAPISID is None:
450 yt_cookies = self._get_cookies('https://www.youtube.com')
451 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
452 # See: https://github.com/yt-dlp/yt-dlp/issues/393
453 sapisid_cookie = dict_get(
454 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
455 if sapisid_cookie and sapisid_cookie.value:
456 self._SAPISID = sapisid_cookie.value
457 self.write_debug('Extracted SAPISID cookie')
458 # SAPISID cookie is required if not already present
459 if not yt_cookies.get('SAPISID'):
460 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
461 self._set_cookie(
462 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
463 else:
464 self._SAPISID = False
465 if not self._SAPISID:
466 return None
467 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
468 sapisidhash = hashlib.sha1(
469 f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
470 return f'SAPISIDHASH {time_now}_{sapisidhash}'
471
472 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
473 note='Downloading API JSON', errnote='Unable to download API page',
474 context=None, api_key=None, api_hostname=None, default_client='web'):
475
476 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
477 data.update(query)
478 real_headers = self.generate_api_headers(default_client=default_client)
479 real_headers.update({'content-type': 'application/json'})
480 if headers:
481 real_headers.update(headers)
482 api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]
483 or api_key or self._extract_api_key(default_client=default_client))
484 return self._download_json(
485 f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
486 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
487 data=json.dumps(data).encode('utf8'), headers=real_headers,
488 query={'key': api_key, 'prettyPrint': 'false'})
489
490 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
491 return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
492
493 @staticmethod
494 def _extract_session_index(*data):
495 """
496 Index of current account in account list.
497 See: https://github.com/yt-dlp/yt-dlp/pull/519
498 """
499 for ytcfg in data:
500 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
501 if session_index is not None:
502 return session_index
503
504 # Deprecated?
505 def _extract_identity_token(self, ytcfg=None, webpage=None):
506 if ytcfg:
507 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)
508 if token:
509 return token
510 if webpage:
511 return self._search_regex(
512 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
513 'identity token', default=None, fatal=False)
514
515 @staticmethod
516 def _extract_account_syncid(*args):
517 """
518 Extract syncId required to download private playlists of secondary channels
519 @params response and/or ytcfg
520 """
521 for data in args:
522 # ytcfg includes channel_syncid if on secondary channel
523 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)
524 if delegated_sid:
525 return delegated_sid
526 sync_ids = (try_get(
527 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
528 lambda x: x['DATASYNC_ID']), str) or '').split('||')
529 if len(sync_ids) >= 2 and sync_ids[1]:
530 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
531 # and just "user_syncid||" for primary channel. We only want the channel_syncid
532 return sync_ids[0]
533
534 @staticmethod
535 def _extract_visitor_data(*args):
536 """
537 Extracts visitorData from an API response or ytcfg
538 Appears to be used to track session state
539 """
540 return get_first(
541 args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
542 expected_type=str)
543
544 @functools.cached_property
545 def is_authenticated(self):
546 return bool(self._generate_sapisidhash_header())
547
548 def extract_ytcfg(self, video_id, webpage):
549 if not webpage:
550 return {}
551 return self._parse_json(
552 self._search_regex(
553 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
554 default='{}'), video_id, fatal=False) or {}
555
556 def generate_api_headers(
557 self, *, ytcfg=None, account_syncid=None, session_index=None,
558 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
559
560 origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))
561 headers = {
562 'X-YouTube-Client-Name': str(
563 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
564 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
565 'Origin': origin,
566 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
567 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
568 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),
569 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client)
570 }
571 if session_index is None:
572 session_index = self._extract_session_index(ytcfg)
573 if account_syncid or session_index is not None:
574 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
575
576 auth = self._generate_sapisidhash_header(origin)
577 if auth is not None:
578 headers['Authorization'] = auth
579 headers['X-Origin'] = origin
580 return {h: v for h, v in headers.items() if v is not None}
581
582 def _download_ytcfg(self, client, video_id):
583 url = {
584 'web': 'https://www.youtube.com',
585 'web_music': 'https://music.youtube.com',
586 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
587 }.get(client)
588 if not url:
589 return {}
590 webpage = self._download_webpage(
591 url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')
592 return self.extract_ytcfg(video_id, webpage) or {}
593
594 @staticmethod
595 def _build_api_continuation_query(continuation, ctp=None):
596 query = {
597 'continuation': continuation
598 }
599 # TODO: Inconsistency with clickTrackingParams.
600 # Currently we have a fixed ctp contained within context (from ytcfg)
601 # and a ctp in root query for continuation.
602 if ctp:
603 query['clickTracking'] = {'clickTrackingParams': ctp}
604 return query
605
606 @classmethod
607 def _extract_next_continuation_data(cls, renderer):
608 next_continuation = try_get(
609 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
610 lambda x: x['continuation']['reloadContinuationData']), dict)
611 if not next_continuation:
612 return
613 continuation = next_continuation.get('continuation')
614 if not continuation:
615 return
616 ctp = next_continuation.get('clickTrackingParams')
617 return cls._build_api_continuation_query(continuation, ctp)
618
619 @classmethod
620 def _extract_continuation_ep_data(cls, continuation_ep: dict):
621 if isinstance(continuation_ep, dict):
622 continuation = try_get(
623 continuation_ep, lambda x: x['continuationCommand']['token'], str)
624 if not continuation:
625 return
626 ctp = continuation_ep.get('clickTrackingParams')
627 return cls._build_api_continuation_query(continuation, ctp)
628
629 @classmethod
630 def _extract_continuation(cls, renderer):
631 next_continuation = cls._extract_next_continuation_data(renderer)
632 if next_continuation:
633 return next_continuation
634
635 contents = []
636 for key in ('contents', 'items'):
637 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
638
639 for content in contents:
640 if not isinstance(content, dict):
641 continue
642 continuation_ep = try_get(
643 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
644 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
645 dict)
646 continuation = cls._extract_continuation_ep_data(continuation_ep)
647 if continuation:
648 return continuation
649
650 @classmethod
651 def _extract_alerts(cls, data):
652 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
653 if not isinstance(alert_dict, dict):
654 continue
655 for alert in alert_dict.values():
656 alert_type = alert.get('type')
657 if not alert_type:
658 continue
659 message = cls._get_text(alert, 'text')
660 if message:
661 yield alert_type, message
662
663 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
664 errors = []
665 warnings = []
666 for alert_type, alert_message in alerts:
667 if alert_type.lower() == 'error' and fatal:
668 errors.append([alert_type, alert_message])
669 else:
670 warnings.append([alert_type, alert_message])
671
672 for alert_type, alert_message in (warnings + errors[:-1]):
673 self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)
674 if errors:
675 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
676
677 def _extract_and_report_alerts(self, data, *args, **kwargs):
678 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
679
680 def _extract_badges(self, renderer: dict):
681 badges = set()
682 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
683 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], str)
684 if label:
685 badges.add(label.lower())
686 return badges
687
688 @staticmethod
689 def _get_text(data, *path_list, max_runs=None):
690 for path in path_list or [None]:
691 if path is None:
692 obj = [data]
693 else:
694 obj = traverse_obj(data, path, default=[])
695 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
696 obj = [obj]
697 for item in obj:
698 text = try_get(item, lambda x: x['simpleText'], str)
699 if text:
700 return text
701 runs = try_get(item, lambda x: x['runs'], list) or []
702 if not runs and isinstance(item, list):
703 runs = item
704
705 runs = runs[:min(len(runs), max_runs or len(runs))]
706 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
707 if text:
708 return text
709
710 def _get_count(self, data, *path_list):
711 count_text = self._get_text(data, *path_list) or ''
712 count = parse_count(count_text)
713 if count is None:
714 count = str_to_int(
715 self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))
716 return count
717
718 @staticmethod
719 def _extract_thumbnails(data, *path_list):
720 """
721 Extract thumbnails from thumbnails dict
722 @param path_list: path list to level that contains 'thumbnails' key
723 """
724 thumbnails = []
725 for path in path_list or [()]:
726 for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):
727 thumbnail_url = url_or_none(thumbnail.get('url'))
728 if not thumbnail_url:
729 continue
730 # Sometimes youtube gives a wrong thumbnail URL. See:
731 # https://github.com/yt-dlp/yt-dlp/issues/233
732 # https://github.com/ytdl-org/youtube-dl/issues/28023
733 if 'maxresdefault' in thumbnail_url:
734 thumbnail_url = thumbnail_url.split('?')[0]
735 thumbnails.append({
736 'url': thumbnail_url,
737 'height': int_or_none(thumbnail.get('height')),
738 'width': int_or_none(thumbnail.get('width')),
739 })
740 return thumbnails
741
742 @staticmethod
743 def extract_relative_time(relative_time_text):
744 """
745 Extracts a relative time from string and converts to dt object
746 e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'
747 """
748 mobj = re.search(r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?\s*ago', relative_time_text)
749 if mobj:
750 start = mobj.group('start')
751 if start:
752 return datetime_from_str(start)
753 try:
754 return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))
755 except ValueError:
756 return None
757
758 def _extract_time_text(self, renderer, *path_list):
759 """@returns (timestamp, time_text)"""
760 text = self._get_text(renderer, *path_list) or ''
761 dt = self.extract_relative_time(text)
762 timestamp = None
763 if isinstance(dt, datetime.datetime):
764 timestamp = calendar.timegm(dt.timetuple())
765
766 if timestamp is None:
767 timestamp = (
768 unified_timestamp(text) or unified_timestamp(
769 self._search_regex(
770 (r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),
771 text.lower(), 'time text', default=None)))
772
773 if text and timestamp is None:
774 self.report_warning(f"Cannot parse localized time text '{text}'" + bug_reports_message(), only_once=True)
775 return timestamp, text
776
777 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
778 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
779 default_client='web'):
780 for retry in self.RetryManager():
781 try:
782 response = self._call_api(
783 ep=ep, fatal=True, headers=headers,
784 video_id=item_id, query=query, note=note,
785 context=self._extract_context(ytcfg, default_client),
786 api_key=self._extract_api_key(ytcfg, default_client),
787 api_hostname=api_hostname, default_client=default_client)
788 except ExtractorError as e:
789 if not isinstance(e.cause, network_exceptions):
790 return self._error_or_warning(e, fatal=fatal)
791 elif not isinstance(e.cause, urllib.error.HTTPError):
792 retry.error = e
793 continue
794
795 first_bytes = e.cause.read(512)
796 if not is_html(first_bytes):
797 yt_error = try_get(
798 self._parse_json(
799 self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
800 lambda x: x['error']['message'], str)
801 if yt_error:
802 self._report_alerts([('ERROR', yt_error)], fatal=False)
803 # Downloading page may result in intermittent 5xx HTTP error
804 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
805 # We also want to catch all other network exceptions since errors in later pages can be troublesome
806 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
807 if e.cause.code not in (403, 429):
808 retry.error = e
809 continue
810 return self._error_or_warning(e, fatal=fatal)
811
812 try:
813 self._extract_and_report_alerts(response, only_once=True)
814 except ExtractorError as e:
815 # YouTube servers may return errors we want to retry on in a 200 OK response
816 # See: https://github.com/yt-dlp/yt-dlp/issues/839
817 if 'unknown error' in e.msg.lower():
818 retry.error = e
819 continue
820 return self._error_or_warning(e, fatal=fatal)
821 # Youtube sometimes sends incomplete data
822 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
823 if not traverse_obj(response, *variadic(check_get_keys)):
824 retry.error = ExtractorError('Incomplete data received', expected=True)
825 continue
826
827 return response
828
829 @staticmethod
830 def is_music_url(url):
831 return re.match(r'https?://music\.youtube\.com/', url) is not None
832
833 def _extract_video(self, renderer):
834 video_id = renderer.get('videoId')
835 title = self._get_text(renderer, 'title')
836 description = self._get_text(renderer, 'descriptionSnippet')
837 duration = parse_duration(self._get_text(
838 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
839 if duration is None:
840 duration = parse_duration(self._search_regex(
841 r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',
842 traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),
843 video_id, default=None, group='duration'))
844
845 view_count = self._get_count(renderer, 'viewCountText')
846
847 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
848 channel_id = traverse_obj(
849 renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
850 expected_type=str, get_all=False)
851 timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')
852 scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
853 overlay_style = traverse_obj(
854 renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
855 get_all=False, expected_type=str)
856 badges = self._extract_badges(renderer)
857 thumbnails = self._extract_thumbnails(renderer, 'thumbnail')
858 navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
859 renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
860 expected_type=str)) or ''
861 url = f'https://www.youtube.com/watch?v={video_id}'
862 if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:
863 url = f'https://www.youtube.com/shorts/{video_id}'
864
865 return {
866 '_type': 'url',
867 'ie_key': YoutubeIE.ie_key(),
868 'id': video_id,
869 'url': url,
870 'title': title,
871 'description': description,
872 'duration': duration,
873 'view_count': view_count,
874 'uploader': uploader,
875 'channel_id': channel_id,
876 'thumbnails': thumbnails,
877 'upload_date': (strftime_or_none(timestamp, '%Y%m%d')
878 if self._configuration_arg('approximate_date', ie_key='youtubetab')
879 else None),
880 'live_status': ('is_upcoming' if scheduled_timestamp is not None
881 else 'was_live' if 'streamed' in time_text.lower()
882 else 'is_live' if overlay_style == 'LIVE' or 'live now' in badges
883 else None),
884 'release_timestamp': scheduled_timestamp,
885 'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)
886 }
887
888
889 class YoutubeIE(YoutubeBaseInfoExtractor):
890 IE_DESC = 'YouTube'
891 _VALID_URL = r"""(?x)^
892 (
893 (?:https?://|//) # http(s):// or protocol-independent URL
894 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
895 (?:www\.)?deturl\.com/www\.youtube\.com|
896 (?:www\.)?pwnyoutube\.com|
897 (?:www\.)?hooktube\.com|
898 (?:www\.)?yourepeat\.com|
899 tube\.majestyc\.net|
900 %(invidious)s|
901 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
902 (?:.*?\#/)? # handle anchor (#/) redirect urls
903 (?: # the various things that can precede the ID:
904 (?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/
905 |(?: # or the v= param in all its forms
906 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
907 (?:\?|\#!?) # the params delimiter ? or # or #!
908 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
909 v=
910 )
911 ))
912 |(?:
913 youtu\.be| # just youtu.be/xxxx
914 vid\.plus| # or vid.plus/xxxx
915 zwearz\.com/watch| # or zwearz.com/watch/xxxx
916 %(invidious)s
917 )/
918 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
919 )
920 )? # all until now is optional -> you can pass the naked ID
921 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
922 (?(1).+)? # if we found the ID, everything can follow
923 (?:\#|$)""" % {
924 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
925 }
926 _EMBED_REGEX = [r'''(?x)
927 (?:
928 <iframe[^>]+?src=|
929 data-video-url=|
930 <embed[^>]+?src=|
931 embedSWF\(?:\s*|
932 <object[^>]+data=|
933 new\s+SWFObject\(
934 )
935 (["\'])
936 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
937 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
938 \1''']
939 _PLAYER_INFO_RE = (
940 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
941 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
942 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
943 )
944 _formats = {
945 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
946 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
947 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
948 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
949 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
950 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
951 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
952 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
953 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
954 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
955 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
956 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
957 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
958 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
959 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
960 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
961 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
962 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
963
964
965 # 3D videos
966 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
967 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
968 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
969 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
970 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
971 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
972 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
973
974 # Apple HTTP Live Streaming
975 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
976 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
977 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
978 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
979 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
980 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
981 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
982 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
983
984 # DASH mp4 video
985 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
986 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
987 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
988 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
989 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
990 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
991 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
992 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
993 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
994 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
995 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
996 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
997
998 # Dash mp4 audio
999 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1000 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1001 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1002 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1003 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1004 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1005 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
1006
1007 # Dash webm
1008 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1009 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1010 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1011 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1012 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1013 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1014 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1015 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1016 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1017 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1018 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1019 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1020 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1021 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1022 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1023 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
1024 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1025 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1026 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1027 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1028 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1029 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1030
1031 # Dash webm audio
1032 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1033 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
1034
1035 # Dash webm audio with opus inside
1036 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1037 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1038 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
1039
1040 # RTMP (unnamed)
1041 '_rtmp': {'protocol': 'rtmp'},
1042
1043 # av01 video only formats sometimes served with "unknown" codecs
1044 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1045 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1046 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
1047 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1048 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1049 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1050 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1051 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1052 }
1053 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
1054
1055 _GEO_BYPASS = False
1056
1057 IE_NAME = 'youtube'
1058 _TESTS = [
1059 {
1060 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
1061 'info_dict': {
1062 'id': 'BaW_jenozKc',
1063 'ext': 'mp4',
1064 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1065 'uploader': 'Philipp Hagemeister',
1066 'uploader_id': 'phihag',
1067 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1068 'channel': 'Philipp Hagemeister',
1069 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1070 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1071 'upload_date': '20121002',
1072 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
1073 'categories': ['Science & Technology'],
1074 'tags': ['youtube-dl'],
1075 'duration': 10,
1076 'view_count': int,
1077 'like_count': int,
1078 'availability': 'public',
1079 'playable_in_embed': True,
1080 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1081 'live_status': 'not_live',
1082 'age_limit': 0,
1083 'start_time': 1,
1084 'end_time': 9,
1085 'comment_count': int,
1086 'channel_follower_count': int
1087 }
1088 },
1089 {
1090 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1091 'note': 'Embed-only video (#1746)',
1092 'info_dict': {
1093 'id': 'yZIXLfi8CZQ',
1094 'ext': 'mp4',
1095 'upload_date': '20120608',
1096 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1097 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1098 'uploader': 'SET India',
1099 'uploader_id': 'setindia',
1100 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
1101 'age_limit': 18,
1102 },
1103 'skip': 'Private video',
1104 },
1105 {
1106 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1107 'note': 'Use the first video ID in the URL',
1108 'info_dict': {
1109 'id': 'BaW_jenozKc',
1110 'ext': 'mp4',
1111 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1112 'uploader': 'Philipp Hagemeister',
1113 'uploader_id': 'phihag',
1114 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1115 'channel': 'Philipp Hagemeister',
1116 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1117 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1118 'upload_date': '20121002',
1119 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
1120 'categories': ['Science & Technology'],
1121 'tags': ['youtube-dl'],
1122 'duration': 10,
1123 'view_count': int,
1124 'like_count': int,
1125 'availability': 'public',
1126 'playable_in_embed': True,
1127 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1128 'live_status': 'not_live',
1129 'age_limit': 0,
1130 'comment_count': int,
1131 'channel_follower_count': int
1132 },
1133 'params': {
1134 'skip_download': True,
1135 },
1136 },
1137 {
1138 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1139 'note': '256k DASH audio (format 141) via DASH manifest',
1140 'info_dict': {
1141 'id': 'a9LDPn-MO4I',
1142 'ext': 'm4a',
1143 'upload_date': '20121002',
1144 'uploader_id': '8KVIDEO',
1145 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1146 'description': '',
1147 'uploader': '8KVIDEO',
1148 'title': 'UHDTV TEST 8K VIDEO.mp4'
1149 },
1150 'params': {
1151 'youtube_include_dash_manifest': True,
1152 'format': '141',
1153 },
1154 'skip': 'format 141 not served anymore',
1155 },
1156 # DASH manifest with encrypted signature
1157 {
1158 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1159 'info_dict': {
1160 'id': 'IB3lcPjvWLA',
1161 'ext': 'm4a',
1162 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1163 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1164 'duration': 244,
1165 'uploader': 'AfrojackVEVO',
1166 'uploader_id': 'AfrojackVEVO',
1167 'upload_date': '20131011',
1168 'abr': 129.495,
1169 'like_count': int,
1170 'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',
1171 'playable_in_embed': True,
1172 'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',
1173 'view_count': int,
1174 'track': 'The Spark',
1175 'live_status': 'not_live',
1176 'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',
1177 'channel': 'Afrojack',
1178 'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',
1179 'tags': 'count:19',
1180 'availability': 'public',
1181 'categories': ['Music'],
1182 'age_limit': 0,
1183 'alt_title': 'The Spark',
1184 'channel_follower_count': int
1185 },
1186 'params': {
1187 'youtube_include_dash_manifest': True,
1188 'format': '141/bestaudio[ext=m4a]',
1189 },
1190 },
1191 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
1192 {
1193 'note': 'Embed allowed age-gate video',
1194 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1195 'info_dict': {
1196 'id': 'HtVdAasjOgU',
1197 'ext': 'mp4',
1198 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1199 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1200 'duration': 142,
1201 'uploader': 'The Witcher',
1202 'uploader_id': 'WitcherGame',
1203 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1204 'upload_date': '20140605',
1205 'age_limit': 18,
1206 'categories': ['Gaming'],
1207 'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',
1208 'availability': 'needs_auth',
1209 'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
1210 'like_count': int,
1211 'channel': 'The Witcher',
1212 'live_status': 'not_live',
1213 'tags': 'count:17',
1214 'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
1215 'playable_in_embed': True,
1216 'view_count': int,
1217 'channel_follower_count': int
1218 },
1219 },
1220 {
1221 'note': 'Age-gate video with embed allowed in public site',
1222 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1223 'info_dict': {
1224 'id': 'HsUATh_Nc2U',
1225 'ext': 'mp4',
1226 'title': 'Godzilla 2 (Official Video)',
1227 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1228 'upload_date': '20200408',
1229 'uploader_id': 'FlyingKitty900',
1230 'uploader': 'FlyingKitty',
1231 'age_limit': 18,
1232 'availability': 'needs_auth',
1233 'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
1234 'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',
1235 'channel': 'FlyingKitty',
1236 'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
1237 'view_count': int,
1238 'categories': ['Entertainment'],
1239 'live_status': 'not_live',
1240 'tags': ['Flyingkitty', 'godzilla 2'],
1241 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
1242 'like_count': int,
1243 'duration': 177,
1244 'playable_in_embed': True,
1245 'channel_follower_count': int
1246 },
1247 },
1248 {
1249 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1250 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1251 'info_dict': {
1252 'id': 'Tq92D6wQ1mg',
1253 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
1254 'ext': 'mp4',
1255 'upload_date': '20191228',
1256 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1257 'uploader': 'Projekt Melody',
1258 'description': 'md5:17eccca93a786d51bc67646756894066',
1259 'age_limit': 18,
1260 'like_count': int,
1261 'availability': 'needs_auth',
1262 'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
1263 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1264 'view_count': int,
1265 'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',
1266 'channel': 'Projekt Melody',
1267 'live_status': 'not_live',
1268 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
1269 'playable_in_embed': True,
1270 'categories': ['Entertainment'],
1271 'duration': 106,
1272 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
1273 'comment_count': int,
1274 'channel_follower_count': int
1275 },
1276 },
1277 {
1278 'note': 'Non-Agegated non-embeddable video',
1279 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1280 'info_dict': {
1281 'id': 'MeJVWBSsPAY',
1282 'ext': 'mp4',
1283 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1284 'uploader': 'Herr Lurik',
1285 'uploader_id': 'st3in234',
1286 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1287 'upload_date': '20130730',
1288 'track': 'Such mich find mich',
1289 'age_limit': 0,
1290 'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
1291 'like_count': int,
1292 'playable_in_embed': False,
1293 'creator': 'OOMPH!',
1294 'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',
1295 'view_count': int,
1296 'alt_title': 'Such mich find mich',
1297 'duration': 210,
1298 'channel': 'Herr Lurik',
1299 'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
1300 'categories': ['Music'],
1301 'availability': 'public',
1302 'uploader_url': 'http://www.youtube.com/user/st3in234',
1303 'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
1304 'live_status': 'not_live',
1305 'artist': 'OOMPH!',
1306 'channel_follower_count': int
1307 },
1308 },
1309 {
1310 'note': 'Non-bypassable age-gated video',
1311 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1312 'only_matching': True,
1313 },
1314 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1315 # YouTube Red ad is not captured for creator
1316 {
1317 'url': '__2ABJjxzNo',
1318 'info_dict': {
1319 'id': '__2ABJjxzNo',
1320 'ext': 'mp4',
1321 'duration': 266,
1322 'upload_date': '20100430',
1323 'uploader_id': 'deadmau5',
1324 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1325 'creator': 'deadmau5',
1326 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1327 'uploader': 'deadmau5',
1328 'title': 'Deadmau5 - Some Chords (HD)',
1329 'alt_title': 'Some Chords',
1330 'availability': 'public',
1331 'tags': 'count:14',
1332 'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',
1333 'view_count': int,
1334 'live_status': 'not_live',
1335 'channel': 'deadmau5',
1336 'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',
1337 'like_count': int,
1338 'track': 'Some Chords',
1339 'artist': 'deadmau5',
1340 'playable_in_embed': True,
1341 'age_limit': 0,
1342 'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',
1343 'categories': ['Music'],
1344 'album': 'Some Chords',
1345 'channel_follower_count': int
1346 },
1347 'expected_warnings': [
1348 'DASH manifest missing',
1349 ]
1350 },
1351 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1352 {
1353 'url': 'lqQg6PlCWgI',
1354 'info_dict': {
1355 'id': 'lqQg6PlCWgI',
1356 'ext': 'mp4',
1357 'duration': 6085,
1358 'upload_date': '20150827',
1359 'uploader_id': 'olympic',
1360 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1361 'description': 'md5:04bbbf3ccceb6795947572ca36f45904',
1362 'uploader': 'Olympics',
1363 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1364 'like_count': int,
1365 'release_timestamp': 1343767800,
1366 'playable_in_embed': True,
1367 'categories': ['Sports'],
1368 'release_date': '20120731',
1369 'channel': 'Olympics',
1370 'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
1371 'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
1372 'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
1373 'age_limit': 0,
1374 'availability': 'public',
1375 'live_status': 'was_live',
1376 'view_count': int,
1377 'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
1378 'channel_follower_count': int
1379 },
1380 'params': {
1381 'skip_download': 'requires avconv',
1382 }
1383 },
1384 # Non-square pixels
1385 {
1386 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1387 'info_dict': {
1388 'id': '_b-2C3KPAM0',
1389 'ext': 'mp4',
1390 'stretched_ratio': 16 / 9.,
1391 'duration': 85,
1392 'upload_date': '20110310',
1393 'uploader_id': 'AllenMeow',
1394 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1395 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1396 'uploader': '孫ᄋᄅ',
1397 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1398 'playable_in_embed': True,
1399 'channel': '孫ᄋᄅ',
1400 'age_limit': 0,
1401 'tags': 'count:11',
1402 'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',
1403 'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',
1404 'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',
1405 'view_count': int,
1406 'categories': ['People & Blogs'],
1407 'like_count': int,
1408 'live_status': 'not_live',
1409 'availability': 'unlisted',
1410 'comment_count': int,
1411 'channel_follower_count': int
1412 },
1413 },
1414 # url_encoded_fmt_stream_map is empty string
1415 {
1416 'url': 'qEJwOuvDf7I',
1417 'info_dict': {
1418 'id': 'qEJwOuvDf7I',
1419 'ext': 'webm',
1420 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1421 'description': '',
1422 'upload_date': '20150404',
1423 'uploader_id': 'spbelect',
1424 'uploader': 'Наблюдатели Петербурга',
1425 },
1426 'params': {
1427 'skip_download': 'requires avconv',
1428 },
1429 'skip': 'This live event has ended.',
1430 },
1431 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1432 {
1433 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1434 'info_dict': {
1435 'id': 'FIl7x6_3R5Y',
1436 'ext': 'webm',
1437 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1438 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1439 'duration': 220,
1440 'upload_date': '20150625',
1441 'uploader_id': 'dorappi2000',
1442 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1443 'uploader': 'dorappi2000',
1444 'formats': 'mincount:31',
1445 },
1446 'skip': 'not actual anymore',
1447 },
1448 # DASH manifest with segment_list
1449 {
1450 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1451 'md5': '8ce563a1d667b599d21064e982ab9e31',
1452 'info_dict': {
1453 'id': 'CsmdDsKjzN8',
1454 'ext': 'mp4',
1455 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1456 'uploader': 'Airtek',
1457 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1458 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1459 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1460 },
1461 'params': {
1462 'youtube_include_dash_manifest': True,
1463 'format': '135', # bestvideo
1464 },
1465 'skip': 'This live event has ended.',
1466 },
1467 {
1468 # Multifeed videos (multiple cameras), URL is for Main Camera
1469 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1470 'info_dict': {
1471 'id': 'jvGDaLqkpTg',
1472 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1473 'description': 'md5:e03b909557865076822aa169218d6a5d',
1474 },
1475 'playlist': [{
1476 'info_dict': {
1477 'id': 'jvGDaLqkpTg',
1478 'ext': 'mp4',
1479 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1480 'description': 'md5:e03b909557865076822aa169218d6a5d',
1481 'duration': 10643,
1482 'upload_date': '20161111',
1483 'uploader': 'Team PGP',
1484 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1485 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1486 },
1487 }, {
1488 'info_dict': {
1489 'id': '3AKt1R1aDnw',
1490 'ext': 'mp4',
1491 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1492 'description': 'md5:e03b909557865076822aa169218d6a5d',
1493 'duration': 10991,
1494 'upload_date': '20161111',
1495 'uploader': 'Team PGP',
1496 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1497 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1498 },
1499 }, {
1500 'info_dict': {
1501 'id': 'RtAMM00gpVc',
1502 'ext': 'mp4',
1503 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1504 'description': 'md5:e03b909557865076822aa169218d6a5d',
1505 'duration': 10995,
1506 'upload_date': '20161111',
1507 'uploader': 'Team PGP',
1508 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1509 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1510 },
1511 }, {
1512 'info_dict': {
1513 'id': '6N2fdlP3C5U',
1514 'ext': 'mp4',
1515 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1516 'description': 'md5:e03b909557865076822aa169218d6a5d',
1517 'duration': 10990,
1518 'upload_date': '20161111',
1519 'uploader': 'Team PGP',
1520 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1521 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1522 },
1523 }],
1524 'params': {
1525 'skip_download': True,
1526 },
1527 'skip': 'Not multifeed anymore',
1528 },
1529 {
1530 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1531 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1532 'info_dict': {
1533 'id': 'gVfLd0zydlo',
1534 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1535 },
1536 'playlist_count': 2,
1537 'skip': 'Not multifeed anymore',
1538 },
1539 {
1540 'url': 'https://vid.plus/FlRa-iH7PGw',
1541 'only_matching': True,
1542 },
1543 {
1544 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1545 'only_matching': True,
1546 },
1547 {
1548 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1549 # Also tests cut-off URL expansion in video description (see
1550 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1551 # https://github.com/ytdl-org/youtube-dl/issues/8164)
1552 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1553 'info_dict': {
1554 'id': 'lsguqyKfVQg',
1555 'ext': 'mp4',
1556 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1557 'alt_title': 'Dark Walk',
1558 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1559 'duration': 133,
1560 'upload_date': '20151119',
1561 'uploader_id': 'IronSoulElf',
1562 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1563 'uploader': 'IronSoulElf',
1564 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1565 'track': 'Dark Walk',
1566 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1567 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1568 'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',
1569 'categories': ['Film & Animation'],
1570 'view_count': int,
1571 'live_status': 'not_live',
1572 'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',
1573 'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',
1574 'tags': 'count:13',
1575 'availability': 'public',
1576 'channel': 'IronSoulElf',
1577 'playable_in_embed': True,
1578 'like_count': int,
1579 'age_limit': 0,
1580 'channel_follower_count': int
1581 },
1582 'params': {
1583 'skip_download': True,
1584 },
1585 },
1586 {
1587 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1588 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1589 'only_matching': True,
1590 },
1591 {
1592 # Video with yt:stretch=17:0
1593 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1594 'info_dict': {
1595 'id': 'Q39EVAstoRM',
1596 'ext': 'mp4',
1597 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1598 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1599 'upload_date': '20151107',
1600 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1601 'uploader': 'CH GAMER DROID',
1602 },
1603 'params': {
1604 'skip_download': True,
1605 },
1606 'skip': 'This video does not exist.',
1607 },
1608 {
1609 # Video with incomplete 'yt:stretch=16:'
1610 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1611 'only_matching': True,
1612 },
1613 {
1614 # Video licensed under Creative Commons
1615 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1616 'info_dict': {
1617 'id': 'M4gD1WSo5mA',
1618 'ext': 'mp4',
1619 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1620 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1621 'duration': 721,
1622 'upload_date': '20150128',
1623 'uploader_id': 'BerkmanCenter',
1624 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1625 'uploader': 'The Berkman Klein Center for Internet & Society',
1626 'license': 'Creative Commons Attribution license (reuse allowed)',
1627 'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',
1628 'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',
1629 'like_count': int,
1630 'age_limit': 0,
1631 'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],
1632 'channel': 'The Berkman Klein Center for Internet & Society',
1633 'availability': 'public',
1634 'view_count': int,
1635 'categories': ['Education'],
1636 'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',
1637 'live_status': 'not_live',
1638 'playable_in_embed': True,
1639 'comment_count': int,
1640 'channel_follower_count': int
1641 },
1642 'params': {
1643 'skip_download': True,
1644 },
1645 },
1646 {
1647 # Channel-like uploader_url
1648 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1649 'info_dict': {
1650 'id': 'eQcmzGIKrzg',
1651 'ext': 'mp4',
1652 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1653 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1654 'duration': 4060,
1655 'upload_date': '20151120',
1656 'uploader': 'Bernie Sanders',
1657 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1658 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1659 'license': 'Creative Commons Attribution license (reuse allowed)',
1660 'playable_in_embed': True,
1661 'tags': 'count:12',
1662 'like_count': int,
1663 'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1664 'age_limit': 0,
1665 'availability': 'public',
1666 'categories': ['News & Politics'],
1667 'channel': 'Bernie Sanders',
1668 'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',
1669 'view_count': int,
1670 'live_status': 'not_live',
1671 'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1672 'comment_count': int,
1673 'channel_follower_count': int
1674 },
1675 'params': {
1676 'skip_download': True,
1677 },
1678 },
1679 {
1680 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1681 'only_matching': True,
1682 },
1683 {
1684 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1685 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1686 'only_matching': True,
1687 },
1688 {
1689 # Rental video preview
1690 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1691 'info_dict': {
1692 'id': 'uGpuVWrhIzE',
1693 'ext': 'mp4',
1694 'title': 'Piku - Trailer',
1695 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1696 'upload_date': '20150811',
1697 'uploader': 'FlixMatrix',
1698 'uploader_id': 'FlixMatrixKaravan',
1699 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1700 'license': 'Standard YouTube License',
1701 },
1702 'params': {
1703 'skip_download': True,
1704 },
1705 'skip': 'This video is not available.',
1706 },
1707 {
1708 # YouTube Red video with episode data
1709 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1710 'info_dict': {
1711 'id': 'iqKdEhx-dD4',
1712 'ext': 'mp4',
1713 'title': 'Isolation - Mind Field (Ep 1)',
1714 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1715 'duration': 2085,
1716 'upload_date': '20170118',
1717 'uploader': 'Vsauce',
1718 'uploader_id': 'Vsauce',
1719 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1720 'series': 'Mind Field',
1721 'season_number': 1,
1722 'episode_number': 1,
1723 'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',
1724 'tags': 'count:12',
1725 'view_count': int,
1726 'availability': 'public',
1727 'age_limit': 0,
1728 'channel': 'Vsauce',
1729 'episode': 'Episode 1',
1730 'categories': ['Entertainment'],
1731 'season': 'Season 1',
1732 'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',
1733 'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',
1734 'like_count': int,
1735 'playable_in_embed': True,
1736 'live_status': 'not_live',
1737 'channel_follower_count': int
1738 },
1739 'params': {
1740 'skip_download': True,
1741 },
1742 'expected_warnings': [
1743 'Skipping DASH manifest',
1744 ],
1745 },
1746 {
1747 # The following content has been identified by the YouTube community
1748 # as inappropriate or offensive to some audiences.
1749 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1750 'info_dict': {
1751 'id': '6SJNVb0GnPI',
1752 'ext': 'mp4',
1753 'title': 'Race Differences in Intelligence',
1754 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1755 'duration': 965,
1756 'upload_date': '20140124',
1757 'uploader': 'New Century Foundation',
1758 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1759 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1760 },
1761 'params': {
1762 'skip_download': True,
1763 },
1764 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1765 },
1766 {
1767 # itag 212
1768 'url': '1t24XAntNCY',
1769 'only_matching': True,
1770 },
1771 {
1772 # geo restricted to JP
1773 'url': 'sJL6WA-aGkQ',
1774 'only_matching': True,
1775 },
1776 {
1777 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1778 'only_matching': True,
1779 },
1780 {
1781 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1782 'only_matching': True,
1783 },
1784 {
1785 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1786 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1787 'only_matching': True,
1788 },
1789 {
1790 # DRM protected
1791 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1792 'only_matching': True,
1793 },
1794 {
1795 # Video with unsupported adaptive stream type formats
1796 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1797 'info_dict': {
1798 'id': 'Z4Vy8R84T1U',
1799 'ext': 'mp4',
1800 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1801 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1802 'duration': 433,
1803 'upload_date': '20130923',
1804 'uploader': 'Amelia Putri Harwita',
1805 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1806 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1807 'formats': 'maxcount:10',
1808 },
1809 'params': {
1810 'skip_download': True,
1811 'youtube_include_dash_manifest': False,
1812 },
1813 'skip': 'not actual anymore',
1814 },
1815 {
1816 # Youtube Music Auto-generated description
1817 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1818 'info_dict': {
1819 'id': 'MgNrAu2pzNs',
1820 'ext': 'mp4',
1821 'title': 'Voyeur Girl',
1822 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1823 'upload_date': '20190312',
1824 'uploader': 'Stephen - Topic',
1825 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1826 'artist': 'Stephen',
1827 'track': 'Voyeur Girl',
1828 'album': 'it\'s too much love to know my dear',
1829 'release_date': '20190313',
1830 'release_year': 2019,
1831 'alt_title': 'Voyeur Girl',
1832 'view_count': int,
1833 'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1834 'playable_in_embed': True,
1835 'like_count': int,
1836 'categories': ['Music'],
1837 'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1838 'channel': 'Stephen',
1839 'availability': 'public',
1840 'creator': 'Stephen',
1841 'duration': 169,
1842 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
1843 'age_limit': 0,
1844 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1845 'tags': 'count:11',
1846 'live_status': 'not_live',
1847 'channel_follower_count': int
1848 },
1849 'params': {
1850 'skip_download': True,
1851 },
1852 },
1853 {
1854 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1855 'only_matching': True,
1856 },
1857 {
1858 # invalid -> valid video id redirection
1859 'url': 'DJztXj2GPfl',
1860 'info_dict': {
1861 'id': 'DJztXj2GPfk',
1862 'ext': 'mp4',
1863 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1864 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1865 'upload_date': '20090125',
1866 'uploader': 'Prochorowka',
1867 'uploader_id': 'Prochorowka',
1868 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1869 'artist': 'Panjabi MC',
1870 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1871 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1872 },
1873 'params': {
1874 'skip_download': True,
1875 },
1876 'skip': 'Video unavailable',
1877 },
1878 {
1879 # empty description results in an empty string
1880 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1881 'info_dict': {
1882 'id': 'x41yOUIvK2k',
1883 'ext': 'mp4',
1884 'title': 'IMG 3456',
1885 'description': '',
1886 'upload_date': '20170613',
1887 'uploader_id': 'ElevageOrVert',
1888 'uploader': 'ElevageOrVert',
1889 'view_count': int,
1890 'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',
1891 'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',
1892 'like_count': int,
1893 'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',
1894 'tags': [],
1895 'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',
1896 'availability': 'public',
1897 'age_limit': 0,
1898 'categories': ['Pets & Animals'],
1899 'duration': 7,
1900 'playable_in_embed': True,
1901 'live_status': 'not_live',
1902 'channel': 'ElevageOrVert',
1903 'channel_follower_count': int
1904 },
1905 'params': {
1906 'skip_download': True,
1907 },
1908 },
1909 {
1910 # with '};' inside yt initial data (see [1])
1911 # see [2] for an example with '};' inside ytInitialPlayerResponse
1912 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1913 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1914 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1915 'info_dict': {
1916 'id': 'CHqg6qOn4no',
1917 'ext': 'mp4',
1918 'title': 'Part 77 Sort a list of simple types in c#',
1919 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1920 'upload_date': '20130831',
1921 'uploader_id': 'kudvenkat',
1922 'uploader': 'kudvenkat',
1923 'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',
1924 'like_count': int,
1925 'uploader_url': 'http://www.youtube.com/user/kudvenkat',
1926 'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',
1927 'live_status': 'not_live',
1928 'categories': ['Education'],
1929 'availability': 'public',
1930 'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',
1931 'tags': 'count:12',
1932 'playable_in_embed': True,
1933 'age_limit': 0,
1934 'view_count': int,
1935 'duration': 522,
1936 'channel': 'kudvenkat',
1937 'comment_count': int,
1938 'channel_follower_count': int
1939 },
1940 'params': {
1941 'skip_download': True,
1942 },
1943 },
1944 {
1945 # another example of '};' in ytInitialData
1946 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1947 'only_matching': True,
1948 },
1949 {
1950 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1951 'only_matching': True,
1952 },
1953 {
1954 # https://github.com/ytdl-org/youtube-dl/pull/28094
1955 'url': 'OtqTfy26tG0',
1956 'info_dict': {
1957 'id': 'OtqTfy26tG0',
1958 'ext': 'mp4',
1959 'title': 'Burn Out',
1960 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1961 'upload_date': '20141120',
1962 'uploader': 'The Cinematic Orchestra - Topic',
1963 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1964 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1965 'artist': 'The Cinematic Orchestra',
1966 'track': 'Burn Out',
1967 'album': 'Every Day',
1968 'like_count': int,
1969 'live_status': 'not_live',
1970 'alt_title': 'Burn Out',
1971 'duration': 614,
1972 'age_limit': 0,
1973 'view_count': int,
1974 'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1975 'creator': 'The Cinematic Orchestra',
1976 'channel': 'The Cinematic Orchestra',
1977 'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],
1978 'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1979 'availability': 'public',
1980 'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',
1981 'categories': ['Music'],
1982 'playable_in_embed': True,
1983 'channel_follower_count': int
1984 },
1985 'params': {
1986 'skip_download': True,
1987 },
1988 },
1989 {
1990 # controversial video, only works with bpctr when authenticated with cookies
1991 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1992 'only_matching': True,
1993 },
1994 {
1995 # controversial video, requires bpctr/contentCheckOk
1996 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1997 'info_dict': {
1998 'id': 'SZJvDhaSDnc',
1999 'ext': 'mp4',
2000 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
2001 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
2002 'uploader': 'CBS Mornings',
2003 'uploader_id': 'CBSThisMorning',
2004 'upload_date': '20140716',
2005 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',
2006 'duration': 170,
2007 'categories': ['News & Politics'],
2008 'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',
2009 'view_count': int,
2010 'channel': 'CBS Mornings',
2011 'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],
2012 'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',
2013 'age_limit': 18,
2014 'availability': 'needs_auth',
2015 'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',
2016 'like_count': int,
2017 'live_status': 'not_live',
2018 'playable_in_embed': True,
2019 'channel_follower_count': int
2020 }
2021 },
2022 {
2023 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
2024 'url': 'cBvYw8_A0vQ',
2025 'info_dict': {
2026 'id': 'cBvYw8_A0vQ',
2027 'ext': 'mp4',
2028 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
2029 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
2030 'upload_date': '20201120',
2031 'uploader': 'Walk around Japan',
2032 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2033 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2034 'duration': 1456,
2035 'categories': ['Travel & Events'],
2036 'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2037 'view_count': int,
2038 'channel': 'Walk around Japan',
2039 'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],
2040 'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',
2041 'age_limit': 0,
2042 'availability': 'public',
2043 'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2044 'live_status': 'not_live',
2045 'playable_in_embed': True,
2046 'channel_follower_count': int
2047 },
2048 'params': {
2049 'skip_download': True,
2050 },
2051 }, {
2052 # Has multiple audio streams
2053 'url': 'WaOKSUlf4TM',
2054 'only_matching': True
2055 }, {
2056 # Requires Premium: has format 141 when requested using YTM url
2057 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
2058 'only_matching': True
2059 }, {
2060 # multiple subtitles with same lang_code
2061 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
2062 'only_matching': True,
2063 }, {
2064 # Force use android client fallback
2065 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
2066 'info_dict': {
2067 'id': 'YOelRv7fMxY',
2068 'title': 'DIGGING A SECRET TUNNEL Part 1',
2069 'ext': '3gp',
2070 'upload_date': '20210624',
2071 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
2072 'uploader': 'colinfurze',
2073 'uploader_id': 'colinfurze',
2074 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
2075 'description': 'md5:5d5991195d599b56cd0c4148907eec50',
2076 'duration': 596,
2077 'categories': ['Entertainment'],
2078 'uploader_url': 'http://www.youtube.com/user/colinfurze',
2079 'view_count': int,
2080 'channel': 'colinfurze',
2081 'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],
2082 'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',
2083 'age_limit': 0,
2084 'availability': 'public',
2085 'like_count': int,
2086 'live_status': 'not_live',
2087 'playable_in_embed': True,
2088 'channel_follower_count': int
2089 },
2090 'params': {
2091 'format': '17', # 3gp format available on android
2092 'extractor_args': {'youtube': {'player_client': ['android']}},
2093 },
2094 },
2095 {
2096 # Skip download of additional client configs (remix client config in this case)
2097 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2098 'only_matching': True,
2099 'params': {
2100 'extractor_args': {'youtube': {'player_skip': ['configs']}},
2101 },
2102 }, {
2103 # shorts
2104 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
2105 'only_matching': True,
2106 }, {
2107 'note': 'Storyboards',
2108 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
2109 'info_dict': {
2110 'id': '5KLPxDtMqe8',
2111 'ext': 'mhtml',
2112 'format_id': 'sb0',
2113 'title': 'Your Brain is Plastic',
2114 'uploader_id': 'scishow',
2115 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
2116 'upload_date': '20140324',
2117 'uploader': 'SciShow',
2118 'like_count': int,
2119 'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',
2120 'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',
2121 'view_count': int,
2122 'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',
2123 'playable_in_embed': True,
2124 'tags': 'count:12',
2125 'uploader_url': 'http://www.youtube.com/user/scishow',
2126 'availability': 'public',
2127 'channel': 'SciShow',
2128 'live_status': 'not_live',
2129 'duration': 248,
2130 'categories': ['Education'],
2131 'age_limit': 0,
2132 'channel_follower_count': int
2133 }, 'params': {'format': 'mhtml', 'skip_download': True}
2134 }, {
2135 # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
2136 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2137 'info_dict': {
2138 'id': '2NUZ8W2llS4',
2139 'ext': 'mp4',
2140 'title': 'The NP that test your phone performance 🙂',
2141 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
2142 'uploader': 'Leon Nguyen',
2143 'uploader_id': 'VNSXIII',
2144 'uploader_url': 'http://www.youtube.com/user/VNSXIII',
2145 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2146 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2147 'duration': 21,
2148 'view_count': int,
2149 'age_limit': 0,
2150 'categories': ['Gaming'],
2151 'tags': 'count:23',
2152 'playable_in_embed': True,
2153 'live_status': 'not_live',
2154 'upload_date': '20220103',
2155 'like_count': int,
2156 'availability': 'public',
2157 'channel': 'Leon Nguyen',
2158 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
2159 'comment_count': int,
2160 'channel_follower_count': int
2161 }
2162 }, {
2163 # Same video as above, but with --compat-opt no-youtube-prefer-utc-upload-date
2164 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2165 'info_dict': {
2166 'id': '2NUZ8W2llS4',
2167 'ext': 'mp4',
2168 'title': 'The NP that test your phone performance 🙂',
2169 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
2170 'uploader': 'Leon Nguyen',
2171 'uploader_id': 'VNSXIII',
2172 'uploader_url': 'http://www.youtube.com/user/VNSXIII',
2173 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2174 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2175 'duration': 21,
2176 'view_count': int,
2177 'age_limit': 0,
2178 'categories': ['Gaming'],
2179 'tags': 'count:23',
2180 'playable_in_embed': True,
2181 'live_status': 'not_live',
2182 'upload_date': '20220102',
2183 'like_count': int,
2184 'availability': 'public',
2185 'channel': 'Leon Nguyen',
2186 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
2187 'comment_count': int,
2188 'channel_follower_count': int
2189 },
2190 'params': {'compat_opts': ['no-youtube-prefer-utc-upload-date']}
2191 }, {
2192 # date text is premiered video, ensure upload date in UTC (published 1641172509)
2193 'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',
2194 'info_dict': {
2195 'id': 'mzZzzBU6lrM',
2196 'ext': 'mp4',
2197 'title': 'I Met GeorgeNotFound In Real Life...',
2198 'description': 'md5:cca98a355c7184e750f711f3a1b22c84',
2199 'uploader': 'Quackity',
2200 'uploader_id': 'QuackityHQ',
2201 'uploader_url': 'http://www.youtube.com/user/QuackityHQ',
2202 'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',
2203 'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',
2204 'duration': 955,
2205 'view_count': int,
2206 'age_limit': 0,
2207 'categories': ['Entertainment'],
2208 'tags': 'count:26',
2209 'playable_in_embed': True,
2210 'live_status': 'not_live',
2211 'release_timestamp': 1641172509,
2212 'release_date': '20220103',
2213 'upload_date': '20220103',
2214 'like_count': int,
2215 'availability': 'public',
2216 'channel': 'Quackity',
2217 'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',
2218 'channel_follower_count': int
2219 }
2220 },
2221 { # continuous livestream. Microformat upload date should be preferred.
2222 # Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27
2223 'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',
2224 'info_dict': {
2225 'id': 'kgx4WGK0oNU',
2226 'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
2227 'ext': 'mp4',
2228 'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2229 'availability': 'public',
2230 'age_limit': 0,
2231 'release_timestamp': 1637975704,
2232 'upload_date': '20210619',
2233 'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2234 'live_status': 'is_live',
2235 'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',
2236 'uploader': '阿鲍Abao',
2237 'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2238 'channel': 'Abao in Tokyo',
2239 'channel_follower_count': int,
2240 'release_date': '20211127',
2241 'tags': 'count:39',
2242 'categories': ['People & Blogs'],
2243 'like_count': int,
2244 'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2245 'view_count': int,
2246 'playable_in_embed': True,
2247 'description': 'md5:2ef1d002cad520f65825346e2084e49d',
2248 },
2249 'params': {'skip_download': True}
2250 }, {
2251 # Story. Requires specific player params to work.
2252 'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',
2253 'info_dict': {
2254 'id': 'vv8qTUWmulI',
2255 'ext': 'mp4',
2256 'availability': 'unlisted',
2257 'view_count': int,
2258 'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',
2259 'upload_date': '20220526',
2260 'categories': ['Education'],
2261 'title': 'Story',
2262 'channel': 'IT\'S HISTORY',
2263 'description': '',
2264 'uploader_id': 'BlastfromthePast',
2265 'duration': 12,
2266 'uploader': 'IT\'S HISTORY',
2267 'playable_in_embed': True,
2268 'age_limit': 0,
2269 'live_status': 'not_live',
2270 'tags': [],
2271 'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',
2272 'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',
2273 'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',
2274 },
2275 'skip': 'stories get removed after some period of time',
2276 }, {
2277 'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
2278 'info_dict': {
2279 'id': 'tjjjtzRLHvA',
2280 'ext': 'mp4',
2281 'title': 'ハッシュタグ無し };if window.ytcsi',
2282 'upload_date': '20220323',
2283 'like_count': int,
2284 'availability': 'unlisted',
2285 'channel': 'nao20010128nao',
2286 'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',
2287 'age_limit': 0,
2288 'uploader': 'nao20010128nao',
2289 'uploader_id': 'nao20010128nao',
2290 'categories': ['Music'],
2291 'view_count': int,
2292 'description': '',
2293 'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',
2294 'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',
2295 'live_status': 'not_live',
2296 'playable_in_embed': True,
2297 'channel_follower_count': int,
2298 'duration': 6,
2299 'tags': [],
2300 'uploader_url': 'http://www.youtube.com/user/nao20010128nao',
2301 }
2302 }, {
2303 'note': '6 channel audio',
2304 'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',
2305 'only_matching': True,
2306 }
2307 ]
2308
2309 _WEBPAGE_TESTS = [
2310 # YouTube <object> embed
2311 {
2312 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
2313 'md5': '873c81d308b979f0e23ee7e620b312a3',
2314 'info_dict': {
2315 'id': 'msN87y-iEx0',
2316 'ext': 'mp4',
2317 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
2318 'upload_date': '20080526',
2319 'description': 'md5:873c81d308b979f0e23ee7e620b312a3',
2320 'uploader': 'Christopher Sykes',
2321 'uploader_id': 'ChristopherJSykes',
2322 'age_limit': 0,
2323 'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],
2324 'channel_id': 'UCCeo--lls1vna5YJABWAcVA',
2325 'playable_in_embed': True,
2326 'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',
2327 'like_count': int,
2328 'comment_count': int,
2329 'channel': 'Christopher Sykes',
2330 'live_status': 'not_live',
2331 'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',
2332 'availability': 'public',
2333 'duration': 195,
2334 'view_count': int,
2335 'categories': ['Science & Technology'],
2336 'channel_follower_count': int,
2337 'uploader_url': 'http://www.youtube.com/user/ChristopherJSykes',
2338 },
2339 'params': {
2340 'skip_download': True,
2341 }
2342 },
2343 ]
2344
2345 @classmethod
2346 def suitable(cls, url):
2347 from ..utils import parse_qs
2348
2349 qs = parse_qs(url)
2350 if qs.get('list', [None])[0]:
2351 return False
2352 return super().suitable(url)
2353
2354 def __init__(self, *args, **kwargs):
2355 super().__init__(*args, **kwargs)
2356 self._code_cache = {}
2357 self._player_cache = {}
2358
2359 def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):
2360 lock = threading.Lock()
2361
2362 is_live = True
2363 start_time = time.time()
2364 formats = [f for f in formats if f.get('is_from_start')]
2365
2366 def refetch_manifest(format_id, delay):
2367 nonlocal formats, start_time, is_live
2368 if time.time() <= start_time + delay:
2369 return
2370
2371 _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
2372 video_details = traverse_obj(
2373 prs, (..., 'videoDetails'), expected_type=dict, default=[])
2374 microformats = traverse_obj(
2375 prs, (..., 'microformat', 'playerMicroformatRenderer'),
2376 expected_type=dict, default=[])
2377 _, is_live, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)
2378 start_time = time.time()
2379
2380 def mpd_feed(format_id, delay):
2381 """
2382 @returns (manifest_url, manifest_stream_number, is_live) or None
2383 """
2384 with lock:
2385 refetch_manifest(format_id, delay)
2386
2387 f = next((f for f in formats if f['format_id'] == format_id), None)
2388 if not f:
2389 if not is_live:
2390 self.to_screen(f'{video_id}: Video is no longer live')
2391 else:
2392 self.report_warning(
2393 f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')
2394 return None
2395 return f['manifest_url'], f['manifest_stream_number'], is_live
2396
2397 for f in formats:
2398 f['is_live'] = True
2399 f['protocol'] = 'http_dash_segments_generator'
2400 f['fragments'] = functools.partial(
2401 self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)
2402
2403 def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):
2404 FETCH_SPAN, MAX_DURATION = 5, 432000
2405
2406 mpd_url, stream_number, is_live = None, None, True
2407
2408 begin_index = 0
2409 download_start_time = ctx.get('start') or time.time()
2410
2411 lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
2412 if lack_early_segments:
2413 self.report_warning(bug_reports_message(
2414 'Starting download from the last 120 hours of the live stream since '
2415 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
2416 lack_early_segments = True
2417
2418 known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
2419 fragments, fragment_base_url = None, None
2420
2421 def _extract_sequence_from_mpd(refresh_sequence, immediate):
2422 nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
2423 # Obtain from MPD's maximum seq value
2424 old_mpd_url = mpd_url
2425 last_error = ctx.pop('last_error', None)
2426 expire_fast = immediate or last_error and isinstance(last_error, urllib.error.HTTPError) and last_error.code == 403
2427 mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
2428 or (mpd_url, stream_number, False))
2429 if not refresh_sequence:
2430 if expire_fast and not is_live:
2431 return False, last_seq
2432 elif old_mpd_url == mpd_url:
2433 return True, last_seq
2434 try:
2435 fmts, _ = self._extract_mpd_formats_and_subtitles(
2436 mpd_url, None, note=False, errnote=False, fatal=False)
2437 except ExtractorError:
2438 fmts = None
2439 if not fmts:
2440 no_fragment_score += 2
2441 return False, last_seq
2442 fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
2443 fragments = fmt_info['fragments']
2444 fragment_base_url = fmt_info['fragment_base_url']
2445 assert fragment_base_url
2446
2447 _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
2448 return True, _last_seq
2449
2450 while is_live:
2451 fetch_time = time.time()
2452 if no_fragment_score > 30:
2453 return
2454 if last_segment_url:
2455 # Obtain from "X-Head-Seqnum" header value from each segment
2456 try:
2457 urlh = self._request_webpage(
2458 last_segment_url, None, note=False, errnote=False, fatal=False)
2459 except ExtractorError:
2460 urlh = None
2461 last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
2462 if last_seq is None:
2463 no_fragment_score += 2
2464 last_segment_url = None
2465 continue
2466 else:
2467 should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)
2468 no_fragment_score += 2
2469 if not should_continue:
2470 continue
2471
2472 if known_idx > last_seq:
2473 last_segment_url = None
2474 continue
2475
2476 last_seq += 1
2477
2478 if begin_index < 0 and known_idx < 0:
2479 # skip from the start when it's negative value
2480 known_idx = last_seq + begin_index
2481 if lack_early_segments:
2482 known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
2483 try:
2484 for idx in range(known_idx, last_seq):
2485 # do not update sequence here or you'll get skipped some part of it
2486 should_continue, _ = _extract_sequence_from_mpd(False, False)
2487 if not should_continue:
2488 known_idx = idx - 1
2489 raise ExtractorError('breaking out of outer loop')
2490 last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)
2491 yield {
2492 'url': last_segment_url,
2493 'fragment_count': last_seq,
2494 }
2495 if known_idx == last_seq:
2496 no_fragment_score += 5
2497 else:
2498 no_fragment_score = 0
2499 known_idx = last_seq
2500 except ExtractorError:
2501 continue
2502
2503 time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
2504
2505 def _extract_player_url(self, *ytcfgs, webpage=None):
2506 player_url = traverse_obj(
2507 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
2508 get_all=False, expected_type=str)
2509 if not player_url:
2510 return
2511 return urljoin('https://www.youtube.com', player_url)
2512
2513 def _download_player_url(self, video_id, fatal=False):
2514 res = self._download_webpage(
2515 'https://www.youtube.com/iframe_api',
2516 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
2517 if res:
2518 player_version = self._search_regex(
2519 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
2520 if player_version:
2521 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
2522
2523 def _signature_cache_id(self, example_sig):
2524 """ Return a string representation of a signature """
2525 return '.'.join(str(len(part)) for part in example_sig.split('.'))
2526
2527 @classmethod
2528 def _extract_player_info(cls, player_url):
2529 for player_re in cls._PLAYER_INFO_RE:
2530 id_m = re.search(player_re, player_url)
2531 if id_m:
2532 break
2533 else:
2534 raise ExtractorError('Cannot identify player %r' % player_url)
2535 return id_m.group('id')
2536
2537 def _load_player(self, video_id, player_url, fatal=True):
2538 player_id = self._extract_player_info(player_url)
2539 if player_id not in self._code_cache:
2540 code = self._download_webpage(
2541 player_url, video_id, fatal=fatal,
2542 note='Downloading player ' + player_id,
2543 errnote='Download of %s failed' % player_url)
2544 if code:
2545 self._code_cache[player_id] = code
2546 return self._code_cache.get(player_id)
2547
2548 def _extract_signature_function(self, video_id, player_url, example_sig):
2549 player_id = self._extract_player_info(player_url)
2550
2551 # Read from filesystem cache
2552 func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'
2553 assert os.path.basename(func_id) == func_id
2554
2555 self.write_debug(f'Extracting signature function {func_id}')
2556 cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
2557
2558 if not cache_spec:
2559 code = self._load_player(video_id, player_url)
2560 if code:
2561 res = self._parse_sig_js(code)
2562 test_string = ''.join(map(chr, range(len(example_sig))))
2563 cache_spec = [ord(c) for c in res(test_string)]
2564 self.cache.store('youtube-sigfuncs', func_id, cache_spec)
2565
2566 return lambda s: ''.join(s[i] for i in cache_spec)
2567
2568 def _print_sig_code(self, func, example_sig):
2569 if not self.get_param('youtube_print_sig_code'):
2570 return
2571
2572 def gen_sig_code(idxs):
2573 def _genslice(start, end, step):
2574 starts = '' if start == 0 else str(start)
2575 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
2576 steps = '' if step == 1 else (':%d' % step)
2577 return f's[{starts}{ends}{steps}]'
2578
2579 step = None
2580 # Quelch pyflakes warnings - start will be set when step is set
2581 start = '(Never used)'
2582 for i, prev in zip(idxs[1:], idxs[:-1]):
2583 if step is not None:
2584 if i - prev == step:
2585 continue
2586 yield _genslice(start, prev, step)
2587 step = None
2588 continue
2589 if i - prev in [-1, 1]:
2590 step = i - prev
2591 start = prev
2592 continue
2593 else:
2594 yield 's[%d]' % prev
2595 if step is None:
2596 yield 's[%d]' % i
2597 else:
2598 yield _genslice(start, i, step)
2599
2600 test_string = ''.join(map(chr, range(len(example_sig))))
2601 cache_res = func(test_string)
2602 cache_spec = [ord(c) for c in cache_res]
2603 expr_code = ' + '.join(gen_sig_code(cache_spec))
2604 signature_id_tuple = '(%s)' % (
2605 ', '.join(str(len(p)) for p in example_sig.split('.')))
2606 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
2607 ' return %s\n') % (signature_id_tuple, expr_code)
2608 self.to_screen('Extracted signature function:\n' + code)
2609
2610 def _parse_sig_js(self, jscode):
2611 funcname = self._search_regex(
2612 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2613 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2614 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
2615 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
2616 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
2617 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
2618 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
2619 # Obsolete patterns
2620 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2621 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
2622 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2623 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2624 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2625 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2626 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2627 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
2628 jscode, 'Initial JS player signature function name', group='sig')
2629
2630 jsi = JSInterpreter(jscode)
2631 initial_function = jsi.extract_function(funcname)
2632 return lambda s: initial_function([s])
2633
2634 def _cached(self, func, *cache_id):
2635 def inner(*args, **kwargs):
2636 if cache_id not in self._player_cache:
2637 try:
2638 self._player_cache[cache_id] = func(*args, **kwargs)
2639 except ExtractorError as e:
2640 self._player_cache[cache_id] = e
2641 except Exception as e:
2642 self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)
2643
2644 ret = self._player_cache[cache_id]
2645 if isinstance(ret, Exception):
2646 raise ret
2647 return ret
2648 return inner
2649
2650 def _decrypt_signature(self, s, video_id, player_url):
2651 """Turn the encrypted s field into a working signature"""
2652 extract_sig = self._cached(
2653 self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))
2654 func = extract_sig(video_id, player_url, s)
2655 self._print_sig_code(func, s)
2656 return func(s)
2657
2658 def _decrypt_nsig(self, s, video_id, player_url):
2659 """Turn the encrypted n field into a working signature"""
2660 if player_url is None:
2661 raise ExtractorError('Cannot decrypt nsig without player_url')
2662 player_url = urljoin('https://www.youtube.com', player_url)
2663
2664 try:
2665 jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
2666 except ExtractorError as e:
2667 raise ExtractorError('Unable to extract nsig function code', cause=e)
2668 if self.get_param('youtube_print_sig_code'):
2669 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
2670
2671 try:
2672 extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)
2673 ret = extract_nsig(jsi, func_code)(s)
2674 except JSInterpreter.Exception as e:
2675 try:
2676 jsi = PhantomJSwrapper(self, timeout=5000)
2677 except ExtractorError:
2678 raise e
2679 self.report_warning(
2680 f'Native nsig extraction failed: Trying with PhantomJS\n'
2681 f' n = {s} ; player = {player_url}', video_id)
2682 self.write_debug(e)
2683
2684 args, func_body = func_code
2685 ret = jsi.execute(
2686 f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',
2687 video_id=video_id, note='Executing signature code').strip()
2688
2689 self.write_debug(f'Decrypted nsig {s} => {ret}')
2690 return ret
2691
2692 def _extract_n_function_name(self, jscode):
2693 funcname, idx = self._search_regex(
2694 r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)',
2695 jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
2696 if not idx:
2697 return funcname
2698
2699 return json.loads(js_to_json(self._search_regex(
2700 rf'var {re.escape(funcname)}\s*=\s*(\[.+?\]);', jscode,
2701 f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
2702
2703 def _extract_n_function_code(self, video_id, player_url):
2704 player_id = self._extract_player_info(player_url)
2705 func_code = self.cache.load('youtube-nsig', player_id, min_ver='2022.09.1')
2706 jscode = func_code or self._load_player(video_id, player_url)
2707 jsi = JSInterpreter(jscode)
2708
2709 if func_code:
2710 return jsi, player_id, func_code
2711
2712 func_name = self._extract_n_function_name(jscode)
2713
2714 # For redundancy
2715 func_code = self._search_regex(
2716 r'''(?xs)%s\s*=\s*function\s*\((?P<var>[\w$]+)\)\s*
2717 # NB: The end of the regex is intentionally kept strict
2718 {(?P<code>.+?}\s*return\ [\w$]+.join\(""\))};''' % func_name,
2719 jscode, 'nsig function', group=('var', 'code'), default=None)
2720 if func_code:
2721 func_code = ([func_code[0]], func_code[1])
2722 else:
2723 self.write_debug('Extracting nsig function with jsinterp')
2724 func_code = jsi.extract_function_code(func_name)
2725
2726 self.cache.store('youtube-nsig', player_id, func_code)
2727 return jsi, player_id, func_code
2728
2729 def _extract_n_function_from_code(self, jsi, func_code):
2730 func = jsi.extract_function_from_code(*func_code)
2731
2732 def extract_nsig(s):
2733 try:
2734 ret = func([s])
2735 except JSInterpreter.Exception:
2736 raise
2737 except Exception as e:
2738 raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
2739
2740 if ret.startswith('enhanced_except_'):
2741 raise JSInterpreter.Exception('Signature function returned an exception')
2742 return ret
2743
2744 return extract_nsig
2745
2746 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
2747 """
2748 Extract signatureTimestamp (sts)
2749 Required to tell API what sig/player version is in use.
2750 """
2751 sts = None
2752 if isinstance(ytcfg, dict):
2753 sts = int_or_none(ytcfg.get('STS'))
2754
2755 if not sts:
2756 # Attempt to extract from player
2757 if player_url is None:
2758 error_msg = 'Cannot extract signature timestamp without player_url.'
2759 if fatal:
2760 raise ExtractorError(error_msg)
2761 self.report_warning(error_msg)
2762 return
2763 code = self._load_player(video_id, player_url, fatal=fatal)
2764 if code:
2765 sts = int_or_none(self._search_regex(
2766 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2767 'JS player signature timestamp', group='sts', fatal=fatal))
2768 return sts
2769
2770 def _mark_watched(self, video_id, player_responses):
2771 for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):
2772 label = 'fully ' if is_full else ''
2773 url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),
2774 expected_type=url_or_none)
2775 if not url:
2776 self.report_warning(f'Unable to mark {label}watched')
2777 return
2778 parsed_url = urllib.parse.urlparse(url)
2779 qs = urllib.parse.parse_qs(parsed_url.query)
2780
2781 # cpn generation algorithm is reverse engineered from base.js.
2782 # In fact it works even with dummy cpn.
2783 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2784 cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))
2785
2786 # # more consistent results setting it to right before the end
2787 video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]
2788
2789 qs.update({
2790 'ver': ['2'],
2791 'cpn': [cpn],
2792 'cmt': video_length,
2793 'el': 'detailpage', # otherwise defaults to "shorts"
2794 })
2795
2796 if is_full:
2797 # these seem to mark watchtime "history" in the real world
2798 # they're required, so send in a single value
2799 qs.update({
2800 'st': video_length,
2801 'et': video_length,
2802 })
2803
2804 url = urllib.parse.urlunparse(
2805 parsed_url._replace(query=urllib.parse.urlencode(qs, True)))
2806
2807 self._download_webpage(
2808 url, video_id, f'Marking {label}watched',
2809 'Unable to mark watched', fatal=False)
2810
2811 @classmethod
2812 def _extract_from_webpage(cls, url, webpage):
2813 # Invidious Instances
2814 # https://github.com/yt-dlp/yt-dlp/issues/195
2815 # https://github.com/iv-org/invidious/pull/1730
2816 mobj = re.search(
2817 r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',
2818 webpage)
2819 if mobj:
2820 yield cls.url_result(mobj.group('url'), cls)
2821 raise cls.StopExtraction()
2822
2823 yield from super()._extract_from_webpage(url, webpage)
2824
2825 # lazyYT YouTube embed
2826 for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):
2827 yield cls.url_result(unescapeHTML(id_), cls, id_)
2828
2829 # Wordpress "YouTube Video Importer" plugin
2830 for m in re.findall(r'''(?x)<div[^>]+
2831 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2832 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):
2833 yield cls.url_result(m[-1], cls, m[-1])
2834
2835 @classmethod
2836 def extract_id(cls, url):
2837 video_id = cls.get_temp_id(url)
2838 if not video_id:
2839 raise ExtractorError(f'Invalid URL: {url}')
2840 return video_id
2841
2842 def _extract_chapters_from_json(self, data, duration):
2843 chapter_list = traverse_obj(
2844 data, (
2845 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2846 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2847 ), expected_type=list)
2848
2849 return self._extract_chapters(
2850 chapter_list,
2851 chapter_time=lambda chapter: float_or_none(
2852 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2853 chapter_title=lambda chapter: traverse_obj(
2854 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2855 duration=duration)
2856
2857 def _extract_chapters_from_engagement_panel(self, data, duration):
2858 content_list = traverse_obj(
2859 data,
2860 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
2861 expected_type=list, default=[])
2862 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2863 chapter_title = lambda chapter: self._get_text(chapter, 'title')
2864
2865 return next(filter(None, (
2866 self._extract_chapters(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2867 chapter_time, chapter_title, duration)
2868 for contents in content_list)), [])
2869
2870 def _extract_chapters_from_description(self, description, duration):
2871 return self._extract_chapters(
2872 re.findall(r'(?m)^((?:\d+:)?\d{1,2}:\d{2})\b\W*\s(.+?)\s*$', description or ''),
2873 chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1],
2874 duration=duration, strict=False)
2875
2876 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True):
2877 if not duration:
2878 return
2879 chapter_list = [{
2880 'start_time': chapter_time(chapter),
2881 'title': chapter_title(chapter),
2882 } for chapter in chapter_list or []]
2883 if not strict:
2884 chapter_list.sort(key=lambda c: c['start_time'] or 0)
2885
2886 chapters = [{'start_time': 0}]
2887 for idx, chapter in enumerate(chapter_list):
2888 if chapter['start_time'] is None:
2889 self.report_warning(f'Incomplete chapter {idx}')
2890 elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:
2891 chapters.append(chapter)
2892 else:
2893 self.report_warning(f'Invalid start time for chapter "{chapter["title"]}"')
2894 return chapters[1:]
2895
2896 def _extract_comment(self, comment_renderer, parent=None):
2897 comment_id = comment_renderer.get('commentId')
2898 if not comment_id:
2899 return
2900
2901 text = self._get_text(comment_renderer, 'contentText')
2902
2903 # note: timestamp is an estimate calculated from the current time and time_text
2904 timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')
2905 author = self._get_text(comment_renderer, 'authorText')
2906 author_id = try_get(comment_renderer,
2907 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str)
2908
2909 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2910 lambda x: x['likeCount']), str)) or 0
2911 author_thumbnail = try_get(comment_renderer,
2912 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], str)
2913
2914 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2915 is_favorited = 'creatorHeart' in (try_get(
2916 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2917 return {
2918 'id': comment_id,
2919 'text': text,
2920 'timestamp': timestamp,
2921 'time_text': time_text,
2922 'like_count': votes,
2923 'is_favorited': is_favorited,
2924 'author': author,
2925 'author_id': author_id,
2926 'author_thumbnail': author_thumbnail,
2927 'author_is_uploader': author_is_uploader,
2928 'parent': parent or 'root'
2929 }
2930
2931 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
2932
2933 get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
2934
2935 def extract_header(contents):
2936 _continuation = None
2937 for content in contents:
2938 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
2939 expected_comment_count = self._get_count(
2940 comments_header_renderer, 'countText', 'commentsCount')
2941
2942 if expected_comment_count:
2943 tracker['est_total'] = expected_comment_count
2944 self.to_screen(f'Downloading ~{expected_comment_count} comments')
2945 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
2946
2947 sort_menu_item = try_get(
2948 comments_header_renderer,
2949 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2950 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2951
2952 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2953 if not _continuation:
2954 continue
2955
2956 sort_text = str_or_none(sort_menu_item.get('title'))
2957 if not sort_text:
2958 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2959 self.to_screen('Sorting comments by %s' % sort_text.lower())
2960 break
2961 return _continuation
2962
2963 def extract_thread(contents):
2964 if not parent:
2965 tracker['current_page_thread'] = 0
2966 for content in contents:
2967 if not parent and tracker['total_parent_comments'] >= max_parents:
2968 yield
2969 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2970 comment_renderer = get_first(
2971 (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
2972 expected_type=dict, default={})
2973
2974 comment = self._extract_comment(comment_renderer, parent)
2975 if not comment:
2976 continue
2977
2978 tracker['running_total'] += 1
2979 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
2980 yield comment
2981
2982 # Attempt to get the replies
2983 comment_replies_renderer = try_get(
2984 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2985
2986 if comment_replies_renderer:
2987 tracker['current_page_thread'] += 1
2988 comment_entries_iter = self._comment_entries(
2989 comment_replies_renderer, ytcfg, video_id,
2990 parent=comment.get('id'), tracker=tracker)
2991 yield from itertools.islice(comment_entries_iter, min(
2992 max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))
2993
2994 # Keeps track of counts across recursive calls
2995 if not tracker:
2996 tracker = dict(
2997 running_total=0,
2998 est_total=0,
2999 current_page_thread=0,
3000 total_parent_comments=0,
3001 total_reply_comments=0)
3002
3003 # TODO: Deprecated
3004 # YouTube comments have a max depth of 2
3005 max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
3006 if max_depth:
3007 self._downloader.deprecated_feature('[youtube] max_comment_depth extractor argument is deprecated. '
3008 'Set max replies in the max-comments extractor argument instead')
3009 if max_depth == 1 and parent:
3010 return
3011
3012 max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(
3013 lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
3014
3015 continuation = self._extract_continuation(root_continuation_data)
3016
3017 response = None
3018 is_forced_continuation = False
3019 is_first_continuation = parent is None
3020 if is_first_continuation and not continuation:
3021 # Sometimes you can get comments by generating the continuation yourself,
3022 # even if YouTube initially reports them being disabled - e.g. stories comments.
3023 # Note: if the comment section is actually disabled, YouTube may return a response with
3024 # required check_get_keys missing. So we will disable that check initially in this case.
3025 continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
3026 is_forced_continuation = True
3027
3028 for page_num in itertools.count(0):
3029 if not continuation:
3030 break
3031 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
3032 comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"
3033 if page_num == 0:
3034 if is_first_continuation:
3035 note_prefix = 'Downloading comment section API JSON'
3036 else:
3037 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
3038 tracker['current_page_thread'], comment_prog_str)
3039 else:
3040 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
3041 ' ' if parent else '', ' replies' if parent else '',
3042 page_num, comment_prog_str)
3043
3044 response = self._extract_response(
3045 item_id=None, query=continuation,
3046 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
3047 check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)
3048 is_forced_continuation = False
3049 continuation_contents = traverse_obj(
3050 response, 'onResponseReceivedEndpoints', expected_type=list, default=[])
3051
3052 continuation = None
3053 for continuation_section in continuation_contents:
3054 continuation_items = traverse_obj(
3055 continuation_section,
3056 (('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),
3057 get_all=False, expected_type=list) or []
3058 if is_first_continuation:
3059 continuation = extract_header(continuation_items)
3060 is_first_continuation = False
3061 if continuation:
3062 break
3063 continue
3064
3065 for entry in extract_thread(continuation_items):
3066 if not entry:
3067 return
3068 yield entry
3069 continuation = self._extract_continuation({'contents': continuation_items})
3070 if continuation:
3071 break
3072
3073 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
3074 if message and not parent and tracker['running_total'] == 0:
3075 self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)
3076
3077 @staticmethod
3078 def _generate_comment_continuation(video_id):
3079 """
3080 Generates initial comment section continuation token from given video id
3081 """
3082 token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'
3083 return base64.b64encode(token.encode()).decode()
3084
3085 def _get_comments(self, ytcfg, video_id, contents, webpage):
3086 """Entry for comment extraction"""
3087 def _real_comment_extract(contents):
3088 renderer = next((
3089 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
3090 if item.get('sectionIdentifier') == 'comment-item-section'), None)
3091 yield from self._comment_entries(renderer, ytcfg, video_id)
3092
3093 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
3094 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
3095
3096 @staticmethod
3097 def _get_checkok_params():
3098 return {'contentCheckOk': True, 'racyCheckOk': True}
3099
3100 @classmethod
3101 def _generate_player_context(cls, sts=None):
3102 context = {
3103 'html5Preference': 'HTML5_PREF_WANTS',
3104 }
3105 if sts is not None:
3106 context['signatureTimestamp'] = sts
3107 return {
3108 'playbackContext': {
3109 'contentPlaybackContext': context
3110 },
3111 **cls._get_checkok_params()
3112 }
3113
3114 @staticmethod
3115 def _is_agegated(player_response):
3116 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
3117 return True
3118
3119 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
3120 AGE_GATE_REASONS = (
3121 'confirm your age', 'age-restricted', 'inappropriate', # reason
3122 'age_verification_required', 'age_check_required', # status
3123 )
3124 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
3125
3126 @staticmethod
3127 def _is_unplayable(player_response):
3128 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
3129
3130 _STORY_PLAYER_PARAMS = '8AEB'
3131
3132 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):
3133
3134 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
3135 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
3136 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
3137 headers = self.generate_api_headers(
3138 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
3139
3140 yt_query = {
3141 'videoId': video_id,
3142 }
3143 if smuggled_data.get('is_story') or _split_innertube_client(client)[0] == 'android':
3144 yt_query['params'] = self._STORY_PLAYER_PARAMS
3145
3146 yt_query.update(self._generate_player_context(sts))
3147 return self._extract_response(
3148 item_id=video_id, ep='player', query=yt_query,
3149 ytcfg=player_ytcfg, headers=headers, fatal=True,
3150 default_client=client,
3151 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
3152 ) or None
3153
3154 def _get_requested_clients(self, url, smuggled_data):
3155 requested_clients = []
3156 default = ['android', 'web']
3157 allowed_clients = sorted(
3158 (client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),
3159 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
3160 for client in self._configuration_arg('player_client'):
3161 if client in allowed_clients:
3162 requested_clients.append(client)
3163 elif client == 'default':
3164 requested_clients.extend(default)
3165 elif client == 'all':
3166 requested_clients.extend(allowed_clients)
3167 else:
3168 self.report_warning(f'Skipping unsupported client {client}')
3169 if not requested_clients:
3170 requested_clients = default
3171
3172 if smuggled_data.get('is_music_url') or self.is_music_url(url):
3173 requested_clients.extend(
3174 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
3175
3176 return orderedSet(requested_clients)
3177
3178 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
3179 initial_pr = None
3180 if webpage:
3181 initial_pr = self._search_json(
3182 self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
3183
3184 all_clients = set(clients)
3185 clients = clients[::-1]
3186 prs = []
3187
3188 def append_client(*client_names):
3189 """ Append the first client name that exists but not already used """
3190 for client_name in client_names:
3191 actual_client = _split_innertube_client(client_name)[0]
3192 if actual_client in INNERTUBE_CLIENTS:
3193 if actual_client not in all_clients:
3194 clients.append(client_name)
3195 all_clients.add(actual_client)
3196 return
3197
3198 # Android player_response does not have microFormats which are needed for
3199 # extraction of some data. So we return the initial_pr with formats
3200 # stripped out even if not requested by the user
3201 # See: https://github.com/yt-dlp/yt-dlp/issues/501
3202 if initial_pr:
3203 pr = dict(initial_pr)
3204 pr['streamingData'] = None
3205 prs.append(pr)
3206
3207 last_error = None
3208 tried_iframe_fallback = False
3209 player_url = None
3210 while clients:
3211 client, base_client, variant = _split_innertube_client(clients.pop())
3212 player_ytcfg = master_ytcfg if client == 'web' else {}
3213 if 'configs' not in self._configuration_arg('player_skip') and client != 'web':
3214 player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
3215
3216 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
3217 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
3218 if 'js' in self._configuration_arg('player_skip'):
3219 require_js_player = False
3220 player_url = None
3221
3222 if not player_url and not tried_iframe_fallback and require_js_player:
3223 player_url = self._download_player_url(video_id)
3224 tried_iframe_fallback = True
3225
3226 try:
3227 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
3228 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)
3229 except ExtractorError as e:
3230 if last_error:
3231 self.report_warning(last_error)
3232 last_error = e
3233 continue
3234
3235 if pr:
3236 # YouTube may return a different video player response than expected.
3237 # See: https://github.com/TeamNewPipe/NewPipe/issues/8713
3238 pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId'))
3239 if pr_video_id and pr_video_id != video_id:
3240 self.report_warning(
3241 f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message())
3242 else:
3243 prs.append(pr)
3244
3245 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
3246 if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
3247 append_client(f'{base_client}_creator')
3248 elif self._is_agegated(pr):
3249 if variant == 'tv_embedded':
3250 append_client(f'{base_client}_embedded')
3251 elif not variant:
3252 append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
3253
3254 if last_error:
3255 if not len(prs):
3256 raise last_error
3257 self.report_warning(last_error)
3258 return prs, player_url
3259
3260 def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, is_live, duration):
3261 itags, stream_ids = {}, []
3262 itag_qualities, res_qualities = {}, {0: None}
3263 q = qualities([
3264 # Normally tiny is the smallest video-only formats. But
3265 # audio-only formats with unknown quality may get tagged as tiny
3266 'tiny',
3267 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
3268 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
3269 ])
3270 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
3271
3272 for fmt in streaming_formats:
3273 if fmt.get('targetDurationSec'):
3274 continue
3275
3276 itag = str_or_none(fmt.get('itag'))
3277 audio_track = fmt.get('audioTrack') or {}
3278 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
3279 if stream_id in stream_ids:
3280 continue
3281
3282 quality = fmt.get('quality')
3283 height = int_or_none(fmt.get('height'))
3284 if quality == 'tiny' or not quality:
3285 quality = fmt.get('audioQuality', '').lower() or quality
3286 # The 3gp format (17) in android client has a quality of "small",
3287 # but is actually worse than other formats
3288 if itag == '17':
3289 quality = 'tiny'
3290 if quality:
3291 if itag:
3292 itag_qualities[itag] = quality
3293 if height:
3294 res_qualities[height] = quality
3295 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
3296 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
3297 # number of fragment that would subsequently requested with (`&sq=N`)
3298 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
3299 continue
3300
3301 fmt_url = fmt.get('url')
3302 if not fmt_url:
3303 sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))
3304 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
3305 encrypted_sig = try_get(sc, lambda x: x['s'][0])
3306 if not all((sc, fmt_url, player_url, encrypted_sig)):
3307 continue
3308 try:
3309 fmt_url += '&%s=%s' % (
3310 traverse_obj(sc, ('sp', -1)) or 'signature',
3311 self._decrypt_signature(encrypted_sig, video_id, player_url)
3312 )
3313 except ExtractorError as e:
3314 self.report_warning('Signature extraction failed: Some formats may be missing',
3315 video_id=video_id, only_once=True)
3316 self.write_debug(e, only_once=True)
3317 continue
3318
3319 query = parse_qs(fmt_url)
3320 throttled = False
3321 if query.get('n'):
3322 try:
3323 decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])
3324 fmt_url = update_url_query(fmt_url, {
3325 'n': decrypt_nsig(query['n'][0], video_id, player_url)
3326 })
3327 except ExtractorError as e:
3328 phantomjs_hint = ''
3329 if isinstance(e, JSInterpreter.Exception):
3330 phantomjs_hint = (f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} '
3331 f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')
3332 self.report_warning(
3333 f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}'
3334 f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)
3335 self.write_debug(e, only_once=True)
3336 throttled = True
3337
3338 if itag:
3339 itags[itag] = 'https'
3340 stream_ids.append(stream_id)
3341
3342 tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
3343 language_preference = (
3344 10 if audio_track.get('audioIsDefault') and 10
3345 else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
3346 else -1)
3347 # Some formats may have much smaller duration than others (possibly damaged during encoding)
3348 # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
3349 # Make sure to avoid false positives with small duration differences.
3350 # E.g. __2ABJjxzNo, ySuUZEjARPY
3351 is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)
3352 if is_damaged:
3353 self.report_warning(
3354 f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
3355 dct = {
3356 'asr': int_or_none(fmt.get('audioSampleRate')),
3357 'filesize': int_or_none(fmt.get('contentLength')),
3358 'format_id': itag,
3359 'format_note': join_nonempty(
3360 '%s%s' % (audio_track.get('displayName') or '',
3361 ' (default)' if language_preference > 0 else ''),
3362 fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
3363 try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
3364 try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
3365 throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),
3366 # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
3367 'source_preference': -10 if throttled else -5 if itag == '22' else -1,
3368 'fps': int_or_none(fmt.get('fps')) or None,
3369 'audio_channels': fmt.get('audioChannels'),
3370 'height': height,
3371 'quality': q(quality),
3372 'has_drm': bool(fmt.get('drmFamilies')),
3373 'tbr': tbr,
3374 'url': fmt_url,
3375 'width': int_or_none(fmt.get('width')),
3376 'language': join_nonempty(audio_track.get('id', '').split('.')[0],
3377 'desc' if language_preference < -1 else ''),
3378 'language_preference': language_preference,
3379 # Strictly de-prioritize damaged and 3gp formats
3380 'preference': -10 if is_damaged else -2 if itag == '17' else None,
3381 }
3382 mime_mobj = re.match(
3383 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
3384 if mime_mobj:
3385 dct['ext'] = mimetype2ext(mime_mobj.group(1))
3386 dct.update(parse_codecs(mime_mobj.group(2)))
3387 no_audio = dct.get('acodec') == 'none'
3388 no_video = dct.get('vcodec') == 'none'
3389 if no_audio:
3390 dct['vbr'] = tbr
3391 if no_video:
3392 dct['abr'] = tbr
3393 if no_audio or no_video:
3394 dct['downloader_options'] = {
3395 # Youtube throttles chunks >~10M
3396 'http_chunk_size': 10485760,
3397 }
3398 if dct.get('ext'):
3399 dct['container'] = dct['ext'] + '_dash'
3400 yield dct
3401
3402 live_from_start = is_live and self.get_param('live_from_start')
3403 skip_manifests = self._configuration_arg('skip')
3404 if not self.get_param('youtube_include_hls_manifest', True):
3405 skip_manifests.append('hls')
3406 if not self.get_param('youtube_include_dash_manifest', True):
3407 skip_manifests.append('dash')
3408 get_dash = 'dash' not in skip_manifests and (
3409 not is_live or live_from_start or self._configuration_arg('include_live_dash'))
3410 get_hls = not live_from_start and 'hls' not in skip_manifests
3411
3412 def process_manifest_format(f, proto, itag):
3413 if itag in itags:
3414 if itags[itag] == proto or f'{itag}-{proto}' in itags:
3415 return False
3416 itag = f'{itag}-{proto}'
3417 if itag:
3418 f['format_id'] = itag
3419 itags[itag] = proto
3420
3421 f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
3422 if f['quality'] == -1 and f.get('height'):
3423 f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
3424 return True
3425
3426 subtitles = {}
3427 for sd in streaming_data:
3428 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
3429 if hls_manifest_url:
3430 fmts, subs = self._extract_m3u8_formats_and_subtitles(hls_manifest_url, video_id, 'mp4', fatal=False, live=is_live)
3431 subtitles = self._merge_subtitles(subs, subtitles)
3432 for f in fmts:
3433 if process_manifest_format(f, 'hls', self._search_regex(
3434 r'/itag/(\d+)', f['url'], 'itag', default=None)):
3435 yield f
3436
3437 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
3438 if dash_manifest_url:
3439 formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
3440 subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
3441 for f in formats:
3442 if process_manifest_format(f, 'dash', f['format_id']):
3443 f['filesize'] = int_or_none(self._search_regex(
3444 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
3445 if live_from_start:
3446 f['is_from_start'] = True
3447
3448 yield f
3449 yield subtitles
3450
3451 def _extract_storyboard(self, player_responses, duration):
3452 spec = get_first(
3453 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
3454 base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))
3455 if not base_url:
3456 return
3457 L = len(spec) - 1
3458 for i, args in enumerate(spec):
3459 args = args.split('#')
3460 counts = list(map(int_or_none, args[:5]))
3461 if len(args) != 8 or not all(counts):
3462 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
3463 continue
3464 width, height, frame_count, cols, rows = counts
3465 N, sigh = args[6:]
3466
3467 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
3468 fragment_count = frame_count / (cols * rows)
3469 fragment_duration = duration / fragment_count
3470 yield {
3471 'format_id': f'sb{i}',
3472 'format_note': 'storyboard',
3473 'ext': 'mhtml',
3474 'protocol': 'mhtml',
3475 'acodec': 'none',
3476 'vcodec': 'none',
3477 'url': url,
3478 'width': width,
3479 'height': height,
3480 'fps': frame_count / duration,
3481 'rows': rows,
3482 'columns': cols,
3483 'fragments': [{
3484 'url': url.replace('$M', str(j)),
3485 'duration': min(fragment_duration, duration - (j * fragment_duration)),
3486 } for j in range(math.ceil(fragment_count))],
3487 }
3488
3489 def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
3490 webpage = None
3491 if 'webpage' not in self._configuration_arg('player_skip'):
3492 query = {'bpctr': '9999999999', 'has_verified': '1'}
3493 if smuggled_data.get('is_story'):
3494 query['pp'] = self._STORY_PLAYER_PARAMS
3495 webpage = self._download_webpage(
3496 webpage_url, video_id, fatal=False, query=query)
3497
3498 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
3499
3500 player_responses, player_url = self._extract_player_responses(
3501 self._get_requested_clients(url, smuggled_data),
3502 video_id, webpage, master_ytcfg, smuggled_data)
3503
3504 return webpage, master_ytcfg, player_responses, player_url
3505
3506 def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):
3507 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
3508 is_live = get_first(video_details, 'isLive')
3509 if is_live is None:
3510 is_live = get_first(live_broadcast_details, 'isLiveNow')
3511
3512 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
3513 *formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, is_live, duration)
3514
3515 return live_broadcast_details, is_live, streaming_data, formats, subtitles
3516
3517 def _real_extract(self, url):
3518 url, smuggled_data = unsmuggle_url(url, {})
3519 video_id = self._match_id(url)
3520
3521 base_url = self.http_scheme() + '//www.youtube.com/'
3522 webpage_url = base_url + 'watch?v=' + video_id
3523
3524 webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
3525
3526 playability_statuses = traverse_obj(
3527 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
3528
3529 trailer_video_id = get_first(
3530 playability_statuses,
3531 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
3532 expected_type=str)
3533 if trailer_video_id:
3534 return self.url_result(
3535 trailer_video_id, self.ie_key(), trailer_video_id)
3536
3537 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
3538 if webpage else (lambda x: None))
3539
3540 video_details = traverse_obj(
3541 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
3542 microformats = traverse_obj(
3543 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
3544 expected_type=dict, default=[])
3545 video_title = (
3546 get_first(video_details, 'title')
3547 or self._get_text(microformats, (..., 'title'))
3548 or search_meta(['og:title', 'twitter:title', 'title']))
3549 video_description = get_first(video_details, 'shortDescription')
3550
3551 multifeed_metadata_list = get_first(
3552 player_responses,
3553 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
3554 expected_type=str)
3555 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
3556 if self.get_param('noplaylist'):
3557 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3558 else:
3559 entries = []
3560 feed_ids = []
3561 for feed in multifeed_metadata_list.split(','):
3562 # Unquote should take place before split on comma (,) since textual
3563 # fields may contain comma as well (see
3564 # https://github.com/ytdl-org/youtube-dl/issues/8536)
3565 feed_data = urllib.parse.parse_qs(
3566 urllib.parse.unquote_plus(feed))
3567
3568 def feed_entry(name):
3569 return try_get(
3570 feed_data, lambda x: x[name][0], str)
3571
3572 feed_id = feed_entry('id')
3573 if not feed_id:
3574 continue
3575 feed_title = feed_entry('title')
3576 title = video_title
3577 if feed_title:
3578 title += ' (%s)' % feed_title
3579 entries.append({
3580 '_type': 'url_transparent',
3581 'ie_key': 'Youtube',
3582 'url': smuggle_url(
3583 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
3584 {'force_singlefeed': True}),
3585 'title': title,
3586 })
3587 feed_ids.append(feed_id)
3588 self.to_screen(
3589 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
3590 % (', '.join(feed_ids), video_id))
3591 return self.playlist_result(
3592 entries, video_id, video_title, video_description)
3593
3594 duration = int_or_none(
3595 get_first(video_details, 'lengthSeconds')
3596 or get_first(microformats, 'lengthSeconds')
3597 or parse_duration(search_meta('duration'))) or None
3598
3599 live_broadcast_details, is_live, streaming_data, formats, automatic_captions = \
3600 self._list_formats(video_id, microformats, video_details, player_responses, player_url)
3601
3602 if not formats:
3603 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
3604 self.report_drm(video_id)
3605 pemr = get_first(
3606 playability_statuses,
3607 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
3608 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
3609 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
3610 if subreason:
3611 if subreason == 'The uploader has not made this video available in your country.':
3612 countries = get_first(microformats, 'availableCountries')
3613 if not countries:
3614 regions_allowed = search_meta('regionsAllowed')
3615 countries = regions_allowed.split(',') if regions_allowed else None
3616 self.raise_geo_restricted(subreason, countries, metadata_available=True)
3617 reason += f'. {subreason}'
3618 if reason:
3619 self.raise_no_formats(reason, expected=True)
3620
3621 keywords = get_first(video_details, 'keywords', expected_type=list) or []
3622 if not keywords and webpage:
3623 keywords = [
3624 unescapeHTML(m.group('content'))
3625 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
3626 for keyword in keywords:
3627 if keyword.startswith('yt:stretch='):
3628 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
3629 if mobj:
3630 # NB: float is intentional for forcing float division
3631 w, h = (float(v) for v in mobj.groups())
3632 if w > 0 and h > 0:
3633 ratio = w / h
3634 for f in formats:
3635 if f.get('vcodec') != 'none':
3636 f['stretched_ratio'] = ratio
3637 break
3638 thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))
3639 thumbnail_url = search_meta(['og:image', 'twitter:image'])
3640 if thumbnail_url:
3641 thumbnails.append({
3642 'url': thumbnail_url,
3643 })
3644 original_thumbnails = thumbnails.copy()
3645
3646 # The best resolution thumbnails sometimes does not appear in the webpage
3647 # See: https://github.com/yt-dlp/yt-dlp/issues/340
3648 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
3649 thumbnail_names = [
3650 # While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants
3651 # in resolution, these are not the custom thumbnail. So de-prioritize them
3652 'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',
3653 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'
3654 ]
3655 n_thumbnail_names = len(thumbnail_names)
3656 thumbnails.extend({
3657 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
3658 video_id=video_id, name=name, ext=ext,
3659 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
3660 } for name in thumbnail_names for ext in ('webp', 'jpg'))
3661 for thumb in thumbnails:
3662 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
3663 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
3664 self._remove_duplicate_formats(thumbnails)
3665 self._downloader._sort_thumbnails(original_thumbnails)
3666
3667 category = get_first(microformats, 'category') or search_meta('genre')
3668 channel_id = str_or_none(
3669 get_first(video_details, 'channelId')
3670 or get_first(microformats, 'externalChannelId')
3671 or search_meta('channelId'))
3672 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
3673
3674 live_content = get_first(video_details, 'isLiveContent')
3675 is_upcoming = get_first(video_details, 'isUpcoming')
3676 if is_live is None:
3677 if is_upcoming or live_content is False:
3678 is_live = False
3679 if is_upcoming is None and (live_content or is_live):
3680 is_upcoming = False
3681 live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
3682 live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
3683 if not duration and live_end_time and live_start_time:
3684 duration = live_end_time - live_start_time
3685
3686 if is_live and self.get_param('live_from_start'):
3687 self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)
3688
3689 formats.extend(self._extract_storyboard(player_responses, duration))
3690
3691 # source_preference is lower for throttled/potentially damaged formats
3692 self._sort_formats(formats, (
3693 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto'))
3694
3695 info = {
3696 'id': video_id,
3697 'title': video_title,
3698 'formats': formats,
3699 'thumbnails': thumbnails,
3700 # The best thumbnail that we are sure exists. Prevents unnecessary
3701 # URL checking if user don't care about getting the best possible thumbnail
3702 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
3703 'description': video_description,
3704 'uploader': get_first(video_details, 'author'),
3705 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
3706 'uploader_url': owner_profile_url,
3707 'channel_id': channel_id,
3708 'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s'),
3709 'duration': duration,
3710 'view_count': int_or_none(
3711 get_first((video_details, microformats), (..., 'viewCount'))
3712 or search_meta('interactionCount')),
3713 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
3714 'age_limit': 18 if (
3715 get_first(microformats, 'isFamilySafe') is False
3716 or search_meta('isFamilyFriendly') == 'false'
3717 or search_meta('og:restrictions:age') == '18+') else 0,
3718 'webpage_url': webpage_url,
3719 'categories': [category] if category else None,
3720 'tags': keywords,
3721 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
3722 'is_live': is_live,
3723 'was_live': (False if is_live or is_upcoming or live_content is False
3724 else None if is_live is None or is_upcoming is None
3725 else live_content),
3726 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
3727 'release_timestamp': live_start_time,
3728 }
3729
3730 if get_first(video_details, 'isPostLiveDvr'):
3731 self.write_debug('Video is in Post-Live Manifestless mode')
3732 info['live_status'] = 'post_live'
3733 if (duration or 0) > 4 * 3600:
3734 self.report_warning(
3735 'The livestream has not finished processing. Only 4 hours of the video can be currently downloaded. '
3736 'This is a known issue and patches are welcome')
3737
3738 subtitles = {}
3739 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
3740 if pctr:
3741 def get_lang_code(track):
3742 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
3743 or track.get('languageCode'))
3744
3745 # Converted into dicts to remove duplicates
3746 captions = {
3747 get_lang_code(sub): sub
3748 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
3749 translation_languages = {
3750 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
3751 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
3752
3753 def process_language(container, base_url, lang_code, sub_name, query):
3754 lang_subs = container.setdefault(lang_code, [])
3755 for fmt in self._SUBTITLE_FORMATS:
3756 query.update({
3757 'fmt': fmt,
3758 })
3759 lang_subs.append({
3760 'ext': fmt,
3761 'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),
3762 'name': sub_name,
3763 })
3764
3765 # NB: Constructing the full subtitle dictionary is slow
3766 get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (
3767 self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))
3768 for lang_code, caption_track in captions.items():
3769 base_url = caption_track.get('baseUrl')
3770 orig_lang = parse_qs(base_url).get('lang', [None])[-1]
3771 if not base_url:
3772 continue
3773 lang_name = self._get_text(caption_track, 'name', max_runs=1)
3774 if caption_track.get('kind') != 'asr':
3775 if not lang_code:
3776 continue
3777 process_language(
3778 subtitles, base_url, lang_code, lang_name, {})
3779 if not caption_track.get('isTranslatable'):
3780 continue
3781 for trans_code, trans_name in translation_languages.items():
3782 if not trans_code:
3783 continue
3784 orig_trans_code = trans_code
3785 if caption_track.get('kind') != 'asr':
3786 if not get_translated_subs:
3787 continue
3788 trans_code += f'-{lang_code}'
3789 trans_name += format_field(lang_name, None, ' from %s')
3790 # Add an "-orig" label to the original language so that it can be distinguished.
3791 # The subs are returned without "-orig" as well for compatibility
3792 if lang_code == f'a-{orig_trans_code}':
3793 process_language(
3794 automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
3795 # Setting tlang=lang returns damaged subtitles.
3796 process_language(automatic_captions, base_url, trans_code, trans_name,
3797 {} if orig_lang == orig_trans_code else {'tlang': trans_code})
3798
3799 info['automatic_captions'] = automatic_captions
3800 info['subtitles'] = subtitles
3801
3802 parsed_url = urllib.parse.urlparse(url)
3803 for component in [parsed_url.fragment, parsed_url.query]:
3804 query = urllib.parse.parse_qs(component)
3805 for k, v in query.items():
3806 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
3807 d_k += '_time'
3808 if d_k not in info and k in s_ks:
3809 info[d_k] = parse_duration(query[k][0])
3810
3811 # Youtube Music Auto-generated description
3812 if video_description:
3813 mobj = re.search(
3814 r'''(?xs)
3815 (?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+
3816 (?P<album>[^\n]+)
3817 (?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
3818 (?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
3819 (.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?
3820 .+\nAuto-generated\ by\ YouTube\.\s*$
3821 ''', video_description)
3822 if mobj:
3823 release_year = mobj.group('release_year')
3824 release_date = mobj.group('release_date')
3825 if release_date:
3826 release_date = release_date.replace('-', '')
3827 if not release_year:
3828 release_year = release_date[:4]
3829 info.update({
3830 'album': mobj.group('album'.strip()),
3831 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
3832 'track': mobj.group('track').strip(),
3833 'release_date': release_date,
3834 'release_year': int_or_none(release_year),
3835 })
3836
3837 initial_data = None
3838 if webpage:
3839 initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)
3840 if not initial_data:
3841 query = {'videoId': video_id}
3842 query.update(self._get_checkok_params())
3843 initial_data = self._extract_response(
3844 item_id=video_id, ep='next', fatal=False,
3845 ytcfg=master_ytcfg, query=query,
3846 headers=self.generate_api_headers(ytcfg=master_ytcfg),
3847 note='Downloading initial data API JSON')
3848
3849 info['comment_count'] = traverse_obj(initial_data, (
3850 'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',
3851 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount', 'simpleText'
3852 ), (
3853 'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',
3854 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo', 'runs', ..., 'text'
3855 ), expected_type=int_or_none, get_all=False)
3856
3857 try: # This will error if there is no livechat
3858 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
3859 except (KeyError, IndexError, TypeError):
3860 pass
3861 else:
3862 info.setdefault('subtitles', {})['live_chat'] = [{
3863 # url is needed to set cookies
3864 'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',
3865 'video_id': video_id,
3866 'ext': 'json',
3867 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
3868 }]
3869
3870 if initial_data:
3871 info['chapters'] = (
3872 self._extract_chapters_from_json(initial_data, duration)
3873 or self._extract_chapters_from_engagement_panel(initial_data, duration)
3874 or self._extract_chapters_from_description(video_description, duration)
3875 or None)
3876
3877 contents = traverse_obj(
3878 initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
3879 expected_type=list, default=[])
3880
3881 vpir = get_first(contents, 'videoPrimaryInfoRenderer')
3882 if vpir:
3883 stl = vpir.get('superTitleLink')
3884 if stl:
3885 stl = self._get_text(stl)
3886 if try_get(
3887 vpir,
3888 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3889 info['location'] = stl
3890 else:
3891 mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)
3892 if mobj:
3893 info.update({
3894 'series': mobj.group(1),
3895 'season_number': int(mobj.group(2)),
3896 'episode_number': int(mobj.group(3)),
3897 })
3898 for tlb in (try_get(
3899 vpir,
3900 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3901 list) or []):
3902 tbr = tlb.get('toggleButtonRenderer') or {}
3903 for getter, regex in [(
3904 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3905 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3906 lambda x: x['accessibility'],
3907 lambda x: x['accessibilityData']['accessibilityData'],
3908 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3909 label = (try_get(tbr, getter, dict) or {}).get('label')
3910 if label:
3911 mobj = re.match(regex, label)
3912 if mobj:
3913 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3914 break
3915 sbr_tooltip = try_get(
3916 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3917 if sbr_tooltip:
3918 like_count, dislike_count = sbr_tooltip.split(' / ')
3919 info.update({
3920 'like_count': str_to_int(like_count),
3921 'dislike_count': str_to_int(dislike_count),
3922 })
3923 vsir = get_first(contents, 'videoSecondaryInfoRenderer')
3924 if vsir:
3925 vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))
3926 info.update({
3927 'channel': self._get_text(vor, 'title'),
3928 'channel_follower_count': self._get_count(vor, 'subscriberCountText')})
3929
3930 rows = try_get(
3931 vsir,
3932 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3933 list) or []
3934 multiple_songs = False
3935 for row in rows:
3936 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3937 multiple_songs = True
3938 break
3939 for row in rows:
3940 mrr = row.get('metadataRowRenderer') or {}
3941 mrr_title = mrr.get('title')
3942 if not mrr_title:
3943 continue
3944 mrr_title = self._get_text(mrr, 'title')
3945 mrr_contents_text = self._get_text(mrr, ('contents', 0))
3946 if mrr_title == 'License':
3947 info['license'] = mrr_contents_text
3948 elif not multiple_songs:
3949 if mrr_title == 'Album':
3950 info['album'] = mrr_contents_text
3951 elif mrr_title == 'Artist':
3952 info['artist'] = mrr_contents_text
3953 elif mrr_title == 'Song':
3954 info['track'] = mrr_contents_text
3955
3956 fallbacks = {
3957 'channel': 'uploader',
3958 'channel_id': 'uploader_id',
3959 'channel_url': 'uploader_url',
3960 }
3961
3962 # The upload date for scheduled, live and past live streams / premieres in microformats
3963 # may be different from the stream date. Although not in UTC, we will prefer it in this case.
3964 # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139
3965 upload_date = (
3966 unified_strdate(get_first(microformats, 'uploadDate'))
3967 or unified_strdate(search_meta('uploadDate')))
3968 if not upload_date or (
3969 not info.get('is_live')
3970 and not info.get('was_live')
3971 and info.get('live_status') != 'is_upcoming'
3972 and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', [])
3973 ):
3974 upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d') or upload_date
3975 info['upload_date'] = upload_date
3976
3977 for to, frm in fallbacks.items():
3978 if not info.get(to):
3979 info[to] = info.get(frm)
3980
3981 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3982 v = info.get(s_k)
3983 if v:
3984 info[d_k] = v
3985
3986 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3987 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
3988 is_membersonly = None
3989 is_premium = None
3990 if initial_data and is_private is not None:
3991 is_membersonly = False
3992 is_premium = False
3993 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3994 badge_labels = set()
3995 for content in contents:
3996 if not isinstance(content, dict):
3997 continue
3998 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3999 for badge_label in badge_labels:
4000 if badge_label.lower() == 'members only':
4001 is_membersonly = True
4002 elif badge_label.lower() == 'premium':
4003 is_premium = True
4004 elif badge_label.lower() == 'unlisted':
4005 is_unlisted = True
4006
4007 info['availability'] = self._availability(
4008 is_private=is_private,
4009 needs_premium=is_premium,
4010 needs_subscription=is_membersonly,
4011 needs_auth=info['age_limit'] >= 18,
4012 is_unlisted=None if is_private is None else is_unlisted)
4013
4014 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
4015
4016 self.mark_watched(video_id, player_responses)
4017
4018 return info
4019
4020
4021 class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
4022
4023 @staticmethod
4024 def passthrough_smuggled_data(func):
4025 def _smuggle(entries, smuggled_data):
4026 for entry in entries:
4027 # TODO: Convert URL to music.youtube instead.
4028 # Do we need to passthrough any other smuggled_data?
4029 entry['url'] = smuggle_url(entry['url'], smuggled_data)
4030 yield entry
4031
4032 @functools.wraps(func)
4033 def wrapper(self, url):
4034 url, smuggled_data = unsmuggle_url(url, {})
4035 if self.is_music_url(url):
4036 smuggled_data['is_music_url'] = True
4037 info_dict = func(self, url, smuggled_data)
4038 if smuggled_data and info_dict.get('entries'):
4039 info_dict['entries'] = _smuggle(info_dict['entries'], smuggled_data)
4040 return info_dict
4041 return wrapper
4042
4043 def _extract_channel_id(self, webpage):
4044 channel_id = self._html_search_meta(
4045 'channelId', webpage, 'channel id', default=None)
4046 if channel_id:
4047 return channel_id
4048 channel_url = self._html_search_meta(
4049 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
4050 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
4051 'twitter:app:url:googleplay'), webpage, 'channel url')
4052 return self._search_regex(
4053 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
4054 channel_url, 'channel id')
4055
4056 @staticmethod
4057 def _extract_basic_item_renderer(item):
4058 # Modified from _extract_grid_item_renderer
4059 known_basic_renderers = (
4060 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'
4061 )
4062 for key, renderer in item.items():
4063 if not isinstance(renderer, dict):
4064 continue
4065 elif key in known_basic_renderers:
4066 return renderer
4067 elif key.startswith('grid') and key.endswith('Renderer'):
4068 return renderer
4069
4070 def _grid_entries(self, grid_renderer):
4071 for item in grid_renderer['items']:
4072 if not isinstance(item, dict):
4073 continue
4074 renderer = self._extract_basic_item_renderer(item)
4075 if not isinstance(renderer, dict):
4076 continue
4077 title = self._get_text(renderer, 'title')
4078
4079 # playlist
4080 playlist_id = renderer.get('playlistId')
4081 if playlist_id:
4082 yield self.url_result(
4083 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4084 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4085 video_title=title)
4086 continue
4087 # video
4088 video_id = renderer.get('videoId')
4089 if video_id:
4090 yield self._extract_video(renderer)
4091 continue
4092 # channel
4093 channel_id = renderer.get('channelId')
4094 if channel_id:
4095 yield self.url_result(
4096 'https://www.youtube.com/channel/%s' % channel_id,
4097 ie=YoutubeTabIE.ie_key(), video_title=title)
4098 continue
4099 # generic endpoint URL support
4100 ep_url = urljoin('https://www.youtube.com/', try_get(
4101 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
4102 str))
4103 if ep_url:
4104 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
4105 if ie.suitable(ep_url):
4106 yield self.url_result(
4107 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
4108 break
4109
4110 def _music_reponsive_list_entry(self, renderer):
4111 video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))
4112 if video_id:
4113 return self.url_result(f'https://music.youtube.com/watch?v={video_id}',
4114 ie=YoutubeIE.ie_key(), video_id=video_id)
4115 playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))
4116 if playlist_id:
4117 video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))
4118 if video_id:
4119 return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',
4120 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4121 return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',
4122 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4123 browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))
4124 if browse_id:
4125 return self.url_result(f'https://music.youtube.com/browse/{browse_id}',
4126 ie=YoutubeTabIE.ie_key(), video_id=browse_id)
4127
4128 def _shelf_entries_from_content(self, shelf_renderer):
4129 content = shelf_renderer.get('content')
4130 if not isinstance(content, dict):
4131 return
4132 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
4133 if renderer:
4134 # TODO: add support for nested playlists so each shelf is processed
4135 # as separate playlist
4136 # TODO: this includes only first N items
4137 yield from self._grid_entries(renderer)
4138 renderer = content.get('horizontalListRenderer')
4139 if renderer:
4140 # TODO
4141 pass
4142
4143 def _shelf_entries(self, shelf_renderer, skip_channels=False):
4144 ep = try_get(
4145 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4146 str)
4147 shelf_url = urljoin('https://www.youtube.com', ep)
4148 if shelf_url:
4149 # Skipping links to another channels, note that checking for
4150 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
4151 # will not work
4152 if skip_channels and '/channels?' in shelf_url:
4153 return
4154 title = self._get_text(shelf_renderer, 'title')
4155 yield self.url_result(shelf_url, video_title=title)
4156 # Shelf may not contain shelf URL, fallback to extraction from content
4157 yield from self._shelf_entries_from_content(shelf_renderer)
4158
4159 def _playlist_entries(self, video_list_renderer):
4160 for content in video_list_renderer['contents']:
4161 if not isinstance(content, dict):
4162 continue
4163 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
4164 if not isinstance(renderer, dict):
4165 continue
4166 video_id = renderer.get('videoId')
4167 if not video_id:
4168 continue
4169 yield self._extract_video(renderer)
4170
4171 def _rich_entries(self, rich_grid_renderer):
4172 renderer = try_get(
4173 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
4174 video_id = renderer.get('videoId')
4175 if not video_id:
4176 return
4177 yield self._extract_video(renderer)
4178
4179 def _video_entry(self, video_renderer):
4180 video_id = video_renderer.get('videoId')
4181 if video_id:
4182 return self._extract_video(video_renderer)
4183
4184 def _hashtag_tile_entry(self, hashtag_tile_renderer):
4185 url = urljoin('https://youtube.com', traverse_obj(
4186 hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))
4187 if url:
4188 return self.url_result(
4189 url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))
4190
4191 def _post_thread_entries(self, post_thread_renderer):
4192 post_renderer = try_get(
4193 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
4194 if not post_renderer:
4195 return
4196 # video attachment
4197 video_renderer = try_get(
4198 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
4199 video_id = video_renderer.get('videoId')
4200 if video_id:
4201 entry = self._extract_video(video_renderer)
4202 if entry:
4203 yield entry
4204 # playlist attachment
4205 playlist_id = try_get(
4206 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)
4207 if playlist_id:
4208 yield self.url_result(
4209 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4210 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4211 # inline video links
4212 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
4213 for run in runs:
4214 if not isinstance(run, dict):
4215 continue
4216 ep_url = try_get(
4217 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)
4218 if not ep_url:
4219 continue
4220 if not YoutubeIE.suitable(ep_url):
4221 continue
4222 ep_video_id = YoutubeIE._match_id(ep_url)
4223 if video_id == ep_video_id:
4224 continue
4225 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
4226
4227 def _post_thread_continuation_entries(self, post_thread_continuation):
4228 contents = post_thread_continuation.get('contents')
4229 if not isinstance(contents, list):
4230 return
4231 for content in contents:
4232 renderer = content.get('backstagePostThreadRenderer')
4233 if isinstance(renderer, dict):
4234 yield from self._post_thread_entries(renderer)
4235 continue
4236 renderer = content.get('videoRenderer')
4237 if isinstance(renderer, dict):
4238 yield self._video_entry(renderer)
4239
4240 r''' # unused
4241 def _rich_grid_entries(self, contents):
4242 for content in contents:
4243 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
4244 if video_renderer:
4245 entry = self._video_entry(video_renderer)
4246 if entry:
4247 yield entry
4248 '''
4249
4250 def _extract_entries(self, parent_renderer, continuation_list):
4251 # continuation_list is modified in-place with continuation_list = [continuation_token]
4252 continuation_list[:] = [None]
4253 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
4254 for content in contents:
4255 if not isinstance(content, dict):
4256 continue
4257 is_renderer = traverse_obj(
4258 content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',
4259 expected_type=dict)
4260 if not is_renderer:
4261 renderer = content.get('richItemRenderer')
4262 if renderer:
4263 for entry in self._rich_entries(renderer):
4264 yield entry
4265 continuation_list[0] = self._extract_continuation(parent_renderer)
4266 continue
4267 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
4268 for isr_content in isr_contents:
4269 if not isinstance(isr_content, dict):
4270 continue
4271
4272 known_renderers = {
4273 'playlistVideoListRenderer': self._playlist_entries,
4274 'gridRenderer': self._grid_entries,
4275 'reelShelfRenderer': self._grid_entries,
4276 'shelfRenderer': self._shelf_entries,
4277 'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],
4278 'backstagePostThreadRenderer': self._post_thread_entries,
4279 'videoRenderer': lambda x: [self._video_entry(x)],
4280 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
4281 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
4282 'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]
4283 }
4284 for key, renderer in isr_content.items():
4285 if key not in known_renderers:
4286 continue
4287 for entry in known_renderers[key](renderer):
4288 if entry:
4289 yield entry
4290 continuation_list[0] = self._extract_continuation(renderer)
4291 break
4292
4293 if not continuation_list[0]:
4294 continuation_list[0] = self._extract_continuation(is_renderer)
4295
4296 if not continuation_list[0]:
4297 continuation_list[0] = self._extract_continuation(parent_renderer)
4298
4299 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
4300 continuation_list = [None]
4301 extract_entries = lambda x: self._extract_entries(x, continuation_list)
4302 tab_content = try_get(tab, lambda x: x['content'], dict)
4303 if not tab_content:
4304 return
4305 parent_renderer = (
4306 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
4307 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
4308 yield from extract_entries(parent_renderer)
4309 continuation = continuation_list[0]
4310
4311 for page_num in itertools.count(1):
4312 if not continuation:
4313 break
4314 headers = self.generate_api_headers(
4315 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
4316 response = self._extract_response(
4317 item_id=f'{item_id} page {page_num}',
4318 query=continuation, headers=headers, ytcfg=ytcfg,
4319 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
4320
4321 if not response:
4322 break
4323 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
4324 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
4325 visitor_data = self._extract_visitor_data(response) or visitor_data
4326
4327 known_continuation_renderers = {
4328 'playlistVideoListContinuation': self._playlist_entries,
4329 'gridContinuation': self._grid_entries,
4330 'itemSectionContinuation': self._post_thread_continuation_entries,
4331 'sectionListContinuation': extract_entries, # for feeds
4332 }
4333 continuation_contents = try_get(
4334 response, lambda x: x['continuationContents'], dict) or {}
4335 continuation_renderer = None
4336 for key, value in continuation_contents.items():
4337 if key not in known_continuation_renderers:
4338 continue
4339 continuation_renderer = value
4340 continuation_list = [None]
4341 yield from known_continuation_renderers[key](continuation_renderer)
4342 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
4343 break
4344 if continuation_renderer:
4345 continue
4346
4347 known_renderers = {
4348 'videoRenderer': (self._grid_entries, 'items'), # for membership tab
4349 'gridPlaylistRenderer': (self._grid_entries, 'items'),
4350 'gridVideoRenderer': (self._grid_entries, 'items'),
4351 'gridChannelRenderer': (self._grid_entries, 'items'),
4352 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
4353 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
4354 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
4355 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
4356 }
4357 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
4358 continuation_items = try_get(
4359 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
4360 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
4361 video_items_renderer = None
4362 for key, value in continuation_item.items():
4363 if key not in known_renderers:
4364 continue
4365 video_items_renderer = {known_renderers[key][1]: continuation_items}
4366 continuation_list = [None]
4367 yield from known_renderers[key][0](video_items_renderer)
4368 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
4369 break
4370 if video_items_renderer:
4371 continue
4372 break
4373
4374 @staticmethod
4375 def _extract_selected_tab(tabs, fatal=True):
4376 for tab in tabs:
4377 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
4378 if renderer.get('selected') is True:
4379 return renderer
4380 else:
4381 if fatal:
4382 raise ExtractorError('Unable to find selected tab')
4383
4384 def _extract_uploader(self, data):
4385 uploader = {}
4386 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
4387 owner = try_get(
4388 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
4389 if owner:
4390 owner_text = owner.get('text')
4391 uploader['uploader'] = self._search_regex(
4392 r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text)
4393 uploader['uploader_id'] = try_get(
4394 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], str)
4395 uploader['uploader_url'] = urljoin(
4396 'https://www.youtube.com/',
4397 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], str))
4398 return {k: v for k, v in uploader.items() if v is not None}
4399
4400 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
4401 playlist_id = title = description = channel_url = channel_name = channel_id = None
4402 tags = []
4403
4404 selected_tab = self._extract_selected_tab(tabs)
4405 primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4406 renderer = try_get(
4407 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
4408 if renderer:
4409 channel_name = renderer.get('title')
4410 channel_url = renderer.get('channelUrl')
4411 channel_id = renderer.get('externalId')
4412 else:
4413 renderer = try_get(
4414 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
4415
4416 if renderer:
4417 title = renderer.get('title')
4418 description = renderer.get('description', '')
4419 playlist_id = channel_id
4420 tags = renderer.get('keywords', '').split()
4421
4422 # We can get the uncropped banner/avatar by replacing the crop params with '=s0'
4423 # See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714
4424 def _get_uncropped(url):
4425 return url_or_none((url or '').split('=')[0] + '=s0')
4426
4427 avatar_thumbnails = self._extract_thumbnails(renderer, 'avatar')
4428 if avatar_thumbnails:
4429 uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])
4430 if uncropped_avatar:
4431 avatar_thumbnails.append({
4432 'url': uncropped_avatar,
4433 'id': 'avatar_uncropped',
4434 'preference': 1
4435 })
4436
4437 channel_banners = self._extract_thumbnails(
4438 data, ('header', ..., ['banner', 'mobileBanner', 'tvBanner']))
4439 for banner in channel_banners:
4440 banner['preference'] = -10
4441
4442 if channel_banners:
4443 uncropped_banner = _get_uncropped(channel_banners[0]['url'])
4444 if uncropped_banner:
4445 channel_banners.append({
4446 'url': uncropped_banner,
4447 'id': 'banner_uncropped',
4448 'preference': -5
4449 })
4450
4451 primary_thumbnails = self._extract_thumbnails(
4452 primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))
4453
4454 if playlist_id is None:
4455 playlist_id = item_id
4456
4457 playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')
4458 last_updated_unix, _ = self._extract_time_text(playlist_stats, 2)
4459 if title is None:
4460 title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id
4461 title += format_field(selected_tab, 'title', ' - %s')
4462 title += format_field(selected_tab, 'expandedText', ' - %s')
4463
4464 metadata = {
4465 'playlist_id': playlist_id,
4466 'playlist_title': title,
4467 'playlist_description': description,
4468 'uploader': channel_name,
4469 'uploader_id': channel_id,
4470 'uploader_url': channel_url,
4471 'thumbnails': primary_thumbnails + avatar_thumbnails + channel_banners,
4472 'tags': tags,
4473 'view_count': self._get_count(playlist_stats, 1),
4474 'availability': self._extract_availability(data),
4475 'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'),
4476 'playlist_count': self._get_count(playlist_stats, 0),
4477 'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),
4478 }
4479 if not channel_id:
4480 metadata.update(self._extract_uploader(data))
4481 metadata.update({
4482 'channel': metadata['uploader'],
4483 'channel_id': metadata['uploader_id'],
4484 'channel_url': metadata['uploader_url']})
4485 return self.playlist_result(
4486 self._entries(
4487 selected_tab, playlist_id, ytcfg,
4488 self._extract_account_syncid(ytcfg, data),
4489 self._extract_visitor_data(data, ytcfg)),
4490 **metadata)
4491
4492 def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):
4493 first_id = last_id = response = None
4494 for page_num in itertools.count(1):
4495 videos = list(self._playlist_entries(playlist))
4496 if not videos:
4497 return
4498 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4499 if start >= len(videos):
4500 return
4501 yield from videos[start:]
4502 first_id = first_id or videos[0]['id']
4503 last_id = videos[-1]['id']
4504 watch_endpoint = try_get(
4505 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
4506 headers = self.generate_api_headers(
4507 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4508 visitor_data=self._extract_visitor_data(response, data, ytcfg))
4509 query = {
4510 'playlistId': playlist_id,
4511 'videoId': watch_endpoint.get('videoId') or last_id,
4512 'index': watch_endpoint.get('index') or len(videos),
4513 'params': watch_endpoint.get('params') or 'OAE%3D'
4514 }
4515 response = self._extract_response(
4516 item_id='%s page %d' % (playlist_id, page_num),
4517 query=query, ep='next', headers=headers, ytcfg=ytcfg,
4518 check_get_keys='contents'
4519 )
4520 playlist = try_get(
4521 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4522
4523 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
4524 title = playlist.get('title') or try_get(
4525 data, lambda x: x['titleText']['simpleText'], str)
4526 playlist_id = playlist.get('playlistId') or item_id
4527
4528 # Delegating everything except mix playlists to regular tab-based playlist URL
4529 playlist_url = urljoin(url, try_get(
4530 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4531 str))
4532
4533 # Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]
4534 # [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg
4535 is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)
4536
4537 if playlist_url and playlist_url != url and not is_known_unviewable:
4538 return self.url_result(
4539 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4540 video_title=title)
4541
4542 return self.playlist_result(
4543 self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),
4544 playlist_id=playlist_id, playlist_title=title)
4545
4546 def _extract_availability(self, data):
4547 """
4548 Gets the availability of a given playlist/tab.
4549 Note: Unless YouTube tells us explicitly, we do not assume it is public
4550 @param data: response
4551 """
4552 is_private = is_unlisted = None
4553 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4554 badge_labels = self._extract_badges(renderer)
4555
4556 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4557 privacy_dropdown_entries = try_get(
4558 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4559 for renderer_dict in privacy_dropdown_entries:
4560 is_selected = try_get(
4561 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4562 if not is_selected:
4563 continue
4564 label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
4565 if label:
4566 badge_labels.add(label.lower())
4567 break
4568
4569 for badge_label in badge_labels:
4570 if badge_label == 'unlisted':
4571 is_unlisted = True
4572 elif badge_label == 'private':
4573 is_private = True
4574 elif badge_label == 'public':
4575 is_unlisted = is_private = False
4576 return self._availability(is_private, False, False, False, is_unlisted)
4577
4578 @staticmethod
4579 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4580 sidebar_renderer = try_get(
4581 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4582 for item in sidebar_renderer:
4583 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4584 if renderer:
4585 return renderer
4586
4587 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
4588 """
4589 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4590 """
4591 browse_id = params = None
4592 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4593 if not renderer:
4594 return
4595 menu_renderer = try_get(
4596 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4597 for menu_item in menu_renderer:
4598 if not isinstance(menu_item, dict):
4599 continue
4600 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4601 text = try_get(
4602 nav_item_renderer, lambda x: x['text']['simpleText'], str)
4603 if not text or text.lower() != 'show unavailable videos':
4604 continue
4605 browse_endpoint = try_get(
4606 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4607 browse_id = browse_endpoint.get('browseId')
4608 params = browse_endpoint.get('params')
4609 break
4610
4611 headers = self.generate_api_headers(
4612 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4613 visitor_data=self._extract_visitor_data(data, ytcfg))
4614 query = {
4615 'params': params or 'wgYCCAA=',
4616 'browseId': browse_id or 'VL%s' % item_id
4617 }
4618 return self._extract_response(
4619 item_id=item_id, headers=headers, query=query,
4620 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
4621 note='Downloading API JSON with unavailable videos')
4622
4623 @functools.cached_property
4624 def skip_webpage(self):
4625 return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())
4626
4627 def _extract_webpage(self, url, item_id, fatal=True):
4628 webpage, data = None, None
4629 for retry in self.RetryManager(fatal=fatal):
4630 try:
4631 webpage = self._download_webpage(url, item_id, note='Downloading webpage')
4632 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
4633 except ExtractorError as e:
4634 if isinstance(e.cause, network_exceptions):
4635 if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):
4636 retry.error = e
4637 continue
4638 self._error_or_warning(e, fatal=fatal)
4639 break
4640
4641 try:
4642 self._extract_and_report_alerts(data)
4643 except ExtractorError as e:
4644 self._error_or_warning(e, fatal=fatal)
4645 break
4646
4647 # Sometimes youtube returns a webpage with incomplete ytInitialData
4648 # See: https://github.com/yt-dlp/yt-dlp/issues/116
4649 if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):
4650 retry.error = ExtractorError('Incomplete yt initial data received')
4651 continue
4652
4653 return webpage, data
4654
4655 def _report_playlist_authcheck(self, ytcfg, fatal=True):
4656 """Use if failed to extract ytcfg (and data) from initial webpage"""
4657 if not ytcfg and self.is_authenticated:
4658 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'
4659 if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:
4660 raise ExtractorError(
4661 f'{msg}. If you are not downloading private content, or '
4662 'your cookies are only for the first account and channel,'
4663 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
4664 expected=True)
4665 self.report_warning(msg, only_once=True)
4666
4667 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
4668 data = None
4669 if not self.skip_webpage:
4670 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
4671 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
4672 # Reject webpage data if redirected to home page without explicitly requesting
4673 selected_tab = self._extract_selected_tab(traverse_obj(
4674 data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list, default=[]), fatal=False) or {}
4675 if (url != 'https://www.youtube.com/feed/recommended'
4676 and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page
4677 and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):
4678 msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'
4679 if fatal:
4680 raise ExtractorError(msg, expected=True)
4681 self.report_warning(msg, only_once=True)
4682 if not data:
4683 self._report_playlist_authcheck(ytcfg, fatal=fatal)
4684 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
4685 return data, ytcfg
4686
4687 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
4688 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
4689 resolve_response = self._extract_response(
4690 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
4691 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
4692 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
4693 for ep_key, ep in endpoints.items():
4694 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
4695 if params:
4696 return self._extract_response(
4697 item_id=item_id, query=params, ep=ep, headers=headers,
4698 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
4699 check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))
4700 err_note = 'Failed to resolve url (does the playlist exist?)'
4701 if fatal:
4702 raise ExtractorError(err_note, expected=True)
4703 self.report_warning(err_note, item_id)
4704
4705 _SEARCH_PARAMS = None
4706
4707 def _search_results(self, query, params=NO_DEFAULT, default_client='web'):
4708 data = {'query': query}
4709 if params is NO_DEFAULT:
4710 params = self._SEARCH_PARAMS
4711 if params:
4712 data['params'] = params
4713
4714 content_keys = (
4715 ('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),
4716 ('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),
4717 # ytmusic search
4718 ('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),
4719 ('continuationContents', ),
4720 )
4721 display_id = f'query "{query}"'
4722 check_get_keys = tuple({keys[0] for keys in content_keys})
4723 ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}
4724 self._report_playlist_authcheck(ytcfg, fatal=False)
4725
4726 continuation_list = [None]
4727 search = None
4728 for page_num in itertools.count(1):
4729 data.update(continuation_list[0] or {})
4730 headers = self.generate_api_headers(
4731 ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)
4732 search = self._extract_response(
4733 item_id=f'{display_id} page {page_num}', ep='search', query=data,
4734 default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)
4735 slr_contents = traverse_obj(search, *content_keys)
4736 yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)
4737 if not continuation_list[0]:
4738 break
4739
4740
4741 class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
4742 IE_DESC = 'YouTube Tabs'
4743 _VALID_URL = r'''(?x:
4744 https?://
4745 (?:\w+\.)?
4746 (?:
4747 youtube(?:kids)?\.com|
4748 %(invidious)s
4749 )/
4750 (?:
4751 (?P<channel_type>channel|c|user|browse)/|
4752 (?P<not_channel>
4753 feed/|hashtag/|
4754 (?:playlist|watch)\?.*?\blist=
4755 )|
4756 (?!(?:%(reserved_names)s)\b) # Direct URLs
4757 )
4758 (?P<id>[^/?\#&]+)
4759 )''' % {
4760 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
4761 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
4762 }
4763 IE_NAME = 'youtube:tab'
4764
4765 _TESTS = [{
4766 'note': 'playlists, multipage',
4767 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
4768 'playlist_mincount': 94,
4769 'info_dict': {
4770 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
4771 'title': 'Igor Kleiner - Playlists',
4772 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
4773 'uploader': 'Igor Kleiner',
4774 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4775 'channel': 'Igor Kleiner',
4776 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4777 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
4778 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4779 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4780 'channel_follower_count': int
4781 },
4782 }, {
4783 'note': 'playlists, multipage, different order',
4784 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
4785 'playlist_mincount': 94,
4786 'info_dict': {
4787 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
4788 'title': 'Igor Kleiner - Playlists',
4789 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
4790 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4791 'uploader': 'Igor Kleiner',
4792 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4793 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
4794 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4795 'channel': 'Igor Kleiner',
4796 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4797 'channel_follower_count': int
4798 },
4799 }, {
4800 'note': 'playlists, series',
4801 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
4802 'playlist_mincount': 5,
4803 'info_dict': {
4804 'id': 'UCYO_jab_esuFRV4b17AJtAw',
4805 'title': '3Blue1Brown - Playlists',
4806 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
4807 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
4808 'uploader': '3Blue1Brown',
4809 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4810 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4811 'channel': '3Blue1Brown',
4812 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
4813 'tags': ['Mathematics'],
4814 'channel_follower_count': int
4815 },
4816 }, {
4817 'note': 'playlists, singlepage',
4818 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
4819 'playlist_mincount': 4,
4820 'info_dict': {
4821 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
4822 'title': 'ThirstForScience - Playlists',
4823 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
4824 'uploader': 'ThirstForScience',
4825 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
4826 'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
4827 'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
4828 'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
4829 'tags': 'count:13',
4830 'channel': 'ThirstForScience',
4831 'channel_follower_count': int
4832 }
4833 }, {
4834 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
4835 'only_matching': True,
4836 }, {
4837 'note': 'basic, single video playlist',
4838 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4839 'info_dict': {
4840 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4841 'uploader': 'Sergey M.',
4842 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4843 'title': 'youtube-dl public playlist',
4844 'description': '',
4845 'tags': [],
4846 'view_count': int,
4847 'modified_date': '20201130',
4848 'channel': 'Sergey M.',
4849 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4850 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4851 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4852 },
4853 'playlist_count': 1,
4854 }, {
4855 'note': 'empty playlist',
4856 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
4857 'info_dict': {
4858 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4859 'uploader': 'Sergey M.',
4860 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
4861 'title': 'youtube-dl empty playlist',
4862 'tags': [],
4863 'channel': 'Sergey M.',
4864 'description': '',
4865 'modified_date': '20160902',
4866 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4867 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4868 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4869 },
4870 'playlist_count': 0,
4871 }, {
4872 'note': 'Home tab',
4873 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
4874 'info_dict': {
4875 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4876 'title': 'lex will - Home',
4877 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4878 'uploader': 'lex will',
4879 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4880 'channel': 'lex will',
4881 'tags': ['bible', 'history', 'prophesy'],
4882 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4883 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4884 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4885 'channel_follower_count': int
4886 },
4887 'playlist_mincount': 2,
4888 }, {
4889 'note': 'Videos tab',
4890 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
4891 'info_dict': {
4892 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4893 'title': 'lex will - Videos',
4894 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4895 'uploader': 'lex will',
4896 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4897 'tags': ['bible', 'history', 'prophesy'],
4898 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4899 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4900 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4901 'channel': 'lex will',
4902 'channel_follower_count': int
4903 },
4904 'playlist_mincount': 975,
4905 }, {
4906 'note': 'Videos tab, sorted by popular',
4907 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
4908 'info_dict': {
4909 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4910 'title': 'lex will - Videos',
4911 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4912 'uploader': 'lex will',
4913 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4914 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4915 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4916 'channel': 'lex will',
4917 'tags': ['bible', 'history', 'prophesy'],
4918 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4919 'channel_follower_count': int
4920 },
4921 'playlist_mincount': 199,
4922 }, {
4923 'note': 'Playlists tab',
4924 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
4925 'info_dict': {
4926 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4927 'title': 'lex will - Playlists',
4928 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4929 'uploader': 'lex will',
4930 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4931 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4932 'channel': 'lex will',
4933 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4934 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4935 'tags': ['bible', 'history', 'prophesy'],
4936 'channel_follower_count': int
4937 },
4938 'playlist_mincount': 17,
4939 }, {
4940 'note': 'Community tab',
4941 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
4942 'info_dict': {
4943 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4944 'title': 'lex will - Community',
4945 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4946 'uploader': 'lex will',
4947 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4948 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4949 'channel': 'lex will',
4950 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4951 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4952 'tags': ['bible', 'history', 'prophesy'],
4953 'channel_follower_count': int
4954 },
4955 'playlist_mincount': 18,
4956 }, {
4957 'note': 'Channels tab',
4958 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
4959 'info_dict': {
4960 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4961 'title': 'lex will - Channels',
4962 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4963 'uploader': 'lex will',
4964 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4965 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4966 'channel': 'lex will',
4967 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4968 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4969 'tags': ['bible', 'history', 'prophesy'],
4970 'channel_follower_count': int
4971 },
4972 'playlist_mincount': 12,
4973 }, {
4974 'note': 'Search tab',
4975 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
4976 'playlist_mincount': 40,
4977 'info_dict': {
4978 'id': 'UCYO_jab_esuFRV4b17AJtAw',
4979 'title': '3Blue1Brown - Search - linear algebra',
4980 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
4981 'uploader': '3Blue1Brown',
4982 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
4983 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4984 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4985 'tags': ['Mathematics'],
4986 'channel': '3Blue1Brown',
4987 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
4988 'channel_follower_count': int
4989 },
4990 }, {
4991 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4992 'only_matching': True,
4993 }, {
4994 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4995 'only_matching': True,
4996 }, {
4997 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4998 'only_matching': True,
4999 }, {
5000 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
5001 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
5002 'info_dict': {
5003 'title': '29C3: Not my department',
5004 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
5005 'uploader': 'Christiaan008',
5006 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
5007 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
5008 'tags': [],
5009 'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',
5010 'view_count': int,
5011 'modified_date': '20150605',
5012 'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
5013 'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',
5014 'channel': 'Christiaan008',
5015 },
5016 'playlist_count': 96,
5017 }, {
5018 'note': 'Large playlist',
5019 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
5020 'info_dict': {
5021 'title': 'Uploads from Cauchemar',
5022 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
5023 'uploader': 'Cauchemar',
5024 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
5025 'channel_url': 'https://www.youtube.com/c/Cauchemar89',
5026 'tags': [],
5027 'modified_date': r're:\d{8}',
5028 'channel': 'Cauchemar',
5029 'uploader_url': 'https://www.youtube.com/c/Cauchemar89',
5030 'view_count': int,
5031 'description': '',
5032 'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
5033 },
5034 'playlist_mincount': 1123,
5035 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5036 }, {
5037 'note': 'even larger playlist, 8832 videos',
5038 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
5039 'only_matching': True,
5040 }, {
5041 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
5042 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
5043 'info_dict': {
5044 'title': 'Uploads from Interstellar Movie',
5045 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
5046 'uploader': 'Interstellar Movie',
5047 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
5048 'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',
5049 'tags': [],
5050 'view_count': int,
5051 'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
5052 'channel_url': 'https://www.youtube.com/c/InterstellarMovie',
5053 'channel': 'Interstellar Movie',
5054 'description': '',
5055 'modified_date': r're:\d{8}',
5056 },
5057 'playlist_mincount': 21,
5058 }, {
5059 'note': 'Playlist with "show unavailable videos" button',
5060 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
5061 'info_dict': {
5062 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
5063 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
5064 'uploader': 'Phim Siêu Nhân Nhật Bản',
5065 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
5066 'view_count': int,
5067 'channel': 'Phim Siêu Nhân Nhật Bản',
5068 'tags': [],
5069 'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5070 'description': '',
5071 'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5072 'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
5073 'modified_date': r're:\d{8}',
5074 },
5075 'playlist_mincount': 200,
5076 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5077 }, {
5078 'note': 'Playlist with unavailable videos in page 7',
5079 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
5080 'info_dict': {
5081 'title': 'Uploads from BlankTV',
5082 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
5083 'uploader': 'BlankTV',
5084 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
5085 'channel': 'BlankTV',
5086 'channel_url': 'https://www.youtube.com/c/blanktv',
5087 'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',
5088 'view_count': int,
5089 'tags': [],
5090 'uploader_url': 'https://www.youtube.com/c/blanktv',
5091 'modified_date': r're:\d{8}',
5092 'description': '',
5093 },
5094 'playlist_mincount': 1000,
5095 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5096 }, {
5097 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
5098 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5099 'info_dict': {
5100 'title': 'Data Analysis with Dr Mike Pound',
5101 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5102 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
5103 'uploader': 'Computerphile',
5104 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
5105 'uploader_url': 'https://www.youtube.com/user/Computerphile',
5106 'tags': [],
5107 'view_count': int,
5108 'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
5109 'channel_url': 'https://www.youtube.com/user/Computerphile',
5110 'channel': 'Computerphile',
5111 },
5112 'playlist_mincount': 11,
5113 }, {
5114 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5115 'only_matching': True,
5116 }, {
5117 'note': 'Playlist URL that does not actually serve a playlist',
5118 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
5119 'info_dict': {
5120 'id': 'FqZTN594JQw',
5121 'ext': 'webm',
5122 'title': "Smiley's People 01 detective, Adventure Series, Action",
5123 'uploader': 'STREEM',
5124 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
5125 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
5126 'upload_date': '20150526',
5127 'license': 'Standard YouTube License',
5128 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
5129 'categories': ['People & Blogs'],
5130 'tags': list,
5131 'view_count': int,
5132 'like_count': int,
5133 },
5134 'params': {
5135 'skip_download': True,
5136 },
5137 'skip': 'This video is not available.',
5138 'add_ie': [YoutubeIE.ie_key()],
5139 }, {
5140 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
5141 'only_matching': True,
5142 }, {
5143 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
5144 'only_matching': True,
5145 }, {
5146 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
5147 'info_dict': {
5148 'id': 'Wq15eF5vCbI', # This will keep changing
5149 'ext': 'mp4',
5150 'title': str,
5151 'uploader': 'Sky News',
5152 'uploader_id': 'skynews',
5153 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
5154 'upload_date': r're:\d{8}',
5155 'description': str,
5156 'categories': ['News & Politics'],
5157 'tags': list,
5158 'like_count': int,
5159 'release_timestamp': 1642502819,
5160 'channel': 'Sky News',
5161 'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',
5162 'age_limit': 0,
5163 'view_count': int,
5164 'thumbnail': 'https://i.ytimg.com/vi/GgL890LIznQ/maxresdefault_live.jpg',
5165 'playable_in_embed': True,
5166 'release_date': '20220118',
5167 'availability': 'public',
5168 'live_status': 'is_live',
5169 'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',
5170 'channel_follower_count': int
5171 },
5172 'params': {
5173 'skip_download': True,
5174 },
5175 'expected_warnings': ['Ignoring subtitle tracks found in '],
5176 }, {
5177 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
5178 'info_dict': {
5179 'id': 'a48o2S1cPoo',
5180 'ext': 'mp4',
5181 'title': 'The Young Turks - Live Main Show',
5182 'uploader': 'The Young Turks',
5183 'uploader_id': 'TheYoungTurks',
5184 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
5185 'upload_date': '20150715',
5186 'license': 'Standard YouTube License',
5187 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
5188 'categories': ['News & Politics'],
5189 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
5190 'like_count': int,
5191 },
5192 'params': {
5193 'skip_download': True,
5194 },
5195 'only_matching': True,
5196 }, {
5197 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
5198 'only_matching': True,
5199 }, {
5200 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
5201 'only_matching': True,
5202 }, {
5203 'note': 'A channel that is not live. Should raise error',
5204 'url': 'https://www.youtube.com/user/numberphile/live',
5205 'only_matching': True,
5206 }, {
5207 'url': 'https://www.youtube.com/feed/trending',
5208 'only_matching': True,
5209 }, {
5210 'url': 'https://www.youtube.com/feed/library',
5211 'only_matching': True,
5212 }, {
5213 'url': 'https://www.youtube.com/feed/history',
5214 'only_matching': True,
5215 }, {
5216 'url': 'https://www.youtube.com/feed/subscriptions',
5217 'only_matching': True,
5218 }, {
5219 'url': 'https://www.youtube.com/feed/watch_later',
5220 'only_matching': True,
5221 }, {
5222 'note': 'Recommended - redirects to home page.',
5223 'url': 'https://www.youtube.com/feed/recommended',
5224 'only_matching': True,
5225 }, {
5226 'note': 'inline playlist with not always working continuations',
5227 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
5228 'only_matching': True,
5229 }, {
5230 'url': 'https://www.youtube.com/course',
5231 'only_matching': True,
5232 }, {
5233 'url': 'https://www.youtube.com/zsecurity',
5234 'only_matching': True,
5235 }, {
5236 'url': 'http://www.youtube.com/NASAgovVideo/videos',
5237 'only_matching': True,
5238 }, {
5239 'url': 'https://www.youtube.com/TheYoungTurks/live',
5240 'only_matching': True,
5241 }, {
5242 'url': 'https://www.youtube.com/hashtag/cctv9',
5243 'info_dict': {
5244 'id': 'cctv9',
5245 'title': '#cctv9',
5246 'tags': [],
5247 },
5248 'playlist_mincount': 350,
5249 }, {
5250 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
5251 'only_matching': True,
5252 }, {
5253 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
5254 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5255 'only_matching': True
5256 }, {
5257 'note': '/browse/ should redirect to /channel/',
5258 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
5259 'only_matching': True
5260 }, {
5261 'note': 'VLPL, should redirect to playlist?list=PL...',
5262 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5263 'info_dict': {
5264 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5265 'uploader': 'NoCopyrightSounds',
5266 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
5267 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
5268 'title': 'NCS : All Releases 💿',
5269 'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',
5270 'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',
5271 'modified_date': r're:\d{8}',
5272 'view_count': int,
5273 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
5274 'tags': [],
5275 'channel': 'NoCopyrightSounds',
5276 },
5277 'playlist_mincount': 166,
5278 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5279 }, {
5280 'note': 'Topic, should redirect to playlist?list=UU...',
5281 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5282 'info_dict': {
5283 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5284 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5285 'title': 'Uploads from Royalty Free Music - Topic',
5286 'uploader': 'Royalty Free Music - Topic',
5287 'tags': [],
5288 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5289 'channel': 'Royalty Free Music - Topic',
5290 'view_count': int,
5291 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5292 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5293 'modified_date': r're:\d{8}',
5294 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5295 'description': '',
5296 },
5297 'expected_warnings': [
5298 'The URL does not have a videos tab',
5299 r'[Uu]navailable videos (are|will be) hidden',
5300 ],
5301 'playlist_mincount': 101,
5302 }, {
5303 'note': 'Topic without a UU playlist',
5304 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
5305 'info_dict': {
5306 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
5307 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
5308 'tags': [],
5309 },
5310 'expected_warnings': [
5311 'the playlist redirect gave error',
5312 ],
5313 'playlist_mincount': 9,
5314 }, {
5315 'note': 'Youtube music Album',
5316 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
5317 'info_dict': {
5318 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
5319 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
5320 'tags': [],
5321 'view_count': int,
5322 'description': '',
5323 'availability': 'unlisted',
5324 'modified_date': r're:\d{8}',
5325 },
5326 'playlist_count': 50,
5327 }, {
5328 'note': 'unlisted single video playlist',
5329 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5330 'info_dict': {
5331 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5332 'uploader': 'colethedj',
5333 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5334 'title': 'yt-dlp unlisted playlist test',
5335 'availability': 'unlisted',
5336 'tags': [],
5337 'modified_date': '20220418',
5338 'channel': 'colethedj',
5339 'view_count': int,
5340 'description': '',
5341 'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
5342 'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5343 'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
5344 },
5345 'playlist_count': 1,
5346 }, {
5347 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
5348 'url': 'https://www.youtube.com/feed/recommended',
5349 'info_dict': {
5350 'id': 'recommended',
5351 'title': 'recommended',
5352 'tags': [],
5353 },
5354 'playlist_mincount': 50,
5355 'params': {
5356 'skip_download': True,
5357 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5358 },
5359 }, {
5360 'note': 'API Fallback: /videos tab, sorted by oldest first',
5361 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
5362 'info_dict': {
5363 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5364 'title': 'Cody\'sLab - Videos',
5365 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
5366 'uploader': 'Cody\'sLab',
5367 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5368 'channel': 'Cody\'sLab',
5369 'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5370 'tags': [],
5371 'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
5372 'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
5373 'channel_follower_count': int
5374 },
5375 'playlist_mincount': 650,
5376 'params': {
5377 'skip_download': True,
5378 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5379 },
5380 }, {
5381 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
5382 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5383 'info_dict': {
5384 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5385 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5386 'title': 'Uploads from Royalty Free Music - Topic',
5387 'uploader': 'Royalty Free Music - Topic',
5388 'modified_date': r're:\d{8}',
5389 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5390 'description': '',
5391 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5392 'tags': [],
5393 'channel': 'Royalty Free Music - Topic',
5394 'view_count': int,
5395 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5396 },
5397 'expected_warnings': [
5398 'does not have a videos tab',
5399 r'[Uu]navailable videos (are|will be) hidden',
5400 ],
5401 'playlist_mincount': 101,
5402 'params': {
5403 'skip_download': True,
5404 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5405 },
5406 }, {
5407 'note': 'non-standard redirect to regional channel',
5408 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
5409 'only_matching': True
5410 }, {
5411 'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
5412 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5413 'info_dict': {
5414 'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5415 'modified_date': '20220407',
5416 'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5417 'tags': [],
5418 'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5419 'uploader': 'pukkandan',
5420 'availability': 'unlisted',
5421 'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5422 'channel': 'pukkandan',
5423 'description': 'Test for collaborative playlist',
5424 'title': 'yt-dlp test - collaborative playlist',
5425 'view_count': int,
5426 'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5427 },
5428 'playlist_mincount': 2
5429 }]
5430
5431 @classmethod
5432 def suitable(cls, url):
5433 return False if YoutubeIE.suitable(url) else super().suitable(url)
5434
5435 _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$')
5436
5437 @YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
5438 def _real_extract(self, url, smuggled_data):
5439 item_id = self._match_id(url)
5440 url = urllib.parse.urlunparse(
5441 urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))
5442 compat_opts = self.get_param('compat_opts', [])
5443
5444 def get_mobj(url):
5445 mobj = self._URL_RE.match(url).groupdict()
5446 mobj.update((k, '') for k, v in mobj.items() if v is None)
5447 return mobj
5448
5449 mobj, redirect_warning = get_mobj(url), None
5450 # Youtube returns incomplete data if tabname is not lower case
5451 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
5452 if is_channel:
5453 if smuggled_data.get('is_music_url'):
5454 if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
5455 item_id = item_id[2:]
5456 pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False
5457 elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
5458 mdata = self._extract_tab_endpoint(
5459 f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
5460 murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
5461 get_all=False, expected_type=str)
5462 if not murl:
5463 raise ExtractorError('Failed to resolve album to playlist')
5464 return self.url_result(murl, ie=YoutubeTabIE.ie_key())
5465 elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
5466 pre = f'https://www.youtube.com/channel/{item_id}'
5467
5468 original_tab_name = tab
5469 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
5470 # Home URLs should redirect to /videos/
5471 redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '
5472 'To download only the videos in the home page, add a "/featured" to the URL')
5473 tab = '/videos'
5474
5475 url = ''.join((pre, tab, post))
5476 mobj = get_mobj(url)
5477
5478 # Handle both video/playlist URLs
5479 qs = parse_qs(url)
5480 video_id, playlist_id = (qs.get(key, [None])[0] for key in ('v', 'list'))
5481
5482 if not video_id and mobj['not_channel'].startswith('watch'):
5483 if not playlist_id:
5484 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
5485 raise ExtractorError('Unable to recognize tab page')
5486 # Common mistake: https://www.youtube.com/watch?list=playlist_id
5487 self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
5488 url = f'https://www.youtube.com/playlist?list={playlist_id}'
5489 mobj = get_mobj(url)
5490
5491 if video_id and playlist_id:
5492 if self.get_param('noplaylist'):
5493 self.to_screen(f'Downloading just video {video_id} because of --no-playlist')
5494 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
5495 ie=YoutubeIE.ie_key(), video_id=video_id)
5496 self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')
5497
5498 data, ytcfg = self._extract_data(url, item_id)
5499
5500 # YouTube may provide a non-standard redirect to the regional channel
5501 # See: https://github.com/yt-dlp/yt-dlp/issues/2694
5502 redirect_url = traverse_obj(
5503 data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)
5504 if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:
5505 redirect_url = ''.join((
5506 urljoin('https://www.youtube.com', redirect_url), mobj['tab'], mobj['post']))
5507 self.to_screen(f'This playlist is likely not available in your region. Following redirect to regional playlist {redirect_url}')
5508 return self.url_result(redirect_url, ie=YoutubeTabIE.ie_key())
5509
5510 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
5511 if tabs:
5512 selected_tab = self._extract_selected_tab(tabs)
5513 selected_tab_name = selected_tab.get('title', '').lower()
5514 if selected_tab_name == 'home':
5515 selected_tab_name = 'featured'
5516 requested_tab_name = mobj['tab'][1:]
5517 if 'no-youtube-channel-redirect' not in compat_opts:
5518 if requested_tab_name == 'live': # Live tab should have redirected to the video
5519 raise UserNotLive(video_id=mobj['id'])
5520 if requested_tab_name not in ('', selected_tab_name):
5521 redirect_warning = f'The channel does not have a {requested_tab_name} tab'
5522 if not original_tab_name:
5523 if item_id[:2] == 'UC':
5524 # Topic channels don't have /videos. Use the equivalent playlist instead
5525 pl_id = f'UU{item_id[2:]}'
5526 pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
5527 try:
5528 data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)
5529 except ExtractorError:
5530 redirect_warning += ' and the playlist redirect gave error'
5531 else:
5532 item_id, url, selected_tab_name = pl_id, pl_url, requested_tab_name
5533 redirect_warning += f'. Redirecting to playlist {pl_id} instead'
5534 if selected_tab_name and selected_tab_name != requested_tab_name:
5535 redirect_warning += f'. {selected_tab_name} tab is being downloaded instead'
5536 else:
5537 raise ExtractorError(redirect_warning, expected=True)
5538
5539 if redirect_warning:
5540 self.to_screen(redirect_warning)
5541 self.write_debug(f'Final URL: {url}')
5542
5543 # YouTube sometimes provides a button to reload playlist with unavailable videos.
5544 if 'no-youtube-unavailable-videos' not in compat_opts:
5545 data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data
5546 self._extract_and_report_alerts(data, only_once=True)
5547 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
5548 if tabs:
5549 return self._extract_from_tabs(item_id, ytcfg, data, tabs)
5550
5551 playlist = traverse_obj(
5552 data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
5553 if playlist:
5554 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
5555
5556 video_id = traverse_obj(
5557 data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
5558 if video_id:
5559 if mobj['tab'] != '/live': # live tab is expected to redirect to video
5560 self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
5561 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
5562 ie=YoutubeIE.ie_key(), video_id=video_id)
5563
5564 raise ExtractorError('Unable to recognize tab page')
5565
5566
5567 class YoutubePlaylistIE(InfoExtractor):
5568 IE_DESC = 'YouTube playlists'
5569 _VALID_URL = r'''(?x)(?:
5570 (?:https?://)?
5571 (?:\w+\.)?
5572 (?:
5573 (?:
5574 youtube(?:kids)?\.com|
5575 %(invidious)s
5576 )
5577 /.*?\?.*?\blist=
5578 )?
5579 (?P<id>%(playlist_id)s)
5580 )''' % {
5581 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
5582 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
5583 }
5584 IE_NAME = 'youtube:playlist'
5585 _TESTS = [{
5586 'note': 'issue #673',
5587 'url': 'PLBB231211A4F62143',
5588 'info_dict': {
5589 'title': '[OLD]Team Fortress 2 (Class-based LP)',
5590 'id': 'PLBB231211A4F62143',
5591 'uploader': 'Wickman',
5592 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
5593 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
5594 'view_count': int,
5595 'uploader_url': 'https://www.youtube.com/user/Wickydoo',
5596 'modified_date': r're:\d{8}',
5597 'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
5598 'channel': 'Wickman',
5599 'tags': [],
5600 'channel_url': 'https://www.youtube.com/user/Wickydoo',
5601 },
5602 'playlist_mincount': 29,
5603 }, {
5604 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
5605 'info_dict': {
5606 'title': 'YDL_safe_search',
5607 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
5608 },
5609 'playlist_count': 2,
5610 'skip': 'This playlist is private',
5611 }, {
5612 'note': 'embedded',
5613 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
5614 'playlist_count': 4,
5615 'info_dict': {
5616 'title': 'JODA15',
5617 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
5618 'uploader': 'milan',
5619 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
5620 'description': '',
5621 'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
5622 'tags': [],
5623 'modified_date': '20140919',
5624 'view_count': int,
5625 'channel': 'milan',
5626 'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
5627 'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
5628 },
5629 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5630 }, {
5631 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
5632 'playlist_mincount': 455,
5633 'info_dict': {
5634 'title': '2018 Chinese New Singles (11/6 updated)',
5635 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
5636 'uploader': 'LBK',
5637 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
5638 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
5639 'channel': 'LBK',
5640 'view_count': int,
5641 'channel_url': 'https://www.youtube.com/c/愛低音的國王',
5642 'tags': [],
5643 'uploader_url': 'https://www.youtube.com/c/愛低音的國王',
5644 'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
5645 'modified_date': r're:\d{8}',
5646 },
5647 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5648 }, {
5649 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
5650 'only_matching': True,
5651 }, {
5652 # music album playlist
5653 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
5654 'only_matching': True,
5655 }]
5656
5657 @classmethod
5658 def suitable(cls, url):
5659 if YoutubeTabIE.suitable(url):
5660 return False
5661 from ..utils import parse_qs
5662 qs = parse_qs(url)
5663 if qs.get('v', [None])[0]:
5664 return False
5665 return super().suitable(url)
5666
5667 def _real_extract(self, url):
5668 playlist_id = self._match_id(url)
5669 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
5670 url = update_url_query(
5671 'https://www.youtube.com/playlist',
5672 parse_qs(url) or {'list': playlist_id})
5673 if is_music_url:
5674 url = smuggle_url(url, {'is_music_url': True})
5675 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
5676
5677
5678 class YoutubeYtBeIE(InfoExtractor):
5679 IE_DESC = 'youtu.be'
5680 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
5681 _TESTS = [{
5682 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
5683 'info_dict': {
5684 'id': 'yeWKywCrFtk',
5685 'ext': 'mp4',
5686 'title': 'Small Scale Baler and Braiding Rugs',
5687 'uploader': 'Backus-Page House Museum',
5688 'uploader_id': 'backuspagemuseum',
5689 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
5690 'upload_date': '20161008',
5691 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
5692 'categories': ['Nonprofits & Activism'],
5693 'tags': list,
5694 'like_count': int,
5695 'age_limit': 0,
5696 'playable_in_embed': True,
5697 'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',
5698 'channel': 'Backus-Page House Museum',
5699 'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',
5700 'live_status': 'not_live',
5701 'view_count': int,
5702 'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',
5703 'availability': 'public',
5704 'duration': 59,
5705 'comment_count': int,
5706 'channel_follower_count': int
5707 },
5708 'params': {
5709 'noplaylist': True,
5710 'skip_download': True,
5711 },
5712 }, {
5713 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
5714 'only_matching': True,
5715 }]
5716
5717 def _real_extract(self, url):
5718 mobj = self._match_valid_url(url)
5719 video_id = mobj.group('id')
5720 playlist_id = mobj.group('playlist_id')
5721 return self.url_result(
5722 update_url_query('https://www.youtube.com/watch', {
5723 'v': video_id,
5724 'list': playlist_id,
5725 'feature': 'youtu.be',
5726 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
5727
5728
5729 class YoutubeLivestreamEmbedIE(InfoExtractor):
5730 IE_DESC = 'YouTube livestream embeds'
5731 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'
5732 _TESTS = [{
5733 'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',
5734 'only_matching': True,
5735 }]
5736
5737 def _real_extract(self, url):
5738 channel_id = self._match_id(url)
5739 return self.url_result(
5740 f'https://www.youtube.com/channel/{channel_id}/live',
5741 ie=YoutubeTabIE.ie_key(), video_id=channel_id)
5742
5743
5744 class YoutubeYtUserIE(InfoExtractor):
5745 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
5746 IE_NAME = 'youtube:user'
5747 _VALID_URL = r'ytuser:(?P<id>.+)'
5748 _TESTS = [{
5749 'url': 'ytuser:phihag',
5750 'only_matching': True,
5751 }]
5752
5753 def _real_extract(self, url):
5754 user_id = self._match_id(url)
5755 return self.url_result(
5756 'https://www.youtube.com/user/%s/videos' % user_id,
5757 ie=YoutubeTabIE.ie_key(), video_id=user_id)
5758
5759
5760 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
5761 IE_NAME = 'youtube:favorites'
5762 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
5763 _VALID_URL = r':ytfav(?:ou?rite)?s?'
5764 _LOGIN_REQUIRED = True
5765 _TESTS = [{
5766 'url': ':ytfav',
5767 'only_matching': True,
5768 }, {
5769 'url': ':ytfavorites',
5770 'only_matching': True,
5771 }]
5772
5773 def _real_extract(self, url):
5774 return self.url_result(
5775 'https://www.youtube.com/playlist?list=LL',
5776 ie=YoutubeTabIE.ie_key())
5777
5778
5779 class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):
5780 IE_NAME = 'youtube:notif'
5781 IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'
5782 _VALID_URL = r':ytnotif(?:ication)?s?'
5783 _LOGIN_REQUIRED = True
5784 _TESTS = [{
5785 'url': ':ytnotif',
5786 'only_matching': True,
5787 }, {
5788 'url': ':ytnotifications',
5789 'only_matching': True,
5790 }]
5791
5792 def _extract_notification_menu(self, response, continuation_list):
5793 notification_list = traverse_obj(
5794 response,
5795 ('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),
5796 ('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),
5797 expected_type=list) or []
5798 continuation_list[0] = None
5799 for item in notification_list:
5800 entry = self._extract_notification_renderer(item.get('notificationRenderer'))
5801 if entry:
5802 yield entry
5803 continuation = item.get('continuationItemRenderer')
5804 if continuation:
5805 continuation_list[0] = continuation
5806
5807 def _extract_notification_renderer(self, notification):
5808 video_id = traverse_obj(
5809 notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)
5810 url = f'https://www.youtube.com/watch?v={video_id}'
5811 channel_id = None
5812 if not video_id:
5813 browse_ep = traverse_obj(
5814 notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)
5815 channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)
5816 post_id = self._search_regex(
5817 r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),
5818 'post id', default=None)
5819 if not channel_id or not post_id:
5820 return
5821 # The direct /post url redirects to this in the browser
5822 url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'
5823
5824 channel = traverse_obj(
5825 notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),
5826 expected_type=str)
5827 notification_title = self._get_text(notification, 'shortMessage')
5828 if notification_title:
5829 notification_title = notification_title.replace('\xad', '') # remove soft hyphens
5830 # TODO: handle recommended videos
5831 title = self._search_regex(
5832 rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,
5833 'video title', default=None)
5834 upload_date = (strftime_or_none(self._extract_time_text(notification, 'sentTimeText')[0], '%Y%m%d')
5835 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE.ie_key())
5836 else None)
5837 return {
5838 '_type': 'url',
5839 'url': url,
5840 'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),
5841 'video_id': video_id,
5842 'title': title,
5843 'channel_id': channel_id,
5844 'channel': channel,
5845 'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),
5846 'upload_date': upload_date,
5847 }
5848
5849 def _notification_menu_entries(self, ytcfg):
5850 continuation_list = [None]
5851 response = None
5852 for page in itertools.count(1):
5853 ctoken = traverse_obj(
5854 continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)
5855 response = self._extract_response(
5856 item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,
5857 ep='notification/get_notification_menu', check_get_keys='actions',
5858 headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))
5859 yield from self._extract_notification_menu(response, continuation_list)
5860 if not continuation_list[0]:
5861 break
5862
5863 def _real_extract(self, url):
5864 display_id = 'notifications'
5865 ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}
5866 self._report_playlist_authcheck(ytcfg)
5867 return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)
5868
5869
5870 class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
5871 IE_DESC = 'YouTube search'
5872 IE_NAME = 'youtube:search'
5873 _SEARCH_KEY = 'ytsearch'
5874 _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
5875 _TESTS = [{
5876 'url': 'ytsearch5:youtube-dl test video',
5877 'playlist_count': 5,
5878 'info_dict': {
5879 'id': 'youtube-dl test video',
5880 'title': 'youtube-dl test video',
5881 }
5882 }]
5883
5884
5885 class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
5886 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
5887 _SEARCH_KEY = 'ytsearchdate'
5888 IE_DESC = 'YouTube search, newest videos first'
5889 _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
5890 _TESTS = [{
5891 'url': 'ytsearchdate5:youtube-dl test video',
5892 'playlist_count': 5,
5893 'info_dict': {
5894 'id': 'youtube-dl test video',
5895 'title': 'youtube-dl test video',
5896 }
5897 }]
5898
5899
5900 class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
5901 IE_DESC = 'YouTube search URLs with sorting and filter support'
5902 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
5903 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
5904 _TESTS = [{
5905 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
5906 'playlist_mincount': 5,
5907 'info_dict': {
5908 'id': 'youtube-dl test video',
5909 'title': 'youtube-dl test video',
5910 }
5911 }, {
5912 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
5913 'playlist_mincount': 5,
5914 'info_dict': {
5915 'id': 'python',
5916 'title': 'python',
5917 }
5918 }, {
5919 'url': 'https://www.youtube.com/results?search_query=%23cats',
5920 'playlist_mincount': 1,
5921 'info_dict': {
5922 'id': '#cats',
5923 'title': '#cats',
5924 # The test suite does not have support for nested playlists
5925 # 'entries': [{
5926 # 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',
5927 # 'title': '#cats',
5928 # }],
5929 },
5930 }, {
5931 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
5932 'only_matching': True,
5933 }]
5934
5935 def _real_extract(self, url):
5936 qs = parse_qs(url)
5937 query = (qs.get('search_query') or qs.get('q'))[0]
5938 return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
5939
5940
5941 class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
5942 IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'
5943 IE_NAME = 'youtube:music:search_url'
5944 _VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
5945 _TESTS = [{
5946 'url': 'https://music.youtube.com/search?q=royalty+free+music',
5947 'playlist_count': 16,
5948 'info_dict': {
5949 'id': 'royalty free music',
5950 'title': 'royalty free music',
5951 }
5952 }, {
5953 'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',
5954 'playlist_mincount': 30,
5955 'info_dict': {
5956 'id': 'royalty free music - songs',
5957 'title': 'royalty free music - songs',
5958 },
5959 'params': {'extract_flat': 'in_playlist'}
5960 }, {
5961 'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',
5962 'playlist_mincount': 30,
5963 'info_dict': {
5964 'id': 'royalty free music - community playlists',
5965 'title': 'royalty free music - community playlists',
5966 },
5967 'params': {'extract_flat': 'in_playlist'}
5968 }]
5969
5970 _SECTIONS = {
5971 'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',
5972 'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',
5973 'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',
5974 'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',
5975 'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',
5976 'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',
5977 }
5978
5979 def _real_extract(self, url):
5980 qs = parse_qs(url)
5981 query = (qs.get('search_query') or qs.get('q'))[0]
5982 params = qs.get('sp', (None,))[0]
5983 if params:
5984 section = next((k for k, v in self._SECTIONS.items() if v == params), params)
5985 else:
5986 section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower()
5987 params = self._SECTIONS.get(section)
5988 if not params:
5989 section = None
5990 title = join_nonempty(query, section, delim=' - ')
5991 return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)
5992
5993
5994 class YoutubeFeedsInfoExtractor(InfoExtractor):
5995 """
5996 Base class for feed extractors
5997 Subclasses must re-define the _FEED_NAME property.
5998 """
5999 _LOGIN_REQUIRED = True
6000 _FEED_NAME = 'feeds'
6001
6002 def _real_initialize(self):
6003 YoutubeBaseInfoExtractor._check_login_required(self)
6004
6005 @classproperty
6006 def IE_NAME(self):
6007 return f'youtube:{self._FEED_NAME}'
6008
6009 def _real_extract(self, url):
6010 return self.url_result(
6011 f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())
6012
6013
6014 class YoutubeWatchLaterIE(InfoExtractor):
6015 IE_NAME = 'youtube:watchlater'
6016 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
6017 _VALID_URL = r':ytwatchlater'
6018 _TESTS = [{
6019 'url': ':ytwatchlater',
6020 'only_matching': True,
6021 }]
6022
6023 def _real_extract(self, url):
6024 return self.url_result(
6025 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
6026
6027
6028 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
6029 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
6030 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
6031 _FEED_NAME = 'recommended'
6032 _LOGIN_REQUIRED = False
6033 _TESTS = [{
6034 'url': ':ytrec',
6035 'only_matching': True,
6036 }, {
6037 'url': ':ytrecommended',
6038 'only_matching': True,
6039 }, {
6040 'url': 'https://youtube.com',
6041 'only_matching': True,
6042 }]
6043
6044
6045 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
6046 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
6047 _VALID_URL = r':ytsub(?:scription)?s?'
6048 _FEED_NAME = 'subscriptions'
6049 _TESTS = [{
6050 'url': ':ytsubs',
6051 'only_matching': True,
6052 }, {
6053 'url': ':ytsubscriptions',
6054 'only_matching': True,
6055 }]
6056
6057
6058 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
6059 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
6060 _VALID_URL = r':ythis(?:tory)?'
6061 _FEED_NAME = 'history'
6062 _TESTS = [{
6063 'url': ':ythistory',
6064 'only_matching': True,
6065 }]
6066
6067
6068 class YoutubeStoriesIE(InfoExtractor):
6069 IE_DESC = 'YouTube channel stories; "ytstories:" prefix'
6070 IE_NAME = 'youtube:stories'
6071 _VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'
6072 _TESTS = [{
6073 'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',
6074 'only_matching': True,
6075 }]
6076
6077 def _real_extract(self, url):
6078 playlist_id = f'RLTD{self._match_id(url)}'
6079 return self.url_result(
6080 smuggle_url(f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1', {'is_story': True}),
6081 ie=YoutubeTabIE, video_id=playlist_id)
6082
6083
6084 class YoutubeTruncatedURLIE(InfoExtractor):
6085 IE_NAME = 'youtube:truncated_url'
6086 IE_DESC = False # Do not list
6087 _VALID_URL = r'''(?x)
6088 (?:https?://)?
6089 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
6090 (?:watch\?(?:
6091 feature=[a-z_]+|
6092 annotation_id=annotation_[^&]+|
6093 x-yt-cl=[0-9]+|
6094 hl=[^&]*|
6095 t=[0-9]+
6096 )?
6097 |
6098 attribution_link\?a=[^&]+
6099 )
6100 $
6101 '''
6102
6103 _TESTS = [{
6104 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
6105 'only_matching': True,
6106 }, {
6107 'url': 'https://www.youtube.com/watch?',
6108 'only_matching': True,
6109 }, {
6110 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
6111 'only_matching': True,
6112 }, {
6113 'url': 'https://www.youtube.com/watch?feature=foo',
6114 'only_matching': True,
6115 }, {
6116 'url': 'https://www.youtube.com/watch?hl=en-GB',
6117 'only_matching': True,
6118 }, {
6119 'url': 'https://www.youtube.com/watch?t=2372',
6120 'only_matching': True,
6121 }]
6122
6123 def _real_extract(self, url):
6124 raise ExtractorError(
6125 'Did you forget to quote the URL? Remember that & is a meta '
6126 'character in most shells, so you want to put the URL in quotes, '
6127 'like youtube-dl '
6128 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
6129 ' or simply youtube-dl BaW_jenozKc .',
6130 expected=True)
6131
6132
6133 class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
6134 IE_NAME = 'youtube:clip'
6135 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'
6136 _TESTS = [{
6137 # FIXME: Other metadata should be extracted from the clip, not from the base video
6138 'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',
6139 'info_dict': {
6140 'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',
6141 'ext': 'mp4',
6142 'section_start': 29.0,
6143 'section_end': 39.7,
6144 'duration': 10.7,
6145 'age_limit': 0,
6146 'availability': 'public',
6147 'categories': ['Gaming'],
6148 'channel': 'Scott The Woz',
6149 'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',
6150 'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',
6151 'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',
6152 'like_count': int,
6153 'playable_in_embed': True,
6154 'tags': 'count:17',
6155 'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',
6156 'title': 'Mobile Games on Console - Scott The Woz',
6157 'upload_date': '20210920',
6158 'uploader': 'Scott The Woz',
6159 'uploader_id': 'scottthewoz',
6160 'uploader_url': 'http://www.youtube.com/user/scottthewoz',
6161 'view_count': int,
6162 'live_status': 'not_live',
6163 'channel_follower_count': int
6164 }
6165 }]
6166
6167 def _real_extract(self, url):
6168 clip_id = self._match_id(url)
6169 _, data = self._extract_webpage(url, clip_id)
6170
6171 video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))
6172 if not video_id:
6173 raise ExtractorError('Unable to find video ID')
6174
6175 clip_data = traverse_obj(data, (
6176 'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',
6177 'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,
6178 'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',
6179 'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)
6180
6181 return {
6182 '_type': 'url_transparent',
6183 'url': f'https://www.youtube.com/watch?v={video_id}',
6184 'ie_key': YoutubeIE.ie_key(),
6185 'id': clip_id,
6186 'section_start': int(clip_data['startTimeMs']) / 1000,
6187 'section_end': int(clip_data['endTimeMs']) / 1000,
6188 }
6189
6190
6191 class YoutubeTruncatedIDIE(InfoExtractor):
6192 IE_NAME = 'youtube:truncated_id'
6193 IE_DESC = False # Do not list
6194 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
6195
6196 _TESTS = [{
6197 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
6198 'only_matching': True,
6199 }]
6200
6201 def _real_extract(self, url):
6202 video_id = self._match_id(url)
6203 raise ExtractorError(
6204 f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',
6205 expected=True)