]> jfr.im git - yt-dlp.git/blame_incremental - yt_dlp/extractor/youtube.py
[extractor/IslamChannel] Add extractors (#4779)
[yt-dlp.git] / yt_dlp / extractor / youtube.py
... / ...
CommitLineData
1import base64
2import calendar
3import copy
4import datetime
5import hashlib
6import itertools
7import json
8import math
9import os.path
10import random
11import re
12import sys
13import threading
14import time
15import traceback
16import urllib.error
17import urllib.parse
18
19from .common import InfoExtractor, SearchInfoExtractor
20from .openload import PhantomJSwrapper
21from ..compat import functools
22from ..jsinterp import JSInterpreter
23from ..utils import (
24 NO_DEFAULT,
25 ExtractorError,
26 UserNotLive,
27 bug_reports_message,
28 classproperty,
29 clean_html,
30 datetime_from_str,
31 dict_get,
32 float_or_none,
33 format_field,
34 get_first,
35 int_or_none,
36 is_html,
37 join_nonempty,
38 js_to_json,
39 mimetype2ext,
40 network_exceptions,
41 orderedSet,
42 parse_codecs,
43 parse_count,
44 parse_duration,
45 parse_iso8601,
46 parse_qs,
47 qualities,
48 remove_start,
49 smuggle_url,
50 str_or_none,
51 str_to_int,
52 strftime_or_none,
53 traverse_obj,
54 try_get,
55 unescapeHTML,
56 unified_strdate,
57 unified_timestamp,
58 unsmuggle_url,
59 update_url_query,
60 url_or_none,
61 urljoin,
62 variadic,
63)
64
65# any clients starting with _ cannot be explicitly requested by the user
66INNERTUBE_CLIENTS = {
67 'web': {
68 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
69 'INNERTUBE_CONTEXT': {
70 'client': {
71 'clientName': 'WEB',
72 'clientVersion': '2.20220801.00.00',
73 }
74 },
75 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
76 },
77 'web_embedded': {
78 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
79 'INNERTUBE_CONTEXT': {
80 'client': {
81 'clientName': 'WEB_EMBEDDED_PLAYER',
82 'clientVersion': '1.20220731.00.00',
83 },
84 },
85 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
86 },
87 'web_music': {
88 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
89 'INNERTUBE_HOST': 'music.youtube.com',
90 'INNERTUBE_CONTEXT': {
91 'client': {
92 'clientName': 'WEB_REMIX',
93 'clientVersion': '1.20220727.01.00',
94 }
95 },
96 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
97 },
98 'web_creator': {
99 'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
100 'INNERTUBE_CONTEXT': {
101 'client': {
102 'clientName': 'WEB_CREATOR',
103 'clientVersion': '1.20220726.00.00',
104 }
105 },
106 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
107 },
108 'android': {
109 'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
110 'INNERTUBE_CONTEXT': {
111 'client': {
112 'clientName': 'ANDROID',
113 'clientVersion': '17.29.34',
114 'androidSdkVersion': 30
115 }
116 },
117 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
118 'REQUIRE_JS_PLAYER': False
119 },
120 'android_embedded': {
121 'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
122 'INNERTUBE_CONTEXT': {
123 'client': {
124 'clientName': 'ANDROID_EMBEDDED_PLAYER',
125 'clientVersion': '17.29.34',
126 'androidSdkVersion': 30
127 },
128 },
129 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
130 'REQUIRE_JS_PLAYER': False
131 },
132 'android_music': {
133 'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
134 'INNERTUBE_CONTEXT': {
135 'client': {
136 'clientName': 'ANDROID_MUSIC',
137 'clientVersion': '5.16.51',
138 'androidSdkVersion': 30
139 }
140 },
141 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
142 'REQUIRE_JS_PLAYER': False
143 },
144 'android_creator': {
145 'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
146 'INNERTUBE_CONTEXT': {
147 'client': {
148 'clientName': 'ANDROID_CREATOR',
149 'clientVersion': '22.28.100',
150 'androidSdkVersion': 30
151 },
152 },
153 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
154 'REQUIRE_JS_PLAYER': False
155 },
156 # iOS clients have HLS live streams. Setting device model to get 60fps formats.
157 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
158 'ios': {
159 'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
160 'INNERTUBE_CONTEXT': {
161 'client': {
162 'clientName': 'IOS',
163 'clientVersion': '17.30.1',
164 'deviceModel': 'iPhone14,3',
165 }
166 },
167 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
168 'REQUIRE_JS_PLAYER': False
169 },
170 'ios_embedded': {
171 'INNERTUBE_CONTEXT': {
172 'client': {
173 'clientName': 'IOS_MESSAGES_EXTENSION',
174 'clientVersion': '17.30.1',
175 'deviceModel': 'iPhone14,3',
176 },
177 },
178 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
179 'REQUIRE_JS_PLAYER': False
180 },
181 'ios_music': {
182 'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
183 'INNERTUBE_CONTEXT': {
184 'client': {
185 'clientName': 'IOS_MUSIC',
186 'clientVersion': '5.18',
187 },
188 },
189 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
190 'REQUIRE_JS_PLAYER': False
191 },
192 'ios_creator': {
193 'INNERTUBE_CONTEXT': {
194 'client': {
195 'clientName': 'IOS_CREATOR',
196 'clientVersion': '22.29.101',
197 },
198 },
199 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
200 'REQUIRE_JS_PLAYER': False
201 },
202 # mweb has 'ultralow' formats
203 # See: https://github.com/yt-dlp/yt-dlp/pull/557
204 'mweb': {
205 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
206 'INNERTUBE_CONTEXT': {
207 'client': {
208 'clientName': 'MWEB',
209 'clientVersion': '2.20220801.00.00',
210 }
211 },
212 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
213 },
214 # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
215 # See: https://github.com/zerodytrash/YouTube-Internal-Clients
216 'tv_embedded': {
217 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
218 'INNERTUBE_CONTEXT': {
219 'client': {
220 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
221 'clientVersion': '2.0',
222 },
223 },
224 'INNERTUBE_CONTEXT_CLIENT_NAME': 85
225 },
226}
227
228
229def _split_innertube_client(client_name):
230 variant, *base = client_name.rsplit('.', 1)
231 if base:
232 return variant, base[0], variant
233 base, *variant = client_name.split('_', 1)
234 return client_name, base, variant[0] if variant else None
235
236
237def build_innertube_clients():
238 THIRD_PARTY = {
239 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
240 }
241 BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
242 priority = qualities(BASE_CLIENTS[::-1])
243
244 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
245 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
246 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
247 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
248 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
249
250 _, base_client, variant = _split_innertube_client(client)
251 ytcfg['priority'] = 10 * priority(base_client)
252
253 if not variant:
254 INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
255 embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
256 embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
257 embedscreen['priority'] -= 3
258 elif variant == 'embedded':
259 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
260 ytcfg['priority'] -= 2
261 else:
262 ytcfg['priority'] -= 3
263
264
265build_innertube_clients()
266
267
268class YoutubeBaseInfoExtractor(InfoExtractor):
269 """Provide base functions for Youtube extractors"""
270
271 _RESERVED_NAMES = (
272 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
273 r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|'
274 r'browse|oembed|get_video_info|iframe_api|s/player|'
275 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
276
277 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
278
279 # _NETRC_MACHINE = 'youtube'
280
281 # If True it will raise an error if no login info is provided
282 _LOGIN_REQUIRED = False
283
284 _INVIDIOUS_SITES = (
285 # invidious-redirect websites
286 r'(?:www\.)?redirect\.invidious\.io',
287 r'(?:(?:www|dev)\.)?invidio\.us',
288 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
289 r'(?:www\.)?invidious\.pussthecat\.org',
290 r'(?:www\.)?invidious\.zee\.li',
291 r'(?:www\.)?invidious\.ethibox\.fr',
292 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
293 r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
294 r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
295 # youtube-dl invidious instances list
296 r'(?:(?:www|no)\.)?invidiou\.sh',
297 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
298 r'(?:www\.)?invidious\.kabi\.tk',
299 r'(?:www\.)?invidious\.mastodon\.host',
300 r'(?:www\.)?invidious\.zapashcanon\.fr',
301 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
302 r'(?:www\.)?invidious\.tinfoil-hat\.net',
303 r'(?:www\.)?invidious\.himiko\.cloud',
304 r'(?:www\.)?invidious\.reallyancient\.tech',
305 r'(?:www\.)?invidious\.tube',
306 r'(?:www\.)?invidiou\.site',
307 r'(?:www\.)?invidious\.site',
308 r'(?:www\.)?invidious\.xyz',
309 r'(?:www\.)?invidious\.nixnet\.xyz',
310 r'(?:www\.)?invidious\.048596\.xyz',
311 r'(?:www\.)?invidious\.drycat\.fr',
312 r'(?:www\.)?inv\.skyn3t\.in',
313 r'(?:www\.)?tube\.poal\.co',
314 r'(?:www\.)?tube\.connect\.cafe',
315 r'(?:www\.)?vid\.wxzm\.sx',
316 r'(?:www\.)?vid\.mint\.lgbt',
317 r'(?:www\.)?vid\.puffyan\.us',
318 r'(?:www\.)?yewtu\.be',
319 r'(?:www\.)?yt\.elukerio\.org',
320 r'(?:www\.)?yt\.lelux\.fi',
321 r'(?:www\.)?invidious\.ggc-project\.de',
322 r'(?:www\.)?yt\.maisputain\.ovh',
323 r'(?:www\.)?ytprivate\.com',
324 r'(?:www\.)?invidious\.13ad\.de',
325 r'(?:www\.)?invidious\.toot\.koeln',
326 r'(?:www\.)?invidious\.fdn\.fr',
327 r'(?:www\.)?watch\.nettohikari\.com',
328 r'(?:www\.)?invidious\.namazso\.eu',
329 r'(?:www\.)?invidious\.silkky\.cloud',
330 r'(?:www\.)?invidious\.exonip\.de',
331 r'(?:www\.)?invidious\.riverside\.rocks',
332 r'(?:www\.)?invidious\.blamefran\.net',
333 r'(?:www\.)?invidious\.moomoo\.de',
334 r'(?:www\.)?ytb\.trom\.tf',
335 r'(?:www\.)?yt\.cyberhost\.uk',
336 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
337 r'(?:www\.)?qklhadlycap4cnod\.onion',
338 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
339 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
340 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
341 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
342 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
343 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
344 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
345 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
346 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
347 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
348 # piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
349 r'(?:www\.)?piped\.kavin\.rocks',
350 r'(?:www\.)?piped\.silkky\.cloud',
351 r'(?:www\.)?piped\.tokhmi\.xyz',
352 r'(?:www\.)?piped\.moomoo\.me',
353 r'(?:www\.)?il\.ax',
354 r'(?:www\.)?piped\.syncpundit\.com',
355 r'(?:www\.)?piped\.mha\.fi',
356 r'(?:www\.)?piped\.mint\.lgbt',
357 r'(?:www\.)?piped\.privacy\.com\.de',
358 )
359
360 def _initialize_consent(self):
361 cookies = self._get_cookies('https://www.youtube.com/')
362 if cookies.get('__Secure-3PSID'):
363 return
364 consent_id = None
365 consent = cookies.get('CONSENT')
366 if consent:
367 if 'YES' in consent.value:
368 return
369 consent_id = self._search_regex(
370 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
371 if not consent_id:
372 consent_id = random.randint(100, 999)
373 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
374
375 def _initialize_pref(self):
376 cookies = self._get_cookies('https://www.youtube.com/')
377 pref_cookie = cookies.get('PREF')
378 pref = {}
379 if pref_cookie:
380 try:
381 pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
382 except ValueError:
383 self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
384 pref.update({'hl': 'en', 'tz': 'UTC'})
385 self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
386
387 def _real_initialize(self):
388 self._initialize_pref()
389 self._initialize_consent()
390 self._check_login_required()
391
392 def _check_login_required(self):
393 if self._LOGIN_REQUIRED and not self._cookies_passed:
394 self.raise_login_required('Login details are needed to download this content', method='cookies')
395
396 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='
397 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
398
399 def _get_default_ytcfg(self, client='web'):
400 return copy.deepcopy(INNERTUBE_CLIENTS[client])
401
402 def _get_innertube_host(self, client='web'):
403 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
404
405 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
406 # try_get but with fallback to default ytcfg client values when present
407 _func = lambda y: try_get(y, getter, expected_type)
408 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
409
410 def _extract_client_name(self, ytcfg, default_client='web'):
411 return self._ytcfg_get_safe(
412 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
413 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
414
415 def _extract_client_version(self, ytcfg, default_client='web'):
416 return self._ytcfg_get_safe(
417 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
418 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
419
420 def _select_api_hostname(self, req_api_hostname, default_client=None):
421 return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
422 or req_api_hostname or self._get_innertube_host(default_client or 'web'))
423
424 def _extract_api_key(self, ytcfg=None, default_client='web'):
425 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
426
427 def _extract_context(self, ytcfg=None, default_client='web'):
428 context = get_first(
429 (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
430 # Enforce language and tz for extraction
431 client_context = traverse_obj(context, 'client', expected_type=dict, default={})
432 client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
433 return context
434
435 _SAPISID = None
436
437 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
438 time_now = round(time.time())
439 if self._SAPISID is None:
440 yt_cookies = self._get_cookies('https://www.youtube.com')
441 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
442 # See: https://github.com/yt-dlp/yt-dlp/issues/393
443 sapisid_cookie = dict_get(
444 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
445 if sapisid_cookie and sapisid_cookie.value:
446 self._SAPISID = sapisid_cookie.value
447 self.write_debug('Extracted SAPISID cookie')
448 # SAPISID cookie is required if not already present
449 if not yt_cookies.get('SAPISID'):
450 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
451 self._set_cookie(
452 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
453 else:
454 self._SAPISID = False
455 if not self._SAPISID:
456 return None
457 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
458 sapisidhash = hashlib.sha1(
459 f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
460 return f'SAPISIDHASH {time_now}_{sapisidhash}'
461
462 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
463 note='Downloading API JSON', errnote='Unable to download API page',
464 context=None, api_key=None, api_hostname=None, default_client='web'):
465
466 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
467 data.update(query)
468 real_headers = self.generate_api_headers(default_client=default_client)
469 real_headers.update({'content-type': 'application/json'})
470 if headers:
471 real_headers.update(headers)
472 api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]
473 or api_key or self._extract_api_key(default_client=default_client))
474 return self._download_json(
475 f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
476 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
477 data=json.dumps(data).encode('utf8'), headers=real_headers,
478 query={'key': api_key, 'prettyPrint': 'false'})
479
480 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
481 return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
482
483 @staticmethod
484 def _extract_session_index(*data):
485 """
486 Index of current account in account list.
487 See: https://github.com/yt-dlp/yt-dlp/pull/519
488 """
489 for ytcfg in data:
490 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
491 if session_index is not None:
492 return session_index
493
494 # Deprecated?
495 def _extract_identity_token(self, ytcfg=None, webpage=None):
496 if ytcfg:
497 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)
498 if token:
499 return token
500 if webpage:
501 return self._search_regex(
502 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
503 'identity token', default=None, fatal=False)
504
505 @staticmethod
506 def _extract_account_syncid(*args):
507 """
508 Extract syncId required to download private playlists of secondary channels
509 @params response and/or ytcfg
510 """
511 for data in args:
512 # ytcfg includes channel_syncid if on secondary channel
513 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)
514 if delegated_sid:
515 return delegated_sid
516 sync_ids = (try_get(
517 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
518 lambda x: x['DATASYNC_ID']), str) or '').split('||')
519 if len(sync_ids) >= 2 and sync_ids[1]:
520 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
521 # and just "user_syncid||" for primary channel. We only want the channel_syncid
522 return sync_ids[0]
523
524 @staticmethod
525 def _extract_visitor_data(*args):
526 """
527 Extracts visitorData from an API response or ytcfg
528 Appears to be used to track session state
529 """
530 return get_first(
531 args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
532 expected_type=str)
533
534 @functools.cached_property
535 def is_authenticated(self):
536 return bool(self._generate_sapisidhash_header())
537
538 def extract_ytcfg(self, video_id, webpage):
539 if not webpage:
540 return {}
541 return self._parse_json(
542 self._search_regex(
543 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
544 default='{}'), video_id, fatal=False) or {}
545
546 def generate_api_headers(
547 self, *, ytcfg=None, account_syncid=None, session_index=None,
548 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
549
550 origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))
551 headers = {
552 'X-YouTube-Client-Name': str(
553 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
554 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
555 'Origin': origin,
556 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
557 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
558 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)
559 }
560 if session_index is None:
561 session_index = self._extract_session_index(ytcfg)
562 if account_syncid or session_index is not None:
563 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
564
565 auth = self._generate_sapisidhash_header(origin)
566 if auth is not None:
567 headers['Authorization'] = auth
568 headers['X-Origin'] = origin
569 return {h: v for h, v in headers.items() if v is not None}
570
571 def _download_ytcfg(self, client, video_id):
572 url = {
573 'web': 'https://www.youtube.com',
574 'web_music': 'https://music.youtube.com',
575 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
576 }.get(client)
577 if not url:
578 return {}
579 webpage = self._download_webpage(
580 url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')
581 return self.extract_ytcfg(video_id, webpage) or {}
582
583 @staticmethod
584 def _build_api_continuation_query(continuation, ctp=None):
585 query = {
586 'continuation': continuation
587 }
588 # TODO: Inconsistency with clickTrackingParams.
589 # Currently we have a fixed ctp contained within context (from ytcfg)
590 # and a ctp in root query for continuation.
591 if ctp:
592 query['clickTracking'] = {'clickTrackingParams': ctp}
593 return query
594
595 @classmethod
596 def _extract_next_continuation_data(cls, renderer):
597 next_continuation = try_get(
598 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
599 lambda x: x['continuation']['reloadContinuationData']), dict)
600 if not next_continuation:
601 return
602 continuation = next_continuation.get('continuation')
603 if not continuation:
604 return
605 ctp = next_continuation.get('clickTrackingParams')
606 return cls._build_api_continuation_query(continuation, ctp)
607
608 @classmethod
609 def _extract_continuation_ep_data(cls, continuation_ep: dict):
610 if isinstance(continuation_ep, dict):
611 continuation = try_get(
612 continuation_ep, lambda x: x['continuationCommand']['token'], str)
613 if not continuation:
614 return
615 ctp = continuation_ep.get('clickTrackingParams')
616 return cls._build_api_continuation_query(continuation, ctp)
617
618 @classmethod
619 def _extract_continuation(cls, renderer):
620 next_continuation = cls._extract_next_continuation_data(renderer)
621 if next_continuation:
622 return next_continuation
623
624 contents = []
625 for key in ('contents', 'items'):
626 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
627
628 for content in contents:
629 if not isinstance(content, dict):
630 continue
631 continuation_ep = try_get(
632 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
633 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
634 dict)
635 continuation = cls._extract_continuation_ep_data(continuation_ep)
636 if continuation:
637 return continuation
638
639 @classmethod
640 def _extract_alerts(cls, data):
641 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
642 if not isinstance(alert_dict, dict):
643 continue
644 for alert in alert_dict.values():
645 alert_type = alert.get('type')
646 if not alert_type:
647 continue
648 message = cls._get_text(alert, 'text')
649 if message:
650 yield alert_type, message
651
652 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
653 errors = []
654 warnings = []
655 for alert_type, alert_message in alerts:
656 if alert_type.lower() == 'error' and fatal:
657 errors.append([alert_type, alert_message])
658 else:
659 warnings.append([alert_type, alert_message])
660
661 for alert_type, alert_message in (warnings + errors[:-1]):
662 self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)
663 if errors:
664 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
665
666 def _extract_and_report_alerts(self, data, *args, **kwargs):
667 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
668
669 def _extract_badges(self, renderer: dict):
670 badges = set()
671 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
672 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], str)
673 if label:
674 badges.add(label.lower())
675 return badges
676
677 @staticmethod
678 def _get_text(data, *path_list, max_runs=None):
679 for path in path_list or [None]:
680 if path is None:
681 obj = [data]
682 else:
683 obj = traverse_obj(data, path, default=[])
684 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
685 obj = [obj]
686 for item in obj:
687 text = try_get(item, lambda x: x['simpleText'], str)
688 if text:
689 return text
690 runs = try_get(item, lambda x: x['runs'], list) or []
691 if not runs and isinstance(item, list):
692 runs = item
693
694 runs = runs[:min(len(runs), max_runs or len(runs))]
695 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
696 if text:
697 return text
698
699 def _get_count(self, data, *path_list):
700 count_text = self._get_text(data, *path_list) or ''
701 count = parse_count(count_text)
702 if count is None:
703 count = str_to_int(
704 self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))
705 return count
706
707 @staticmethod
708 def _extract_thumbnails(data, *path_list):
709 """
710 Extract thumbnails from thumbnails dict
711 @param path_list: path list to level that contains 'thumbnails' key
712 """
713 thumbnails = []
714 for path in path_list or [()]:
715 for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):
716 thumbnail_url = url_or_none(thumbnail.get('url'))
717 if not thumbnail_url:
718 continue
719 # Sometimes youtube gives a wrong thumbnail URL. See:
720 # https://github.com/yt-dlp/yt-dlp/issues/233
721 # https://github.com/ytdl-org/youtube-dl/issues/28023
722 if 'maxresdefault' in thumbnail_url:
723 thumbnail_url = thumbnail_url.split('?')[0]
724 thumbnails.append({
725 'url': thumbnail_url,
726 'height': int_or_none(thumbnail.get('height')),
727 'width': int_or_none(thumbnail.get('width')),
728 })
729 return thumbnails
730
731 @staticmethod
732 def extract_relative_time(relative_time_text):
733 """
734 Extracts a relative time from string and converts to dt object
735 e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'
736 """
737 mobj = re.search(r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?\s*ago', relative_time_text)
738 if mobj:
739 start = mobj.group('start')
740 if start:
741 return datetime_from_str(start)
742 try:
743 return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))
744 except ValueError:
745 return None
746
747 def _extract_time_text(self, renderer, *path_list):
748 """@returns (timestamp, time_text)"""
749 text = self._get_text(renderer, *path_list) or ''
750 dt = self.extract_relative_time(text)
751 timestamp = None
752 if isinstance(dt, datetime.datetime):
753 timestamp = calendar.timegm(dt.timetuple())
754
755 if timestamp is None:
756 timestamp = (
757 unified_timestamp(text) or unified_timestamp(
758 self._search_regex(
759 (r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),
760 text.lower(), 'time text', default=None)))
761
762 if text and timestamp is None:
763 self.report_warning(f"Cannot parse localized time text '{text}'" + bug_reports_message(), only_once=True)
764 return timestamp, text
765
766 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
767 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
768 default_client='web'):
769 for retry in self.RetryManager():
770 try:
771 response = self._call_api(
772 ep=ep, fatal=True, headers=headers,
773 video_id=item_id, query=query, note=note,
774 context=self._extract_context(ytcfg, default_client),
775 api_key=self._extract_api_key(ytcfg, default_client),
776 api_hostname=api_hostname, default_client=default_client)
777 except ExtractorError as e:
778 if not isinstance(e.cause, network_exceptions):
779 return self._error_or_warning(e, fatal=fatal)
780 elif not isinstance(e.cause, urllib.error.HTTPError):
781 retry.error = e
782 continue
783
784 first_bytes = e.cause.read(512)
785 if not is_html(first_bytes):
786 yt_error = try_get(
787 self._parse_json(
788 self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
789 lambda x: x['error']['message'], str)
790 if yt_error:
791 self._report_alerts([('ERROR', yt_error)], fatal=False)
792 # Downloading page may result in intermittent 5xx HTTP error
793 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
794 # We also want to catch all other network exceptions since errors in later pages can be troublesome
795 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
796 if e.cause.code not in (403, 429):
797 retry.error = e
798 continue
799 return self._error_or_warning(e, fatal=fatal)
800
801 try:
802 self._extract_and_report_alerts(response, only_once=True)
803 except ExtractorError as e:
804 # YouTube servers may return errors we want to retry on in a 200 OK response
805 # See: https://github.com/yt-dlp/yt-dlp/issues/839
806 if 'unknown error' in e.msg.lower():
807 retry.error = e
808 continue
809 return self._error_or_warning(e, fatal=fatal)
810 # Youtube sometimes sends incomplete data
811 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
812 if not traverse_obj(response, *variadic(check_get_keys)):
813 retry.error = ExtractorError('Incomplete data received', expected=True)
814 continue
815
816 return response
817
818 @staticmethod
819 def is_music_url(url):
820 return re.match(r'https?://music\.youtube\.com/', url) is not None
821
822 def _extract_video(self, renderer):
823 video_id = renderer.get('videoId')
824 title = self._get_text(renderer, 'title')
825 description = self._get_text(renderer, 'descriptionSnippet')
826 duration = parse_duration(self._get_text(
827 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
828 if duration is None:
829 duration = parse_duration(self._search_regex(
830 r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',
831 traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),
832 video_id, default=None, group='duration'))
833
834 view_count = self._get_count(renderer, 'viewCountText')
835
836 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
837 channel_id = traverse_obj(
838 renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
839 expected_type=str, get_all=False)
840 timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')
841 scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
842 overlay_style = traverse_obj(
843 renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
844 get_all=False, expected_type=str)
845 badges = self._extract_badges(renderer)
846 thumbnails = self._extract_thumbnails(renderer, 'thumbnail')
847 navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
848 renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
849 expected_type=str)) or ''
850 url = f'https://www.youtube.com/watch?v={video_id}'
851 if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:
852 url = f'https://www.youtube.com/shorts/{video_id}'
853
854 return {
855 '_type': 'url',
856 'ie_key': YoutubeIE.ie_key(),
857 'id': video_id,
858 'url': url,
859 'title': title,
860 'description': description,
861 'duration': duration,
862 'view_count': view_count,
863 'uploader': uploader,
864 'channel_id': channel_id,
865 'thumbnails': thumbnails,
866 'upload_date': (strftime_or_none(timestamp, '%Y%m%d')
867 if self._configuration_arg('approximate_date', ie_key='youtubetab')
868 else None),
869 'live_status': ('is_upcoming' if scheduled_timestamp is not None
870 else 'was_live' if 'streamed' in time_text.lower()
871 else 'is_live' if overlay_style == 'LIVE' or 'live now' in badges
872 else None),
873 'release_timestamp': scheduled_timestamp,
874 'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)
875 }
876
877
878class YoutubeIE(YoutubeBaseInfoExtractor):
879 IE_DESC = 'YouTube'
880 _VALID_URL = r"""(?x)^
881 (
882 (?:https?://|//) # http(s):// or protocol-independent URL
883 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
884 (?:www\.)?deturl\.com/www\.youtube\.com|
885 (?:www\.)?pwnyoutube\.com|
886 (?:www\.)?hooktube\.com|
887 (?:www\.)?yourepeat\.com|
888 tube\.majestyc\.net|
889 %(invidious)s|
890 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
891 (?:.*?\#/)? # handle anchor (#/) redirect urls
892 (?: # the various things that can precede the ID:
893 (?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/
894 |(?: # or the v= param in all its forms
895 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
896 (?:\?|\#!?) # the params delimiter ? or # or #!
897 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
898 v=
899 )
900 ))
901 |(?:
902 youtu\.be| # just youtu.be/xxxx
903 vid\.plus| # or vid.plus/xxxx
904 zwearz\.com/watch| # or zwearz.com/watch/xxxx
905 %(invidious)s
906 )/
907 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
908 )
909 )? # all until now is optional -> you can pass the naked ID
910 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
911 (?(1).+)? # if we found the ID, everything can follow
912 (?:\#|$)""" % {
913 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
914 }
915 _EMBED_REGEX = [r'''(?x)
916 (?:
917 <iframe[^>]+?src=|
918 data-video-url=|
919 <embed[^>]+?src=|
920 embedSWF\(?:\s*|
921 <object[^>]+data=|
922 new\s+SWFObject\(
923 )
924 (["\'])
925 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
926 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
927 \1''']
928 _PLAYER_INFO_RE = (
929 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
930 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
931 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
932 )
933 _formats = {
934 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
935 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
936 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
937 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
938 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
939 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
940 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
941 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
942 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
943 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
944 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
945 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
946 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
947 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
948 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
949 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
950 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
951 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
952
953
954 # 3D videos
955 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
956 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
957 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
958 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
959 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
960 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
961 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
962
963 # Apple HTTP Live Streaming
964 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
965 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
966 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
967 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
968 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
969 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
970 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
971 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
972
973 # DASH mp4 video
974 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
975 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
976 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
977 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
978 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
979 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
980 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
981 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
982 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
983 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
984 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
985 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
986
987 # Dash mp4 audio
988 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
989 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
990 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
991 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
992 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
993 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
994 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
995
996 # Dash webm
997 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
998 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
999 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1000 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1001 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1002 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1003 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1004 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1005 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1006 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1007 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1008 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1009 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1010 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1011 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1012 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
1013 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1014 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1015 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1016 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1017 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1018 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1019
1020 # Dash webm audio
1021 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1022 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
1023
1024 # Dash webm audio with opus inside
1025 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1026 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1027 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
1028
1029 # RTMP (unnamed)
1030 '_rtmp': {'protocol': 'rtmp'},
1031
1032 # av01 video only formats sometimes served with "unknown" codecs
1033 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1034 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1035 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
1036 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1037 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1038 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1039 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1040 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1041 }
1042 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
1043
1044 _GEO_BYPASS = False
1045
1046 IE_NAME = 'youtube'
1047 _TESTS = [
1048 {
1049 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
1050 'info_dict': {
1051 'id': 'BaW_jenozKc',
1052 'ext': 'mp4',
1053 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1054 'uploader': 'Philipp Hagemeister',
1055 'uploader_id': 'phihag',
1056 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1057 'channel': 'Philipp Hagemeister',
1058 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1059 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1060 'upload_date': '20121002',
1061 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
1062 'categories': ['Science & Technology'],
1063 'tags': ['youtube-dl'],
1064 'duration': 10,
1065 'view_count': int,
1066 'like_count': int,
1067 'availability': 'public',
1068 'playable_in_embed': True,
1069 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1070 'live_status': 'not_live',
1071 'age_limit': 0,
1072 'start_time': 1,
1073 'end_time': 9,
1074 'comment_count': int,
1075 'channel_follower_count': int
1076 }
1077 },
1078 {
1079 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1080 'note': 'Embed-only video (#1746)',
1081 'info_dict': {
1082 'id': 'yZIXLfi8CZQ',
1083 'ext': 'mp4',
1084 'upload_date': '20120608',
1085 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1086 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1087 'uploader': 'SET India',
1088 'uploader_id': 'setindia',
1089 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
1090 'age_limit': 18,
1091 },
1092 'skip': 'Private video',
1093 },
1094 {
1095 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1096 'note': 'Use the first video ID in the URL',
1097 'info_dict': {
1098 'id': 'BaW_jenozKc',
1099 'ext': 'mp4',
1100 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1101 'uploader': 'Philipp Hagemeister',
1102 'uploader_id': 'phihag',
1103 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1104 'channel': 'Philipp Hagemeister',
1105 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1106 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1107 'upload_date': '20121002',
1108 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
1109 'categories': ['Science & Technology'],
1110 'tags': ['youtube-dl'],
1111 'duration': 10,
1112 'view_count': int,
1113 'like_count': int,
1114 'availability': 'public',
1115 'playable_in_embed': True,
1116 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1117 'live_status': 'not_live',
1118 'age_limit': 0,
1119 'comment_count': int,
1120 'channel_follower_count': int
1121 },
1122 'params': {
1123 'skip_download': True,
1124 },
1125 },
1126 {
1127 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1128 'note': '256k DASH audio (format 141) via DASH manifest',
1129 'info_dict': {
1130 'id': 'a9LDPn-MO4I',
1131 'ext': 'm4a',
1132 'upload_date': '20121002',
1133 'uploader_id': '8KVIDEO',
1134 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1135 'description': '',
1136 'uploader': '8KVIDEO',
1137 'title': 'UHDTV TEST 8K VIDEO.mp4'
1138 },
1139 'params': {
1140 'youtube_include_dash_manifest': True,
1141 'format': '141',
1142 },
1143 'skip': 'format 141 not served anymore',
1144 },
1145 # DASH manifest with encrypted signature
1146 {
1147 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1148 'info_dict': {
1149 'id': 'IB3lcPjvWLA',
1150 'ext': 'm4a',
1151 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1152 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1153 'duration': 244,
1154 'uploader': 'AfrojackVEVO',
1155 'uploader_id': 'AfrojackVEVO',
1156 'upload_date': '20131011',
1157 'abr': 129.495,
1158 'like_count': int,
1159 'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',
1160 'playable_in_embed': True,
1161 'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',
1162 'view_count': int,
1163 'track': 'The Spark',
1164 'live_status': 'not_live',
1165 'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',
1166 'channel': 'Afrojack',
1167 'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',
1168 'tags': 'count:19',
1169 'availability': 'public',
1170 'categories': ['Music'],
1171 'age_limit': 0,
1172 'alt_title': 'The Spark',
1173 'channel_follower_count': int
1174 },
1175 'params': {
1176 'youtube_include_dash_manifest': True,
1177 'format': '141/bestaudio[ext=m4a]',
1178 },
1179 },
1180 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
1181 {
1182 'note': 'Embed allowed age-gate video',
1183 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1184 'info_dict': {
1185 'id': 'HtVdAasjOgU',
1186 'ext': 'mp4',
1187 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1188 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1189 'duration': 142,
1190 'uploader': 'The Witcher',
1191 'uploader_id': 'WitcherGame',
1192 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1193 'upload_date': '20140605',
1194 'age_limit': 18,
1195 'categories': ['Gaming'],
1196 'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',
1197 'availability': 'needs_auth',
1198 'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
1199 'like_count': int,
1200 'channel': 'The Witcher',
1201 'live_status': 'not_live',
1202 'tags': 'count:17',
1203 'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
1204 'playable_in_embed': True,
1205 'view_count': int,
1206 'channel_follower_count': int
1207 },
1208 },
1209 {
1210 'note': 'Age-gate video with embed allowed in public site',
1211 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1212 'info_dict': {
1213 'id': 'HsUATh_Nc2U',
1214 'ext': 'mp4',
1215 'title': 'Godzilla 2 (Official Video)',
1216 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1217 'upload_date': '20200408',
1218 'uploader_id': 'FlyingKitty900',
1219 'uploader': 'FlyingKitty',
1220 'age_limit': 18,
1221 'availability': 'needs_auth',
1222 'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
1223 'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',
1224 'channel': 'FlyingKitty',
1225 'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
1226 'view_count': int,
1227 'categories': ['Entertainment'],
1228 'live_status': 'not_live',
1229 'tags': ['Flyingkitty', 'godzilla 2'],
1230 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
1231 'like_count': int,
1232 'duration': 177,
1233 'playable_in_embed': True,
1234 'channel_follower_count': int
1235 },
1236 },
1237 {
1238 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1239 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1240 'info_dict': {
1241 'id': 'Tq92D6wQ1mg',
1242 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
1243 'ext': 'mp4',
1244 'upload_date': '20191228',
1245 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1246 'uploader': 'Projekt Melody',
1247 'description': 'md5:17eccca93a786d51bc67646756894066',
1248 'age_limit': 18,
1249 'like_count': int,
1250 'availability': 'needs_auth',
1251 'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
1252 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1253 'view_count': int,
1254 'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',
1255 'channel': 'Projekt Melody',
1256 'live_status': 'not_live',
1257 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
1258 'playable_in_embed': True,
1259 'categories': ['Entertainment'],
1260 'duration': 106,
1261 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
1262 'comment_count': int,
1263 'channel_follower_count': int
1264 },
1265 },
1266 {
1267 'note': 'Non-Agegated non-embeddable video',
1268 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1269 'info_dict': {
1270 'id': 'MeJVWBSsPAY',
1271 'ext': 'mp4',
1272 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1273 'uploader': 'Herr Lurik',
1274 'uploader_id': 'st3in234',
1275 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1276 'upload_date': '20130730',
1277 'track': 'Such mich find mich',
1278 'age_limit': 0,
1279 'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
1280 'like_count': int,
1281 'playable_in_embed': False,
1282 'creator': 'OOMPH!',
1283 'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',
1284 'view_count': int,
1285 'alt_title': 'Such mich find mich',
1286 'duration': 210,
1287 'channel': 'Herr Lurik',
1288 'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
1289 'categories': ['Music'],
1290 'availability': 'public',
1291 'uploader_url': 'http://www.youtube.com/user/st3in234',
1292 'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
1293 'live_status': 'not_live',
1294 'artist': 'OOMPH!',
1295 'channel_follower_count': int
1296 },
1297 },
1298 {
1299 'note': 'Non-bypassable age-gated video',
1300 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1301 'only_matching': True,
1302 },
1303 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1304 # YouTube Red ad is not captured for creator
1305 {
1306 'url': '__2ABJjxzNo',
1307 'info_dict': {
1308 'id': '__2ABJjxzNo',
1309 'ext': 'mp4',
1310 'duration': 266,
1311 'upload_date': '20100430',
1312 'uploader_id': 'deadmau5',
1313 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1314 'creator': 'deadmau5',
1315 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1316 'uploader': 'deadmau5',
1317 'title': 'Deadmau5 - Some Chords (HD)',
1318 'alt_title': 'Some Chords',
1319 'availability': 'public',
1320 'tags': 'count:14',
1321 'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',
1322 'view_count': int,
1323 'live_status': 'not_live',
1324 'channel': 'deadmau5',
1325 'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',
1326 'like_count': int,
1327 'track': 'Some Chords',
1328 'artist': 'deadmau5',
1329 'playable_in_embed': True,
1330 'age_limit': 0,
1331 'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',
1332 'categories': ['Music'],
1333 'album': 'Some Chords',
1334 'channel_follower_count': int
1335 },
1336 'expected_warnings': [
1337 'DASH manifest missing',
1338 ]
1339 },
1340 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1341 {
1342 'url': 'lqQg6PlCWgI',
1343 'info_dict': {
1344 'id': 'lqQg6PlCWgI',
1345 'ext': 'mp4',
1346 'duration': 6085,
1347 'upload_date': '20150827',
1348 'uploader_id': 'olympic',
1349 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1350 'description': 'md5:04bbbf3ccceb6795947572ca36f45904',
1351 'uploader': 'Olympics',
1352 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1353 'like_count': int,
1354 'release_timestamp': 1343767800,
1355 'playable_in_embed': True,
1356 'categories': ['Sports'],
1357 'release_date': '20120731',
1358 'channel': 'Olympics',
1359 'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
1360 'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
1361 'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
1362 'age_limit': 0,
1363 'availability': 'public',
1364 'live_status': 'was_live',
1365 'view_count': int,
1366 'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
1367 'channel_follower_count': int
1368 },
1369 'params': {
1370 'skip_download': 'requires avconv',
1371 }
1372 },
1373 # Non-square pixels
1374 {
1375 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1376 'info_dict': {
1377 'id': '_b-2C3KPAM0',
1378 'ext': 'mp4',
1379 'stretched_ratio': 16 / 9.,
1380 'duration': 85,
1381 'upload_date': '20110310',
1382 'uploader_id': 'AllenMeow',
1383 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1384 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1385 'uploader': '孫ᄋᄅ',
1386 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1387 'playable_in_embed': True,
1388 'channel': '孫ᄋᄅ',
1389 'age_limit': 0,
1390 'tags': 'count:11',
1391 'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',
1392 'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',
1393 'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',
1394 'view_count': int,
1395 'categories': ['People & Blogs'],
1396 'like_count': int,
1397 'live_status': 'not_live',
1398 'availability': 'unlisted',
1399 'comment_count': int,
1400 'channel_follower_count': int
1401 },
1402 },
1403 # url_encoded_fmt_stream_map is empty string
1404 {
1405 'url': 'qEJwOuvDf7I',
1406 'info_dict': {
1407 'id': 'qEJwOuvDf7I',
1408 'ext': 'webm',
1409 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1410 'description': '',
1411 'upload_date': '20150404',
1412 'uploader_id': 'spbelect',
1413 'uploader': 'Наблюдатели Петербурга',
1414 },
1415 'params': {
1416 'skip_download': 'requires avconv',
1417 },
1418 'skip': 'This live event has ended.',
1419 },
1420 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1421 {
1422 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1423 'info_dict': {
1424 'id': 'FIl7x6_3R5Y',
1425 'ext': 'webm',
1426 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1427 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1428 'duration': 220,
1429 'upload_date': '20150625',
1430 'uploader_id': 'dorappi2000',
1431 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1432 'uploader': 'dorappi2000',
1433 'formats': 'mincount:31',
1434 },
1435 'skip': 'not actual anymore',
1436 },
1437 # DASH manifest with segment_list
1438 {
1439 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1440 'md5': '8ce563a1d667b599d21064e982ab9e31',
1441 'info_dict': {
1442 'id': 'CsmdDsKjzN8',
1443 'ext': 'mp4',
1444 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1445 'uploader': 'Airtek',
1446 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1447 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1448 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1449 },
1450 'params': {
1451 'youtube_include_dash_manifest': True,
1452 'format': '135', # bestvideo
1453 },
1454 'skip': 'This live event has ended.',
1455 },
1456 {
1457 # Multifeed videos (multiple cameras), URL is for Main Camera
1458 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1459 'info_dict': {
1460 'id': 'jvGDaLqkpTg',
1461 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1462 'description': 'md5:e03b909557865076822aa169218d6a5d',
1463 },
1464 'playlist': [{
1465 'info_dict': {
1466 'id': 'jvGDaLqkpTg',
1467 'ext': 'mp4',
1468 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1469 'description': 'md5:e03b909557865076822aa169218d6a5d',
1470 'duration': 10643,
1471 'upload_date': '20161111',
1472 'uploader': 'Team PGP',
1473 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1474 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1475 },
1476 }, {
1477 'info_dict': {
1478 'id': '3AKt1R1aDnw',
1479 'ext': 'mp4',
1480 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1481 'description': 'md5:e03b909557865076822aa169218d6a5d',
1482 'duration': 10991,
1483 'upload_date': '20161111',
1484 'uploader': 'Team PGP',
1485 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1486 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1487 },
1488 }, {
1489 'info_dict': {
1490 'id': 'RtAMM00gpVc',
1491 'ext': 'mp4',
1492 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1493 'description': 'md5:e03b909557865076822aa169218d6a5d',
1494 'duration': 10995,
1495 'upload_date': '20161111',
1496 'uploader': 'Team PGP',
1497 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1498 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1499 },
1500 }, {
1501 'info_dict': {
1502 'id': '6N2fdlP3C5U',
1503 'ext': 'mp4',
1504 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1505 'description': 'md5:e03b909557865076822aa169218d6a5d',
1506 'duration': 10990,
1507 'upload_date': '20161111',
1508 'uploader': 'Team PGP',
1509 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1510 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1511 },
1512 }],
1513 'params': {
1514 'skip_download': True,
1515 },
1516 'skip': 'Not multifeed anymore',
1517 },
1518 {
1519 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1520 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1521 'info_dict': {
1522 'id': 'gVfLd0zydlo',
1523 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1524 },
1525 'playlist_count': 2,
1526 'skip': 'Not multifeed anymore',
1527 },
1528 {
1529 'url': 'https://vid.plus/FlRa-iH7PGw',
1530 'only_matching': True,
1531 },
1532 {
1533 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1534 'only_matching': True,
1535 },
1536 {
1537 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1538 # Also tests cut-off URL expansion in video description (see
1539 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1540 # https://github.com/ytdl-org/youtube-dl/issues/8164)
1541 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1542 'info_dict': {
1543 'id': 'lsguqyKfVQg',
1544 'ext': 'mp4',
1545 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1546 'alt_title': 'Dark Walk',
1547 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1548 'duration': 133,
1549 'upload_date': '20151119',
1550 'uploader_id': 'IronSoulElf',
1551 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1552 'uploader': 'IronSoulElf',
1553 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1554 'track': 'Dark Walk',
1555 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1556 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1557 'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',
1558 'categories': ['Film & Animation'],
1559 'view_count': int,
1560 'live_status': 'not_live',
1561 'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',
1562 'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',
1563 'tags': 'count:13',
1564 'availability': 'public',
1565 'channel': 'IronSoulElf',
1566 'playable_in_embed': True,
1567 'like_count': int,
1568 'age_limit': 0,
1569 'channel_follower_count': int
1570 },
1571 'params': {
1572 'skip_download': True,
1573 },
1574 },
1575 {
1576 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1577 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1578 'only_matching': True,
1579 },
1580 {
1581 # Video with yt:stretch=17:0
1582 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1583 'info_dict': {
1584 'id': 'Q39EVAstoRM',
1585 'ext': 'mp4',
1586 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1587 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1588 'upload_date': '20151107',
1589 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1590 'uploader': 'CH GAMER DROID',
1591 },
1592 'params': {
1593 'skip_download': True,
1594 },
1595 'skip': 'This video does not exist.',
1596 },
1597 {
1598 # Video with incomplete 'yt:stretch=16:'
1599 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1600 'only_matching': True,
1601 },
1602 {
1603 # Video licensed under Creative Commons
1604 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1605 'info_dict': {
1606 'id': 'M4gD1WSo5mA',
1607 'ext': 'mp4',
1608 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1609 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1610 'duration': 721,
1611 'upload_date': '20150128',
1612 'uploader_id': 'BerkmanCenter',
1613 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1614 'uploader': 'The Berkman Klein Center for Internet & Society',
1615 'license': 'Creative Commons Attribution license (reuse allowed)',
1616 'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',
1617 'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',
1618 'like_count': int,
1619 'age_limit': 0,
1620 'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],
1621 'channel': 'The Berkman Klein Center for Internet & Society',
1622 'availability': 'public',
1623 'view_count': int,
1624 'categories': ['Education'],
1625 'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',
1626 'live_status': 'not_live',
1627 'playable_in_embed': True,
1628 'comment_count': int,
1629 'channel_follower_count': int
1630 },
1631 'params': {
1632 'skip_download': True,
1633 },
1634 },
1635 {
1636 # Channel-like uploader_url
1637 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1638 'info_dict': {
1639 'id': 'eQcmzGIKrzg',
1640 'ext': 'mp4',
1641 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1642 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1643 'duration': 4060,
1644 'upload_date': '20151120',
1645 'uploader': 'Bernie Sanders',
1646 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1647 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1648 'license': 'Creative Commons Attribution license (reuse allowed)',
1649 'playable_in_embed': True,
1650 'tags': 'count:12',
1651 'like_count': int,
1652 'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1653 'age_limit': 0,
1654 'availability': 'public',
1655 'categories': ['News & Politics'],
1656 'channel': 'Bernie Sanders',
1657 'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',
1658 'view_count': int,
1659 'live_status': 'not_live',
1660 'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1661 'comment_count': int,
1662 'channel_follower_count': int
1663 },
1664 'params': {
1665 'skip_download': True,
1666 },
1667 },
1668 {
1669 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1670 'only_matching': True,
1671 },
1672 {
1673 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1674 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1675 'only_matching': True,
1676 },
1677 {
1678 # Rental video preview
1679 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1680 'info_dict': {
1681 'id': 'uGpuVWrhIzE',
1682 'ext': 'mp4',
1683 'title': 'Piku - Trailer',
1684 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1685 'upload_date': '20150811',
1686 'uploader': 'FlixMatrix',
1687 'uploader_id': 'FlixMatrixKaravan',
1688 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1689 'license': 'Standard YouTube License',
1690 },
1691 'params': {
1692 'skip_download': True,
1693 },
1694 'skip': 'This video is not available.',
1695 },
1696 {
1697 # YouTube Red video with episode data
1698 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1699 'info_dict': {
1700 'id': 'iqKdEhx-dD4',
1701 'ext': 'mp4',
1702 'title': 'Isolation - Mind Field (Ep 1)',
1703 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1704 'duration': 2085,
1705 'upload_date': '20170118',
1706 'uploader': 'Vsauce',
1707 'uploader_id': 'Vsauce',
1708 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1709 'series': 'Mind Field',
1710 'season_number': 1,
1711 'episode_number': 1,
1712 'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',
1713 'tags': 'count:12',
1714 'view_count': int,
1715 'availability': 'public',
1716 'age_limit': 0,
1717 'channel': 'Vsauce',
1718 'episode': 'Episode 1',
1719 'categories': ['Entertainment'],
1720 'season': 'Season 1',
1721 'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',
1722 'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',
1723 'like_count': int,
1724 'playable_in_embed': True,
1725 'live_status': 'not_live',
1726 'channel_follower_count': int
1727 },
1728 'params': {
1729 'skip_download': True,
1730 },
1731 'expected_warnings': [
1732 'Skipping DASH manifest',
1733 ],
1734 },
1735 {
1736 # The following content has been identified by the YouTube community
1737 # as inappropriate or offensive to some audiences.
1738 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1739 'info_dict': {
1740 'id': '6SJNVb0GnPI',
1741 'ext': 'mp4',
1742 'title': 'Race Differences in Intelligence',
1743 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1744 'duration': 965,
1745 'upload_date': '20140124',
1746 'uploader': 'New Century Foundation',
1747 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1748 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1749 },
1750 'params': {
1751 'skip_download': True,
1752 },
1753 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1754 },
1755 {
1756 # itag 212
1757 'url': '1t24XAntNCY',
1758 'only_matching': True,
1759 },
1760 {
1761 # geo restricted to JP
1762 'url': 'sJL6WA-aGkQ',
1763 'only_matching': True,
1764 },
1765 {
1766 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1767 'only_matching': True,
1768 },
1769 {
1770 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1771 'only_matching': True,
1772 },
1773 {
1774 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1775 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1776 'only_matching': True,
1777 },
1778 {
1779 # DRM protected
1780 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1781 'only_matching': True,
1782 },
1783 {
1784 # Video with unsupported adaptive stream type formats
1785 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1786 'info_dict': {
1787 'id': 'Z4Vy8R84T1U',
1788 'ext': 'mp4',
1789 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1790 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1791 'duration': 433,
1792 'upload_date': '20130923',
1793 'uploader': 'Amelia Putri Harwita',
1794 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1795 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1796 'formats': 'maxcount:10',
1797 },
1798 'params': {
1799 'skip_download': True,
1800 'youtube_include_dash_manifest': False,
1801 },
1802 'skip': 'not actual anymore',
1803 },
1804 {
1805 # Youtube Music Auto-generated description
1806 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1807 'info_dict': {
1808 'id': 'MgNrAu2pzNs',
1809 'ext': 'mp4',
1810 'title': 'Voyeur Girl',
1811 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1812 'upload_date': '20190312',
1813 'uploader': 'Stephen - Topic',
1814 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1815 'artist': 'Stephen',
1816 'track': 'Voyeur Girl',
1817 'album': 'it\'s too much love to know my dear',
1818 'release_date': '20190313',
1819 'release_year': 2019,
1820 'alt_title': 'Voyeur Girl',
1821 'view_count': int,
1822 'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1823 'playable_in_embed': True,
1824 'like_count': int,
1825 'categories': ['Music'],
1826 'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1827 'channel': 'Stephen',
1828 'availability': 'public',
1829 'creator': 'Stephen',
1830 'duration': 169,
1831 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
1832 'age_limit': 0,
1833 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1834 'tags': 'count:11',
1835 'live_status': 'not_live',
1836 'channel_follower_count': int
1837 },
1838 'params': {
1839 'skip_download': True,
1840 },
1841 },
1842 {
1843 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1844 'only_matching': True,
1845 },
1846 {
1847 # invalid -> valid video id redirection
1848 'url': 'DJztXj2GPfl',
1849 'info_dict': {
1850 'id': 'DJztXj2GPfk',
1851 'ext': 'mp4',
1852 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1853 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1854 'upload_date': '20090125',
1855 'uploader': 'Prochorowka',
1856 'uploader_id': 'Prochorowka',
1857 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1858 'artist': 'Panjabi MC',
1859 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1860 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1861 },
1862 'params': {
1863 'skip_download': True,
1864 },
1865 'skip': 'Video unavailable',
1866 },
1867 {
1868 # empty description results in an empty string
1869 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1870 'info_dict': {
1871 'id': 'x41yOUIvK2k',
1872 'ext': 'mp4',
1873 'title': 'IMG 3456',
1874 'description': '',
1875 'upload_date': '20170613',
1876 'uploader_id': 'ElevageOrVert',
1877 'uploader': 'ElevageOrVert',
1878 'view_count': int,
1879 'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',
1880 'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',
1881 'like_count': int,
1882 'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',
1883 'tags': [],
1884 'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',
1885 'availability': 'public',
1886 'age_limit': 0,
1887 'categories': ['Pets & Animals'],
1888 'duration': 7,
1889 'playable_in_embed': True,
1890 'live_status': 'not_live',
1891 'channel': 'ElevageOrVert',
1892 'channel_follower_count': int
1893 },
1894 'params': {
1895 'skip_download': True,
1896 },
1897 },
1898 {
1899 # with '};' inside yt initial data (see [1])
1900 # see [2] for an example with '};' inside ytInitialPlayerResponse
1901 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1902 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1903 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1904 'info_dict': {
1905 'id': 'CHqg6qOn4no',
1906 'ext': 'mp4',
1907 'title': 'Part 77 Sort a list of simple types in c#',
1908 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1909 'upload_date': '20130831',
1910 'uploader_id': 'kudvenkat',
1911 'uploader': 'kudvenkat',
1912 'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',
1913 'like_count': int,
1914 'uploader_url': 'http://www.youtube.com/user/kudvenkat',
1915 'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',
1916 'live_status': 'not_live',
1917 'categories': ['Education'],
1918 'availability': 'public',
1919 'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',
1920 'tags': 'count:12',
1921 'playable_in_embed': True,
1922 'age_limit': 0,
1923 'view_count': int,
1924 'duration': 522,
1925 'channel': 'kudvenkat',
1926 'comment_count': int,
1927 'channel_follower_count': int
1928 },
1929 'params': {
1930 'skip_download': True,
1931 },
1932 },
1933 {
1934 # another example of '};' in ytInitialData
1935 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1936 'only_matching': True,
1937 },
1938 {
1939 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1940 'only_matching': True,
1941 },
1942 {
1943 # https://github.com/ytdl-org/youtube-dl/pull/28094
1944 'url': 'OtqTfy26tG0',
1945 'info_dict': {
1946 'id': 'OtqTfy26tG0',
1947 'ext': 'mp4',
1948 'title': 'Burn Out',
1949 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1950 'upload_date': '20141120',
1951 'uploader': 'The Cinematic Orchestra - Topic',
1952 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1953 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1954 'artist': 'The Cinematic Orchestra',
1955 'track': 'Burn Out',
1956 'album': 'Every Day',
1957 'like_count': int,
1958 'live_status': 'not_live',
1959 'alt_title': 'Burn Out',
1960 'duration': 614,
1961 'age_limit': 0,
1962 'view_count': int,
1963 'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1964 'creator': 'The Cinematic Orchestra',
1965 'channel': 'The Cinematic Orchestra',
1966 'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],
1967 'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1968 'availability': 'public',
1969 'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',
1970 'categories': ['Music'],
1971 'playable_in_embed': True,
1972 'channel_follower_count': int
1973 },
1974 'params': {
1975 'skip_download': True,
1976 },
1977 },
1978 {
1979 # controversial video, only works with bpctr when authenticated with cookies
1980 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1981 'only_matching': True,
1982 },
1983 {
1984 # controversial video, requires bpctr/contentCheckOk
1985 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1986 'info_dict': {
1987 'id': 'SZJvDhaSDnc',
1988 'ext': 'mp4',
1989 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1990 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1991 'uploader': 'CBS Mornings',
1992 'uploader_id': 'CBSThisMorning',
1993 'upload_date': '20140716',
1994 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',
1995 'duration': 170,
1996 'categories': ['News & Politics'],
1997 'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',
1998 'view_count': int,
1999 'channel': 'CBS Mornings',
2000 'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],
2001 'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',
2002 'age_limit': 18,
2003 'availability': 'needs_auth',
2004 'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',
2005 'like_count': int,
2006 'live_status': 'not_live',
2007 'playable_in_embed': True,
2008 'channel_follower_count': int
2009 }
2010 },
2011 {
2012 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
2013 'url': 'cBvYw8_A0vQ',
2014 'info_dict': {
2015 'id': 'cBvYw8_A0vQ',
2016 'ext': 'mp4',
2017 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
2018 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
2019 'upload_date': '20201120',
2020 'uploader': 'Walk around Japan',
2021 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2022 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2023 'duration': 1456,
2024 'categories': ['Travel & Events'],
2025 'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2026 'view_count': int,
2027 'channel': 'Walk around Japan',
2028 'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],
2029 'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',
2030 'age_limit': 0,
2031 'availability': 'public',
2032 'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2033 'live_status': 'not_live',
2034 'playable_in_embed': True,
2035 'channel_follower_count': int
2036 },
2037 'params': {
2038 'skip_download': True,
2039 },
2040 }, {
2041 # Has multiple audio streams
2042 'url': 'WaOKSUlf4TM',
2043 'only_matching': True
2044 }, {
2045 # Requires Premium: has format 141 when requested using YTM url
2046 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
2047 'only_matching': True
2048 }, {
2049 # multiple subtitles with same lang_code
2050 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
2051 'only_matching': True,
2052 }, {
2053 # Force use android client fallback
2054 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
2055 'info_dict': {
2056 'id': 'YOelRv7fMxY',
2057 'title': 'DIGGING A SECRET TUNNEL Part 1',
2058 'ext': '3gp',
2059 'upload_date': '20210624',
2060 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
2061 'uploader': 'colinfurze',
2062 'uploader_id': 'colinfurze',
2063 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
2064 'description': 'md5:5d5991195d599b56cd0c4148907eec50',
2065 'duration': 596,
2066 'categories': ['Entertainment'],
2067 'uploader_url': 'http://www.youtube.com/user/colinfurze',
2068 'view_count': int,
2069 'channel': 'colinfurze',
2070 'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],
2071 'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',
2072 'age_limit': 0,
2073 'availability': 'public',
2074 'like_count': int,
2075 'live_status': 'not_live',
2076 'playable_in_embed': True,
2077 'channel_follower_count': int
2078 },
2079 'params': {
2080 'format': '17', # 3gp format available on android
2081 'extractor_args': {'youtube': {'player_client': ['android']}},
2082 },
2083 },
2084 {
2085 # Skip download of additional client configs (remix client config in this case)
2086 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2087 'only_matching': True,
2088 'params': {
2089 'extractor_args': {'youtube': {'player_skip': ['configs']}},
2090 },
2091 }, {
2092 # shorts
2093 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
2094 'only_matching': True,
2095 }, {
2096 'note': 'Storyboards',
2097 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
2098 'info_dict': {
2099 'id': '5KLPxDtMqe8',
2100 'ext': 'mhtml',
2101 'format_id': 'sb0',
2102 'title': 'Your Brain is Plastic',
2103 'uploader_id': 'scishow',
2104 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
2105 'upload_date': '20140324',
2106 'uploader': 'SciShow',
2107 'like_count': int,
2108 'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',
2109 'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',
2110 'view_count': int,
2111 'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',
2112 'playable_in_embed': True,
2113 'tags': 'count:12',
2114 'uploader_url': 'http://www.youtube.com/user/scishow',
2115 'availability': 'public',
2116 'channel': 'SciShow',
2117 'live_status': 'not_live',
2118 'duration': 248,
2119 'categories': ['Education'],
2120 'age_limit': 0,
2121 'channel_follower_count': int
2122 }, 'params': {'format': 'mhtml', 'skip_download': True}
2123 }, {
2124 # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
2125 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2126 'info_dict': {
2127 'id': '2NUZ8W2llS4',
2128 'ext': 'mp4',
2129 'title': 'The NP that test your phone performance 🙂',
2130 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
2131 'uploader': 'Leon Nguyen',
2132 'uploader_id': 'VNSXIII',
2133 'uploader_url': 'http://www.youtube.com/user/VNSXIII',
2134 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2135 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2136 'duration': 21,
2137 'view_count': int,
2138 'age_limit': 0,
2139 'categories': ['Gaming'],
2140 'tags': 'count:23',
2141 'playable_in_embed': True,
2142 'live_status': 'not_live',
2143 'upload_date': '20220103',
2144 'like_count': int,
2145 'availability': 'public',
2146 'channel': 'Leon Nguyen',
2147 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
2148 'comment_count': int,
2149 'channel_follower_count': int
2150 }
2151 }, {
2152 # date text is premiered video, ensure upload date in UTC (published 1641172509)
2153 'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',
2154 'info_dict': {
2155 'id': 'mzZzzBU6lrM',
2156 'ext': 'mp4',
2157 'title': 'I Met GeorgeNotFound In Real Life...',
2158 'description': 'md5:cca98a355c7184e750f711f3a1b22c84',
2159 'uploader': 'Quackity',
2160 'uploader_id': 'QuackityHQ',
2161 'uploader_url': 'http://www.youtube.com/user/QuackityHQ',
2162 'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',
2163 'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',
2164 'duration': 955,
2165 'view_count': int,
2166 'age_limit': 0,
2167 'categories': ['Entertainment'],
2168 'tags': 'count:26',
2169 'playable_in_embed': True,
2170 'live_status': 'not_live',
2171 'release_timestamp': 1641172509,
2172 'release_date': '20220103',
2173 'upload_date': '20220103',
2174 'like_count': int,
2175 'availability': 'public',
2176 'channel': 'Quackity',
2177 'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',
2178 'channel_follower_count': int
2179 }
2180 },
2181 { # continuous livestream. Microformat upload date should be preferred.
2182 # Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27
2183 'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',
2184 'info_dict': {
2185 'id': 'kgx4WGK0oNU',
2186 'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
2187 'ext': 'mp4',
2188 'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2189 'availability': 'public',
2190 'age_limit': 0,
2191 'release_timestamp': 1637975704,
2192 'upload_date': '20210619',
2193 'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2194 'live_status': 'is_live',
2195 'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',
2196 'uploader': '阿鲍Abao',
2197 'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2198 'channel': 'Abao in Tokyo',
2199 'channel_follower_count': int,
2200 'release_date': '20211127',
2201 'tags': 'count:39',
2202 'categories': ['People & Blogs'],
2203 'like_count': int,
2204 'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2205 'view_count': int,
2206 'playable_in_embed': True,
2207 'description': 'md5:2ef1d002cad520f65825346e2084e49d',
2208 },
2209 'params': {'skip_download': True}
2210 }, {
2211 # Story. Requires specific player params to work.
2212 'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',
2213 'info_dict': {
2214 'id': 'vv8qTUWmulI',
2215 'ext': 'mp4',
2216 'availability': 'unlisted',
2217 'view_count': int,
2218 'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',
2219 'upload_date': '20220526',
2220 'categories': ['Education'],
2221 'title': 'Story',
2222 'channel': 'IT\'S HISTORY',
2223 'description': '',
2224 'uploader_id': 'BlastfromthePast',
2225 'duration': 12,
2226 'uploader': 'IT\'S HISTORY',
2227 'playable_in_embed': True,
2228 'age_limit': 0,
2229 'live_status': 'not_live',
2230 'tags': [],
2231 'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',
2232 'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',
2233 'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',
2234 },
2235 'skip': 'stories get removed after some period of time',
2236 }, {
2237 'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
2238 'info_dict': {
2239 'id': 'tjjjtzRLHvA',
2240 'ext': 'mp4',
2241 'title': 'ハッシュタグ無し };if window.ytcsi',
2242 'upload_date': '20220323',
2243 'like_count': int,
2244 'availability': 'unlisted',
2245 'channel': 'nao20010128nao',
2246 'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',
2247 'age_limit': 0,
2248 'uploader': 'nao20010128nao',
2249 'uploader_id': 'nao20010128nao',
2250 'categories': ['Music'],
2251 'view_count': int,
2252 'description': '',
2253 'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',
2254 'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',
2255 'live_status': 'not_live',
2256 'playable_in_embed': True,
2257 'channel_follower_count': int,
2258 'duration': 6,
2259 'tags': [],
2260 'uploader_url': 'http://www.youtube.com/user/nao20010128nao',
2261 }
2262 }, {
2263 'note': '6 channel audio',
2264 'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',
2265 'only_matching': True,
2266 }
2267 ]
2268
2269 _WEBPAGE_TESTS = [
2270 # YouTube <object> embed
2271 {
2272 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
2273 'md5': '873c81d308b979f0e23ee7e620b312a3',
2274 'info_dict': {
2275 'id': 'msN87y-iEx0',
2276 'ext': 'mp4',
2277 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
2278 'upload_date': '20080526',
2279 'description': 'md5:873c81d308b979f0e23ee7e620b312a3',
2280 'uploader': 'Christopher Sykes',
2281 'uploader_id': 'ChristopherJSykes',
2282 'age_limit': 0,
2283 'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],
2284 'channel_id': 'UCCeo--lls1vna5YJABWAcVA',
2285 'playable_in_embed': True,
2286 'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',
2287 'like_count': int,
2288 'comment_count': int,
2289 'channel': 'Christopher Sykes',
2290 'live_status': 'not_live',
2291 'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',
2292 'availability': 'public',
2293 'duration': 195,
2294 'view_count': int,
2295 'categories': ['Science & Technology'],
2296 'channel_follower_count': int,
2297 'uploader_url': 'http://www.youtube.com/user/ChristopherJSykes',
2298 },
2299 'params': {
2300 'skip_download': True,
2301 }
2302 },
2303 ]
2304
2305 @classmethod
2306 def suitable(cls, url):
2307 from ..utils import parse_qs
2308
2309 qs = parse_qs(url)
2310 if qs.get('list', [None])[0]:
2311 return False
2312 return super().suitable(url)
2313
2314 def __init__(self, *args, **kwargs):
2315 super().__init__(*args, **kwargs)
2316 self._code_cache = {}
2317 self._player_cache = {}
2318
2319 def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):
2320 lock = threading.Lock()
2321
2322 is_live = True
2323 start_time = time.time()
2324 formats = [f for f in formats if f.get('is_from_start')]
2325
2326 def refetch_manifest(format_id, delay):
2327 nonlocal formats, start_time, is_live
2328 if time.time() <= start_time + delay:
2329 return
2330
2331 _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
2332 video_details = traverse_obj(
2333 prs, (..., 'videoDetails'), expected_type=dict, default=[])
2334 microformats = traverse_obj(
2335 prs, (..., 'microformat', 'playerMicroformatRenderer'),
2336 expected_type=dict, default=[])
2337 _, is_live, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)
2338 start_time = time.time()
2339
2340 def mpd_feed(format_id, delay):
2341 """
2342 @returns (manifest_url, manifest_stream_number, is_live) or None
2343 """
2344 with lock:
2345 refetch_manifest(format_id, delay)
2346
2347 f = next((f for f in formats if f['format_id'] == format_id), None)
2348 if not f:
2349 if not is_live:
2350 self.to_screen(f'{video_id}: Video is no longer live')
2351 else:
2352 self.report_warning(
2353 f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')
2354 return None
2355 return f['manifest_url'], f['manifest_stream_number'], is_live
2356
2357 for f in formats:
2358 f['is_live'] = True
2359 f['protocol'] = 'http_dash_segments_generator'
2360 f['fragments'] = functools.partial(
2361 self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)
2362
2363 def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):
2364 FETCH_SPAN, MAX_DURATION = 5, 432000
2365
2366 mpd_url, stream_number, is_live = None, None, True
2367
2368 begin_index = 0
2369 download_start_time = ctx.get('start') or time.time()
2370
2371 lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
2372 if lack_early_segments:
2373 self.report_warning(bug_reports_message(
2374 'Starting download from the last 120 hours of the live stream since '
2375 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
2376 lack_early_segments = True
2377
2378 known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
2379 fragments, fragment_base_url = None, None
2380
2381 def _extract_sequence_from_mpd(refresh_sequence, immediate):
2382 nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
2383 # Obtain from MPD's maximum seq value
2384 old_mpd_url = mpd_url
2385 last_error = ctx.pop('last_error', None)
2386 expire_fast = immediate or last_error and isinstance(last_error, urllib.error.HTTPError) and last_error.code == 403
2387 mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
2388 or (mpd_url, stream_number, False))
2389 if not refresh_sequence:
2390 if expire_fast and not is_live:
2391 return False, last_seq
2392 elif old_mpd_url == mpd_url:
2393 return True, last_seq
2394 try:
2395 fmts, _ = self._extract_mpd_formats_and_subtitles(
2396 mpd_url, None, note=False, errnote=False, fatal=False)
2397 except ExtractorError:
2398 fmts = None
2399 if not fmts:
2400 no_fragment_score += 2
2401 return False, last_seq
2402 fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
2403 fragments = fmt_info['fragments']
2404 fragment_base_url = fmt_info['fragment_base_url']
2405 assert fragment_base_url
2406
2407 _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
2408 return True, _last_seq
2409
2410 while is_live:
2411 fetch_time = time.time()
2412 if no_fragment_score > 30:
2413 return
2414 if last_segment_url:
2415 # Obtain from "X-Head-Seqnum" header value from each segment
2416 try:
2417 urlh = self._request_webpage(
2418 last_segment_url, None, note=False, errnote=False, fatal=False)
2419 except ExtractorError:
2420 urlh = None
2421 last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
2422 if last_seq is None:
2423 no_fragment_score += 2
2424 last_segment_url = None
2425 continue
2426 else:
2427 should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)
2428 no_fragment_score += 2
2429 if not should_continue:
2430 continue
2431
2432 if known_idx > last_seq:
2433 last_segment_url = None
2434 continue
2435
2436 last_seq += 1
2437
2438 if begin_index < 0 and known_idx < 0:
2439 # skip from the start when it's negative value
2440 known_idx = last_seq + begin_index
2441 if lack_early_segments:
2442 known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
2443 try:
2444 for idx in range(known_idx, last_seq):
2445 # do not update sequence here or you'll get skipped some part of it
2446 should_continue, _ = _extract_sequence_from_mpd(False, False)
2447 if not should_continue:
2448 known_idx = idx - 1
2449 raise ExtractorError('breaking out of outer loop')
2450 last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)
2451 yield {
2452 'url': last_segment_url,
2453 'fragment_count': last_seq,
2454 }
2455 if known_idx == last_seq:
2456 no_fragment_score += 5
2457 else:
2458 no_fragment_score = 0
2459 known_idx = last_seq
2460 except ExtractorError:
2461 continue
2462
2463 time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
2464
2465 def _extract_player_url(self, *ytcfgs, webpage=None):
2466 player_url = traverse_obj(
2467 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
2468 get_all=False, expected_type=str)
2469 if not player_url:
2470 return
2471 return urljoin('https://www.youtube.com', player_url)
2472
2473 def _download_player_url(self, video_id, fatal=False):
2474 res = self._download_webpage(
2475 'https://www.youtube.com/iframe_api',
2476 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
2477 if res:
2478 player_version = self._search_regex(
2479 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
2480 if player_version:
2481 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
2482
2483 def _signature_cache_id(self, example_sig):
2484 """ Return a string representation of a signature """
2485 return '.'.join(str(len(part)) for part in example_sig.split('.'))
2486
2487 @classmethod
2488 def _extract_player_info(cls, player_url):
2489 for player_re in cls._PLAYER_INFO_RE:
2490 id_m = re.search(player_re, player_url)
2491 if id_m:
2492 break
2493 else:
2494 raise ExtractorError('Cannot identify player %r' % player_url)
2495 return id_m.group('id')
2496
2497 def _load_player(self, video_id, player_url, fatal=True):
2498 player_id = self._extract_player_info(player_url)
2499 if player_id not in self._code_cache:
2500 code = self._download_webpage(
2501 player_url, video_id, fatal=fatal,
2502 note='Downloading player ' + player_id,
2503 errnote='Download of %s failed' % player_url)
2504 if code:
2505 self._code_cache[player_id] = code
2506 return self._code_cache.get(player_id)
2507
2508 def _extract_signature_function(self, video_id, player_url, example_sig):
2509 player_id = self._extract_player_info(player_url)
2510
2511 # Read from filesystem cache
2512 func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'
2513 assert os.path.basename(func_id) == func_id
2514
2515 self.write_debug(f'Extracting signature function {func_id}')
2516 cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
2517
2518 if not cache_spec:
2519 code = self._load_player(video_id, player_url)
2520 if code:
2521 res = self._parse_sig_js(code)
2522 test_string = ''.join(map(chr, range(len(example_sig))))
2523 cache_spec = [ord(c) for c in res(test_string)]
2524 self.cache.store('youtube-sigfuncs', func_id, cache_spec)
2525
2526 return lambda s: ''.join(s[i] for i in cache_spec)
2527
2528 def _print_sig_code(self, func, example_sig):
2529 if not self.get_param('youtube_print_sig_code'):
2530 return
2531
2532 def gen_sig_code(idxs):
2533 def _genslice(start, end, step):
2534 starts = '' if start == 0 else str(start)
2535 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
2536 steps = '' if step == 1 else (':%d' % step)
2537 return f's[{starts}{ends}{steps}]'
2538
2539 step = None
2540 # Quelch pyflakes warnings - start will be set when step is set
2541 start = '(Never used)'
2542 for i, prev in zip(idxs[1:], idxs[:-1]):
2543 if step is not None:
2544 if i - prev == step:
2545 continue
2546 yield _genslice(start, prev, step)
2547 step = None
2548 continue
2549 if i - prev in [-1, 1]:
2550 step = i - prev
2551 start = prev
2552 continue
2553 else:
2554 yield 's[%d]' % prev
2555 if step is None:
2556 yield 's[%d]' % i
2557 else:
2558 yield _genslice(start, i, step)
2559
2560 test_string = ''.join(map(chr, range(len(example_sig))))
2561 cache_res = func(test_string)
2562 cache_spec = [ord(c) for c in cache_res]
2563 expr_code = ' + '.join(gen_sig_code(cache_spec))
2564 signature_id_tuple = '(%s)' % (
2565 ', '.join(str(len(p)) for p in example_sig.split('.')))
2566 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
2567 ' return %s\n') % (signature_id_tuple, expr_code)
2568 self.to_screen('Extracted signature function:\n' + code)
2569
2570 def _parse_sig_js(self, jscode):
2571 funcname = self._search_regex(
2572 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2573 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2574 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
2575 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
2576 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
2577 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
2578 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
2579 # Obsolete patterns
2580 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2581 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
2582 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2583 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2584 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2585 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2586 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2587 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
2588 jscode, 'Initial JS player signature function name', group='sig')
2589
2590 jsi = JSInterpreter(jscode)
2591 initial_function = jsi.extract_function(funcname)
2592 return lambda s: initial_function([s])
2593
2594 def _cached(self, func, *cache_id):
2595 def inner(*args, **kwargs):
2596 if cache_id not in self._player_cache:
2597 try:
2598 self._player_cache[cache_id] = func(*args, **kwargs)
2599 except ExtractorError as e:
2600 self._player_cache[cache_id] = e
2601 except Exception as e:
2602 self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)
2603
2604 ret = self._player_cache[cache_id]
2605 if isinstance(ret, Exception):
2606 raise ret
2607 return ret
2608 return inner
2609
2610 def _decrypt_signature(self, s, video_id, player_url):
2611 """Turn the encrypted s field into a working signature"""
2612 extract_sig = self._cached(
2613 self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))
2614 func = extract_sig(video_id, player_url, s)
2615 self._print_sig_code(func, s)
2616 return func(s)
2617
2618 def _decrypt_nsig(self, s, video_id, player_url):
2619 """Turn the encrypted n field into a working signature"""
2620 if player_url is None:
2621 raise ExtractorError('Cannot decrypt nsig without player_url')
2622 player_url = urljoin('https://www.youtube.com', player_url)
2623
2624 jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
2625 if self.get_param('youtube_print_sig_code'):
2626 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
2627
2628 try:
2629 extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)
2630 ret = extract_nsig(jsi, func_code)(s)
2631 except JSInterpreter.Exception as e:
2632 try:
2633 jsi = PhantomJSwrapper(self, timeout=5000)
2634 except ExtractorError:
2635 raise e
2636 self.report_warning(
2637 f'Native nsig extraction failed: Trying with PhantomJS\n'
2638 f' n = {s} ; player = {player_url}', video_id)
2639 self.write_debug(e)
2640
2641 args, func_body = func_code
2642 ret = jsi.execute(
2643 f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',
2644 video_id=video_id, note='Executing signature code').strip()
2645
2646 self.write_debug(f'Decrypted nsig {s} => {ret}')
2647 return ret
2648
2649 def _extract_n_function_name(self, jscode):
2650 funcname, idx = self._search_regex(
2651 r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)',
2652 jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
2653 if not idx:
2654 return funcname
2655
2656 return json.loads(js_to_json(self._search_regex(
2657 rf'var {re.escape(funcname)}\s*=\s*(\[.+?\]);', jscode,
2658 f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
2659
2660 def _extract_n_function_code(self, video_id, player_url):
2661 player_id = self._extract_player_info(player_url)
2662 func_code = self.cache.load('youtube-nsig', player_id, after='2022.08.19')
2663 jscode = func_code or self._load_player(video_id, player_url)
2664 jsi = JSInterpreter(jscode)
2665
2666 if func_code:
2667 return jsi, player_id, func_code
2668
2669 func_code = jsi.extract_function_code(self._extract_n_function_name(jscode))
2670 self.cache.store('youtube-nsig', player_id, func_code)
2671 return jsi, player_id, func_code
2672
2673 def _extract_n_function_from_code(self, jsi, func_code):
2674 func = jsi.extract_function_from_code(*func_code)
2675
2676 def extract_nsig(s):
2677 try:
2678 ret = func([s])
2679 except JSInterpreter.Exception:
2680 raise
2681 except Exception as e:
2682 raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
2683
2684 if ret.startswith('enhanced_except_'):
2685 raise JSInterpreter.Exception('Signature function returned an exception')
2686 return ret
2687
2688 return extract_nsig
2689
2690 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
2691 """
2692 Extract signatureTimestamp (sts)
2693 Required to tell API what sig/player version is in use.
2694 """
2695 sts = None
2696 if isinstance(ytcfg, dict):
2697 sts = int_or_none(ytcfg.get('STS'))
2698
2699 if not sts:
2700 # Attempt to extract from player
2701 if player_url is None:
2702 error_msg = 'Cannot extract signature timestamp without player_url.'
2703 if fatal:
2704 raise ExtractorError(error_msg)
2705 self.report_warning(error_msg)
2706 return
2707 code = self._load_player(video_id, player_url, fatal=fatal)
2708 if code:
2709 sts = int_or_none(self._search_regex(
2710 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2711 'JS player signature timestamp', group='sts', fatal=fatal))
2712 return sts
2713
2714 def _mark_watched(self, video_id, player_responses):
2715 for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):
2716 label = 'fully ' if is_full else ''
2717 url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),
2718 expected_type=url_or_none)
2719 if not url:
2720 self.report_warning(f'Unable to mark {label}watched')
2721 return
2722 parsed_url = urllib.parse.urlparse(url)
2723 qs = urllib.parse.parse_qs(parsed_url.query)
2724
2725 # cpn generation algorithm is reverse engineered from base.js.
2726 # In fact it works even with dummy cpn.
2727 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2728 cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))
2729
2730 # # more consistent results setting it to right before the end
2731 video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]
2732
2733 qs.update({
2734 'ver': ['2'],
2735 'cpn': [cpn],
2736 'cmt': video_length,
2737 'el': 'detailpage', # otherwise defaults to "shorts"
2738 })
2739
2740 if is_full:
2741 # these seem to mark watchtime "history" in the real world
2742 # they're required, so send in a single value
2743 qs.update({
2744 'st': video_length,
2745 'et': video_length,
2746 })
2747
2748 url = urllib.parse.urlunparse(
2749 parsed_url._replace(query=urllib.parse.urlencode(qs, True)))
2750
2751 self._download_webpage(
2752 url, video_id, f'Marking {label}watched',
2753 'Unable to mark watched', fatal=False)
2754
2755 @classmethod
2756 def _extract_from_webpage(cls, url, webpage):
2757 # Invidious Instances
2758 # https://github.com/yt-dlp/yt-dlp/issues/195
2759 # https://github.com/iv-org/invidious/pull/1730
2760 mobj = re.search(
2761 r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',
2762 webpage)
2763 if mobj:
2764 yield cls.url_result(mobj.group('url'), cls)
2765 raise cls.StopExtraction()
2766
2767 yield from super()._extract_from_webpage(url, webpage)
2768
2769 # lazyYT YouTube embed
2770 for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):
2771 yield cls.url_result(unescapeHTML(id_), cls, id_)
2772
2773 # Wordpress "YouTube Video Importer" plugin
2774 for m in re.findall(r'''(?x)<div[^>]+
2775 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2776 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):
2777 yield cls.url_result(m[-1], cls, m[-1])
2778
2779 @classmethod
2780 def extract_id(cls, url):
2781 video_id = cls.get_temp_id(url)
2782 if not video_id:
2783 raise ExtractorError(f'Invalid URL: {url}')
2784 return video_id
2785
2786 def _extract_chapters_from_json(self, data, duration):
2787 chapter_list = traverse_obj(
2788 data, (
2789 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2790 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2791 ), expected_type=list)
2792
2793 return self._extract_chapters(
2794 chapter_list,
2795 chapter_time=lambda chapter: float_or_none(
2796 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2797 chapter_title=lambda chapter: traverse_obj(
2798 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2799 duration=duration)
2800
2801 def _extract_chapters_from_engagement_panel(self, data, duration):
2802 content_list = traverse_obj(
2803 data,
2804 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
2805 expected_type=list, default=[])
2806 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2807 chapter_title = lambda chapter: self._get_text(chapter, 'title')
2808
2809 return next(filter(None, (
2810 self._extract_chapters(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2811 chapter_time, chapter_title, duration)
2812 for contents in content_list)), [])
2813
2814 def _extract_chapters_from_description(self, description, duration):
2815 return self._extract_chapters(
2816 re.findall(r'(?m)^((?:\d+:)?\d{1,2}:\d{2})\b\W*\s(.+?)\s*$', description or ''),
2817 chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1],
2818 duration=duration, strict=False)
2819
2820 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True):
2821 if not duration:
2822 return
2823 chapter_list = [{
2824 'start_time': chapter_time(chapter),
2825 'title': chapter_title(chapter),
2826 } for chapter in chapter_list or []]
2827 if not strict:
2828 chapter_list.sort(key=lambda c: c['start_time'] or 0)
2829
2830 chapters = [{'start_time': 0}]
2831 for idx, chapter in enumerate(chapter_list):
2832 if chapter['start_time'] is None:
2833 self.report_warning(f'Incomplete chapter {idx}')
2834 elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:
2835 chapters.append(chapter)
2836 else:
2837 self.report_warning(f'Invalid start time for chapter "{chapter["title"]}"')
2838 return chapters[1:]
2839
2840 def _extract_comment(self, comment_renderer, parent=None):
2841 comment_id = comment_renderer.get('commentId')
2842 if not comment_id:
2843 return
2844
2845 text = self._get_text(comment_renderer, 'contentText')
2846
2847 # note: timestamp is an estimate calculated from the current time and time_text
2848 timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')
2849 author = self._get_text(comment_renderer, 'authorText')
2850 author_id = try_get(comment_renderer,
2851 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str)
2852
2853 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2854 lambda x: x['likeCount']), str)) or 0
2855 author_thumbnail = try_get(comment_renderer,
2856 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], str)
2857
2858 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2859 is_favorited = 'creatorHeart' in (try_get(
2860 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2861 return {
2862 'id': comment_id,
2863 'text': text,
2864 'timestamp': timestamp,
2865 'time_text': time_text,
2866 'like_count': votes,
2867 'is_favorited': is_favorited,
2868 'author': author,
2869 'author_id': author_id,
2870 'author_thumbnail': author_thumbnail,
2871 'author_is_uploader': author_is_uploader,
2872 'parent': parent or 'root'
2873 }
2874
2875 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
2876
2877 get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
2878
2879 def extract_header(contents):
2880 _continuation = None
2881 for content in contents:
2882 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
2883 expected_comment_count = self._get_count(
2884 comments_header_renderer, 'countText', 'commentsCount')
2885
2886 if expected_comment_count:
2887 tracker['est_total'] = expected_comment_count
2888 self.to_screen(f'Downloading ~{expected_comment_count} comments')
2889 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
2890
2891 sort_menu_item = try_get(
2892 comments_header_renderer,
2893 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2894 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2895
2896 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2897 if not _continuation:
2898 continue
2899
2900 sort_text = str_or_none(sort_menu_item.get('title'))
2901 if not sort_text:
2902 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2903 self.to_screen('Sorting comments by %s' % sort_text.lower())
2904 break
2905 return _continuation
2906
2907 def extract_thread(contents):
2908 if not parent:
2909 tracker['current_page_thread'] = 0
2910 for content in contents:
2911 if not parent and tracker['total_parent_comments'] >= max_parents:
2912 yield
2913 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2914 comment_renderer = get_first(
2915 (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
2916 expected_type=dict, default={})
2917
2918 comment = self._extract_comment(comment_renderer, parent)
2919 if not comment:
2920 continue
2921
2922 tracker['running_total'] += 1
2923 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
2924 yield comment
2925
2926 # Attempt to get the replies
2927 comment_replies_renderer = try_get(
2928 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2929
2930 if comment_replies_renderer:
2931 tracker['current_page_thread'] += 1
2932 comment_entries_iter = self._comment_entries(
2933 comment_replies_renderer, ytcfg, video_id,
2934 parent=comment.get('id'), tracker=tracker)
2935 yield from itertools.islice(comment_entries_iter, min(
2936 max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))
2937
2938 # Keeps track of counts across recursive calls
2939 if not tracker:
2940 tracker = dict(
2941 running_total=0,
2942 est_total=0,
2943 current_page_thread=0,
2944 total_parent_comments=0,
2945 total_reply_comments=0)
2946
2947 # TODO: Deprecated
2948 # YouTube comments have a max depth of 2
2949 max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
2950 if max_depth:
2951 self._downloader.deprecation_warning(
2952 '[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')
2953 if max_depth == 1 and parent:
2954 return
2955
2956 max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(
2957 lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
2958
2959 continuation = self._extract_continuation(root_continuation_data)
2960
2961 response = None
2962 is_forced_continuation = False
2963 is_first_continuation = parent is None
2964 if is_first_continuation and not continuation:
2965 # Sometimes you can get comments by generating the continuation yourself,
2966 # even if YouTube initially reports them being disabled - e.g. stories comments.
2967 # Note: if the comment section is actually disabled, YouTube may return a response with
2968 # required check_get_keys missing. So we will disable that check initially in this case.
2969 continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
2970 is_forced_continuation = True
2971
2972 for page_num in itertools.count(0):
2973 if not continuation:
2974 break
2975 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
2976 comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"
2977 if page_num == 0:
2978 if is_first_continuation:
2979 note_prefix = 'Downloading comment section API JSON'
2980 else:
2981 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
2982 tracker['current_page_thread'], comment_prog_str)
2983 else:
2984 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2985 ' ' if parent else '', ' replies' if parent else '',
2986 page_num, comment_prog_str)
2987
2988 response = self._extract_response(
2989 item_id=None, query=continuation,
2990 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2991 check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)
2992 is_forced_continuation = False
2993 continuation_contents = traverse_obj(
2994 response, 'onResponseReceivedEndpoints', expected_type=list, default=[])
2995
2996 continuation = None
2997 for continuation_section in continuation_contents:
2998 continuation_items = traverse_obj(
2999 continuation_section,
3000 (('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),
3001 get_all=False, expected_type=list) or []
3002 if is_first_continuation:
3003 continuation = extract_header(continuation_items)
3004 is_first_continuation = False
3005 if continuation:
3006 break
3007 continue
3008
3009 for entry in extract_thread(continuation_items):
3010 if not entry:
3011 return
3012 yield entry
3013 continuation = self._extract_continuation({'contents': continuation_items})
3014 if continuation:
3015 break
3016
3017 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
3018 if message and not parent and tracker['running_total'] == 0:
3019 self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)
3020
3021 @staticmethod
3022 def _generate_comment_continuation(video_id):
3023 """
3024 Generates initial comment section continuation token from given video id
3025 """
3026 token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'
3027 return base64.b64encode(token.encode()).decode()
3028
3029 def _get_comments(self, ytcfg, video_id, contents, webpage):
3030 """Entry for comment extraction"""
3031 def _real_comment_extract(contents):
3032 renderer = next((
3033 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
3034 if item.get('sectionIdentifier') == 'comment-item-section'), None)
3035 yield from self._comment_entries(renderer, ytcfg, video_id)
3036
3037 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
3038 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
3039
3040 @staticmethod
3041 def _get_checkok_params():
3042 return {'contentCheckOk': True, 'racyCheckOk': True}
3043
3044 @classmethod
3045 def _generate_player_context(cls, sts=None):
3046 context = {
3047 'html5Preference': 'HTML5_PREF_WANTS',
3048 }
3049 if sts is not None:
3050 context['signatureTimestamp'] = sts
3051 return {
3052 'playbackContext': {
3053 'contentPlaybackContext': context
3054 },
3055 **cls._get_checkok_params()
3056 }
3057
3058 @staticmethod
3059 def _is_agegated(player_response):
3060 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
3061 return True
3062
3063 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
3064 AGE_GATE_REASONS = (
3065 'confirm your age', 'age-restricted', 'inappropriate', # reason
3066 'age_verification_required', 'age_check_required', # status
3067 )
3068 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
3069
3070 @staticmethod
3071 def _is_unplayable(player_response):
3072 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
3073
3074 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):
3075
3076 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
3077 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
3078 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
3079 headers = self.generate_api_headers(
3080 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
3081
3082 yt_query = {
3083 'videoId': video_id,
3084 'params': '8AEB' # enable stories
3085 }
3086 yt_query.update(self._generate_player_context(sts))
3087 return self._extract_response(
3088 item_id=video_id, ep='player', query=yt_query,
3089 ytcfg=player_ytcfg, headers=headers, fatal=True,
3090 default_client=client,
3091 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
3092 ) or None
3093
3094 def _get_requested_clients(self, url, smuggled_data):
3095 requested_clients = []
3096 default = ['android', 'web']
3097 allowed_clients = sorted(
3098 (client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),
3099 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
3100 for client in self._configuration_arg('player_client'):
3101 if client in allowed_clients:
3102 requested_clients.append(client)
3103 elif client == 'default':
3104 requested_clients.extend(default)
3105 elif client == 'all':
3106 requested_clients.extend(allowed_clients)
3107 else:
3108 self.report_warning(f'Skipping unsupported client {client}')
3109 if not requested_clients:
3110 requested_clients = default
3111
3112 if smuggled_data.get('is_music_url') or self.is_music_url(url):
3113 requested_clients.extend(
3114 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
3115
3116 return orderedSet(requested_clients)
3117
3118 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):
3119 initial_pr = None
3120 if webpage:
3121 initial_pr = self._search_json(
3122 self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
3123
3124 all_clients = set(clients)
3125 clients = clients[::-1]
3126 prs = []
3127
3128 def append_client(*client_names):
3129 """ Append the first client name that exists but not already used """
3130 for client_name in client_names:
3131 actual_client = _split_innertube_client(client_name)[0]
3132 if actual_client in INNERTUBE_CLIENTS:
3133 if actual_client not in all_clients:
3134 clients.append(client_name)
3135 all_clients.add(actual_client)
3136 return
3137
3138 # Android player_response does not have microFormats which are needed for
3139 # extraction of some data. So we return the initial_pr with formats
3140 # stripped out even if not requested by the user
3141 # See: https://github.com/yt-dlp/yt-dlp/issues/501
3142 if initial_pr:
3143 pr = dict(initial_pr)
3144 pr['streamingData'] = None
3145 prs.append(pr)
3146
3147 last_error = None
3148 tried_iframe_fallback = False
3149 player_url = None
3150 while clients:
3151 client, base_client, variant = _split_innertube_client(clients.pop())
3152 player_ytcfg = master_ytcfg if client == 'web' else {}
3153 if 'configs' not in self._configuration_arg('player_skip') and client != 'web':
3154 player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
3155
3156 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
3157 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
3158 if 'js' in self._configuration_arg('player_skip'):
3159 require_js_player = False
3160 player_url = None
3161
3162 if not player_url and not tried_iframe_fallback and require_js_player:
3163 player_url = self._download_player_url(video_id)
3164 tried_iframe_fallback = True
3165
3166 try:
3167 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
3168 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)
3169 except ExtractorError as e:
3170 if last_error:
3171 self.report_warning(last_error)
3172 last_error = e
3173 continue
3174
3175 if pr:
3176 # YouTube may return a different video player response than expected.
3177 # See: https://github.com/TeamNewPipe/NewPipe/issues/8713
3178 pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId'))
3179 if pr_video_id and pr_video_id != video_id:
3180 self.report_warning(
3181 f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message())
3182 else:
3183 prs.append(pr)
3184
3185 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
3186 if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
3187 append_client(f'{base_client}_creator')
3188 elif self._is_agegated(pr):
3189 if variant == 'tv_embedded':
3190 append_client(f'{base_client}_embedded')
3191 elif not variant:
3192 append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
3193
3194 if last_error:
3195 if not len(prs):
3196 raise last_error
3197 self.report_warning(last_error)
3198 return prs, player_url
3199
3200 def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, is_live, duration):
3201 itags, stream_ids = {}, []
3202 itag_qualities, res_qualities = {}, {0: None}
3203 q = qualities([
3204 # Normally tiny is the smallest video-only formats. But
3205 # audio-only formats with unknown quality may get tagged as tiny
3206 'tiny',
3207 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
3208 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
3209 ])
3210 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
3211
3212 for fmt in streaming_formats:
3213 if fmt.get('targetDurationSec'):
3214 continue
3215
3216 itag = str_or_none(fmt.get('itag'))
3217 audio_track = fmt.get('audioTrack') or {}
3218 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
3219 if stream_id in stream_ids:
3220 continue
3221
3222 quality = fmt.get('quality')
3223 height = int_or_none(fmt.get('height'))
3224 if quality == 'tiny' or not quality:
3225 quality = fmt.get('audioQuality', '').lower() or quality
3226 # The 3gp format (17) in android client has a quality of "small",
3227 # but is actually worse than other formats
3228 if itag == '17':
3229 quality = 'tiny'
3230 if quality:
3231 if itag:
3232 itag_qualities[itag] = quality
3233 if height:
3234 res_qualities[height] = quality
3235 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
3236 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
3237 # number of fragment that would subsequently requested with (`&sq=N`)
3238 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
3239 continue
3240
3241 fmt_url = fmt.get('url')
3242 if not fmt_url:
3243 sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))
3244 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
3245 encrypted_sig = try_get(sc, lambda x: x['s'][0])
3246 if not all((sc, fmt_url, player_url, encrypted_sig)):
3247 continue
3248 try:
3249 fmt_url += '&%s=%s' % (
3250 traverse_obj(sc, ('sp', -1)) or 'signature',
3251 self._decrypt_signature(encrypted_sig, video_id, player_url)
3252 )
3253 except ExtractorError as e:
3254 self.report_warning('Signature extraction failed: Some formats may be missing',
3255 video_id=video_id, only_once=True)
3256 self.write_debug(e, only_once=True)
3257 continue
3258
3259 query = parse_qs(fmt_url)
3260 throttled = False
3261 if query.get('n'):
3262 try:
3263 decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])
3264 fmt_url = update_url_query(fmt_url, {
3265 'n': decrypt_nsig(query['n'][0], video_id, player_url)
3266 })
3267 except ExtractorError as e:
3268 phantomjs_hint = ''
3269 if isinstance(e, JSInterpreter.Exception):
3270 phantomjs_hint = f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} to workaround the issue\n'
3271 self.report_warning(
3272 f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}'
3273 f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)
3274 self.write_debug(e, only_once=True)
3275 throttled = True
3276
3277 if itag:
3278 itags[itag] = 'https'
3279 stream_ids.append(stream_id)
3280
3281 tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
3282 language_preference = (
3283 10 if audio_track.get('audioIsDefault') and 10
3284 else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
3285 else -1)
3286 # Some formats may have much smaller duration than others (possibly damaged during encoding)
3287 # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
3288 # Make sure to avoid false positives with small duration differences.
3289 # E.g. __2ABJjxzNo, ySuUZEjARPY
3290 is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)
3291 if is_damaged:
3292 self.report_warning(
3293 f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
3294 dct = {
3295 'asr': int_or_none(fmt.get('audioSampleRate')),
3296 'filesize': int_or_none(fmt.get('contentLength')),
3297 'format_id': itag,
3298 'format_note': join_nonempty(
3299 '%s%s' % (audio_track.get('displayName') or '',
3300 ' (default)' if language_preference > 0 else ''),
3301 fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
3302 try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
3303 try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
3304 throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),
3305 # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
3306 'source_preference': -10 if throttled else -5 if itag == '22' else -1,
3307 'fps': int_or_none(fmt.get('fps')) or None,
3308 'audio_channels': fmt.get('audioChannels'),
3309 'height': height,
3310 'quality': q(quality),
3311 'has_drm': bool(fmt.get('drmFamilies')),
3312 'tbr': tbr,
3313 'url': fmt_url,
3314 'width': int_or_none(fmt.get('width')),
3315 'language': join_nonempty(audio_track.get('id', '').split('.')[0],
3316 'desc' if language_preference < -1 else ''),
3317 'language_preference': language_preference,
3318 # Strictly de-prioritize damaged and 3gp formats
3319 'preference': -10 if is_damaged else -2 if itag == '17' else None,
3320 }
3321 mime_mobj = re.match(
3322 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
3323 if mime_mobj:
3324 dct['ext'] = mimetype2ext(mime_mobj.group(1))
3325 dct.update(parse_codecs(mime_mobj.group(2)))
3326 no_audio = dct.get('acodec') == 'none'
3327 no_video = dct.get('vcodec') == 'none'
3328 if no_audio:
3329 dct['vbr'] = tbr
3330 if no_video:
3331 dct['abr'] = tbr
3332 if no_audio or no_video:
3333 dct['downloader_options'] = {
3334 # Youtube throttles chunks >~10M
3335 'http_chunk_size': 10485760,
3336 }
3337 if dct.get('ext'):
3338 dct['container'] = dct['ext'] + '_dash'
3339 yield dct
3340
3341 live_from_start = is_live and self.get_param('live_from_start')
3342 skip_manifests = self._configuration_arg('skip')
3343 if not self.get_param('youtube_include_hls_manifest', True):
3344 skip_manifests.append('hls')
3345 if not self.get_param('youtube_include_dash_manifest', True):
3346 skip_manifests.append('dash')
3347 get_dash = 'dash' not in skip_manifests and (
3348 not is_live or live_from_start or self._configuration_arg('include_live_dash'))
3349 get_hls = not live_from_start and 'hls' not in skip_manifests
3350
3351 def process_manifest_format(f, proto, itag):
3352 if itag in itags:
3353 if itags[itag] == proto or f'{itag}-{proto}' in itags:
3354 return False
3355 itag = f'{itag}-{proto}'
3356 if itag:
3357 f['format_id'] = itag
3358 itags[itag] = proto
3359
3360 f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
3361 if f['quality'] == -1 and f.get('height'):
3362 f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
3363 return True
3364
3365 subtitles = {}
3366 for sd in streaming_data:
3367 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
3368 if hls_manifest_url:
3369 fmts, subs = self._extract_m3u8_formats_and_subtitles(hls_manifest_url, video_id, 'mp4', fatal=False, live=is_live)
3370 subtitles = self._merge_subtitles(subs, subtitles)
3371 for f in fmts:
3372 if process_manifest_format(f, 'hls', self._search_regex(
3373 r'/itag/(\d+)', f['url'], 'itag', default=None)):
3374 yield f
3375
3376 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
3377 if dash_manifest_url:
3378 formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
3379 subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
3380 for f in formats:
3381 if process_manifest_format(f, 'dash', f['format_id']):
3382 f['filesize'] = int_or_none(self._search_regex(
3383 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
3384 if live_from_start:
3385 f['is_from_start'] = True
3386
3387 yield f
3388 yield subtitles
3389
3390 def _extract_storyboard(self, player_responses, duration):
3391 spec = get_first(
3392 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
3393 base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))
3394 if not base_url:
3395 return
3396 L = len(spec) - 1
3397 for i, args in enumerate(spec):
3398 args = args.split('#')
3399 counts = list(map(int_or_none, args[:5]))
3400 if len(args) != 8 or not all(counts):
3401 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
3402 continue
3403 width, height, frame_count, cols, rows = counts
3404 N, sigh = args[6:]
3405
3406 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
3407 fragment_count = frame_count / (cols * rows)
3408 fragment_duration = duration / fragment_count
3409 yield {
3410 'format_id': f'sb{i}',
3411 'format_note': 'storyboard',
3412 'ext': 'mhtml',
3413 'protocol': 'mhtml',
3414 'acodec': 'none',
3415 'vcodec': 'none',
3416 'url': url,
3417 'width': width,
3418 'height': height,
3419 'fps': frame_count / duration,
3420 'rows': rows,
3421 'columns': cols,
3422 'fragments': [{
3423 'url': url.replace('$M', str(j)),
3424 'duration': min(fragment_duration, duration - (j * fragment_duration)),
3425 } for j in range(math.ceil(fragment_count))],
3426 }
3427
3428 def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
3429 webpage = None
3430 if 'webpage' not in self._configuration_arg('player_skip'):
3431 webpage = self._download_webpage(
3432 webpage_url + '&bpctr=9999999999&has_verified=1&pp=8AEB', video_id, fatal=False)
3433
3434 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
3435
3436 player_responses, player_url = self._extract_player_responses(
3437 self._get_requested_clients(url, smuggled_data),
3438 video_id, webpage, master_ytcfg)
3439
3440 return webpage, master_ytcfg, player_responses, player_url
3441
3442 def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):
3443 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
3444 is_live = get_first(video_details, 'isLive')
3445 if is_live is None:
3446 is_live = get_first(live_broadcast_details, 'isLiveNow')
3447
3448 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
3449 *formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, is_live, duration)
3450
3451 return live_broadcast_details, is_live, streaming_data, formats, subtitles
3452
3453 def _real_extract(self, url):
3454 url, smuggled_data = unsmuggle_url(url, {})
3455 video_id = self._match_id(url)
3456
3457 base_url = self.http_scheme() + '//www.youtube.com/'
3458 webpage_url = base_url + 'watch?v=' + video_id
3459
3460 webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
3461
3462 playability_statuses = traverse_obj(
3463 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
3464
3465 trailer_video_id = get_first(
3466 playability_statuses,
3467 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
3468 expected_type=str)
3469 if trailer_video_id:
3470 return self.url_result(
3471 trailer_video_id, self.ie_key(), trailer_video_id)
3472
3473 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
3474 if webpage else (lambda x: None))
3475
3476 video_details = traverse_obj(
3477 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
3478 microformats = traverse_obj(
3479 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
3480 expected_type=dict, default=[])
3481 video_title = (
3482 get_first(video_details, 'title')
3483 or self._get_text(microformats, (..., 'title'))
3484 or search_meta(['og:title', 'twitter:title', 'title']))
3485 video_description = get_first(video_details, 'shortDescription')
3486
3487 multifeed_metadata_list = get_first(
3488 player_responses,
3489 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
3490 expected_type=str)
3491 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
3492 if self.get_param('noplaylist'):
3493 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3494 else:
3495 entries = []
3496 feed_ids = []
3497 for feed in multifeed_metadata_list.split(','):
3498 # Unquote should take place before split on comma (,) since textual
3499 # fields may contain comma as well (see
3500 # https://github.com/ytdl-org/youtube-dl/issues/8536)
3501 feed_data = urllib.parse.parse_qs(
3502 urllib.parse.unquote_plus(feed))
3503
3504 def feed_entry(name):
3505 return try_get(
3506 feed_data, lambda x: x[name][0], str)
3507
3508 feed_id = feed_entry('id')
3509 if not feed_id:
3510 continue
3511 feed_title = feed_entry('title')
3512 title = video_title
3513 if feed_title:
3514 title += ' (%s)' % feed_title
3515 entries.append({
3516 '_type': 'url_transparent',
3517 'ie_key': 'Youtube',
3518 'url': smuggle_url(
3519 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
3520 {'force_singlefeed': True}),
3521 'title': title,
3522 })
3523 feed_ids.append(feed_id)
3524 self.to_screen(
3525 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
3526 % (', '.join(feed_ids), video_id))
3527 return self.playlist_result(
3528 entries, video_id, video_title, video_description)
3529
3530 duration = int_or_none(
3531 get_first(video_details, 'lengthSeconds')
3532 or get_first(microformats, 'lengthSeconds')
3533 or parse_duration(search_meta('duration'))) or None
3534
3535 live_broadcast_details, is_live, streaming_data, formats, automatic_captions = \
3536 self._list_formats(video_id, microformats, video_details, player_responses, player_url)
3537
3538 if not formats:
3539 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
3540 self.report_drm(video_id)
3541 pemr = get_first(
3542 playability_statuses,
3543 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
3544 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
3545 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
3546 if subreason:
3547 if subreason == 'The uploader has not made this video available in your country.':
3548 countries = get_first(microformats, 'availableCountries')
3549 if not countries:
3550 regions_allowed = search_meta('regionsAllowed')
3551 countries = regions_allowed.split(',') if regions_allowed else None
3552 self.raise_geo_restricted(subreason, countries, metadata_available=True)
3553 reason += f'. {subreason}'
3554 if reason:
3555 self.raise_no_formats(reason, expected=True)
3556
3557 keywords = get_first(video_details, 'keywords', expected_type=list) or []
3558 if not keywords and webpage:
3559 keywords = [
3560 unescapeHTML(m.group('content'))
3561 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
3562 for keyword in keywords:
3563 if keyword.startswith('yt:stretch='):
3564 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
3565 if mobj:
3566 # NB: float is intentional for forcing float division
3567 w, h = (float(v) for v in mobj.groups())
3568 if w > 0 and h > 0:
3569 ratio = w / h
3570 for f in formats:
3571 if f.get('vcodec') != 'none':
3572 f['stretched_ratio'] = ratio
3573 break
3574 thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))
3575 thumbnail_url = search_meta(['og:image', 'twitter:image'])
3576 if thumbnail_url:
3577 thumbnails.append({
3578 'url': thumbnail_url,
3579 })
3580 original_thumbnails = thumbnails.copy()
3581
3582 # The best resolution thumbnails sometimes does not appear in the webpage
3583 # See: https://github.com/yt-dlp/yt-dlp/issues/340
3584 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
3585 thumbnail_names = [
3586 # While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants
3587 # in resolution, these are not the custom thumbnail. So de-prioritize them
3588 'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',
3589 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'
3590 ]
3591 n_thumbnail_names = len(thumbnail_names)
3592 thumbnails.extend({
3593 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
3594 video_id=video_id, name=name, ext=ext,
3595 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
3596 } for name in thumbnail_names for ext in ('webp', 'jpg'))
3597 for thumb in thumbnails:
3598 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
3599 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
3600 self._remove_duplicate_formats(thumbnails)
3601 self._downloader._sort_thumbnails(original_thumbnails)
3602
3603 category = get_first(microformats, 'category') or search_meta('genre')
3604 channel_id = str_or_none(
3605 get_first(video_details, 'channelId')
3606 or get_first(microformats, 'externalChannelId')
3607 or search_meta('channelId'))
3608 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
3609
3610 live_content = get_first(video_details, 'isLiveContent')
3611 is_upcoming = get_first(video_details, 'isUpcoming')
3612 if is_live is None:
3613 if is_upcoming or live_content is False:
3614 is_live = False
3615 if is_upcoming is None and (live_content or is_live):
3616 is_upcoming = False
3617 live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
3618 live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
3619 if not duration and live_end_time and live_start_time:
3620 duration = live_end_time - live_start_time
3621
3622 if is_live and self.get_param('live_from_start'):
3623 self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)
3624
3625 formats.extend(self._extract_storyboard(player_responses, duration))
3626
3627 # source_preference is lower for throttled/potentially damaged formats
3628 self._sort_formats(formats, (
3629 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto'))
3630
3631 info = {
3632 'id': video_id,
3633 'title': video_title,
3634 'formats': formats,
3635 'thumbnails': thumbnails,
3636 # The best thumbnail that we are sure exists. Prevents unnecessary
3637 # URL checking if user don't care about getting the best possible thumbnail
3638 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
3639 'description': video_description,
3640 'uploader': get_first(video_details, 'author'),
3641 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
3642 'uploader_url': owner_profile_url,
3643 'channel_id': channel_id,
3644 'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s'),
3645 'duration': duration,
3646 'view_count': int_or_none(
3647 get_first((video_details, microformats), (..., 'viewCount'))
3648 or search_meta('interactionCount')),
3649 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
3650 'age_limit': 18 if (
3651 get_first(microformats, 'isFamilySafe') is False
3652 or search_meta('isFamilyFriendly') == 'false'
3653 or search_meta('og:restrictions:age') == '18+') else 0,
3654 'webpage_url': webpage_url,
3655 'categories': [category] if category else None,
3656 'tags': keywords,
3657 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
3658 'is_live': is_live,
3659 'was_live': (False if is_live or is_upcoming or live_content is False
3660 else None if is_live is None or is_upcoming is None
3661 else live_content),
3662 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
3663 'release_timestamp': live_start_time,
3664 }
3665
3666 if get_first(video_details, 'isPostLiveDvr'):
3667 self.write_debug('Video is in Post-Live Manifestless mode')
3668 info['live_status'] = 'post_live'
3669 if (duration or 0) > 4 * 3600:
3670 self.report_warning(
3671 'The livestream has not finished processing. Only 4 hours of the video can be currently downloaded. '
3672 'This is a known issue and patches are welcome')
3673
3674 subtitles = {}
3675 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
3676 if pctr:
3677 def get_lang_code(track):
3678 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
3679 or track.get('languageCode'))
3680
3681 # Converted into dicts to remove duplicates
3682 captions = {
3683 get_lang_code(sub): sub
3684 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
3685 translation_languages = {
3686 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
3687 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
3688
3689 def process_language(container, base_url, lang_code, sub_name, query):
3690 lang_subs = container.setdefault(lang_code, [])
3691 for fmt in self._SUBTITLE_FORMATS:
3692 query.update({
3693 'fmt': fmt,
3694 })
3695 lang_subs.append({
3696 'ext': fmt,
3697 'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),
3698 'name': sub_name,
3699 })
3700
3701 # NB: Constructing the full subtitle dictionary is slow
3702 get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (
3703 self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))
3704 for lang_code, caption_track in captions.items():
3705 base_url = caption_track.get('baseUrl')
3706 orig_lang = parse_qs(base_url).get('lang', [None])[-1]
3707 if not base_url:
3708 continue
3709 lang_name = self._get_text(caption_track, 'name', max_runs=1)
3710 if caption_track.get('kind') != 'asr':
3711 if not lang_code:
3712 continue
3713 process_language(
3714 subtitles, base_url, lang_code, lang_name, {})
3715 if not caption_track.get('isTranslatable'):
3716 continue
3717 for trans_code, trans_name in translation_languages.items():
3718 if not trans_code:
3719 continue
3720 orig_trans_code = trans_code
3721 if caption_track.get('kind') != 'asr':
3722 if not get_translated_subs:
3723 continue
3724 trans_code += f'-{lang_code}'
3725 trans_name += format_field(lang_name, None, ' from %s')
3726 # Add an "-orig" label to the original language so that it can be distinguished.
3727 # The subs are returned without "-orig" as well for compatibility
3728 if lang_code == f'a-{orig_trans_code}':
3729 process_language(
3730 automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
3731 # Setting tlang=lang returns damaged subtitles.
3732 process_language(automatic_captions, base_url, trans_code, trans_name,
3733 {} if orig_lang == orig_trans_code else {'tlang': trans_code})
3734
3735 info['automatic_captions'] = automatic_captions
3736 info['subtitles'] = subtitles
3737
3738 parsed_url = urllib.parse.urlparse(url)
3739 for component in [parsed_url.fragment, parsed_url.query]:
3740 query = urllib.parse.parse_qs(component)
3741 for k, v in query.items():
3742 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
3743 d_k += '_time'
3744 if d_k not in info and k in s_ks:
3745 info[d_k] = parse_duration(query[k][0])
3746
3747 # Youtube Music Auto-generated description
3748 if video_description:
3749 mobj = re.search(
3750 r'''(?xs)
3751 (?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+
3752 (?P<album>[^\n]+)
3753 (?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
3754 (?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
3755 (.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?
3756 .+\nAuto-generated\ by\ YouTube\.\s*$
3757 ''', video_description)
3758 if mobj:
3759 release_year = mobj.group('release_year')
3760 release_date = mobj.group('release_date')
3761 if release_date:
3762 release_date = release_date.replace('-', '')
3763 if not release_year:
3764 release_year = release_date[:4]
3765 info.update({
3766 'album': mobj.group('album'.strip()),
3767 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
3768 'track': mobj.group('track').strip(),
3769 'release_date': release_date,
3770 'release_year': int_or_none(release_year),
3771 })
3772
3773 initial_data = None
3774 if webpage:
3775 initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)
3776 if not initial_data:
3777 query = {'videoId': video_id}
3778 query.update(self._get_checkok_params())
3779 initial_data = self._extract_response(
3780 item_id=video_id, ep='next', fatal=False,
3781 ytcfg=master_ytcfg, query=query,
3782 headers=self.generate_api_headers(ytcfg=master_ytcfg),
3783 note='Downloading initial data API JSON')
3784
3785 info['comment_count'] = traverse_obj(initial_data, (
3786 'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',
3787 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount', 'simpleText'
3788 ), (
3789 'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',
3790 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo', 'runs', ..., 'text'
3791 ), expected_type=int_or_none, get_all=False)
3792
3793 try: # This will error if there is no livechat
3794 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
3795 except (KeyError, IndexError, TypeError):
3796 pass
3797 else:
3798 info.setdefault('subtitles', {})['live_chat'] = [{
3799 # url is needed to set cookies
3800 'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',
3801 'video_id': video_id,
3802 'ext': 'json',
3803 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
3804 }]
3805
3806 if initial_data:
3807 info['chapters'] = (
3808 self._extract_chapters_from_json(initial_data, duration)
3809 or self._extract_chapters_from_engagement_panel(initial_data, duration)
3810 or self._extract_chapters_from_description(video_description, duration)
3811 or None)
3812
3813 contents = traverse_obj(
3814 initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
3815 expected_type=list, default=[])
3816
3817 vpir = get_first(contents, 'videoPrimaryInfoRenderer')
3818 if vpir:
3819 stl = vpir.get('superTitleLink')
3820 if stl:
3821 stl = self._get_text(stl)
3822 if try_get(
3823 vpir,
3824 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3825 info['location'] = stl
3826 else:
3827 mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)
3828 if mobj:
3829 info.update({
3830 'series': mobj.group(1),
3831 'season_number': int(mobj.group(2)),
3832 'episode_number': int(mobj.group(3)),
3833 })
3834 for tlb in (try_get(
3835 vpir,
3836 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3837 list) or []):
3838 tbr = tlb.get('toggleButtonRenderer') or {}
3839 for getter, regex in [(
3840 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3841 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3842 lambda x: x['accessibility'],
3843 lambda x: x['accessibilityData']['accessibilityData'],
3844 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3845 label = (try_get(tbr, getter, dict) or {}).get('label')
3846 if label:
3847 mobj = re.match(regex, label)
3848 if mobj:
3849 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3850 break
3851 sbr_tooltip = try_get(
3852 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3853 if sbr_tooltip:
3854 like_count, dislike_count = sbr_tooltip.split(' / ')
3855 info.update({
3856 'like_count': str_to_int(like_count),
3857 'dislike_count': str_to_int(dislike_count),
3858 })
3859 vsir = get_first(contents, 'videoSecondaryInfoRenderer')
3860 if vsir:
3861 vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))
3862 info.update({
3863 'channel': self._get_text(vor, 'title'),
3864 'channel_follower_count': self._get_count(vor, 'subscriberCountText')})
3865
3866 rows = try_get(
3867 vsir,
3868 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3869 list) or []
3870 multiple_songs = False
3871 for row in rows:
3872 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3873 multiple_songs = True
3874 break
3875 for row in rows:
3876 mrr = row.get('metadataRowRenderer') or {}
3877 mrr_title = mrr.get('title')
3878 if not mrr_title:
3879 continue
3880 mrr_title = self._get_text(mrr, 'title')
3881 mrr_contents_text = self._get_text(mrr, ('contents', 0))
3882 if mrr_title == 'License':
3883 info['license'] = mrr_contents_text
3884 elif not multiple_songs:
3885 if mrr_title == 'Album':
3886 info['album'] = mrr_contents_text
3887 elif mrr_title == 'Artist':
3888 info['artist'] = mrr_contents_text
3889 elif mrr_title == 'Song':
3890 info['track'] = mrr_contents_text
3891
3892 fallbacks = {
3893 'channel': 'uploader',
3894 'channel_id': 'uploader_id',
3895 'channel_url': 'uploader_url',
3896 }
3897
3898 # The upload date for scheduled, live and past live streams / premieres in microformats
3899 # may be different from the stream date. Although not in UTC, we will prefer it in this case.
3900 # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139
3901 upload_date = (
3902 unified_strdate(get_first(microformats, 'uploadDate'))
3903 or unified_strdate(search_meta('uploadDate')))
3904 if not upload_date or (not info.get('is_live') and not info.get('was_live') and info.get('live_status') != 'is_upcoming'):
3905 upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d') or upload_date
3906 info['upload_date'] = upload_date
3907
3908 for to, frm in fallbacks.items():
3909 if not info.get(to):
3910 info[to] = info.get(frm)
3911
3912 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3913 v = info.get(s_k)
3914 if v:
3915 info[d_k] = v
3916
3917 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3918 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
3919 is_membersonly = None
3920 is_premium = None
3921 if initial_data and is_private is not None:
3922 is_membersonly = False
3923 is_premium = False
3924 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3925 badge_labels = set()
3926 for content in contents:
3927 if not isinstance(content, dict):
3928 continue
3929 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3930 for badge_label in badge_labels:
3931 if badge_label.lower() == 'members only':
3932 is_membersonly = True
3933 elif badge_label.lower() == 'premium':
3934 is_premium = True
3935 elif badge_label.lower() == 'unlisted':
3936 is_unlisted = True
3937
3938 info['availability'] = self._availability(
3939 is_private=is_private,
3940 needs_premium=is_premium,
3941 needs_subscription=is_membersonly,
3942 needs_auth=info['age_limit'] >= 18,
3943 is_unlisted=None if is_private is None else is_unlisted)
3944
3945 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
3946
3947 self.mark_watched(video_id, player_responses)
3948
3949 return info
3950
3951
3952class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
3953
3954 @staticmethod
3955 def passthrough_smuggled_data(func):
3956 def _smuggle(entries, smuggled_data):
3957 for entry in entries:
3958 # TODO: Convert URL to music.youtube instead.
3959 # Do we need to passthrough any other smuggled_data?
3960 entry['url'] = smuggle_url(entry['url'], smuggled_data)
3961 yield entry
3962
3963 @functools.wraps(func)
3964 def wrapper(self, url):
3965 url, smuggled_data = unsmuggle_url(url, {})
3966 if self.is_music_url(url):
3967 smuggled_data['is_music_url'] = True
3968 info_dict = func(self, url, smuggled_data)
3969 if smuggled_data and info_dict.get('entries'):
3970 info_dict['entries'] = _smuggle(info_dict['entries'], smuggled_data)
3971 return info_dict
3972 return wrapper
3973
3974 def _extract_channel_id(self, webpage):
3975 channel_id = self._html_search_meta(
3976 'channelId', webpage, 'channel id', default=None)
3977 if channel_id:
3978 return channel_id
3979 channel_url = self._html_search_meta(
3980 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3981 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3982 'twitter:app:url:googleplay'), webpage, 'channel url')
3983 return self._search_regex(
3984 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3985 channel_url, 'channel id')
3986
3987 @staticmethod
3988 def _extract_basic_item_renderer(item):
3989 # Modified from _extract_grid_item_renderer
3990 known_basic_renderers = (
3991 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'
3992 )
3993 for key, renderer in item.items():
3994 if not isinstance(renderer, dict):
3995 continue
3996 elif key in known_basic_renderers:
3997 return renderer
3998 elif key.startswith('grid') and key.endswith('Renderer'):
3999 return renderer
4000
4001 def _grid_entries(self, grid_renderer):
4002 for item in grid_renderer['items']:
4003 if not isinstance(item, dict):
4004 continue
4005 renderer = self._extract_basic_item_renderer(item)
4006 if not isinstance(renderer, dict):
4007 continue
4008 title = self._get_text(renderer, 'title')
4009
4010 # playlist
4011 playlist_id = renderer.get('playlistId')
4012 if playlist_id:
4013 yield self.url_result(
4014 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4015 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4016 video_title=title)
4017 continue
4018 # video
4019 video_id = renderer.get('videoId')
4020 if video_id:
4021 yield self._extract_video(renderer)
4022 continue
4023 # channel
4024 channel_id = renderer.get('channelId')
4025 if channel_id:
4026 yield self.url_result(
4027 'https://www.youtube.com/channel/%s' % channel_id,
4028 ie=YoutubeTabIE.ie_key(), video_title=title)
4029 continue
4030 # generic endpoint URL support
4031 ep_url = urljoin('https://www.youtube.com/', try_get(
4032 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
4033 str))
4034 if ep_url:
4035 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
4036 if ie.suitable(ep_url):
4037 yield self.url_result(
4038 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
4039 break
4040
4041 def _music_reponsive_list_entry(self, renderer):
4042 video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))
4043 if video_id:
4044 return self.url_result(f'https://music.youtube.com/watch?v={video_id}',
4045 ie=YoutubeIE.ie_key(), video_id=video_id)
4046 playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))
4047 if playlist_id:
4048 video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))
4049 if video_id:
4050 return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',
4051 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4052 return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',
4053 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4054 browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))
4055 if browse_id:
4056 return self.url_result(f'https://music.youtube.com/browse/{browse_id}',
4057 ie=YoutubeTabIE.ie_key(), video_id=browse_id)
4058
4059 def _shelf_entries_from_content(self, shelf_renderer):
4060 content = shelf_renderer.get('content')
4061 if not isinstance(content, dict):
4062 return
4063 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
4064 if renderer:
4065 # TODO: add support for nested playlists so each shelf is processed
4066 # as separate playlist
4067 # TODO: this includes only first N items
4068 yield from self._grid_entries(renderer)
4069 renderer = content.get('horizontalListRenderer')
4070 if renderer:
4071 # TODO
4072 pass
4073
4074 def _shelf_entries(self, shelf_renderer, skip_channels=False):
4075 ep = try_get(
4076 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4077 str)
4078 shelf_url = urljoin('https://www.youtube.com', ep)
4079 if shelf_url:
4080 # Skipping links to another channels, note that checking for
4081 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
4082 # will not work
4083 if skip_channels and '/channels?' in shelf_url:
4084 return
4085 title = self._get_text(shelf_renderer, 'title')
4086 yield self.url_result(shelf_url, video_title=title)
4087 # Shelf may not contain shelf URL, fallback to extraction from content
4088 yield from self._shelf_entries_from_content(shelf_renderer)
4089
4090 def _playlist_entries(self, video_list_renderer):
4091 for content in video_list_renderer['contents']:
4092 if not isinstance(content, dict):
4093 continue
4094 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
4095 if not isinstance(renderer, dict):
4096 continue
4097 video_id = renderer.get('videoId')
4098 if not video_id:
4099 continue
4100 yield self._extract_video(renderer)
4101
4102 def _rich_entries(self, rich_grid_renderer):
4103 renderer = try_get(
4104 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
4105 video_id = renderer.get('videoId')
4106 if not video_id:
4107 return
4108 yield self._extract_video(renderer)
4109
4110 def _video_entry(self, video_renderer):
4111 video_id = video_renderer.get('videoId')
4112 if video_id:
4113 return self._extract_video(video_renderer)
4114
4115 def _hashtag_tile_entry(self, hashtag_tile_renderer):
4116 url = urljoin('https://youtube.com', traverse_obj(
4117 hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))
4118 if url:
4119 return self.url_result(
4120 url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))
4121
4122 def _post_thread_entries(self, post_thread_renderer):
4123 post_renderer = try_get(
4124 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
4125 if not post_renderer:
4126 return
4127 # video attachment
4128 video_renderer = try_get(
4129 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
4130 video_id = video_renderer.get('videoId')
4131 if video_id:
4132 entry = self._extract_video(video_renderer)
4133 if entry:
4134 yield entry
4135 # playlist attachment
4136 playlist_id = try_get(
4137 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)
4138 if playlist_id:
4139 yield self.url_result(
4140 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4141 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4142 # inline video links
4143 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
4144 for run in runs:
4145 if not isinstance(run, dict):
4146 continue
4147 ep_url = try_get(
4148 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)
4149 if not ep_url:
4150 continue
4151 if not YoutubeIE.suitable(ep_url):
4152 continue
4153 ep_video_id = YoutubeIE._match_id(ep_url)
4154 if video_id == ep_video_id:
4155 continue
4156 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
4157
4158 def _post_thread_continuation_entries(self, post_thread_continuation):
4159 contents = post_thread_continuation.get('contents')
4160 if not isinstance(contents, list):
4161 return
4162 for content in contents:
4163 renderer = content.get('backstagePostThreadRenderer')
4164 if isinstance(renderer, dict):
4165 yield from self._post_thread_entries(renderer)
4166 continue
4167 renderer = content.get('videoRenderer')
4168 if isinstance(renderer, dict):
4169 yield self._video_entry(renderer)
4170
4171 r''' # unused
4172 def _rich_grid_entries(self, contents):
4173 for content in contents:
4174 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
4175 if video_renderer:
4176 entry = self._video_entry(video_renderer)
4177 if entry:
4178 yield entry
4179 '''
4180
4181 def _extract_entries(self, parent_renderer, continuation_list):
4182 # continuation_list is modified in-place with continuation_list = [continuation_token]
4183 continuation_list[:] = [None]
4184 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
4185 for content in contents:
4186 if not isinstance(content, dict):
4187 continue
4188 is_renderer = traverse_obj(
4189 content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',
4190 expected_type=dict)
4191 if not is_renderer:
4192 renderer = content.get('richItemRenderer')
4193 if renderer:
4194 for entry in self._rich_entries(renderer):
4195 yield entry
4196 continuation_list[0] = self._extract_continuation(parent_renderer)
4197 continue
4198 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
4199 for isr_content in isr_contents:
4200 if not isinstance(isr_content, dict):
4201 continue
4202
4203 known_renderers = {
4204 'playlistVideoListRenderer': self._playlist_entries,
4205 'gridRenderer': self._grid_entries,
4206 'reelShelfRenderer': self._grid_entries,
4207 'shelfRenderer': self._shelf_entries,
4208 'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],
4209 'backstagePostThreadRenderer': self._post_thread_entries,
4210 'videoRenderer': lambda x: [self._video_entry(x)],
4211 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
4212 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
4213 'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]
4214 }
4215 for key, renderer in isr_content.items():
4216 if key not in known_renderers:
4217 continue
4218 for entry in known_renderers[key](renderer):
4219 if entry:
4220 yield entry
4221 continuation_list[0] = self._extract_continuation(renderer)
4222 break
4223
4224 if not continuation_list[0]:
4225 continuation_list[0] = self._extract_continuation(is_renderer)
4226
4227 if not continuation_list[0]:
4228 continuation_list[0] = self._extract_continuation(parent_renderer)
4229
4230 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
4231 continuation_list = [None]
4232 extract_entries = lambda x: self._extract_entries(x, continuation_list)
4233 tab_content = try_get(tab, lambda x: x['content'], dict)
4234 if not tab_content:
4235 return
4236 parent_renderer = (
4237 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
4238 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
4239 yield from extract_entries(parent_renderer)
4240 continuation = continuation_list[0]
4241
4242 for page_num in itertools.count(1):
4243 if not continuation:
4244 break
4245 headers = self.generate_api_headers(
4246 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
4247 response = self._extract_response(
4248 item_id=f'{item_id} page {page_num}',
4249 query=continuation, headers=headers, ytcfg=ytcfg,
4250 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
4251
4252 if not response:
4253 break
4254 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
4255 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
4256 visitor_data = self._extract_visitor_data(response) or visitor_data
4257
4258 known_continuation_renderers = {
4259 'playlistVideoListContinuation': self._playlist_entries,
4260 'gridContinuation': self._grid_entries,
4261 'itemSectionContinuation': self._post_thread_continuation_entries,
4262 'sectionListContinuation': extract_entries, # for feeds
4263 }
4264 continuation_contents = try_get(
4265 response, lambda x: x['continuationContents'], dict) or {}
4266 continuation_renderer = None
4267 for key, value in continuation_contents.items():
4268 if key not in known_continuation_renderers:
4269 continue
4270 continuation_renderer = value
4271 continuation_list = [None]
4272 yield from known_continuation_renderers[key](continuation_renderer)
4273 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
4274 break
4275 if continuation_renderer:
4276 continue
4277
4278 known_renderers = {
4279 'videoRenderer': (self._grid_entries, 'items'), # for membership tab
4280 'gridPlaylistRenderer': (self._grid_entries, 'items'),
4281 'gridVideoRenderer': (self._grid_entries, 'items'),
4282 'gridChannelRenderer': (self._grid_entries, 'items'),
4283 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
4284 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
4285 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
4286 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
4287 }
4288 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
4289 continuation_items = try_get(
4290 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
4291 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
4292 video_items_renderer = None
4293 for key, value in continuation_item.items():
4294 if key not in known_renderers:
4295 continue
4296 video_items_renderer = {known_renderers[key][1]: continuation_items}
4297 continuation_list = [None]
4298 yield from known_renderers[key][0](video_items_renderer)
4299 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
4300 break
4301 if video_items_renderer:
4302 continue
4303 break
4304
4305 @staticmethod
4306 def _extract_selected_tab(tabs, fatal=True):
4307 for tab in tabs:
4308 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
4309 if renderer.get('selected') is True:
4310 return renderer
4311 else:
4312 if fatal:
4313 raise ExtractorError('Unable to find selected tab')
4314
4315 def _extract_uploader(self, data):
4316 uploader = {}
4317 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
4318 owner = try_get(
4319 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
4320 if owner:
4321 owner_text = owner.get('text')
4322 uploader['uploader'] = self._search_regex(
4323 r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text)
4324 uploader['uploader_id'] = try_get(
4325 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], str)
4326 uploader['uploader_url'] = urljoin(
4327 'https://www.youtube.com/',
4328 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], str))
4329 return {k: v for k, v in uploader.items() if v is not None}
4330
4331 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
4332 playlist_id = title = description = channel_url = channel_name = channel_id = None
4333 tags = []
4334
4335 selected_tab = self._extract_selected_tab(tabs)
4336 primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4337 renderer = try_get(
4338 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
4339 if renderer:
4340 channel_name = renderer.get('title')
4341 channel_url = renderer.get('channelUrl')
4342 channel_id = renderer.get('externalId')
4343 else:
4344 renderer = try_get(
4345 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
4346
4347 if renderer:
4348 title = renderer.get('title')
4349 description = renderer.get('description', '')
4350 playlist_id = channel_id
4351 tags = renderer.get('keywords', '').split()
4352
4353 # We can get the uncropped banner/avatar by replacing the crop params with '=s0'
4354 # See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714
4355 def _get_uncropped(url):
4356 return url_or_none((url or '').split('=')[0] + '=s0')
4357
4358 avatar_thumbnails = self._extract_thumbnails(renderer, 'avatar')
4359 if avatar_thumbnails:
4360 uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])
4361 if uncropped_avatar:
4362 avatar_thumbnails.append({
4363 'url': uncropped_avatar,
4364 'id': 'avatar_uncropped',
4365 'preference': 1
4366 })
4367
4368 channel_banners = self._extract_thumbnails(
4369 data, ('header', ..., ['banner', 'mobileBanner', 'tvBanner']))
4370 for banner in channel_banners:
4371 banner['preference'] = -10
4372
4373 if channel_banners:
4374 uncropped_banner = _get_uncropped(channel_banners[0]['url'])
4375 if uncropped_banner:
4376 channel_banners.append({
4377 'url': uncropped_banner,
4378 'id': 'banner_uncropped',
4379 'preference': -5
4380 })
4381
4382 primary_thumbnails = self._extract_thumbnails(
4383 primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))
4384
4385 if playlist_id is None:
4386 playlist_id = item_id
4387
4388 playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')
4389 last_updated_unix, _ = self._extract_time_text(playlist_stats, 2)
4390 if title is None:
4391 title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id
4392 title += format_field(selected_tab, 'title', ' - %s')
4393 title += format_field(selected_tab, 'expandedText', ' - %s')
4394
4395 metadata = {
4396 'playlist_id': playlist_id,
4397 'playlist_title': title,
4398 'playlist_description': description,
4399 'uploader': channel_name,
4400 'uploader_id': channel_id,
4401 'uploader_url': channel_url,
4402 'thumbnails': primary_thumbnails + avatar_thumbnails + channel_banners,
4403 'tags': tags,
4404 'view_count': self._get_count(playlist_stats, 1),
4405 'availability': self._extract_availability(data),
4406 'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'),
4407 'playlist_count': self._get_count(playlist_stats, 0),
4408 'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),
4409 }
4410 if not channel_id:
4411 metadata.update(self._extract_uploader(data))
4412 metadata.update({
4413 'channel': metadata['uploader'],
4414 'channel_id': metadata['uploader_id'],
4415 'channel_url': metadata['uploader_url']})
4416 return self.playlist_result(
4417 self._entries(
4418 selected_tab, playlist_id, ytcfg,
4419 self._extract_account_syncid(ytcfg, data),
4420 self._extract_visitor_data(data, ytcfg)),
4421 **metadata)
4422
4423 def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):
4424 first_id = last_id = response = None
4425 for page_num in itertools.count(1):
4426 videos = list(self._playlist_entries(playlist))
4427 if not videos:
4428 return
4429 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4430 if start >= len(videos):
4431 return
4432 yield from videos[start:]
4433 first_id = first_id or videos[0]['id']
4434 last_id = videos[-1]['id']
4435 watch_endpoint = try_get(
4436 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
4437 headers = self.generate_api_headers(
4438 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4439 visitor_data=self._extract_visitor_data(response, data, ytcfg))
4440 query = {
4441 'playlistId': playlist_id,
4442 'videoId': watch_endpoint.get('videoId') or last_id,
4443 'index': watch_endpoint.get('index') or len(videos),
4444 'params': watch_endpoint.get('params') or 'OAE%3D'
4445 }
4446 response = self._extract_response(
4447 item_id='%s page %d' % (playlist_id, page_num),
4448 query=query, ep='next', headers=headers, ytcfg=ytcfg,
4449 check_get_keys='contents'
4450 )
4451 playlist = try_get(
4452 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4453
4454 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
4455 title = playlist.get('title') or try_get(
4456 data, lambda x: x['titleText']['simpleText'], str)
4457 playlist_id = playlist.get('playlistId') or item_id
4458
4459 # Delegating everything except mix playlists to regular tab-based playlist URL
4460 playlist_url = urljoin(url, try_get(
4461 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4462 str))
4463
4464 # Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]
4465 # [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg
4466 is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)
4467
4468 if playlist_url and playlist_url != url and not is_known_unviewable:
4469 return self.url_result(
4470 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4471 video_title=title)
4472
4473 return self.playlist_result(
4474 self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),
4475 playlist_id=playlist_id, playlist_title=title)
4476
4477 def _extract_availability(self, data):
4478 """
4479 Gets the availability of a given playlist/tab.
4480 Note: Unless YouTube tells us explicitly, we do not assume it is public
4481 @param data: response
4482 """
4483 is_private = is_unlisted = None
4484 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4485 badge_labels = self._extract_badges(renderer)
4486
4487 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4488 privacy_dropdown_entries = try_get(
4489 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4490 for renderer_dict in privacy_dropdown_entries:
4491 is_selected = try_get(
4492 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4493 if not is_selected:
4494 continue
4495 label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
4496 if label:
4497 badge_labels.add(label.lower())
4498 break
4499
4500 for badge_label in badge_labels:
4501 if badge_label == 'unlisted':
4502 is_unlisted = True
4503 elif badge_label == 'private':
4504 is_private = True
4505 elif badge_label == 'public':
4506 is_unlisted = is_private = False
4507 return self._availability(is_private, False, False, False, is_unlisted)
4508
4509 @staticmethod
4510 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4511 sidebar_renderer = try_get(
4512 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4513 for item in sidebar_renderer:
4514 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4515 if renderer:
4516 return renderer
4517
4518 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
4519 """
4520 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4521 """
4522 browse_id = params = None
4523 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4524 if not renderer:
4525 return
4526 menu_renderer = try_get(
4527 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4528 for menu_item in menu_renderer:
4529 if not isinstance(menu_item, dict):
4530 continue
4531 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4532 text = try_get(
4533 nav_item_renderer, lambda x: x['text']['simpleText'], str)
4534 if not text or text.lower() != 'show unavailable videos':
4535 continue
4536 browse_endpoint = try_get(
4537 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4538 browse_id = browse_endpoint.get('browseId')
4539 params = browse_endpoint.get('params')
4540 break
4541
4542 headers = self.generate_api_headers(
4543 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4544 visitor_data=self._extract_visitor_data(data, ytcfg))
4545 query = {
4546 'params': params or 'wgYCCAA=',
4547 'browseId': browse_id or 'VL%s' % item_id
4548 }
4549 return self._extract_response(
4550 item_id=item_id, headers=headers, query=query,
4551 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
4552 note='Downloading API JSON with unavailable videos')
4553
4554 @functools.cached_property
4555 def skip_webpage(self):
4556 return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())
4557
4558 def _extract_webpage(self, url, item_id, fatal=True):
4559 webpage, data = None, None
4560 for retry in self.RetryManager(fatal=fatal):
4561 try:
4562 webpage = self._download_webpage(url, item_id, note='Downloading webpage')
4563 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
4564 except ExtractorError as e:
4565 if isinstance(e.cause, network_exceptions):
4566 if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):
4567 retry.error = e
4568 continue
4569 self._error_or_warning(e, fatal=fatal)
4570 break
4571
4572 try:
4573 self._extract_and_report_alerts(data)
4574 except ExtractorError as e:
4575 self._error_or_warning(e, fatal=fatal)
4576 break
4577
4578 # Sometimes youtube returns a webpage with incomplete ytInitialData
4579 # See: https://github.com/yt-dlp/yt-dlp/issues/116
4580 if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):
4581 retry.error = ExtractorError('Incomplete yt initial data received')
4582 continue
4583
4584 return webpage, data
4585
4586 def _report_playlist_authcheck(self, ytcfg, fatal=True):
4587 """Use if failed to extract ytcfg (and data) from initial webpage"""
4588 if not ytcfg and self.is_authenticated:
4589 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'
4590 if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:
4591 raise ExtractorError(
4592 f'{msg}. If you are not downloading private content, or '
4593 'your cookies are only for the first account and channel,'
4594 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
4595 expected=True)
4596 self.report_warning(msg, only_once=True)
4597
4598 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
4599 data = None
4600 if not self.skip_webpage:
4601 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
4602 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
4603 # Reject webpage data if redirected to home page without explicitly requesting
4604 selected_tab = self._extract_selected_tab(traverse_obj(
4605 data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list, default=[]), fatal=False) or {}
4606 if (url != 'https://www.youtube.com/feed/recommended'
4607 and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page
4608 and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):
4609 msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'
4610 if fatal:
4611 raise ExtractorError(msg, expected=True)
4612 self.report_warning(msg, only_once=True)
4613 if not data:
4614 self._report_playlist_authcheck(ytcfg, fatal=fatal)
4615 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
4616 return data, ytcfg
4617
4618 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
4619 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
4620 resolve_response = self._extract_response(
4621 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
4622 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
4623 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
4624 for ep_key, ep in endpoints.items():
4625 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
4626 if params:
4627 return self._extract_response(
4628 item_id=item_id, query=params, ep=ep, headers=headers,
4629 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
4630 check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))
4631 err_note = 'Failed to resolve url (does the playlist exist?)'
4632 if fatal:
4633 raise ExtractorError(err_note, expected=True)
4634 self.report_warning(err_note, item_id)
4635
4636 _SEARCH_PARAMS = None
4637
4638 def _search_results(self, query, params=NO_DEFAULT, default_client='web'):
4639 data = {'query': query}
4640 if params is NO_DEFAULT:
4641 params = self._SEARCH_PARAMS
4642 if params:
4643 data['params'] = params
4644
4645 content_keys = (
4646 ('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),
4647 ('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),
4648 # ytmusic search
4649 ('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),
4650 ('continuationContents', ),
4651 )
4652 display_id = f'query "{query}"'
4653 check_get_keys = tuple({keys[0] for keys in content_keys})
4654 ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}
4655 self._report_playlist_authcheck(ytcfg, fatal=False)
4656
4657 continuation_list = [None]
4658 search = None
4659 for page_num in itertools.count(1):
4660 data.update(continuation_list[0] or {})
4661 headers = self.generate_api_headers(
4662 ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)
4663 search = self._extract_response(
4664 item_id=f'{display_id} page {page_num}', ep='search', query=data,
4665 default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)
4666 slr_contents = traverse_obj(search, *content_keys)
4667 yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)
4668 if not continuation_list[0]:
4669 break
4670
4671
4672class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
4673 IE_DESC = 'YouTube Tabs'
4674 _VALID_URL = r'''(?x:
4675 https?://
4676 (?:\w+\.)?
4677 (?:
4678 youtube(?:kids)?\.com|
4679 %(invidious)s
4680 )/
4681 (?:
4682 (?P<channel_type>channel|c|user|browse)/|
4683 (?P<not_channel>
4684 feed/|hashtag/|
4685 (?:playlist|watch)\?.*?\blist=
4686 )|
4687 (?!(?:%(reserved_names)s)\b) # Direct URLs
4688 )
4689 (?P<id>[^/?\#&]+)
4690 )''' % {
4691 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
4692 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
4693 }
4694 IE_NAME = 'youtube:tab'
4695
4696 _TESTS = [{
4697 'note': 'playlists, multipage',
4698 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
4699 'playlist_mincount': 94,
4700 'info_dict': {
4701 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
4702 'title': 'Igor Kleiner - Playlists',
4703 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
4704 'uploader': 'Igor Kleiner',
4705 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4706 'channel': 'Igor Kleiner',
4707 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4708 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
4709 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4710 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4711 'channel_follower_count': int
4712 },
4713 }, {
4714 'note': 'playlists, multipage, different order',
4715 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
4716 'playlist_mincount': 94,
4717 'info_dict': {
4718 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
4719 'title': 'Igor Kleiner - Playlists',
4720 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
4721 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4722 'uploader': 'Igor Kleiner',
4723 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4724 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
4725 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4726 'channel': 'Igor Kleiner',
4727 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4728 'channel_follower_count': int
4729 },
4730 }, {
4731 'note': 'playlists, series',
4732 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
4733 'playlist_mincount': 5,
4734 'info_dict': {
4735 'id': 'UCYO_jab_esuFRV4b17AJtAw',
4736 'title': '3Blue1Brown - Playlists',
4737 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
4738 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
4739 'uploader': '3Blue1Brown',
4740 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4741 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4742 'channel': '3Blue1Brown',
4743 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
4744 'tags': ['Mathematics'],
4745 'channel_follower_count': int
4746 },
4747 }, {
4748 'note': 'playlists, singlepage',
4749 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
4750 'playlist_mincount': 4,
4751 'info_dict': {
4752 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
4753 'title': 'ThirstForScience - Playlists',
4754 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
4755 'uploader': 'ThirstForScience',
4756 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
4757 'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
4758 'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
4759 'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
4760 'tags': 'count:13',
4761 'channel': 'ThirstForScience',
4762 'channel_follower_count': int
4763 }
4764 }, {
4765 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
4766 'only_matching': True,
4767 }, {
4768 'note': 'basic, single video playlist',
4769 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4770 'info_dict': {
4771 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4772 'uploader': 'Sergey M.',
4773 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4774 'title': 'youtube-dl public playlist',
4775 'description': '',
4776 'tags': [],
4777 'view_count': int,
4778 'modified_date': '20201130',
4779 'channel': 'Sergey M.',
4780 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4781 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4782 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4783 },
4784 'playlist_count': 1,
4785 }, {
4786 'note': 'empty playlist',
4787 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
4788 'info_dict': {
4789 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4790 'uploader': 'Sergey M.',
4791 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
4792 'title': 'youtube-dl empty playlist',
4793 'tags': [],
4794 'channel': 'Sergey M.',
4795 'description': '',
4796 'modified_date': '20160902',
4797 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4798 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4799 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4800 },
4801 'playlist_count': 0,
4802 }, {
4803 'note': 'Home tab',
4804 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
4805 'info_dict': {
4806 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4807 'title': 'lex will - Home',
4808 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4809 'uploader': 'lex will',
4810 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4811 'channel': 'lex will',
4812 'tags': ['bible', 'history', 'prophesy'],
4813 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4814 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4815 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4816 'channel_follower_count': int
4817 },
4818 'playlist_mincount': 2,
4819 }, {
4820 'note': 'Videos tab',
4821 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
4822 'info_dict': {
4823 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4824 'title': 'lex will - Videos',
4825 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4826 'uploader': 'lex will',
4827 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4828 'tags': ['bible', 'history', 'prophesy'],
4829 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4830 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4831 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4832 'channel': 'lex will',
4833 'channel_follower_count': int
4834 },
4835 'playlist_mincount': 975,
4836 }, {
4837 'note': 'Videos tab, sorted by popular',
4838 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
4839 'info_dict': {
4840 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4841 'title': 'lex will - Videos',
4842 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4843 'uploader': 'lex will',
4844 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4845 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4846 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4847 'channel': 'lex will',
4848 'tags': ['bible', 'history', 'prophesy'],
4849 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4850 'channel_follower_count': int
4851 },
4852 'playlist_mincount': 199,
4853 }, {
4854 'note': 'Playlists tab',
4855 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
4856 'info_dict': {
4857 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4858 'title': 'lex will - Playlists',
4859 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4860 'uploader': 'lex will',
4861 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4862 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4863 'channel': 'lex will',
4864 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4865 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4866 'tags': ['bible', 'history', 'prophesy'],
4867 'channel_follower_count': int
4868 },
4869 'playlist_mincount': 17,
4870 }, {
4871 'note': 'Community tab',
4872 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
4873 'info_dict': {
4874 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4875 'title': 'lex will - Community',
4876 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4877 'uploader': 'lex will',
4878 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4879 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4880 'channel': 'lex will',
4881 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4882 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4883 'tags': ['bible', 'history', 'prophesy'],
4884 'channel_follower_count': int
4885 },
4886 'playlist_mincount': 18,
4887 }, {
4888 'note': 'Channels tab',
4889 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
4890 'info_dict': {
4891 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4892 'title': 'lex will - Channels',
4893 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4894 'uploader': 'lex will',
4895 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4896 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4897 'channel': 'lex will',
4898 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4899 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4900 'tags': ['bible', 'history', 'prophesy'],
4901 'channel_follower_count': int
4902 },
4903 'playlist_mincount': 12,
4904 }, {
4905 'note': 'Search tab',
4906 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
4907 'playlist_mincount': 40,
4908 'info_dict': {
4909 'id': 'UCYO_jab_esuFRV4b17AJtAw',
4910 'title': '3Blue1Brown - Search - linear algebra',
4911 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
4912 'uploader': '3Blue1Brown',
4913 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
4914 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4915 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4916 'tags': ['Mathematics'],
4917 'channel': '3Blue1Brown',
4918 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
4919 'channel_follower_count': int
4920 },
4921 }, {
4922 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4923 'only_matching': True,
4924 }, {
4925 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4926 'only_matching': True,
4927 }, {
4928 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4929 'only_matching': True,
4930 }, {
4931 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
4932 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
4933 'info_dict': {
4934 'title': '29C3: Not my department',
4935 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
4936 'uploader': 'Christiaan008',
4937 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
4938 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
4939 'tags': [],
4940 'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',
4941 'view_count': int,
4942 'modified_date': '20150605',
4943 'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
4944 'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',
4945 'channel': 'Christiaan008',
4946 },
4947 'playlist_count': 96,
4948 }, {
4949 'note': 'Large playlist',
4950 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
4951 'info_dict': {
4952 'title': 'Uploads from Cauchemar',
4953 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
4954 'uploader': 'Cauchemar',
4955 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
4956 'channel_url': 'https://www.youtube.com/c/Cauchemar89',
4957 'tags': [],
4958 'modified_date': r're:\d{8}',
4959 'channel': 'Cauchemar',
4960 'uploader_url': 'https://www.youtube.com/c/Cauchemar89',
4961 'view_count': int,
4962 'description': '',
4963 'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
4964 },
4965 'playlist_mincount': 1123,
4966 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
4967 }, {
4968 'note': 'even larger playlist, 8832 videos',
4969 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
4970 'only_matching': True,
4971 }, {
4972 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
4973 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
4974 'info_dict': {
4975 'title': 'Uploads from Interstellar Movie',
4976 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
4977 'uploader': 'Interstellar Movie',
4978 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4979 'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',
4980 'tags': [],
4981 'view_count': int,
4982 'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4983 'channel_url': 'https://www.youtube.com/c/InterstellarMovie',
4984 'channel': 'Interstellar Movie',
4985 'description': '',
4986 'modified_date': r're:\d{8}',
4987 },
4988 'playlist_mincount': 21,
4989 }, {
4990 'note': 'Playlist with "show unavailable videos" button',
4991 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
4992 'info_dict': {
4993 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
4994 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
4995 'uploader': 'Phim Siêu Nhân Nhật Bản',
4996 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
4997 'view_count': int,
4998 'channel': 'Phim Siêu Nhân Nhật Bản',
4999 'tags': [],
5000 'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5001 'description': '',
5002 'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5003 'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
5004 'modified_date': r're:\d{8}',
5005 },
5006 'playlist_mincount': 200,
5007 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5008 }, {
5009 'note': 'Playlist with unavailable videos in page 7',
5010 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
5011 'info_dict': {
5012 'title': 'Uploads from BlankTV',
5013 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
5014 'uploader': 'BlankTV',
5015 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
5016 'channel': 'BlankTV',
5017 'channel_url': 'https://www.youtube.com/c/blanktv',
5018 'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',
5019 'view_count': int,
5020 'tags': [],
5021 'uploader_url': 'https://www.youtube.com/c/blanktv',
5022 'modified_date': r're:\d{8}',
5023 'description': '',
5024 },
5025 'playlist_mincount': 1000,
5026 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5027 }, {
5028 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
5029 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5030 'info_dict': {
5031 'title': 'Data Analysis with Dr Mike Pound',
5032 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5033 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
5034 'uploader': 'Computerphile',
5035 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
5036 'uploader_url': 'https://www.youtube.com/user/Computerphile',
5037 'tags': [],
5038 'view_count': int,
5039 'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
5040 'channel_url': 'https://www.youtube.com/user/Computerphile',
5041 'channel': 'Computerphile',
5042 },
5043 'playlist_mincount': 11,
5044 }, {
5045 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5046 'only_matching': True,
5047 }, {
5048 'note': 'Playlist URL that does not actually serve a playlist',
5049 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
5050 'info_dict': {
5051 'id': 'FqZTN594JQw',
5052 'ext': 'webm',
5053 'title': "Smiley's People 01 detective, Adventure Series, Action",
5054 'uploader': 'STREEM',
5055 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
5056 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
5057 'upload_date': '20150526',
5058 'license': 'Standard YouTube License',
5059 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
5060 'categories': ['People & Blogs'],
5061 'tags': list,
5062 'view_count': int,
5063 'like_count': int,
5064 },
5065 'params': {
5066 'skip_download': True,
5067 },
5068 'skip': 'This video is not available.',
5069 'add_ie': [YoutubeIE.ie_key()],
5070 }, {
5071 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
5072 'only_matching': True,
5073 }, {
5074 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
5075 'only_matching': True,
5076 }, {
5077 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
5078 'info_dict': {
5079 'id': 'Wq15eF5vCbI', # This will keep changing
5080 'ext': 'mp4',
5081 'title': str,
5082 'uploader': 'Sky News',
5083 'uploader_id': 'skynews',
5084 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
5085 'upload_date': r're:\d{8}',
5086 'description': str,
5087 'categories': ['News & Politics'],
5088 'tags': list,
5089 'like_count': int,
5090 'release_timestamp': 1642502819,
5091 'channel': 'Sky News',
5092 'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',
5093 'age_limit': 0,
5094 'view_count': int,
5095 'thumbnail': 'https://i.ytimg.com/vi/GgL890LIznQ/maxresdefault_live.jpg',
5096 'playable_in_embed': True,
5097 'release_date': '20220118',
5098 'availability': 'public',
5099 'live_status': 'is_live',
5100 'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',
5101 'channel_follower_count': int
5102 },
5103 'params': {
5104 'skip_download': True,
5105 },
5106 'expected_warnings': ['Ignoring subtitle tracks found in '],
5107 }, {
5108 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
5109 'info_dict': {
5110 'id': 'a48o2S1cPoo',
5111 'ext': 'mp4',
5112 'title': 'The Young Turks - Live Main Show',
5113 'uploader': 'The Young Turks',
5114 'uploader_id': 'TheYoungTurks',
5115 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
5116 'upload_date': '20150715',
5117 'license': 'Standard YouTube License',
5118 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
5119 'categories': ['News & Politics'],
5120 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
5121 'like_count': int,
5122 },
5123 'params': {
5124 'skip_download': True,
5125 },
5126 'only_matching': True,
5127 }, {
5128 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
5129 'only_matching': True,
5130 }, {
5131 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
5132 'only_matching': True,
5133 }, {
5134 'note': 'A channel that is not live. Should raise error',
5135 'url': 'https://www.youtube.com/user/numberphile/live',
5136 'only_matching': True,
5137 }, {
5138 'url': 'https://www.youtube.com/feed/trending',
5139 'only_matching': True,
5140 }, {
5141 'url': 'https://www.youtube.com/feed/library',
5142 'only_matching': True,
5143 }, {
5144 'url': 'https://www.youtube.com/feed/history',
5145 'only_matching': True,
5146 }, {
5147 'url': 'https://www.youtube.com/feed/subscriptions',
5148 'only_matching': True,
5149 }, {
5150 'url': 'https://www.youtube.com/feed/watch_later',
5151 'only_matching': True,
5152 }, {
5153 'note': 'Recommended - redirects to home page.',
5154 'url': 'https://www.youtube.com/feed/recommended',
5155 'only_matching': True,
5156 }, {
5157 'note': 'inline playlist with not always working continuations',
5158 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
5159 'only_matching': True,
5160 }, {
5161 'url': 'https://www.youtube.com/course',
5162 'only_matching': True,
5163 }, {
5164 'url': 'https://www.youtube.com/zsecurity',
5165 'only_matching': True,
5166 }, {
5167 'url': 'http://www.youtube.com/NASAgovVideo/videos',
5168 'only_matching': True,
5169 }, {
5170 'url': 'https://www.youtube.com/TheYoungTurks/live',
5171 'only_matching': True,
5172 }, {
5173 'url': 'https://www.youtube.com/hashtag/cctv9',
5174 'info_dict': {
5175 'id': 'cctv9',
5176 'title': '#cctv9',
5177 'tags': [],
5178 },
5179 'playlist_mincount': 350,
5180 }, {
5181 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
5182 'only_matching': True,
5183 }, {
5184 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
5185 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5186 'only_matching': True
5187 }, {
5188 'note': '/browse/ should redirect to /channel/',
5189 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
5190 'only_matching': True
5191 }, {
5192 'note': 'VLPL, should redirect to playlist?list=PL...',
5193 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5194 'info_dict': {
5195 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5196 'uploader': 'NoCopyrightSounds',
5197 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
5198 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
5199 'title': 'NCS : All Releases 💿',
5200 'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',
5201 'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',
5202 'modified_date': r're:\d{8}',
5203 'view_count': int,
5204 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
5205 'tags': [],
5206 'channel': 'NoCopyrightSounds',
5207 },
5208 'playlist_mincount': 166,
5209 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5210 }, {
5211 'note': 'Topic, should redirect to playlist?list=UU...',
5212 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5213 'info_dict': {
5214 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5215 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5216 'title': 'Uploads from Royalty Free Music - Topic',
5217 'uploader': 'Royalty Free Music - Topic',
5218 'tags': [],
5219 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5220 'channel': 'Royalty Free Music - Topic',
5221 'view_count': int,
5222 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5223 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5224 'modified_date': r're:\d{8}',
5225 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5226 'description': '',
5227 },
5228 'expected_warnings': [
5229 'The URL does not have a videos tab',
5230 r'[Uu]navailable videos (are|will be) hidden',
5231 ],
5232 'playlist_mincount': 101,
5233 }, {
5234 'note': 'Topic without a UU playlist',
5235 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
5236 'info_dict': {
5237 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
5238 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
5239 'tags': [],
5240 },
5241 'expected_warnings': [
5242 'the playlist redirect gave error',
5243 ],
5244 'playlist_mincount': 9,
5245 }, {
5246 'note': 'Youtube music Album',
5247 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
5248 'info_dict': {
5249 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
5250 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
5251 'tags': [],
5252 'view_count': int,
5253 'description': '',
5254 'availability': 'unlisted',
5255 'modified_date': r're:\d{8}',
5256 },
5257 'playlist_count': 50,
5258 }, {
5259 'note': 'unlisted single video playlist',
5260 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5261 'info_dict': {
5262 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5263 'uploader': 'colethedj',
5264 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5265 'title': 'yt-dlp unlisted playlist test',
5266 'availability': 'unlisted',
5267 'tags': [],
5268 'modified_date': '20220418',
5269 'channel': 'colethedj',
5270 'view_count': int,
5271 'description': '',
5272 'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
5273 'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5274 'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
5275 },
5276 'playlist_count': 1,
5277 }, {
5278 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
5279 'url': 'https://www.youtube.com/feed/recommended',
5280 'info_dict': {
5281 'id': 'recommended',
5282 'title': 'recommended',
5283 'tags': [],
5284 },
5285 'playlist_mincount': 50,
5286 'params': {
5287 'skip_download': True,
5288 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5289 },
5290 }, {
5291 'note': 'API Fallback: /videos tab, sorted by oldest first',
5292 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
5293 'info_dict': {
5294 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5295 'title': 'Cody\'sLab - Videos',
5296 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
5297 'uploader': 'Cody\'sLab',
5298 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5299 'channel': 'Cody\'sLab',
5300 'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5301 'tags': [],
5302 'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
5303 'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
5304 'channel_follower_count': int
5305 },
5306 'playlist_mincount': 650,
5307 'params': {
5308 'skip_download': True,
5309 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5310 },
5311 }, {
5312 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
5313 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5314 'info_dict': {
5315 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5316 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5317 'title': 'Uploads from Royalty Free Music - Topic',
5318 'uploader': 'Royalty Free Music - Topic',
5319 'modified_date': r're:\d{8}',
5320 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5321 'description': '',
5322 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5323 'tags': [],
5324 'channel': 'Royalty Free Music - Topic',
5325 'view_count': int,
5326 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5327 },
5328 'expected_warnings': [
5329 'does not have a videos tab',
5330 r'[Uu]navailable videos (are|will be) hidden',
5331 ],
5332 'playlist_mincount': 101,
5333 'params': {
5334 'skip_download': True,
5335 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5336 },
5337 }, {
5338 'note': 'non-standard redirect to regional channel',
5339 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
5340 'only_matching': True
5341 }, {
5342 'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
5343 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5344 'info_dict': {
5345 'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5346 'modified_date': '20220407',
5347 'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5348 'tags': [],
5349 'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5350 'uploader': 'pukkandan',
5351 'availability': 'unlisted',
5352 'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5353 'channel': 'pukkandan',
5354 'description': 'Test for collaborative playlist',
5355 'title': 'yt-dlp test - collaborative playlist',
5356 'view_count': int,
5357 'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5358 },
5359 'playlist_mincount': 2
5360 }]
5361
5362 @classmethod
5363 def suitable(cls, url):
5364 return False if YoutubeIE.suitable(url) else super().suitable(url)
5365
5366 _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$')
5367
5368 @YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
5369 def _real_extract(self, url, smuggled_data):
5370 item_id = self._match_id(url)
5371 url = urllib.parse.urlunparse(
5372 urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))
5373 compat_opts = self.get_param('compat_opts', [])
5374
5375 def get_mobj(url):
5376 mobj = self._URL_RE.match(url).groupdict()
5377 mobj.update((k, '') for k, v in mobj.items() if v is None)
5378 return mobj
5379
5380 mobj, redirect_warning = get_mobj(url), None
5381 # Youtube returns incomplete data if tabname is not lower case
5382 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
5383 if is_channel:
5384 if smuggled_data.get('is_music_url'):
5385 if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
5386 item_id = item_id[2:]
5387 pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False
5388 elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
5389 mdata = self._extract_tab_endpoint(
5390 f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
5391 murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
5392 get_all=False, expected_type=str)
5393 if not murl:
5394 raise ExtractorError('Failed to resolve album to playlist')
5395 return self.url_result(murl, ie=YoutubeTabIE.ie_key())
5396 elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
5397 pre = f'https://www.youtube.com/channel/{item_id}'
5398
5399 original_tab_name = tab
5400 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
5401 # Home URLs should redirect to /videos/
5402 redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '
5403 'To download only the videos in the home page, add a "/featured" to the URL')
5404 tab = '/videos'
5405
5406 url = ''.join((pre, tab, post))
5407 mobj = get_mobj(url)
5408
5409 # Handle both video/playlist URLs
5410 qs = parse_qs(url)
5411 video_id, playlist_id = (qs.get(key, [None])[0] for key in ('v', 'list'))
5412
5413 if not video_id and mobj['not_channel'].startswith('watch'):
5414 if not playlist_id:
5415 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
5416 raise ExtractorError('Unable to recognize tab page')
5417 # Common mistake: https://www.youtube.com/watch?list=playlist_id
5418 self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
5419 url = f'https://www.youtube.com/playlist?list={playlist_id}'
5420 mobj = get_mobj(url)
5421
5422 if video_id and playlist_id:
5423 if self.get_param('noplaylist'):
5424 self.to_screen(f'Downloading just video {video_id} because of --no-playlist')
5425 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
5426 ie=YoutubeIE.ie_key(), video_id=video_id)
5427 self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')
5428
5429 data, ytcfg = self._extract_data(url, item_id)
5430
5431 # YouTube may provide a non-standard redirect to the regional channel
5432 # See: https://github.com/yt-dlp/yt-dlp/issues/2694
5433 redirect_url = traverse_obj(
5434 data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)
5435 if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:
5436 redirect_url = ''.join((
5437 urljoin('https://www.youtube.com', redirect_url), mobj['tab'], mobj['post']))
5438 self.to_screen(f'This playlist is likely not available in your region. Following redirect to regional playlist {redirect_url}')
5439 return self.url_result(redirect_url, ie=YoutubeTabIE.ie_key())
5440
5441 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
5442 if tabs:
5443 selected_tab = self._extract_selected_tab(tabs)
5444 selected_tab_name = selected_tab.get('title', '').lower()
5445 if selected_tab_name == 'home':
5446 selected_tab_name = 'featured'
5447 requested_tab_name = mobj['tab'][1:]
5448 if 'no-youtube-channel-redirect' not in compat_opts:
5449 if requested_tab_name == 'live': # Live tab should have redirected to the video
5450 raise UserNotLive(video_id=mobj['id'])
5451 if requested_tab_name not in ('', selected_tab_name):
5452 redirect_warning = f'The channel does not have a {requested_tab_name} tab'
5453 if not original_tab_name:
5454 if item_id[:2] == 'UC':
5455 # Topic channels don't have /videos. Use the equivalent playlist instead
5456 pl_id = f'UU{item_id[2:]}'
5457 pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
5458 try:
5459 data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)
5460 except ExtractorError:
5461 redirect_warning += ' and the playlist redirect gave error'
5462 else:
5463 item_id, url, selected_tab_name = pl_id, pl_url, requested_tab_name
5464 redirect_warning += f'. Redirecting to playlist {pl_id} instead'
5465 if selected_tab_name and selected_tab_name != requested_tab_name:
5466 redirect_warning += f'. {selected_tab_name} tab is being downloaded instead'
5467 else:
5468 raise ExtractorError(redirect_warning, expected=True)
5469
5470 if redirect_warning:
5471 self.to_screen(redirect_warning)
5472 self.write_debug(f'Final URL: {url}')
5473
5474 # YouTube sometimes provides a button to reload playlist with unavailable videos.
5475 if 'no-youtube-unavailable-videos' not in compat_opts:
5476 data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data
5477 self._extract_and_report_alerts(data, only_once=True)
5478 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
5479 if tabs:
5480 return self._extract_from_tabs(item_id, ytcfg, data, tabs)
5481
5482 playlist = traverse_obj(
5483 data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
5484 if playlist:
5485 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
5486
5487 video_id = traverse_obj(
5488 data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
5489 if video_id:
5490 if mobj['tab'] != '/live': # live tab is expected to redirect to video
5491 self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
5492 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
5493 ie=YoutubeIE.ie_key(), video_id=video_id)
5494
5495 raise ExtractorError('Unable to recognize tab page')
5496
5497
5498class YoutubePlaylistIE(InfoExtractor):
5499 IE_DESC = 'YouTube playlists'
5500 _VALID_URL = r'''(?x)(?:
5501 (?:https?://)?
5502 (?:\w+\.)?
5503 (?:
5504 (?:
5505 youtube(?:kids)?\.com|
5506 %(invidious)s
5507 )
5508 /.*?\?.*?\blist=
5509 )?
5510 (?P<id>%(playlist_id)s)
5511 )''' % {
5512 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
5513 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
5514 }
5515 IE_NAME = 'youtube:playlist'
5516 _TESTS = [{
5517 'note': 'issue #673',
5518 'url': 'PLBB231211A4F62143',
5519 'info_dict': {
5520 'title': '[OLD]Team Fortress 2 (Class-based LP)',
5521 'id': 'PLBB231211A4F62143',
5522 'uploader': 'Wickman',
5523 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
5524 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
5525 'view_count': int,
5526 'uploader_url': 'https://www.youtube.com/user/Wickydoo',
5527 'modified_date': r're:\d{8}',
5528 'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
5529 'channel': 'Wickman',
5530 'tags': [],
5531 'channel_url': 'https://www.youtube.com/user/Wickydoo',
5532 },
5533 'playlist_mincount': 29,
5534 }, {
5535 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
5536 'info_dict': {
5537 'title': 'YDL_safe_search',
5538 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
5539 },
5540 'playlist_count': 2,
5541 'skip': 'This playlist is private',
5542 }, {
5543 'note': 'embedded',
5544 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
5545 'playlist_count': 4,
5546 'info_dict': {
5547 'title': 'JODA15',
5548 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
5549 'uploader': 'milan',
5550 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
5551 'description': '',
5552 'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
5553 'tags': [],
5554 'modified_date': '20140919',
5555 'view_count': int,
5556 'channel': 'milan',
5557 'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
5558 'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
5559 },
5560 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5561 }, {
5562 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
5563 'playlist_mincount': 455,
5564 'info_dict': {
5565 'title': '2018 Chinese New Singles (11/6 updated)',
5566 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
5567 'uploader': 'LBK',
5568 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
5569 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
5570 'channel': 'LBK',
5571 'view_count': int,
5572 'channel_url': 'https://www.youtube.com/c/愛低音的國王',
5573 'tags': [],
5574 'uploader_url': 'https://www.youtube.com/c/愛低音的國王',
5575 'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
5576 'modified_date': r're:\d{8}',
5577 },
5578 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5579 }, {
5580 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
5581 'only_matching': True,
5582 }, {
5583 # music album playlist
5584 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
5585 'only_matching': True,
5586 }]
5587
5588 @classmethod
5589 def suitable(cls, url):
5590 if YoutubeTabIE.suitable(url):
5591 return False
5592 from ..utils import parse_qs
5593 qs = parse_qs(url)
5594 if qs.get('v', [None])[0]:
5595 return False
5596 return super().suitable(url)
5597
5598 def _real_extract(self, url):
5599 playlist_id = self._match_id(url)
5600 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
5601 url = update_url_query(
5602 'https://www.youtube.com/playlist',
5603 parse_qs(url) or {'list': playlist_id})
5604 if is_music_url:
5605 url = smuggle_url(url, {'is_music_url': True})
5606 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
5607
5608
5609class YoutubeYtBeIE(InfoExtractor):
5610 IE_DESC = 'youtu.be'
5611 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
5612 _TESTS = [{
5613 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
5614 'info_dict': {
5615 'id': 'yeWKywCrFtk',
5616 'ext': 'mp4',
5617 'title': 'Small Scale Baler and Braiding Rugs',
5618 'uploader': 'Backus-Page House Museum',
5619 'uploader_id': 'backuspagemuseum',
5620 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
5621 'upload_date': '20161008',
5622 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
5623 'categories': ['Nonprofits & Activism'],
5624 'tags': list,
5625 'like_count': int,
5626 'age_limit': 0,
5627 'playable_in_embed': True,
5628 'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',
5629 'channel': 'Backus-Page House Museum',
5630 'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',
5631 'live_status': 'not_live',
5632 'view_count': int,
5633 'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',
5634 'availability': 'public',
5635 'duration': 59,
5636 'comment_count': int,
5637 'channel_follower_count': int
5638 },
5639 'params': {
5640 'noplaylist': True,
5641 'skip_download': True,
5642 },
5643 }, {
5644 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
5645 'only_matching': True,
5646 }]
5647
5648 def _real_extract(self, url):
5649 mobj = self._match_valid_url(url)
5650 video_id = mobj.group('id')
5651 playlist_id = mobj.group('playlist_id')
5652 return self.url_result(
5653 update_url_query('https://www.youtube.com/watch', {
5654 'v': video_id,
5655 'list': playlist_id,
5656 'feature': 'youtu.be',
5657 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
5658
5659
5660class YoutubeLivestreamEmbedIE(InfoExtractor):
5661 IE_DESC = 'YouTube livestream embeds'
5662 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'
5663 _TESTS = [{
5664 'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',
5665 'only_matching': True,
5666 }]
5667
5668 def _real_extract(self, url):
5669 channel_id = self._match_id(url)
5670 return self.url_result(
5671 f'https://www.youtube.com/channel/{channel_id}/live',
5672 ie=YoutubeTabIE.ie_key(), video_id=channel_id)
5673
5674
5675class YoutubeYtUserIE(InfoExtractor):
5676 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
5677 IE_NAME = 'youtube:user'
5678 _VALID_URL = r'ytuser:(?P<id>.+)'
5679 _TESTS = [{
5680 'url': 'ytuser:phihag',
5681 'only_matching': True,
5682 }]
5683
5684 def _real_extract(self, url):
5685 user_id = self._match_id(url)
5686 return self.url_result(
5687 'https://www.youtube.com/user/%s/videos' % user_id,
5688 ie=YoutubeTabIE.ie_key(), video_id=user_id)
5689
5690
5691class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
5692 IE_NAME = 'youtube:favorites'
5693 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
5694 _VALID_URL = r':ytfav(?:ou?rite)?s?'
5695 _LOGIN_REQUIRED = True
5696 _TESTS = [{
5697 'url': ':ytfav',
5698 'only_matching': True,
5699 }, {
5700 'url': ':ytfavorites',
5701 'only_matching': True,
5702 }]
5703
5704 def _real_extract(self, url):
5705 return self.url_result(
5706 'https://www.youtube.com/playlist?list=LL',
5707 ie=YoutubeTabIE.ie_key())
5708
5709
5710class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):
5711 IE_NAME = 'youtube:notif'
5712 IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'
5713 _VALID_URL = r':ytnotif(?:ication)?s?'
5714 _LOGIN_REQUIRED = True
5715 _TESTS = [{
5716 'url': ':ytnotif',
5717 'only_matching': True,
5718 }, {
5719 'url': ':ytnotifications',
5720 'only_matching': True,
5721 }]
5722
5723 def _extract_notification_menu(self, response, continuation_list):
5724 notification_list = traverse_obj(
5725 response,
5726 ('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),
5727 ('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),
5728 expected_type=list) or []
5729 continuation_list[0] = None
5730 for item in notification_list:
5731 entry = self._extract_notification_renderer(item.get('notificationRenderer'))
5732 if entry:
5733 yield entry
5734 continuation = item.get('continuationItemRenderer')
5735 if continuation:
5736 continuation_list[0] = continuation
5737
5738 def _extract_notification_renderer(self, notification):
5739 video_id = traverse_obj(
5740 notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)
5741 url = f'https://www.youtube.com/watch?v={video_id}'
5742 channel_id = None
5743 if not video_id:
5744 browse_ep = traverse_obj(
5745 notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)
5746 channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)
5747 post_id = self._search_regex(
5748 r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),
5749 'post id', default=None)
5750 if not channel_id or not post_id:
5751 return
5752 # The direct /post url redirects to this in the browser
5753 url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'
5754
5755 channel = traverse_obj(
5756 notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),
5757 expected_type=str)
5758 notification_title = self._get_text(notification, 'shortMessage')
5759 if notification_title:
5760 notification_title = notification_title.replace('\xad', '') # remove soft hyphens
5761 # TODO: handle recommended videos
5762 title = self._search_regex(
5763 rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,
5764 'video title', default=None)
5765 upload_date = (strftime_or_none(self._extract_time_text(notification, 'sentTimeText')[0], '%Y%m%d')
5766 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE.ie_key())
5767 else None)
5768 return {
5769 '_type': 'url',
5770 'url': url,
5771 'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),
5772 'video_id': video_id,
5773 'title': title,
5774 'channel_id': channel_id,
5775 'channel': channel,
5776 'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),
5777 'upload_date': upload_date,
5778 }
5779
5780 def _notification_menu_entries(self, ytcfg):
5781 continuation_list = [None]
5782 response = None
5783 for page in itertools.count(1):
5784 ctoken = traverse_obj(
5785 continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)
5786 response = self._extract_response(
5787 item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,
5788 ep='notification/get_notification_menu', check_get_keys='actions',
5789 headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))
5790 yield from self._extract_notification_menu(response, continuation_list)
5791 if not continuation_list[0]:
5792 break
5793
5794 def _real_extract(self, url):
5795 display_id = 'notifications'
5796 ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}
5797 self._report_playlist_authcheck(ytcfg)
5798 return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)
5799
5800
5801class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
5802 IE_DESC = 'YouTube search'
5803 IE_NAME = 'youtube:search'
5804 _SEARCH_KEY = 'ytsearch'
5805 _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
5806 _TESTS = [{
5807 'url': 'ytsearch5:youtube-dl test video',
5808 'playlist_count': 5,
5809 'info_dict': {
5810 'id': 'youtube-dl test video',
5811 'title': 'youtube-dl test video',
5812 }
5813 }]
5814
5815
5816class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
5817 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
5818 _SEARCH_KEY = 'ytsearchdate'
5819 IE_DESC = 'YouTube search, newest videos first'
5820 _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
5821 _TESTS = [{
5822 'url': 'ytsearchdate5:youtube-dl test video',
5823 'playlist_count': 5,
5824 'info_dict': {
5825 'id': 'youtube-dl test video',
5826 'title': 'youtube-dl test video',
5827 }
5828 }]
5829
5830
5831class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
5832 IE_DESC = 'YouTube search URLs with sorting and filter support'
5833 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
5834 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
5835 _TESTS = [{
5836 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
5837 'playlist_mincount': 5,
5838 'info_dict': {
5839 'id': 'youtube-dl test video',
5840 'title': 'youtube-dl test video',
5841 }
5842 }, {
5843 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
5844 'playlist_mincount': 5,
5845 'info_dict': {
5846 'id': 'python',
5847 'title': 'python',
5848 }
5849 }, {
5850 'url': 'https://www.youtube.com/results?search_query=%23cats',
5851 'playlist_mincount': 1,
5852 'info_dict': {
5853 'id': '#cats',
5854 'title': '#cats',
5855 # The test suite does not have support for nested playlists
5856 # 'entries': [{
5857 # 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',
5858 # 'title': '#cats',
5859 # }],
5860 },
5861 }, {
5862 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
5863 'only_matching': True,
5864 }]
5865
5866 def _real_extract(self, url):
5867 qs = parse_qs(url)
5868 query = (qs.get('search_query') or qs.get('q'))[0]
5869 return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
5870
5871
5872class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
5873 IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'
5874 IE_NAME = 'youtube:music:search_url'
5875 _VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
5876 _TESTS = [{
5877 'url': 'https://music.youtube.com/search?q=royalty+free+music',
5878 'playlist_count': 16,
5879 'info_dict': {
5880 'id': 'royalty free music',
5881 'title': 'royalty free music',
5882 }
5883 }, {
5884 'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',
5885 'playlist_mincount': 30,
5886 'info_dict': {
5887 'id': 'royalty free music - songs',
5888 'title': 'royalty free music - songs',
5889 },
5890 'params': {'extract_flat': 'in_playlist'}
5891 }, {
5892 'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',
5893 'playlist_mincount': 30,
5894 'info_dict': {
5895 'id': 'royalty free music - community playlists',
5896 'title': 'royalty free music - community playlists',
5897 },
5898 'params': {'extract_flat': 'in_playlist'}
5899 }]
5900
5901 _SECTIONS = {
5902 'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',
5903 'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',
5904 'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',
5905 'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',
5906 'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',
5907 'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',
5908 }
5909
5910 def _real_extract(self, url):
5911 qs = parse_qs(url)
5912 query = (qs.get('search_query') or qs.get('q'))[0]
5913 params = qs.get('sp', (None,))[0]
5914 if params:
5915 section = next((k for k, v in self._SECTIONS.items() if v == params), params)
5916 else:
5917 section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower()
5918 params = self._SECTIONS.get(section)
5919 if not params:
5920 section = None
5921 title = join_nonempty(query, section, delim=' - ')
5922 return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)
5923
5924
5925class YoutubeFeedsInfoExtractor(InfoExtractor):
5926 """
5927 Base class for feed extractors
5928 Subclasses must re-define the _FEED_NAME property.
5929 """
5930 _LOGIN_REQUIRED = True
5931 _FEED_NAME = 'feeds'
5932
5933 def _real_initialize(self):
5934 YoutubeBaseInfoExtractor._check_login_required(self)
5935
5936 @classproperty
5937 def IE_NAME(self):
5938 return f'youtube:{self._FEED_NAME}'
5939
5940 def _real_extract(self, url):
5941 return self.url_result(
5942 f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())
5943
5944
5945class YoutubeWatchLaterIE(InfoExtractor):
5946 IE_NAME = 'youtube:watchlater'
5947 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
5948 _VALID_URL = r':ytwatchlater'
5949 _TESTS = [{
5950 'url': ':ytwatchlater',
5951 'only_matching': True,
5952 }]
5953
5954 def _real_extract(self, url):
5955 return self.url_result(
5956 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
5957
5958
5959class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
5960 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
5961 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
5962 _FEED_NAME = 'recommended'
5963 _LOGIN_REQUIRED = False
5964 _TESTS = [{
5965 'url': ':ytrec',
5966 'only_matching': True,
5967 }, {
5968 'url': ':ytrecommended',
5969 'only_matching': True,
5970 }, {
5971 'url': 'https://youtube.com',
5972 'only_matching': True,
5973 }]
5974
5975
5976class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
5977 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
5978 _VALID_URL = r':ytsub(?:scription)?s?'
5979 _FEED_NAME = 'subscriptions'
5980 _TESTS = [{
5981 'url': ':ytsubs',
5982 'only_matching': True,
5983 }, {
5984 'url': ':ytsubscriptions',
5985 'only_matching': True,
5986 }]
5987
5988
5989class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
5990 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
5991 _VALID_URL = r':ythis(?:tory)?'
5992 _FEED_NAME = 'history'
5993 _TESTS = [{
5994 'url': ':ythistory',
5995 'only_matching': True,
5996 }]
5997
5998
5999class YoutubeStoriesIE(InfoExtractor):
6000 IE_DESC = 'YouTube channel stories; "ytstories:" prefix'
6001 IE_NAME = 'youtube:stories'
6002 _VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'
6003 _TESTS = [{
6004 'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',
6005 'only_matching': True,
6006 }]
6007
6008 def _real_extract(self, url):
6009 playlist_id = f'RLTD{self._match_id(url)}'
6010 return self.url_result(
6011 f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1',
6012 ie=YoutubeTabIE, video_id=playlist_id)
6013
6014
6015class YoutubeTruncatedURLIE(InfoExtractor):
6016 IE_NAME = 'youtube:truncated_url'
6017 IE_DESC = False # Do not list
6018 _VALID_URL = r'''(?x)
6019 (?:https?://)?
6020 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
6021 (?:watch\?(?:
6022 feature=[a-z_]+|
6023 annotation_id=annotation_[^&]+|
6024 x-yt-cl=[0-9]+|
6025 hl=[^&]*|
6026 t=[0-9]+
6027 )?
6028 |
6029 attribution_link\?a=[^&]+
6030 )
6031 $
6032 '''
6033
6034 _TESTS = [{
6035 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
6036 'only_matching': True,
6037 }, {
6038 'url': 'https://www.youtube.com/watch?',
6039 'only_matching': True,
6040 }, {
6041 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
6042 'only_matching': True,
6043 }, {
6044 'url': 'https://www.youtube.com/watch?feature=foo',
6045 'only_matching': True,
6046 }, {
6047 'url': 'https://www.youtube.com/watch?hl=en-GB',
6048 'only_matching': True,
6049 }, {
6050 'url': 'https://www.youtube.com/watch?t=2372',
6051 'only_matching': True,
6052 }]
6053
6054 def _real_extract(self, url):
6055 raise ExtractorError(
6056 'Did you forget to quote the URL? Remember that & is a meta '
6057 'character in most shells, so you want to put the URL in quotes, '
6058 'like youtube-dl '
6059 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
6060 ' or simply youtube-dl BaW_jenozKc .',
6061 expected=True)
6062
6063
6064class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
6065 IE_NAME = 'youtube:clip'
6066 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'
6067 _TESTS = [{
6068 # FIXME: Other metadata should be extracted from the clip, not from the base video
6069 'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',
6070 'info_dict': {
6071 'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',
6072 'ext': 'mp4',
6073 'section_start': 29.0,
6074 'section_end': 39.7,
6075 'duration': 10.7,
6076 'age_limit': 0,
6077 'availability': 'public',
6078 'categories': ['Gaming'],
6079 'channel': 'Scott The Woz',
6080 'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',
6081 'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',
6082 'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',
6083 'like_count': int,
6084 'playable_in_embed': True,
6085 'tags': 'count:17',
6086 'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',
6087 'title': 'Mobile Games on Console - Scott The Woz',
6088 'upload_date': '20210920',
6089 'uploader': 'Scott The Woz',
6090 'uploader_id': 'scottthewoz',
6091 'uploader_url': 'http://www.youtube.com/user/scottthewoz',
6092 'view_count': int,
6093 'live_status': 'not_live',
6094 'channel_follower_count': int
6095 }
6096 }]
6097
6098 def _real_extract(self, url):
6099 clip_id = self._match_id(url)
6100 _, data = self._extract_webpage(url, clip_id)
6101
6102 video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))
6103 if not video_id:
6104 raise ExtractorError('Unable to find video ID')
6105
6106 clip_data = traverse_obj(data, (
6107 'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',
6108 'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,
6109 'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',
6110 'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)
6111
6112 return {
6113 '_type': 'url_transparent',
6114 'url': f'https://www.youtube.com/watch?v={video_id}',
6115 'ie_key': YoutubeIE.ie_key(),
6116 'id': clip_id,
6117 'section_start': int(clip_data['startTimeMs']) / 1000,
6118 'section_end': int(clip_data['endTimeMs']) / 1000,
6119 }
6120
6121
6122class YoutubeTruncatedIDIE(InfoExtractor):
6123 IE_NAME = 'youtube:truncated_id'
6124 IE_DESC = False # Do not list
6125 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
6126
6127 _TESTS = [{
6128 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
6129 'only_matching': True,
6130 }]
6131
6132 def _real_extract(self, url):
6133 video_id = self._match_id(url)
6134 raise ExtractorError(
6135 f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',
6136 expected=True)