]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/youtube.py
[cleanup] Misc
[yt-dlp.git] / yt_dlp / extractor / youtube.py
1 import base64
2 import calendar
3 import copy
4 import datetime
5 import hashlib
6 import itertools
7 import json
8 import math
9 import os.path
10 import random
11 import re
12 import sys
13 import threading
14 import time
15 import traceback
16 import urllib.error
17 import urllib.parse
18
19 from .common import InfoExtractor, SearchInfoExtractor
20 from .openload import PhantomJSwrapper
21 from ..compat import functools
22 from ..jsinterp import JSInterpreter
23 from ..utils import (
24 NO_DEFAULT,
25 ExtractorError,
26 UserNotLive,
27 bug_reports_message,
28 classproperty,
29 clean_html,
30 datetime_from_str,
31 dict_get,
32 float_or_none,
33 format_field,
34 get_first,
35 int_or_none,
36 is_html,
37 join_nonempty,
38 js_to_json,
39 mimetype2ext,
40 network_exceptions,
41 orderedSet,
42 parse_codecs,
43 parse_count,
44 parse_duration,
45 parse_iso8601,
46 parse_qs,
47 qualities,
48 remove_start,
49 smuggle_url,
50 str_or_none,
51 str_to_int,
52 strftime_or_none,
53 traverse_obj,
54 try_get,
55 unescapeHTML,
56 unified_strdate,
57 unified_timestamp,
58 unsmuggle_url,
59 update_url_query,
60 url_or_none,
61 urljoin,
62 variadic,
63 )
64
65 # any clients starting with _ cannot be explicitly requested by the user
66 INNERTUBE_CLIENTS = {
67 'web': {
68 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
69 'INNERTUBE_CONTEXT': {
70 'client': {
71 'clientName': 'WEB',
72 'clientVersion': '2.20220801.00.00',
73 }
74 },
75 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
76 },
77 'web_embedded': {
78 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
79 'INNERTUBE_CONTEXT': {
80 'client': {
81 'clientName': 'WEB_EMBEDDED_PLAYER',
82 'clientVersion': '1.20220731.00.00',
83 },
84 },
85 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
86 },
87 'web_music': {
88 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
89 'INNERTUBE_HOST': 'music.youtube.com',
90 'INNERTUBE_CONTEXT': {
91 'client': {
92 'clientName': 'WEB_REMIX',
93 'clientVersion': '1.20220727.01.00',
94 }
95 },
96 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
97 },
98 'web_creator': {
99 'INNERTUBE_API_KEY': 'AIzaSyBUPetSUmoZL-OhlxA7wSac5XinrygCqMo',
100 'INNERTUBE_CONTEXT': {
101 'client': {
102 'clientName': 'WEB_CREATOR',
103 'clientVersion': '1.20220726.00.00',
104 }
105 },
106 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
107 },
108 'android': {
109 'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
110 'INNERTUBE_CONTEXT': {
111 'client': {
112 'clientName': 'ANDROID',
113 'clientVersion': '17.29.34',
114 'androidSdkVersion': 30
115 }
116 },
117 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
118 'REQUIRE_JS_PLAYER': False
119 },
120 'android_embedded': {
121 'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
122 'INNERTUBE_CONTEXT': {
123 'client': {
124 'clientName': 'ANDROID_EMBEDDED_PLAYER',
125 'clientVersion': '17.29.34',
126 'androidSdkVersion': 30
127 },
128 },
129 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
130 'REQUIRE_JS_PLAYER': False
131 },
132 'android_music': {
133 'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
134 'INNERTUBE_CONTEXT': {
135 'client': {
136 'clientName': 'ANDROID_MUSIC',
137 'clientVersion': '5.16.51',
138 'androidSdkVersion': 30
139 }
140 },
141 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
142 'REQUIRE_JS_PLAYER': False
143 },
144 'android_creator': {
145 'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8',
146 'INNERTUBE_CONTEXT': {
147 'client': {
148 'clientName': 'ANDROID_CREATOR',
149 'clientVersion': '22.28.100',
150 'androidSdkVersion': 30
151 },
152 },
153 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
154 'REQUIRE_JS_PLAYER': False
155 },
156 # iOS clients have HLS live streams. Setting device model to get 60fps formats.
157 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
158 'ios': {
159 'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
160 'INNERTUBE_CONTEXT': {
161 'client': {
162 'clientName': 'IOS',
163 'clientVersion': '17.30.1',
164 'deviceModel': 'iPhone14,3',
165 }
166 },
167 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
168 'REQUIRE_JS_PLAYER': False
169 },
170 'ios_embedded': {
171 'INNERTUBE_CONTEXT': {
172 'client': {
173 'clientName': 'IOS_MESSAGES_EXTENSION',
174 'clientVersion': '17.30.1',
175 'deviceModel': 'iPhone14,3',
176 },
177 },
178 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
179 'REQUIRE_JS_PLAYER': False
180 },
181 'ios_music': {
182 'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s',
183 'INNERTUBE_CONTEXT': {
184 'client': {
185 'clientName': 'IOS_MUSIC',
186 'clientVersion': '5.18',
187 },
188 },
189 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
190 'REQUIRE_JS_PLAYER': False
191 },
192 'ios_creator': {
193 'INNERTUBE_CONTEXT': {
194 'client': {
195 'clientName': 'IOS_CREATOR',
196 'clientVersion': '22.29.101',
197 },
198 },
199 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
200 'REQUIRE_JS_PLAYER': False
201 },
202 # mweb has 'ultralow' formats
203 # See: https://github.com/yt-dlp/yt-dlp/pull/557
204 'mweb': {
205 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
206 'INNERTUBE_CONTEXT': {
207 'client': {
208 'clientName': 'MWEB',
209 'clientVersion': '2.20220801.00.00',
210 }
211 },
212 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
213 },
214 # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
215 # See: https://github.com/zerodytrash/YouTube-Internal-Clients
216 'tv_embedded': {
217 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
218 'INNERTUBE_CONTEXT': {
219 'client': {
220 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
221 'clientVersion': '2.0',
222 },
223 },
224 'INNERTUBE_CONTEXT_CLIENT_NAME': 85
225 },
226 }
227
228
229 def _split_innertube_client(client_name):
230 variant, *base = client_name.rsplit('.', 1)
231 if base:
232 return variant, base[0], variant
233 base, *variant = client_name.split('_', 1)
234 return client_name, base, variant[0] if variant else None
235
236
237 def build_innertube_clients():
238 THIRD_PARTY = {
239 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
240 }
241 BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
242 priority = qualities(BASE_CLIENTS[::-1])
243
244 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
245 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
246 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
247 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
248 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
249
250 _, base_client, variant = _split_innertube_client(client)
251 ytcfg['priority'] = 10 * priority(base_client)
252
253 if not variant:
254 INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
255 embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
256 embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
257 embedscreen['priority'] -= 3
258 elif variant == 'embedded':
259 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
260 ytcfg['priority'] -= 2
261 else:
262 ytcfg['priority'] -= 3
263
264
265 build_innertube_clients()
266
267
268 class YoutubeBaseInfoExtractor(InfoExtractor):
269 """Provide base functions for Youtube extractors"""
270
271 _RESERVED_NAMES = (
272 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
273 r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|'
274 r'browse|oembed|get_video_info|iframe_api|s/player|'
275 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
276
277 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
278
279 # _NETRC_MACHINE = 'youtube'
280
281 # If True it will raise an error if no login info is provided
282 _LOGIN_REQUIRED = False
283
284 _INVIDIOUS_SITES = (
285 # invidious-redirect websites
286 r'(?:www\.)?redirect\.invidious\.io',
287 r'(?:(?:www|dev)\.)?invidio\.us',
288 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
289 r'(?:www\.)?invidious\.pussthecat\.org',
290 r'(?:www\.)?invidious\.zee\.li',
291 r'(?:www\.)?invidious\.ethibox\.fr',
292 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
293 r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
294 r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
295 # youtube-dl invidious instances list
296 r'(?:(?:www|no)\.)?invidiou\.sh',
297 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
298 r'(?:www\.)?invidious\.kabi\.tk',
299 r'(?:www\.)?invidious\.mastodon\.host',
300 r'(?:www\.)?invidious\.zapashcanon\.fr',
301 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
302 r'(?:www\.)?invidious\.tinfoil-hat\.net',
303 r'(?:www\.)?invidious\.himiko\.cloud',
304 r'(?:www\.)?invidious\.reallyancient\.tech',
305 r'(?:www\.)?invidious\.tube',
306 r'(?:www\.)?invidiou\.site',
307 r'(?:www\.)?invidious\.site',
308 r'(?:www\.)?invidious\.xyz',
309 r'(?:www\.)?invidious\.nixnet\.xyz',
310 r'(?:www\.)?invidious\.048596\.xyz',
311 r'(?:www\.)?invidious\.drycat\.fr',
312 r'(?:www\.)?inv\.skyn3t\.in',
313 r'(?:www\.)?tube\.poal\.co',
314 r'(?:www\.)?tube\.connect\.cafe',
315 r'(?:www\.)?vid\.wxzm\.sx',
316 r'(?:www\.)?vid\.mint\.lgbt',
317 r'(?:www\.)?vid\.puffyan\.us',
318 r'(?:www\.)?yewtu\.be',
319 r'(?:www\.)?yt\.elukerio\.org',
320 r'(?:www\.)?yt\.lelux\.fi',
321 r'(?:www\.)?invidious\.ggc-project\.de',
322 r'(?:www\.)?yt\.maisputain\.ovh',
323 r'(?:www\.)?ytprivate\.com',
324 r'(?:www\.)?invidious\.13ad\.de',
325 r'(?:www\.)?invidious\.toot\.koeln',
326 r'(?:www\.)?invidious\.fdn\.fr',
327 r'(?:www\.)?watch\.nettohikari\.com',
328 r'(?:www\.)?invidious\.namazso\.eu',
329 r'(?:www\.)?invidious\.silkky\.cloud',
330 r'(?:www\.)?invidious\.exonip\.de',
331 r'(?:www\.)?invidious\.riverside\.rocks',
332 r'(?:www\.)?invidious\.blamefran\.net',
333 r'(?:www\.)?invidious\.moomoo\.de',
334 r'(?:www\.)?ytb\.trom\.tf',
335 r'(?:www\.)?yt\.cyberhost\.uk',
336 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
337 r'(?:www\.)?qklhadlycap4cnod\.onion',
338 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
339 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
340 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
341 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
342 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
343 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
344 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
345 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
346 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
347 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
348 # piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
349 r'(?:www\.)?piped\.kavin\.rocks',
350 r'(?:www\.)?piped\.silkky\.cloud',
351 r'(?:www\.)?piped\.tokhmi\.xyz',
352 r'(?:www\.)?piped\.moomoo\.me',
353 r'(?:www\.)?il\.ax',
354 r'(?:www\.)?piped\.syncpundit\.com',
355 r'(?:www\.)?piped\.mha\.fi',
356 r'(?:www\.)?piped\.mint\.lgbt',
357 r'(?:www\.)?piped\.privacy\.com\.de',
358 )
359
360 def _initialize_consent(self):
361 cookies = self._get_cookies('https://www.youtube.com/')
362 if cookies.get('__Secure-3PSID'):
363 return
364 consent_id = None
365 consent = cookies.get('CONSENT')
366 if consent:
367 if 'YES' in consent.value:
368 return
369 consent_id = self._search_regex(
370 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
371 if not consent_id:
372 consent_id = random.randint(100, 999)
373 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
374
375 def _initialize_pref(self):
376 cookies = self._get_cookies('https://www.youtube.com/')
377 pref_cookie = cookies.get('PREF')
378 pref = {}
379 if pref_cookie:
380 try:
381 pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
382 except ValueError:
383 self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
384 pref.update({'hl': 'en', 'tz': 'UTC'})
385 self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
386
387 def _real_initialize(self):
388 self._initialize_pref()
389 self._initialize_consent()
390 self._check_login_required()
391
392 def _check_login_required(self):
393 if self._LOGIN_REQUIRED and not self._cookies_passed:
394 self.raise_login_required('Login details are needed to download this content', method='cookies')
395
396 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='
397 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
398
399 def _get_default_ytcfg(self, client='web'):
400 return copy.deepcopy(INNERTUBE_CLIENTS[client])
401
402 def _get_innertube_host(self, client='web'):
403 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
404
405 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
406 # try_get but with fallback to default ytcfg client values when present
407 _func = lambda y: try_get(y, getter, expected_type)
408 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
409
410 def _extract_client_name(self, ytcfg, default_client='web'):
411 return self._ytcfg_get_safe(
412 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
413 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
414
415 def _extract_client_version(self, ytcfg, default_client='web'):
416 return self._ytcfg_get_safe(
417 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
418 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
419
420 def _select_api_hostname(self, req_api_hostname, default_client=None):
421 return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
422 or req_api_hostname or self._get_innertube_host(default_client or 'web'))
423
424 def _extract_api_key(self, ytcfg=None, default_client='web'):
425 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
426
427 def _extract_context(self, ytcfg=None, default_client='web'):
428 context = get_first(
429 (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
430 # Enforce language and tz for extraction
431 client_context = traverse_obj(context, 'client', expected_type=dict, default={})
432 client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
433 return context
434
435 _SAPISID = None
436
437 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
438 time_now = round(time.time())
439 if self._SAPISID is None:
440 yt_cookies = self._get_cookies('https://www.youtube.com')
441 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
442 # See: https://github.com/yt-dlp/yt-dlp/issues/393
443 sapisid_cookie = dict_get(
444 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
445 if sapisid_cookie and sapisid_cookie.value:
446 self._SAPISID = sapisid_cookie.value
447 self.write_debug('Extracted SAPISID cookie')
448 # SAPISID cookie is required if not already present
449 if not yt_cookies.get('SAPISID'):
450 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
451 self._set_cookie(
452 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
453 else:
454 self._SAPISID = False
455 if not self._SAPISID:
456 return None
457 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
458 sapisidhash = hashlib.sha1(
459 f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
460 return f'SAPISIDHASH {time_now}_{sapisidhash}'
461
462 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
463 note='Downloading API JSON', errnote='Unable to download API page',
464 context=None, api_key=None, api_hostname=None, default_client='web'):
465
466 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
467 data.update(query)
468 real_headers = self.generate_api_headers(default_client=default_client)
469 real_headers.update({'content-type': 'application/json'})
470 if headers:
471 real_headers.update(headers)
472 api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]
473 or api_key or self._extract_api_key(default_client=default_client))
474 return self._download_json(
475 f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
476 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
477 data=json.dumps(data).encode('utf8'), headers=real_headers,
478 query={'key': api_key, 'prettyPrint': 'false'})
479
480 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
481 return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
482
483 @staticmethod
484 def _extract_session_index(*data):
485 """
486 Index of current account in account list.
487 See: https://github.com/yt-dlp/yt-dlp/pull/519
488 """
489 for ytcfg in data:
490 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
491 if session_index is not None:
492 return session_index
493
494 # Deprecated?
495 def _extract_identity_token(self, ytcfg=None, webpage=None):
496 if ytcfg:
497 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)
498 if token:
499 return token
500 if webpage:
501 return self._search_regex(
502 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
503 'identity token', default=None, fatal=False)
504
505 @staticmethod
506 def _extract_account_syncid(*args):
507 """
508 Extract syncId required to download private playlists of secondary channels
509 @params response and/or ytcfg
510 """
511 for data in args:
512 # ytcfg includes channel_syncid if on secondary channel
513 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)
514 if delegated_sid:
515 return delegated_sid
516 sync_ids = (try_get(
517 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
518 lambda x: x['DATASYNC_ID']), str) or '').split('||')
519 if len(sync_ids) >= 2 and sync_ids[1]:
520 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
521 # and just "user_syncid||" for primary channel. We only want the channel_syncid
522 return sync_ids[0]
523
524 @staticmethod
525 def _extract_visitor_data(*args):
526 """
527 Extracts visitorData from an API response or ytcfg
528 Appears to be used to track session state
529 """
530 return get_first(
531 args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
532 expected_type=str)
533
534 @functools.cached_property
535 def is_authenticated(self):
536 return bool(self._generate_sapisidhash_header())
537
538 def extract_ytcfg(self, video_id, webpage):
539 if not webpage:
540 return {}
541 return self._parse_json(
542 self._search_regex(
543 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
544 default='{}'), video_id, fatal=False) or {}
545
546 def generate_api_headers(
547 self, *, ytcfg=None, account_syncid=None, session_index=None,
548 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
549
550 origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))
551 headers = {
552 'X-YouTube-Client-Name': str(
553 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
554 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
555 'Origin': origin,
556 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
557 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
558 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)
559 }
560 if session_index is None:
561 session_index = self._extract_session_index(ytcfg)
562 if account_syncid or session_index is not None:
563 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
564
565 auth = self._generate_sapisidhash_header(origin)
566 if auth is not None:
567 headers['Authorization'] = auth
568 headers['X-Origin'] = origin
569 return {h: v for h, v in headers.items() if v is not None}
570
571 def _download_ytcfg(self, client, video_id):
572 url = {
573 'web': 'https://www.youtube.com',
574 'web_music': 'https://music.youtube.com',
575 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
576 }.get(client)
577 if not url:
578 return {}
579 webpage = self._download_webpage(
580 url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')
581 return self.extract_ytcfg(video_id, webpage) or {}
582
583 @staticmethod
584 def _build_api_continuation_query(continuation, ctp=None):
585 query = {
586 'continuation': continuation
587 }
588 # TODO: Inconsistency with clickTrackingParams.
589 # Currently we have a fixed ctp contained within context (from ytcfg)
590 # and a ctp in root query for continuation.
591 if ctp:
592 query['clickTracking'] = {'clickTrackingParams': ctp}
593 return query
594
595 @classmethod
596 def _extract_next_continuation_data(cls, renderer):
597 next_continuation = try_get(
598 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
599 lambda x: x['continuation']['reloadContinuationData']), dict)
600 if not next_continuation:
601 return
602 continuation = next_continuation.get('continuation')
603 if not continuation:
604 return
605 ctp = next_continuation.get('clickTrackingParams')
606 return cls._build_api_continuation_query(continuation, ctp)
607
608 @classmethod
609 def _extract_continuation_ep_data(cls, continuation_ep: dict):
610 if isinstance(continuation_ep, dict):
611 continuation = try_get(
612 continuation_ep, lambda x: x['continuationCommand']['token'], str)
613 if not continuation:
614 return
615 ctp = continuation_ep.get('clickTrackingParams')
616 return cls._build_api_continuation_query(continuation, ctp)
617
618 @classmethod
619 def _extract_continuation(cls, renderer):
620 next_continuation = cls._extract_next_continuation_data(renderer)
621 if next_continuation:
622 return next_continuation
623
624 contents = []
625 for key in ('contents', 'items'):
626 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
627
628 for content in contents:
629 if not isinstance(content, dict):
630 continue
631 continuation_ep = try_get(
632 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
633 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
634 dict)
635 continuation = cls._extract_continuation_ep_data(continuation_ep)
636 if continuation:
637 return continuation
638
639 @classmethod
640 def _extract_alerts(cls, data):
641 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
642 if not isinstance(alert_dict, dict):
643 continue
644 for alert in alert_dict.values():
645 alert_type = alert.get('type')
646 if not alert_type:
647 continue
648 message = cls._get_text(alert, 'text')
649 if message:
650 yield alert_type, message
651
652 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
653 errors = []
654 warnings = []
655 for alert_type, alert_message in alerts:
656 if alert_type.lower() == 'error' and fatal:
657 errors.append([alert_type, alert_message])
658 else:
659 warnings.append([alert_type, alert_message])
660
661 for alert_type, alert_message in (warnings + errors[:-1]):
662 self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)
663 if errors:
664 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
665
666 def _extract_and_report_alerts(self, data, *args, **kwargs):
667 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
668
669 def _extract_badges(self, renderer: dict):
670 badges = set()
671 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
672 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], str)
673 if label:
674 badges.add(label.lower())
675 return badges
676
677 @staticmethod
678 def _get_text(data, *path_list, max_runs=None):
679 for path in path_list or [None]:
680 if path is None:
681 obj = [data]
682 else:
683 obj = traverse_obj(data, path, default=[])
684 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
685 obj = [obj]
686 for item in obj:
687 text = try_get(item, lambda x: x['simpleText'], str)
688 if text:
689 return text
690 runs = try_get(item, lambda x: x['runs'], list) or []
691 if not runs and isinstance(item, list):
692 runs = item
693
694 runs = runs[:min(len(runs), max_runs or len(runs))]
695 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
696 if text:
697 return text
698
699 def _get_count(self, data, *path_list):
700 count_text = self._get_text(data, *path_list) or ''
701 count = parse_count(count_text)
702 if count is None:
703 count = str_to_int(
704 self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))
705 return count
706
707 @staticmethod
708 def _extract_thumbnails(data, *path_list):
709 """
710 Extract thumbnails from thumbnails dict
711 @param path_list: path list to level that contains 'thumbnails' key
712 """
713 thumbnails = []
714 for path in path_list or [()]:
715 for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):
716 thumbnail_url = url_or_none(thumbnail.get('url'))
717 if not thumbnail_url:
718 continue
719 # Sometimes youtube gives a wrong thumbnail URL. See:
720 # https://github.com/yt-dlp/yt-dlp/issues/233
721 # https://github.com/ytdl-org/youtube-dl/issues/28023
722 if 'maxresdefault' in thumbnail_url:
723 thumbnail_url = thumbnail_url.split('?')[0]
724 thumbnails.append({
725 'url': thumbnail_url,
726 'height': int_or_none(thumbnail.get('height')),
727 'width': int_or_none(thumbnail.get('width')),
728 })
729 return thumbnails
730
731 @staticmethod
732 def extract_relative_time(relative_time_text):
733 """
734 Extracts a relative time from string and converts to dt object
735 e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'
736 """
737 mobj = re.search(r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?\s*ago', relative_time_text)
738 if mobj:
739 start = mobj.group('start')
740 if start:
741 return datetime_from_str(start)
742 try:
743 return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')))
744 except ValueError:
745 return None
746
747 def _extract_time_text(self, renderer, *path_list):
748 """@returns (timestamp, time_text)"""
749 text = self._get_text(renderer, *path_list) or ''
750 dt = self.extract_relative_time(text)
751 timestamp = None
752 if isinstance(dt, datetime.datetime):
753 timestamp = calendar.timegm(dt.timetuple())
754
755 if timestamp is None:
756 timestamp = (
757 unified_timestamp(text) or unified_timestamp(
758 self._search_regex(
759 (r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),
760 text.lower(), 'time text', default=None)))
761
762 if text and timestamp is None:
763 self.report_warning(f"Cannot parse localized time text '{text}'" + bug_reports_message(), only_once=True)
764 return timestamp, text
765
766 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
767 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
768 default_client='web'):
769 for retry in self.RetryManager():
770 try:
771 response = self._call_api(
772 ep=ep, fatal=True, headers=headers,
773 video_id=item_id, query=query, note=note,
774 context=self._extract_context(ytcfg, default_client),
775 api_key=self._extract_api_key(ytcfg, default_client),
776 api_hostname=api_hostname, default_client=default_client)
777 except ExtractorError as e:
778 if not isinstance(e.cause, network_exceptions):
779 return self._error_or_warning(e, fatal=fatal)
780 elif not isinstance(e.cause, urllib.error.HTTPError):
781 retry.error = e
782 continue
783
784 first_bytes = e.cause.read(512)
785 if not is_html(first_bytes):
786 yt_error = try_get(
787 self._parse_json(
788 self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
789 lambda x: x['error']['message'], str)
790 if yt_error:
791 self._report_alerts([('ERROR', yt_error)], fatal=False)
792 # Downloading page may result in intermittent 5xx HTTP error
793 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
794 # We also want to catch all other network exceptions since errors in later pages can be troublesome
795 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
796 if e.cause.code not in (403, 429):
797 retry.error = e
798 continue
799 return self._error_or_warning(e, fatal=fatal)
800
801 try:
802 self._extract_and_report_alerts(response, only_once=True)
803 except ExtractorError as e:
804 # YouTube servers may return errors we want to retry on in a 200 OK response
805 # See: https://github.com/yt-dlp/yt-dlp/issues/839
806 if 'unknown error' in e.msg.lower():
807 retry.error = e
808 continue
809 return self._error_or_warning(e, fatal=fatal)
810 # Youtube sometimes sends incomplete data
811 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
812 if not traverse_obj(response, *variadic(check_get_keys)):
813 retry.error = ExtractorError('Incomplete data received', expected=True)
814 continue
815
816 return response
817
818 @staticmethod
819 def is_music_url(url):
820 return re.match(r'https?://music\.youtube\.com/', url) is not None
821
822 def _extract_video(self, renderer):
823 video_id = renderer.get('videoId')
824 title = self._get_text(renderer, 'title')
825 description = self._get_text(renderer, 'descriptionSnippet')
826 duration = parse_duration(self._get_text(
827 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
828 if duration is None:
829 duration = parse_duration(self._search_regex(
830 r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',
831 traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),
832 video_id, default=None, group='duration'))
833
834 view_count = self._get_count(renderer, 'viewCountText')
835
836 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
837 channel_id = traverse_obj(
838 renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
839 expected_type=str, get_all=False)
840 timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')
841 scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
842 overlay_style = traverse_obj(
843 renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
844 get_all=False, expected_type=str)
845 badges = self._extract_badges(renderer)
846 thumbnails = self._extract_thumbnails(renderer, 'thumbnail')
847 navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
848 renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
849 expected_type=str)) or ''
850 url = f'https://www.youtube.com/watch?v={video_id}'
851 if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:
852 url = f'https://www.youtube.com/shorts/{video_id}'
853
854 return {
855 '_type': 'url',
856 'ie_key': YoutubeIE.ie_key(),
857 'id': video_id,
858 'url': url,
859 'title': title,
860 'description': description,
861 'duration': duration,
862 'view_count': view_count,
863 'uploader': uploader,
864 'channel_id': channel_id,
865 'thumbnails': thumbnails,
866 'upload_date': (strftime_or_none(timestamp, '%Y%m%d')
867 if self._configuration_arg('approximate_date', ie_key='youtubetab')
868 else None),
869 'live_status': ('is_upcoming' if scheduled_timestamp is not None
870 else 'was_live' if 'streamed' in time_text.lower()
871 else 'is_live' if overlay_style == 'LIVE' or 'live now' in badges
872 else None),
873 'release_timestamp': scheduled_timestamp,
874 'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)
875 }
876
877
878 class YoutubeIE(YoutubeBaseInfoExtractor):
879 IE_DESC = 'YouTube'
880 _VALID_URL = r"""(?x)^
881 (
882 (?:https?://|//) # http(s):// or protocol-independent URL
883 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
884 (?:www\.)?deturl\.com/www\.youtube\.com|
885 (?:www\.)?pwnyoutube\.com|
886 (?:www\.)?hooktube\.com|
887 (?:www\.)?yourepeat\.com|
888 tube\.majestyc\.net|
889 %(invidious)s|
890 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
891 (?:.*?\#/)? # handle anchor (#/) redirect urls
892 (?: # the various things that can precede the ID:
893 (?:(?:v|embed|e|shorts)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/
894 |(?: # or the v= param in all its forms
895 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
896 (?:\?|\#!?) # the params delimiter ? or # or #!
897 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
898 v=
899 )
900 ))
901 |(?:
902 youtu\.be| # just youtu.be/xxxx
903 vid\.plus| # or vid.plus/xxxx
904 zwearz\.com/watch| # or zwearz.com/watch/xxxx
905 %(invidious)s
906 )/
907 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
908 )
909 )? # all until now is optional -> you can pass the naked ID
910 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
911 (?(1).+)? # if we found the ID, everything can follow
912 (?:\#|$)""" % {
913 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
914 }
915 _EMBED_REGEX = [r'''(?x)
916 (?:
917 <iframe[^>]+?src=|
918 data-video-url=|
919 <embed[^>]+?src=|
920 embedSWF\(?:\s*|
921 <object[^>]+data=|
922 new\s+SWFObject\(
923 )
924 (["\'])
925 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
926 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
927 \1''']
928 _PLAYER_INFO_RE = (
929 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
930 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
931 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
932 )
933 _formats = {
934 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
935 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
936 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
937 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
938 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
939 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
940 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
941 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
942 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
943 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
944 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
945 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
946 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
947 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
948 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
949 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
950 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
951 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
952
953
954 # 3D videos
955 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
956 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
957 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
958 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
959 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
960 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
961 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
962
963 # Apple HTTP Live Streaming
964 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
965 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
966 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
967 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
968 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
969 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
970 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
971 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
972
973 # DASH mp4 video
974 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
975 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
976 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
977 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
978 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
979 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
980 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
981 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
982 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
983 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
984 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
985 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
986
987 # Dash mp4 audio
988 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
989 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
990 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
991 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
992 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
993 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
994 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
995
996 # Dash webm
997 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
998 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
999 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1000 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1001 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1002 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1003 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1004 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1005 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1006 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1007 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1008 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1009 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1010 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1011 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1012 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
1013 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1014 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1015 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1016 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1017 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1018 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1019
1020 # Dash webm audio
1021 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1022 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
1023
1024 # Dash webm audio with opus inside
1025 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1026 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1027 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
1028
1029 # RTMP (unnamed)
1030 '_rtmp': {'protocol': 'rtmp'},
1031
1032 # av01 video only formats sometimes served with "unknown" codecs
1033 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1034 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1035 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
1036 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1037 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1038 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1039 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1040 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1041 }
1042 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
1043
1044 _GEO_BYPASS = False
1045
1046 IE_NAME = 'youtube'
1047 _TESTS = [
1048 {
1049 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
1050 'info_dict': {
1051 'id': 'BaW_jenozKc',
1052 'ext': 'mp4',
1053 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1054 'uploader': 'Philipp Hagemeister',
1055 'uploader_id': 'phihag',
1056 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1057 'channel': 'Philipp Hagemeister',
1058 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1059 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1060 'upload_date': '20121002',
1061 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
1062 'categories': ['Science & Technology'],
1063 'tags': ['youtube-dl'],
1064 'duration': 10,
1065 'view_count': int,
1066 'like_count': int,
1067 'availability': 'public',
1068 'playable_in_embed': True,
1069 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1070 'live_status': 'not_live',
1071 'age_limit': 0,
1072 'start_time': 1,
1073 'end_time': 9,
1074 'comment_count': int,
1075 'channel_follower_count': int
1076 }
1077 },
1078 {
1079 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1080 'note': 'Embed-only video (#1746)',
1081 'info_dict': {
1082 'id': 'yZIXLfi8CZQ',
1083 'ext': 'mp4',
1084 'upload_date': '20120608',
1085 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1086 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1087 'uploader': 'SET India',
1088 'uploader_id': 'setindia',
1089 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
1090 'age_limit': 18,
1091 },
1092 'skip': 'Private video',
1093 },
1094 {
1095 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1096 'note': 'Use the first video ID in the URL',
1097 'info_dict': {
1098 'id': 'BaW_jenozKc',
1099 'ext': 'mp4',
1100 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1101 'uploader': 'Philipp Hagemeister',
1102 'uploader_id': 'phihag',
1103 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1104 'channel': 'Philipp Hagemeister',
1105 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1106 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1107 'upload_date': '20121002',
1108 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
1109 'categories': ['Science & Technology'],
1110 'tags': ['youtube-dl'],
1111 'duration': 10,
1112 'view_count': int,
1113 'like_count': int,
1114 'availability': 'public',
1115 'playable_in_embed': True,
1116 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1117 'live_status': 'not_live',
1118 'age_limit': 0,
1119 'comment_count': int,
1120 'channel_follower_count': int
1121 },
1122 'params': {
1123 'skip_download': True,
1124 },
1125 },
1126 {
1127 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1128 'note': '256k DASH audio (format 141) via DASH manifest',
1129 'info_dict': {
1130 'id': 'a9LDPn-MO4I',
1131 'ext': 'm4a',
1132 'upload_date': '20121002',
1133 'uploader_id': '8KVIDEO',
1134 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1135 'description': '',
1136 'uploader': '8KVIDEO',
1137 'title': 'UHDTV TEST 8K VIDEO.mp4'
1138 },
1139 'params': {
1140 'youtube_include_dash_manifest': True,
1141 'format': '141',
1142 },
1143 'skip': 'format 141 not served anymore',
1144 },
1145 # DASH manifest with encrypted signature
1146 {
1147 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1148 'info_dict': {
1149 'id': 'IB3lcPjvWLA',
1150 'ext': 'm4a',
1151 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1152 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1153 'duration': 244,
1154 'uploader': 'AfrojackVEVO',
1155 'uploader_id': 'AfrojackVEVO',
1156 'upload_date': '20131011',
1157 'abr': 129.495,
1158 'like_count': int,
1159 'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',
1160 'playable_in_embed': True,
1161 'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',
1162 'view_count': int,
1163 'track': 'The Spark',
1164 'live_status': 'not_live',
1165 'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',
1166 'channel': 'Afrojack',
1167 'uploader_url': 'http://www.youtube.com/user/AfrojackVEVO',
1168 'tags': 'count:19',
1169 'availability': 'public',
1170 'categories': ['Music'],
1171 'age_limit': 0,
1172 'alt_title': 'The Spark',
1173 'channel_follower_count': int
1174 },
1175 'params': {
1176 'youtube_include_dash_manifest': True,
1177 'format': '141/bestaudio[ext=m4a]',
1178 },
1179 },
1180 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
1181 {
1182 'note': 'Embed allowed age-gate video',
1183 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1184 'info_dict': {
1185 'id': 'HtVdAasjOgU',
1186 'ext': 'mp4',
1187 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1188 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1189 'duration': 142,
1190 'uploader': 'The Witcher',
1191 'uploader_id': 'WitcherGame',
1192 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1193 'upload_date': '20140605',
1194 'age_limit': 18,
1195 'categories': ['Gaming'],
1196 'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',
1197 'availability': 'needs_auth',
1198 'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
1199 'like_count': int,
1200 'channel': 'The Witcher',
1201 'live_status': 'not_live',
1202 'tags': 'count:17',
1203 'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
1204 'playable_in_embed': True,
1205 'view_count': int,
1206 'channel_follower_count': int
1207 },
1208 },
1209 {
1210 'note': 'Age-gate video with embed allowed in public site',
1211 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1212 'info_dict': {
1213 'id': 'HsUATh_Nc2U',
1214 'ext': 'mp4',
1215 'title': 'Godzilla 2 (Official Video)',
1216 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1217 'upload_date': '20200408',
1218 'uploader_id': 'FlyingKitty900',
1219 'uploader': 'FlyingKitty',
1220 'age_limit': 18,
1221 'availability': 'needs_auth',
1222 'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
1223 'uploader_url': 'http://www.youtube.com/user/FlyingKitty900',
1224 'channel': 'FlyingKitty',
1225 'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
1226 'view_count': int,
1227 'categories': ['Entertainment'],
1228 'live_status': 'not_live',
1229 'tags': ['Flyingkitty', 'godzilla 2'],
1230 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
1231 'like_count': int,
1232 'duration': 177,
1233 'playable_in_embed': True,
1234 'channel_follower_count': int
1235 },
1236 },
1237 {
1238 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1239 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1240 'info_dict': {
1241 'id': 'Tq92D6wQ1mg',
1242 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
1243 'ext': 'mp4',
1244 'upload_date': '20191228',
1245 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1246 'uploader': 'Projekt Melody',
1247 'description': 'md5:17eccca93a786d51bc67646756894066',
1248 'age_limit': 18,
1249 'like_count': int,
1250 'availability': 'needs_auth',
1251 'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
1252 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1253 'view_count': int,
1254 'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',
1255 'channel': 'Projekt Melody',
1256 'live_status': 'not_live',
1257 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
1258 'playable_in_embed': True,
1259 'categories': ['Entertainment'],
1260 'duration': 106,
1261 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
1262 'comment_count': int,
1263 'channel_follower_count': int
1264 },
1265 },
1266 {
1267 'note': 'Non-Agegated non-embeddable video',
1268 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1269 'info_dict': {
1270 'id': 'MeJVWBSsPAY',
1271 'ext': 'mp4',
1272 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1273 'uploader': 'Herr Lurik',
1274 'uploader_id': 'st3in234',
1275 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1276 'upload_date': '20130730',
1277 'track': 'Such mich find mich',
1278 'age_limit': 0,
1279 'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
1280 'like_count': int,
1281 'playable_in_embed': False,
1282 'creator': 'OOMPH!',
1283 'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',
1284 'view_count': int,
1285 'alt_title': 'Such mich find mich',
1286 'duration': 210,
1287 'channel': 'Herr Lurik',
1288 'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
1289 'categories': ['Music'],
1290 'availability': 'public',
1291 'uploader_url': 'http://www.youtube.com/user/st3in234',
1292 'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
1293 'live_status': 'not_live',
1294 'artist': 'OOMPH!',
1295 'channel_follower_count': int
1296 },
1297 },
1298 {
1299 'note': 'Non-bypassable age-gated video',
1300 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1301 'only_matching': True,
1302 },
1303 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1304 # YouTube Red ad is not captured for creator
1305 {
1306 'url': '__2ABJjxzNo',
1307 'info_dict': {
1308 'id': '__2ABJjxzNo',
1309 'ext': 'mp4',
1310 'duration': 266,
1311 'upload_date': '20100430',
1312 'uploader_id': 'deadmau5',
1313 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1314 'creator': 'deadmau5',
1315 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1316 'uploader': 'deadmau5',
1317 'title': 'Deadmau5 - Some Chords (HD)',
1318 'alt_title': 'Some Chords',
1319 'availability': 'public',
1320 'tags': 'count:14',
1321 'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',
1322 'view_count': int,
1323 'live_status': 'not_live',
1324 'channel': 'deadmau5',
1325 'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',
1326 'like_count': int,
1327 'track': 'Some Chords',
1328 'artist': 'deadmau5',
1329 'playable_in_embed': True,
1330 'age_limit': 0,
1331 'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',
1332 'categories': ['Music'],
1333 'album': 'Some Chords',
1334 'channel_follower_count': int
1335 },
1336 'expected_warnings': [
1337 'DASH manifest missing',
1338 ]
1339 },
1340 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1341 {
1342 'url': 'lqQg6PlCWgI',
1343 'info_dict': {
1344 'id': 'lqQg6PlCWgI',
1345 'ext': 'mp4',
1346 'duration': 6085,
1347 'upload_date': '20150827',
1348 'uploader_id': 'olympic',
1349 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1350 'description': 'md5:04bbbf3ccceb6795947572ca36f45904',
1351 'uploader': 'Olympics',
1352 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1353 'like_count': int,
1354 'release_timestamp': 1343767800,
1355 'playable_in_embed': True,
1356 'categories': ['Sports'],
1357 'release_date': '20120731',
1358 'channel': 'Olympics',
1359 'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
1360 'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
1361 'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
1362 'age_limit': 0,
1363 'availability': 'public',
1364 'live_status': 'was_live',
1365 'view_count': int,
1366 'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
1367 'channel_follower_count': int
1368 },
1369 'params': {
1370 'skip_download': 'requires avconv',
1371 }
1372 },
1373 # Non-square pixels
1374 {
1375 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1376 'info_dict': {
1377 'id': '_b-2C3KPAM0',
1378 'ext': 'mp4',
1379 'stretched_ratio': 16 / 9.,
1380 'duration': 85,
1381 'upload_date': '20110310',
1382 'uploader_id': 'AllenMeow',
1383 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1384 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1385 'uploader': '孫ᄋᄅ',
1386 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1387 'playable_in_embed': True,
1388 'channel': '孫ᄋᄅ',
1389 'age_limit': 0,
1390 'tags': 'count:11',
1391 'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',
1392 'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',
1393 'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',
1394 'view_count': int,
1395 'categories': ['People & Blogs'],
1396 'like_count': int,
1397 'live_status': 'not_live',
1398 'availability': 'unlisted',
1399 'comment_count': int,
1400 'channel_follower_count': int
1401 },
1402 },
1403 # url_encoded_fmt_stream_map is empty string
1404 {
1405 'url': 'qEJwOuvDf7I',
1406 'info_dict': {
1407 'id': 'qEJwOuvDf7I',
1408 'ext': 'webm',
1409 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1410 'description': '',
1411 'upload_date': '20150404',
1412 'uploader_id': 'spbelect',
1413 'uploader': 'Наблюдатели Петербурга',
1414 },
1415 'params': {
1416 'skip_download': 'requires avconv',
1417 },
1418 'skip': 'This live event has ended.',
1419 },
1420 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1421 {
1422 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1423 'info_dict': {
1424 'id': 'FIl7x6_3R5Y',
1425 'ext': 'webm',
1426 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1427 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1428 'duration': 220,
1429 'upload_date': '20150625',
1430 'uploader_id': 'dorappi2000',
1431 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1432 'uploader': 'dorappi2000',
1433 'formats': 'mincount:31',
1434 },
1435 'skip': 'not actual anymore',
1436 },
1437 # DASH manifest with segment_list
1438 {
1439 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1440 'md5': '8ce563a1d667b599d21064e982ab9e31',
1441 'info_dict': {
1442 'id': 'CsmdDsKjzN8',
1443 'ext': 'mp4',
1444 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1445 'uploader': 'Airtek',
1446 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1447 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1448 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1449 },
1450 'params': {
1451 'youtube_include_dash_manifest': True,
1452 'format': '135', # bestvideo
1453 },
1454 'skip': 'This live event has ended.',
1455 },
1456 {
1457 # Multifeed videos (multiple cameras), URL is for Main Camera
1458 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1459 'info_dict': {
1460 'id': 'jvGDaLqkpTg',
1461 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1462 'description': 'md5:e03b909557865076822aa169218d6a5d',
1463 },
1464 'playlist': [{
1465 'info_dict': {
1466 'id': 'jvGDaLqkpTg',
1467 'ext': 'mp4',
1468 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1469 'description': 'md5:e03b909557865076822aa169218d6a5d',
1470 'duration': 10643,
1471 'upload_date': '20161111',
1472 'uploader': 'Team PGP',
1473 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1474 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1475 },
1476 }, {
1477 'info_dict': {
1478 'id': '3AKt1R1aDnw',
1479 'ext': 'mp4',
1480 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1481 'description': 'md5:e03b909557865076822aa169218d6a5d',
1482 'duration': 10991,
1483 'upload_date': '20161111',
1484 'uploader': 'Team PGP',
1485 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1486 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1487 },
1488 }, {
1489 'info_dict': {
1490 'id': 'RtAMM00gpVc',
1491 'ext': 'mp4',
1492 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1493 'description': 'md5:e03b909557865076822aa169218d6a5d',
1494 'duration': 10995,
1495 'upload_date': '20161111',
1496 'uploader': 'Team PGP',
1497 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1498 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1499 },
1500 }, {
1501 'info_dict': {
1502 'id': '6N2fdlP3C5U',
1503 'ext': 'mp4',
1504 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1505 'description': 'md5:e03b909557865076822aa169218d6a5d',
1506 'duration': 10990,
1507 'upload_date': '20161111',
1508 'uploader': 'Team PGP',
1509 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1510 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1511 },
1512 }],
1513 'params': {
1514 'skip_download': True,
1515 },
1516 'skip': 'Not multifeed anymore',
1517 },
1518 {
1519 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1520 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1521 'info_dict': {
1522 'id': 'gVfLd0zydlo',
1523 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1524 },
1525 'playlist_count': 2,
1526 'skip': 'Not multifeed anymore',
1527 },
1528 {
1529 'url': 'https://vid.plus/FlRa-iH7PGw',
1530 'only_matching': True,
1531 },
1532 {
1533 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1534 'only_matching': True,
1535 },
1536 {
1537 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1538 # Also tests cut-off URL expansion in video description (see
1539 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1540 # https://github.com/ytdl-org/youtube-dl/issues/8164)
1541 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1542 'info_dict': {
1543 'id': 'lsguqyKfVQg',
1544 'ext': 'mp4',
1545 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1546 'alt_title': 'Dark Walk',
1547 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1548 'duration': 133,
1549 'upload_date': '20151119',
1550 'uploader_id': 'IronSoulElf',
1551 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1552 'uploader': 'IronSoulElf',
1553 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1554 'track': 'Dark Walk',
1555 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1556 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1557 'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',
1558 'categories': ['Film & Animation'],
1559 'view_count': int,
1560 'live_status': 'not_live',
1561 'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',
1562 'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',
1563 'tags': 'count:13',
1564 'availability': 'public',
1565 'channel': 'IronSoulElf',
1566 'playable_in_embed': True,
1567 'like_count': int,
1568 'age_limit': 0,
1569 'channel_follower_count': int
1570 },
1571 'params': {
1572 'skip_download': True,
1573 },
1574 },
1575 {
1576 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1577 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1578 'only_matching': True,
1579 },
1580 {
1581 # Video with yt:stretch=17:0
1582 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1583 'info_dict': {
1584 'id': 'Q39EVAstoRM',
1585 'ext': 'mp4',
1586 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1587 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1588 'upload_date': '20151107',
1589 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1590 'uploader': 'CH GAMER DROID',
1591 },
1592 'params': {
1593 'skip_download': True,
1594 },
1595 'skip': 'This video does not exist.',
1596 },
1597 {
1598 # Video with incomplete 'yt:stretch=16:'
1599 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1600 'only_matching': True,
1601 },
1602 {
1603 # Video licensed under Creative Commons
1604 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1605 'info_dict': {
1606 'id': 'M4gD1WSo5mA',
1607 'ext': 'mp4',
1608 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1609 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1610 'duration': 721,
1611 'upload_date': '20150128',
1612 'uploader_id': 'BerkmanCenter',
1613 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1614 'uploader': 'The Berkman Klein Center for Internet & Society',
1615 'license': 'Creative Commons Attribution license (reuse allowed)',
1616 'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',
1617 'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',
1618 'like_count': int,
1619 'age_limit': 0,
1620 'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],
1621 'channel': 'The Berkman Klein Center for Internet & Society',
1622 'availability': 'public',
1623 'view_count': int,
1624 'categories': ['Education'],
1625 'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',
1626 'live_status': 'not_live',
1627 'playable_in_embed': True,
1628 'comment_count': int,
1629 'channel_follower_count': int
1630 },
1631 'params': {
1632 'skip_download': True,
1633 },
1634 },
1635 {
1636 # Channel-like uploader_url
1637 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1638 'info_dict': {
1639 'id': 'eQcmzGIKrzg',
1640 'ext': 'mp4',
1641 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1642 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1643 'duration': 4060,
1644 'upload_date': '20151120',
1645 'uploader': 'Bernie Sanders',
1646 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1647 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1648 'license': 'Creative Commons Attribution license (reuse allowed)',
1649 'playable_in_embed': True,
1650 'tags': 'count:12',
1651 'like_count': int,
1652 'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1653 'age_limit': 0,
1654 'availability': 'public',
1655 'categories': ['News & Politics'],
1656 'channel': 'Bernie Sanders',
1657 'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',
1658 'view_count': int,
1659 'live_status': 'not_live',
1660 'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1661 'comment_count': int,
1662 'channel_follower_count': int
1663 },
1664 'params': {
1665 'skip_download': True,
1666 },
1667 },
1668 {
1669 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1670 'only_matching': True,
1671 },
1672 {
1673 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1674 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1675 'only_matching': True,
1676 },
1677 {
1678 # Rental video preview
1679 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1680 'info_dict': {
1681 'id': 'uGpuVWrhIzE',
1682 'ext': 'mp4',
1683 'title': 'Piku - Trailer',
1684 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1685 'upload_date': '20150811',
1686 'uploader': 'FlixMatrix',
1687 'uploader_id': 'FlixMatrixKaravan',
1688 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1689 'license': 'Standard YouTube License',
1690 },
1691 'params': {
1692 'skip_download': True,
1693 },
1694 'skip': 'This video is not available.',
1695 },
1696 {
1697 # YouTube Red video with episode data
1698 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1699 'info_dict': {
1700 'id': 'iqKdEhx-dD4',
1701 'ext': 'mp4',
1702 'title': 'Isolation - Mind Field (Ep 1)',
1703 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1704 'duration': 2085,
1705 'upload_date': '20170118',
1706 'uploader': 'Vsauce',
1707 'uploader_id': 'Vsauce',
1708 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1709 'series': 'Mind Field',
1710 'season_number': 1,
1711 'episode_number': 1,
1712 'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',
1713 'tags': 'count:12',
1714 'view_count': int,
1715 'availability': 'public',
1716 'age_limit': 0,
1717 'channel': 'Vsauce',
1718 'episode': 'Episode 1',
1719 'categories': ['Entertainment'],
1720 'season': 'Season 1',
1721 'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',
1722 'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',
1723 'like_count': int,
1724 'playable_in_embed': True,
1725 'live_status': 'not_live',
1726 'channel_follower_count': int
1727 },
1728 'params': {
1729 'skip_download': True,
1730 },
1731 'expected_warnings': [
1732 'Skipping DASH manifest',
1733 ],
1734 },
1735 {
1736 # The following content has been identified by the YouTube community
1737 # as inappropriate or offensive to some audiences.
1738 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1739 'info_dict': {
1740 'id': '6SJNVb0GnPI',
1741 'ext': 'mp4',
1742 'title': 'Race Differences in Intelligence',
1743 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1744 'duration': 965,
1745 'upload_date': '20140124',
1746 'uploader': 'New Century Foundation',
1747 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1748 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1749 },
1750 'params': {
1751 'skip_download': True,
1752 },
1753 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1754 },
1755 {
1756 # itag 212
1757 'url': '1t24XAntNCY',
1758 'only_matching': True,
1759 },
1760 {
1761 # geo restricted to JP
1762 'url': 'sJL6WA-aGkQ',
1763 'only_matching': True,
1764 },
1765 {
1766 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1767 'only_matching': True,
1768 },
1769 {
1770 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1771 'only_matching': True,
1772 },
1773 {
1774 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1775 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1776 'only_matching': True,
1777 },
1778 {
1779 # DRM protected
1780 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1781 'only_matching': True,
1782 },
1783 {
1784 # Video with unsupported adaptive stream type formats
1785 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1786 'info_dict': {
1787 'id': 'Z4Vy8R84T1U',
1788 'ext': 'mp4',
1789 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1790 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1791 'duration': 433,
1792 'upload_date': '20130923',
1793 'uploader': 'Amelia Putri Harwita',
1794 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1795 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1796 'formats': 'maxcount:10',
1797 },
1798 'params': {
1799 'skip_download': True,
1800 'youtube_include_dash_manifest': False,
1801 },
1802 'skip': 'not actual anymore',
1803 },
1804 {
1805 # Youtube Music Auto-generated description
1806 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1807 'info_dict': {
1808 'id': 'MgNrAu2pzNs',
1809 'ext': 'mp4',
1810 'title': 'Voyeur Girl',
1811 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1812 'upload_date': '20190312',
1813 'uploader': 'Stephen - Topic',
1814 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1815 'artist': 'Stephen',
1816 'track': 'Voyeur Girl',
1817 'album': 'it\'s too much love to know my dear',
1818 'release_date': '20190313',
1819 'release_year': 2019,
1820 'alt_title': 'Voyeur Girl',
1821 'view_count': int,
1822 'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1823 'playable_in_embed': True,
1824 'like_count': int,
1825 'categories': ['Music'],
1826 'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
1827 'channel': 'Stephen',
1828 'availability': 'public',
1829 'creator': 'Stephen',
1830 'duration': 169,
1831 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
1832 'age_limit': 0,
1833 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1834 'tags': 'count:11',
1835 'live_status': 'not_live',
1836 'channel_follower_count': int
1837 },
1838 'params': {
1839 'skip_download': True,
1840 },
1841 },
1842 {
1843 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1844 'only_matching': True,
1845 },
1846 {
1847 # invalid -> valid video id redirection
1848 'url': 'DJztXj2GPfl',
1849 'info_dict': {
1850 'id': 'DJztXj2GPfk',
1851 'ext': 'mp4',
1852 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1853 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1854 'upload_date': '20090125',
1855 'uploader': 'Prochorowka',
1856 'uploader_id': 'Prochorowka',
1857 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1858 'artist': 'Panjabi MC',
1859 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1860 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1861 },
1862 'params': {
1863 'skip_download': True,
1864 },
1865 'skip': 'Video unavailable',
1866 },
1867 {
1868 # empty description results in an empty string
1869 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1870 'info_dict': {
1871 'id': 'x41yOUIvK2k',
1872 'ext': 'mp4',
1873 'title': 'IMG 3456',
1874 'description': '',
1875 'upload_date': '20170613',
1876 'uploader_id': 'ElevageOrVert',
1877 'uploader': 'ElevageOrVert',
1878 'view_count': int,
1879 'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',
1880 'uploader_url': 'http://www.youtube.com/user/ElevageOrVert',
1881 'like_count': int,
1882 'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',
1883 'tags': [],
1884 'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',
1885 'availability': 'public',
1886 'age_limit': 0,
1887 'categories': ['Pets & Animals'],
1888 'duration': 7,
1889 'playable_in_embed': True,
1890 'live_status': 'not_live',
1891 'channel': 'ElevageOrVert',
1892 'channel_follower_count': int
1893 },
1894 'params': {
1895 'skip_download': True,
1896 },
1897 },
1898 {
1899 # with '};' inside yt initial data (see [1])
1900 # see [2] for an example with '};' inside ytInitialPlayerResponse
1901 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1902 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1903 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1904 'info_dict': {
1905 'id': 'CHqg6qOn4no',
1906 'ext': 'mp4',
1907 'title': 'Part 77 Sort a list of simple types in c#',
1908 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1909 'upload_date': '20130831',
1910 'uploader_id': 'kudvenkat',
1911 'uploader': 'kudvenkat',
1912 'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',
1913 'like_count': int,
1914 'uploader_url': 'http://www.youtube.com/user/kudvenkat',
1915 'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',
1916 'live_status': 'not_live',
1917 'categories': ['Education'],
1918 'availability': 'public',
1919 'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',
1920 'tags': 'count:12',
1921 'playable_in_embed': True,
1922 'age_limit': 0,
1923 'view_count': int,
1924 'duration': 522,
1925 'channel': 'kudvenkat',
1926 'comment_count': int,
1927 'channel_follower_count': int
1928 },
1929 'params': {
1930 'skip_download': True,
1931 },
1932 },
1933 {
1934 # another example of '};' in ytInitialData
1935 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1936 'only_matching': True,
1937 },
1938 {
1939 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1940 'only_matching': True,
1941 },
1942 {
1943 # https://github.com/ytdl-org/youtube-dl/pull/28094
1944 'url': 'OtqTfy26tG0',
1945 'info_dict': {
1946 'id': 'OtqTfy26tG0',
1947 'ext': 'mp4',
1948 'title': 'Burn Out',
1949 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1950 'upload_date': '20141120',
1951 'uploader': 'The Cinematic Orchestra - Topic',
1952 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1953 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1954 'artist': 'The Cinematic Orchestra',
1955 'track': 'Burn Out',
1956 'album': 'Every Day',
1957 'like_count': int,
1958 'live_status': 'not_live',
1959 'alt_title': 'Burn Out',
1960 'duration': 614,
1961 'age_limit': 0,
1962 'view_count': int,
1963 'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1964 'creator': 'The Cinematic Orchestra',
1965 'channel': 'The Cinematic Orchestra',
1966 'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],
1967 'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1968 'availability': 'public',
1969 'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',
1970 'categories': ['Music'],
1971 'playable_in_embed': True,
1972 'channel_follower_count': int
1973 },
1974 'params': {
1975 'skip_download': True,
1976 },
1977 },
1978 {
1979 # controversial video, only works with bpctr when authenticated with cookies
1980 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1981 'only_matching': True,
1982 },
1983 {
1984 # controversial video, requires bpctr/contentCheckOk
1985 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1986 'info_dict': {
1987 'id': 'SZJvDhaSDnc',
1988 'ext': 'mp4',
1989 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1990 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1991 'uploader': 'CBS Mornings',
1992 'uploader_id': 'CBSThisMorning',
1993 'upload_date': '20140716',
1994 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',
1995 'duration': 170,
1996 'categories': ['News & Politics'],
1997 'uploader_url': 'http://www.youtube.com/user/CBSThisMorning',
1998 'view_count': int,
1999 'channel': 'CBS Mornings',
2000 'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],
2001 'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',
2002 'age_limit': 18,
2003 'availability': 'needs_auth',
2004 'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',
2005 'like_count': int,
2006 'live_status': 'not_live',
2007 'playable_in_embed': True,
2008 'channel_follower_count': int
2009 }
2010 },
2011 {
2012 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
2013 'url': 'cBvYw8_A0vQ',
2014 'info_dict': {
2015 'id': 'cBvYw8_A0vQ',
2016 'ext': 'mp4',
2017 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
2018 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
2019 'upload_date': '20201120',
2020 'uploader': 'Walk around Japan',
2021 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2022 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2023 'duration': 1456,
2024 'categories': ['Travel & Events'],
2025 'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2026 'view_count': int,
2027 'channel': 'Walk around Japan',
2028 'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],
2029 'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp',
2030 'age_limit': 0,
2031 'availability': 'public',
2032 'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2033 'live_status': 'not_live',
2034 'playable_in_embed': True,
2035 'channel_follower_count': int
2036 },
2037 'params': {
2038 'skip_download': True,
2039 },
2040 }, {
2041 # Has multiple audio streams
2042 'url': 'WaOKSUlf4TM',
2043 'only_matching': True
2044 }, {
2045 # Requires Premium: has format 141 when requested using YTM url
2046 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
2047 'only_matching': True
2048 }, {
2049 # multiple subtitles with same lang_code
2050 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
2051 'only_matching': True,
2052 }, {
2053 # Force use android client fallback
2054 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
2055 'info_dict': {
2056 'id': 'YOelRv7fMxY',
2057 'title': 'DIGGING A SECRET TUNNEL Part 1',
2058 'ext': '3gp',
2059 'upload_date': '20210624',
2060 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
2061 'uploader': 'colinfurze',
2062 'uploader_id': 'colinfurze',
2063 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
2064 'description': 'md5:5d5991195d599b56cd0c4148907eec50',
2065 'duration': 596,
2066 'categories': ['Entertainment'],
2067 'uploader_url': 'http://www.youtube.com/user/colinfurze',
2068 'view_count': int,
2069 'channel': 'colinfurze',
2070 'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],
2071 'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',
2072 'age_limit': 0,
2073 'availability': 'public',
2074 'like_count': int,
2075 'live_status': 'not_live',
2076 'playable_in_embed': True,
2077 'channel_follower_count': int
2078 },
2079 'params': {
2080 'format': '17', # 3gp format available on android
2081 'extractor_args': {'youtube': {'player_client': ['android']}},
2082 },
2083 },
2084 {
2085 # Skip download of additional client configs (remix client config in this case)
2086 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2087 'only_matching': True,
2088 'params': {
2089 'extractor_args': {'youtube': {'player_skip': ['configs']}},
2090 },
2091 }, {
2092 # shorts
2093 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
2094 'only_matching': True,
2095 }, {
2096 'note': 'Storyboards',
2097 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
2098 'info_dict': {
2099 'id': '5KLPxDtMqe8',
2100 'ext': 'mhtml',
2101 'format_id': 'sb0',
2102 'title': 'Your Brain is Plastic',
2103 'uploader_id': 'scishow',
2104 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
2105 'upload_date': '20140324',
2106 'uploader': 'SciShow',
2107 'like_count': int,
2108 'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',
2109 'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',
2110 'view_count': int,
2111 'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',
2112 'playable_in_embed': True,
2113 'tags': 'count:12',
2114 'uploader_url': 'http://www.youtube.com/user/scishow',
2115 'availability': 'public',
2116 'channel': 'SciShow',
2117 'live_status': 'not_live',
2118 'duration': 248,
2119 'categories': ['Education'],
2120 'age_limit': 0,
2121 'channel_follower_count': int
2122 }, 'params': {'format': 'mhtml', 'skip_download': True}
2123 }, {
2124 # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
2125 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2126 'info_dict': {
2127 'id': '2NUZ8W2llS4',
2128 'ext': 'mp4',
2129 'title': 'The NP that test your phone performance 🙂',
2130 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
2131 'uploader': 'Leon Nguyen',
2132 'uploader_id': 'VNSXIII',
2133 'uploader_url': 'http://www.youtube.com/user/VNSXIII',
2134 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2135 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2136 'duration': 21,
2137 'view_count': int,
2138 'age_limit': 0,
2139 'categories': ['Gaming'],
2140 'tags': 'count:23',
2141 'playable_in_embed': True,
2142 'live_status': 'not_live',
2143 'upload_date': '20220103',
2144 'like_count': int,
2145 'availability': 'public',
2146 'channel': 'Leon Nguyen',
2147 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
2148 'comment_count': int,
2149 'channel_follower_count': int
2150 }
2151 }, {
2152 # date text is premiered video, ensure upload date in UTC (published 1641172509)
2153 'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',
2154 'info_dict': {
2155 'id': 'mzZzzBU6lrM',
2156 'ext': 'mp4',
2157 'title': 'I Met GeorgeNotFound In Real Life...',
2158 'description': 'md5:cca98a355c7184e750f711f3a1b22c84',
2159 'uploader': 'Quackity',
2160 'uploader_id': 'QuackityHQ',
2161 'uploader_url': 'http://www.youtube.com/user/QuackityHQ',
2162 'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',
2163 'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',
2164 'duration': 955,
2165 'view_count': int,
2166 'age_limit': 0,
2167 'categories': ['Entertainment'],
2168 'tags': 'count:26',
2169 'playable_in_embed': True,
2170 'live_status': 'not_live',
2171 'release_timestamp': 1641172509,
2172 'release_date': '20220103',
2173 'upload_date': '20220103',
2174 'like_count': int,
2175 'availability': 'public',
2176 'channel': 'Quackity',
2177 'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',
2178 'channel_follower_count': int
2179 }
2180 },
2181 { # continuous livestream. Microformat upload date should be preferred.
2182 # Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27
2183 'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',
2184 'info_dict': {
2185 'id': 'kgx4WGK0oNU',
2186 'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
2187 'ext': 'mp4',
2188 'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2189 'availability': 'public',
2190 'age_limit': 0,
2191 'release_timestamp': 1637975704,
2192 'upload_date': '20210619',
2193 'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2194 'live_status': 'is_live',
2195 'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',
2196 'uploader': '阿鲍Abao',
2197 'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
2198 'channel': 'Abao in Tokyo',
2199 'channel_follower_count': int,
2200 'release_date': '20211127',
2201 'tags': 'count:39',
2202 'categories': ['People & Blogs'],
2203 'like_count': int,
2204 'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',
2205 'view_count': int,
2206 'playable_in_embed': True,
2207 'description': 'md5:2ef1d002cad520f65825346e2084e49d',
2208 },
2209 'params': {'skip_download': True}
2210 }, {
2211 # Story. Requires specific player params to work.
2212 'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',
2213 'info_dict': {
2214 'id': 'vv8qTUWmulI',
2215 'ext': 'mp4',
2216 'availability': 'unlisted',
2217 'view_count': int,
2218 'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',
2219 'upload_date': '20220526',
2220 'categories': ['Education'],
2221 'title': 'Story',
2222 'channel': 'IT\'S HISTORY',
2223 'description': '',
2224 'uploader_id': 'BlastfromthePast',
2225 'duration': 12,
2226 'uploader': 'IT\'S HISTORY',
2227 'playable_in_embed': True,
2228 'age_limit': 0,
2229 'live_status': 'not_live',
2230 'tags': [],
2231 'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',
2232 'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',
2233 'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',
2234 },
2235 'skip': 'stories get removed after some period of time',
2236 }, {
2237 'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
2238 'info_dict': {
2239 'id': 'tjjjtzRLHvA',
2240 'ext': 'mp4',
2241 'title': 'ハッシュタグ無し };if window.ytcsi',
2242 'upload_date': '20220323',
2243 'like_count': int,
2244 'availability': 'unlisted',
2245 'channel': 'nao20010128nao',
2246 'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',
2247 'age_limit': 0,
2248 'uploader': 'nao20010128nao',
2249 'uploader_id': 'nao20010128nao',
2250 'categories': ['Music'],
2251 'view_count': int,
2252 'description': '',
2253 'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',
2254 'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',
2255 'live_status': 'not_live',
2256 'playable_in_embed': True,
2257 'channel_follower_count': int,
2258 'duration': 6,
2259 'tags': [],
2260 'uploader_url': 'http://www.youtube.com/user/nao20010128nao',
2261 }
2262 }, {
2263 'note': '6 channel audio',
2264 'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',
2265 'only_matching': True,
2266 }
2267 ]
2268
2269 _WEBPAGE_TESTS = [
2270 # YouTube <object> embed
2271 {
2272 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
2273 'md5': '873c81d308b979f0e23ee7e620b312a3',
2274 'info_dict': {
2275 'id': 'msN87y-iEx0',
2276 'ext': 'mp4',
2277 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
2278 'upload_date': '20080526',
2279 'description': 'md5:873c81d308b979f0e23ee7e620b312a3',
2280 'uploader': 'Christopher Sykes',
2281 'uploader_id': 'ChristopherJSykes',
2282 'age_limit': 0,
2283 'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],
2284 'channel_id': 'UCCeo--lls1vna5YJABWAcVA',
2285 'playable_in_embed': True,
2286 'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',
2287 'like_count': int,
2288 'comment_count': int,
2289 'channel': 'Christopher Sykes',
2290 'live_status': 'not_live',
2291 'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',
2292 'availability': 'public',
2293 'duration': 195,
2294 'view_count': int,
2295 'categories': ['Science & Technology'],
2296 'channel_follower_count': int,
2297 'uploader_url': 'http://www.youtube.com/user/ChristopherJSykes',
2298 },
2299 'params': {
2300 'skip_download': True,
2301 }
2302 },
2303 ]
2304
2305 @classmethod
2306 def suitable(cls, url):
2307 from ..utils import parse_qs
2308
2309 qs = parse_qs(url)
2310 if qs.get('list', [None])[0]:
2311 return False
2312 return super().suitable(url)
2313
2314 def __init__(self, *args, **kwargs):
2315 super().__init__(*args, **kwargs)
2316 self._code_cache = {}
2317 self._player_cache = {}
2318
2319 def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):
2320 lock = threading.Lock()
2321
2322 is_live = True
2323 start_time = time.time()
2324 formats = [f for f in formats if f.get('is_from_start')]
2325
2326 def refetch_manifest(format_id, delay):
2327 nonlocal formats, start_time, is_live
2328 if time.time() <= start_time + delay:
2329 return
2330
2331 _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
2332 video_details = traverse_obj(
2333 prs, (..., 'videoDetails'), expected_type=dict, default=[])
2334 microformats = traverse_obj(
2335 prs, (..., 'microformat', 'playerMicroformatRenderer'),
2336 expected_type=dict, default=[])
2337 _, is_live, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)
2338 start_time = time.time()
2339
2340 def mpd_feed(format_id, delay):
2341 """
2342 @returns (manifest_url, manifest_stream_number, is_live) or None
2343 """
2344 with lock:
2345 refetch_manifest(format_id, delay)
2346
2347 f = next((f for f in formats if f['format_id'] == format_id), None)
2348 if not f:
2349 if not is_live:
2350 self.to_screen(f'{video_id}: Video is no longer live')
2351 else:
2352 self.report_warning(
2353 f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')
2354 return None
2355 return f['manifest_url'], f['manifest_stream_number'], is_live
2356
2357 for f in formats:
2358 f['is_live'] = True
2359 f['protocol'] = 'http_dash_segments_generator'
2360 f['fragments'] = functools.partial(
2361 self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)
2362
2363 def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):
2364 FETCH_SPAN, MAX_DURATION = 5, 432000
2365
2366 mpd_url, stream_number, is_live = None, None, True
2367
2368 begin_index = 0
2369 download_start_time = ctx.get('start') or time.time()
2370
2371 lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
2372 if lack_early_segments:
2373 self.report_warning(bug_reports_message(
2374 'Starting download from the last 120 hours of the live stream since '
2375 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
2376 lack_early_segments = True
2377
2378 known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
2379 fragments, fragment_base_url = None, None
2380
2381 def _extract_sequence_from_mpd(refresh_sequence, immediate):
2382 nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
2383 # Obtain from MPD's maximum seq value
2384 old_mpd_url = mpd_url
2385 last_error = ctx.pop('last_error', None)
2386 expire_fast = immediate or last_error and isinstance(last_error, urllib.error.HTTPError) and last_error.code == 403
2387 mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
2388 or (mpd_url, stream_number, False))
2389 if not refresh_sequence:
2390 if expire_fast and not is_live:
2391 return False, last_seq
2392 elif old_mpd_url == mpd_url:
2393 return True, last_seq
2394 try:
2395 fmts, _ = self._extract_mpd_formats_and_subtitles(
2396 mpd_url, None, note=False, errnote=False, fatal=False)
2397 except ExtractorError:
2398 fmts = None
2399 if not fmts:
2400 no_fragment_score += 2
2401 return False, last_seq
2402 fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
2403 fragments = fmt_info['fragments']
2404 fragment_base_url = fmt_info['fragment_base_url']
2405 assert fragment_base_url
2406
2407 _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
2408 return True, _last_seq
2409
2410 while is_live:
2411 fetch_time = time.time()
2412 if no_fragment_score > 30:
2413 return
2414 if last_segment_url:
2415 # Obtain from "X-Head-Seqnum" header value from each segment
2416 try:
2417 urlh = self._request_webpage(
2418 last_segment_url, None, note=False, errnote=False, fatal=False)
2419 except ExtractorError:
2420 urlh = None
2421 last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
2422 if last_seq is None:
2423 no_fragment_score += 2
2424 last_segment_url = None
2425 continue
2426 else:
2427 should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)
2428 no_fragment_score += 2
2429 if not should_continue:
2430 continue
2431
2432 if known_idx > last_seq:
2433 last_segment_url = None
2434 continue
2435
2436 last_seq += 1
2437
2438 if begin_index < 0 and known_idx < 0:
2439 # skip from the start when it's negative value
2440 known_idx = last_seq + begin_index
2441 if lack_early_segments:
2442 known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
2443 try:
2444 for idx in range(known_idx, last_seq):
2445 # do not update sequence here or you'll get skipped some part of it
2446 should_continue, _ = _extract_sequence_from_mpd(False, False)
2447 if not should_continue:
2448 known_idx = idx - 1
2449 raise ExtractorError('breaking out of outer loop')
2450 last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)
2451 yield {
2452 'url': last_segment_url,
2453 'fragment_count': last_seq,
2454 }
2455 if known_idx == last_seq:
2456 no_fragment_score += 5
2457 else:
2458 no_fragment_score = 0
2459 known_idx = last_seq
2460 except ExtractorError:
2461 continue
2462
2463 time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
2464
2465 def _extract_player_url(self, *ytcfgs, webpage=None):
2466 player_url = traverse_obj(
2467 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
2468 get_all=False, expected_type=str)
2469 if not player_url:
2470 return
2471 return urljoin('https://www.youtube.com', player_url)
2472
2473 def _download_player_url(self, video_id, fatal=False):
2474 res = self._download_webpage(
2475 'https://www.youtube.com/iframe_api',
2476 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
2477 if res:
2478 player_version = self._search_regex(
2479 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
2480 if player_version:
2481 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
2482
2483 def _signature_cache_id(self, example_sig):
2484 """ Return a string representation of a signature """
2485 return '.'.join(str(len(part)) for part in example_sig.split('.'))
2486
2487 @classmethod
2488 def _extract_player_info(cls, player_url):
2489 for player_re in cls._PLAYER_INFO_RE:
2490 id_m = re.search(player_re, player_url)
2491 if id_m:
2492 break
2493 else:
2494 raise ExtractorError('Cannot identify player %r' % player_url)
2495 return id_m.group('id')
2496
2497 def _load_player(self, video_id, player_url, fatal=True):
2498 player_id = self._extract_player_info(player_url)
2499 if player_id not in self._code_cache:
2500 code = self._download_webpage(
2501 player_url, video_id, fatal=fatal,
2502 note='Downloading player ' + player_id,
2503 errnote='Download of %s failed' % player_url)
2504 if code:
2505 self._code_cache[player_id] = code
2506 return self._code_cache.get(player_id)
2507
2508 def _extract_signature_function(self, video_id, player_url, example_sig):
2509 player_id = self._extract_player_info(player_url)
2510
2511 # Read from filesystem cache
2512 func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'
2513 assert os.path.basename(func_id) == func_id
2514
2515 self.write_debug(f'Extracting signature function {func_id}')
2516 cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
2517
2518 if not cache_spec:
2519 code = self._load_player(video_id, player_url)
2520 if code:
2521 res = self._parse_sig_js(code)
2522 test_string = ''.join(map(chr, range(len(example_sig))))
2523 cache_spec = [ord(c) for c in res(test_string)]
2524 self.cache.store('youtube-sigfuncs', func_id, cache_spec)
2525
2526 return lambda s: ''.join(s[i] for i in cache_spec)
2527
2528 def _print_sig_code(self, func, example_sig):
2529 if not self.get_param('youtube_print_sig_code'):
2530 return
2531
2532 def gen_sig_code(idxs):
2533 def _genslice(start, end, step):
2534 starts = '' if start == 0 else str(start)
2535 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
2536 steps = '' if step == 1 else (':%d' % step)
2537 return f's[{starts}{ends}{steps}]'
2538
2539 step = None
2540 # Quelch pyflakes warnings - start will be set when step is set
2541 start = '(Never used)'
2542 for i, prev in zip(idxs[1:], idxs[:-1]):
2543 if step is not None:
2544 if i - prev == step:
2545 continue
2546 yield _genslice(start, prev, step)
2547 step = None
2548 continue
2549 if i - prev in [-1, 1]:
2550 step = i - prev
2551 start = prev
2552 continue
2553 else:
2554 yield 's[%d]' % prev
2555 if step is None:
2556 yield 's[%d]' % i
2557 else:
2558 yield _genslice(start, i, step)
2559
2560 test_string = ''.join(map(chr, range(len(example_sig))))
2561 cache_res = func(test_string)
2562 cache_spec = [ord(c) for c in cache_res]
2563 expr_code = ' + '.join(gen_sig_code(cache_spec))
2564 signature_id_tuple = '(%s)' % (
2565 ', '.join(str(len(p)) for p in example_sig.split('.')))
2566 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
2567 ' return %s\n') % (signature_id_tuple, expr_code)
2568 self.to_screen('Extracted signature function:\n' + code)
2569
2570 def _parse_sig_js(self, jscode):
2571 funcname = self._search_regex(
2572 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2573 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2574 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
2575 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
2576 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
2577 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
2578 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
2579 # Obsolete patterns
2580 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2581 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
2582 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2583 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2584 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2585 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2586 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2587 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
2588 jscode, 'Initial JS player signature function name', group='sig')
2589
2590 jsi = JSInterpreter(jscode)
2591 initial_function = jsi.extract_function(funcname)
2592 return lambda s: initial_function([s])
2593
2594 def _cached(self, func, *cache_id):
2595 def inner(*args, **kwargs):
2596 if cache_id not in self._player_cache:
2597 try:
2598 self._player_cache[cache_id] = func(*args, **kwargs)
2599 except ExtractorError as e:
2600 self._player_cache[cache_id] = e
2601 except Exception as e:
2602 self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)
2603
2604 ret = self._player_cache[cache_id]
2605 if isinstance(ret, Exception):
2606 raise ret
2607 return ret
2608 return inner
2609
2610 def _decrypt_signature(self, s, video_id, player_url):
2611 """Turn the encrypted s field into a working signature"""
2612 extract_sig = self._cached(
2613 self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))
2614 func = extract_sig(video_id, player_url, s)
2615 self._print_sig_code(func, s)
2616 return func(s)
2617
2618 def _decrypt_nsig(self, s, video_id, player_url):
2619 """Turn the encrypted n field into a working signature"""
2620 if player_url is None:
2621 raise ExtractorError('Cannot decrypt nsig without player_url')
2622 player_url = urljoin('https://www.youtube.com', player_url)
2623
2624 jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
2625 if self.get_param('youtube_print_sig_code'):
2626 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
2627
2628 try:
2629 extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)
2630 ret = extract_nsig(jsi, func_code)(s)
2631 except JSInterpreter.Exception as e:
2632 try:
2633 jsi = PhantomJSwrapper(self)
2634 except ExtractorError:
2635 raise e
2636 self.report_warning(
2637 f'Native nsig extraction failed: Trying with PhantomJS\n'
2638 f' n = {s} ; player = {player_url}', video_id)
2639 self.write_debug(e)
2640
2641 args, func_body = func_code
2642 ret = jsi.execute(
2643 f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',
2644 video_id=video_id, note='Executing signature code').strip()
2645
2646 self.write_debug(f'Decrypted nsig {s} => {ret}')
2647 return ret
2648
2649 def _extract_n_function_code(self, video_id, player_url):
2650 player_id = self._extract_player_info(player_url)
2651 func_code = self.cache.load('youtube-nsig', player_id)
2652 jscode = func_code or self._load_player(video_id, player_url)
2653 jsi = JSInterpreter(jscode)
2654
2655 if func_code:
2656 return jsi, player_id, func_code
2657
2658 funcname, idx = self._search_regex(
2659 r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)',
2660 jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
2661 if idx:
2662 funcname = json.loads(js_to_json(self._search_regex(
2663 rf'var {re.escape(funcname)}\s*=\s*(\[.+?\]);', jscode,
2664 f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
2665
2666 func_code = jsi.extract_function_code(funcname)
2667 self.cache.store('youtube-nsig', player_id, func_code)
2668 return jsi, player_id, func_code
2669
2670 def _extract_n_function_from_code(self, jsi, func_code):
2671 func = jsi.extract_function_from_code(*func_code)
2672
2673 def extract_nsig(s):
2674 try:
2675 ret = func([s])
2676 except JSInterpreter.Exception:
2677 raise
2678 except Exception as e:
2679 raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
2680
2681 if ret.startswith('enhanced_except_'):
2682 raise JSInterpreter.Exception('Signature function returned an exception')
2683 return ret
2684
2685 return extract_nsig
2686
2687 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
2688 """
2689 Extract signatureTimestamp (sts)
2690 Required to tell API what sig/player version is in use.
2691 """
2692 sts = None
2693 if isinstance(ytcfg, dict):
2694 sts = int_or_none(ytcfg.get('STS'))
2695
2696 if not sts:
2697 # Attempt to extract from player
2698 if player_url is None:
2699 error_msg = 'Cannot extract signature timestamp without player_url.'
2700 if fatal:
2701 raise ExtractorError(error_msg)
2702 self.report_warning(error_msg)
2703 return
2704 code = self._load_player(video_id, player_url, fatal=fatal)
2705 if code:
2706 sts = int_or_none(self._search_regex(
2707 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2708 'JS player signature timestamp', group='sts', fatal=fatal))
2709 return sts
2710
2711 def _mark_watched(self, video_id, player_responses):
2712 for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):
2713 label = 'fully ' if is_full else ''
2714 url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),
2715 expected_type=url_or_none)
2716 if not url:
2717 self.report_warning(f'Unable to mark {label}watched')
2718 return
2719 parsed_url = urllib.parse.urlparse(url)
2720 qs = urllib.parse.parse_qs(parsed_url.query)
2721
2722 # cpn generation algorithm is reverse engineered from base.js.
2723 # In fact it works even with dummy cpn.
2724 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2725 cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))
2726
2727 # # more consistent results setting it to right before the end
2728 video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]
2729
2730 qs.update({
2731 'ver': ['2'],
2732 'cpn': [cpn],
2733 'cmt': video_length,
2734 'el': 'detailpage', # otherwise defaults to "shorts"
2735 })
2736
2737 if is_full:
2738 # these seem to mark watchtime "history" in the real world
2739 # they're required, so send in a single value
2740 qs.update({
2741 'st': video_length,
2742 'et': video_length,
2743 })
2744
2745 url = urllib.parse.urlunparse(
2746 parsed_url._replace(query=urllib.parse.urlencode(qs, True)))
2747
2748 self._download_webpage(
2749 url, video_id, f'Marking {label}watched',
2750 'Unable to mark watched', fatal=False)
2751
2752 @classmethod
2753 def _extract_from_webpage(cls, url, webpage):
2754 # Invidious Instances
2755 # https://github.com/yt-dlp/yt-dlp/issues/195
2756 # https://github.com/iv-org/invidious/pull/1730
2757 mobj = re.search(
2758 r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',
2759 webpage)
2760 if mobj:
2761 yield cls.url_result(mobj.group('url'), cls)
2762 raise cls.StopExtraction()
2763
2764 yield from super()._extract_from_webpage(url, webpage)
2765
2766 # lazyYT YouTube embed
2767 for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):
2768 yield cls.url_result(unescapeHTML(id_), cls, id_)
2769
2770 # Wordpress "YouTube Video Importer" plugin
2771 for m in re.findall(r'''(?x)<div[^>]+
2772 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2773 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):
2774 yield cls.url_result(m[-1], cls, m[-1])
2775
2776 @classmethod
2777 def extract_id(cls, url):
2778 video_id = cls.get_temp_id(url)
2779 if not video_id:
2780 raise ExtractorError(f'Invalid URL: {url}')
2781 return video_id
2782
2783 def _extract_chapters_from_json(self, data, duration):
2784 chapter_list = traverse_obj(
2785 data, (
2786 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2787 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2788 ), expected_type=list)
2789
2790 return self._extract_chapters(
2791 chapter_list,
2792 chapter_time=lambda chapter: float_or_none(
2793 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2794 chapter_title=lambda chapter: traverse_obj(
2795 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2796 duration=duration)
2797
2798 def _extract_chapters_from_engagement_panel(self, data, duration):
2799 content_list = traverse_obj(
2800 data,
2801 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
2802 expected_type=list, default=[])
2803 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2804 chapter_title = lambda chapter: self._get_text(chapter, 'title')
2805
2806 return next(filter(None, (
2807 self._extract_chapters(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2808 chapter_time, chapter_title, duration)
2809 for contents in content_list)), [])
2810
2811 def _extract_chapters_from_description(self, description, duration):
2812 return self._extract_chapters(
2813 re.findall(r'(?m)^((?:\d+:)?\d{1,2}:\d{2})\b\W*\s(.+?)\s*$', description or ''),
2814 chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1],
2815 duration=duration, strict=False)
2816
2817 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True):
2818 if not duration:
2819 return
2820 chapter_list = [{
2821 'start_time': chapter_time(chapter),
2822 'title': chapter_title(chapter),
2823 } for chapter in chapter_list or []]
2824 if not strict:
2825 chapter_list.sort(key=lambda c: c['start_time'] or 0)
2826
2827 chapters = [{'start_time': 0}]
2828 for idx, chapter in enumerate(chapter_list):
2829 if chapter['start_time'] is None:
2830 self.report_warning(f'Incomplete chapter {idx}')
2831 elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:
2832 chapters.append(chapter)
2833 else:
2834 self.report_warning(f'Invalid start time for chapter "{chapter["title"]}"')
2835 return chapters[1:]
2836
2837 def _extract_comment(self, comment_renderer, parent=None):
2838 comment_id = comment_renderer.get('commentId')
2839 if not comment_id:
2840 return
2841
2842 text = self._get_text(comment_renderer, 'contentText')
2843
2844 # note: timestamp is an estimate calculated from the current time and time_text
2845 timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')
2846 author = self._get_text(comment_renderer, 'authorText')
2847 author_id = try_get(comment_renderer,
2848 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str)
2849
2850 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2851 lambda x: x['likeCount']), str)) or 0
2852 author_thumbnail = try_get(comment_renderer,
2853 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], str)
2854
2855 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2856 is_favorited = 'creatorHeart' in (try_get(
2857 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2858 return {
2859 'id': comment_id,
2860 'text': text,
2861 'timestamp': timestamp,
2862 'time_text': time_text,
2863 'like_count': votes,
2864 'is_favorited': is_favorited,
2865 'author': author,
2866 'author_id': author_id,
2867 'author_thumbnail': author_thumbnail,
2868 'author_is_uploader': author_is_uploader,
2869 'parent': parent or 'root'
2870 }
2871
2872 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
2873
2874 get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
2875
2876 def extract_header(contents):
2877 _continuation = None
2878 for content in contents:
2879 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
2880 expected_comment_count = self._get_count(
2881 comments_header_renderer, 'countText', 'commentsCount')
2882
2883 if expected_comment_count:
2884 tracker['est_total'] = expected_comment_count
2885 self.to_screen(f'Downloading ~{expected_comment_count} comments')
2886 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
2887
2888 sort_menu_item = try_get(
2889 comments_header_renderer,
2890 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2891 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2892
2893 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2894 if not _continuation:
2895 continue
2896
2897 sort_text = str_or_none(sort_menu_item.get('title'))
2898 if not sort_text:
2899 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2900 self.to_screen('Sorting comments by %s' % sort_text.lower())
2901 break
2902 return _continuation
2903
2904 def extract_thread(contents):
2905 if not parent:
2906 tracker['current_page_thread'] = 0
2907 for content in contents:
2908 if not parent and tracker['total_parent_comments'] >= max_parents:
2909 yield
2910 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2911 comment_renderer = get_first(
2912 (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
2913 expected_type=dict, default={})
2914
2915 comment = self._extract_comment(comment_renderer, parent)
2916 if not comment:
2917 continue
2918
2919 tracker['running_total'] += 1
2920 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
2921 yield comment
2922
2923 # Attempt to get the replies
2924 comment_replies_renderer = try_get(
2925 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2926
2927 if comment_replies_renderer:
2928 tracker['current_page_thread'] += 1
2929 comment_entries_iter = self._comment_entries(
2930 comment_replies_renderer, ytcfg, video_id,
2931 parent=comment.get('id'), tracker=tracker)
2932 yield from itertools.islice(comment_entries_iter, min(
2933 max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))
2934
2935 # Keeps track of counts across recursive calls
2936 if not tracker:
2937 tracker = dict(
2938 running_total=0,
2939 est_total=0,
2940 current_page_thread=0,
2941 total_parent_comments=0,
2942 total_reply_comments=0)
2943
2944 # TODO: Deprecated
2945 # YouTube comments have a max depth of 2
2946 max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
2947 if max_depth:
2948 self._downloader.deprecation_warning(
2949 '[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')
2950 if max_depth == 1 and parent:
2951 return
2952
2953 max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(
2954 lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
2955
2956 continuation = self._extract_continuation(root_continuation_data)
2957
2958 response = None
2959 is_forced_continuation = False
2960 is_first_continuation = parent is None
2961 if is_first_continuation and not continuation:
2962 # Sometimes you can get comments by generating the continuation yourself,
2963 # even if YouTube initially reports them being disabled - e.g. stories comments.
2964 # Note: if the comment section is actually disabled, YouTube may return a response with
2965 # required check_get_keys missing. So we will disable that check initially in this case.
2966 continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
2967 is_forced_continuation = True
2968
2969 for page_num in itertools.count(0):
2970 if not continuation:
2971 break
2972 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
2973 comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"
2974 if page_num == 0:
2975 if is_first_continuation:
2976 note_prefix = 'Downloading comment section API JSON'
2977 else:
2978 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
2979 tracker['current_page_thread'], comment_prog_str)
2980 else:
2981 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2982 ' ' if parent else '', ' replies' if parent else '',
2983 page_num, comment_prog_str)
2984
2985 response = self._extract_response(
2986 item_id=None, query=continuation,
2987 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2988 check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)
2989 is_forced_continuation = False
2990 continuation_contents = traverse_obj(
2991 response, 'onResponseReceivedEndpoints', expected_type=list, default=[])
2992
2993 continuation = None
2994 for continuation_section in continuation_contents:
2995 continuation_items = traverse_obj(
2996 continuation_section,
2997 (('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),
2998 get_all=False, expected_type=list) or []
2999 if is_first_continuation:
3000 continuation = extract_header(continuation_items)
3001 is_first_continuation = False
3002 if continuation:
3003 break
3004 continue
3005
3006 for entry in extract_thread(continuation_items):
3007 if not entry:
3008 return
3009 yield entry
3010 continuation = self._extract_continuation({'contents': continuation_items})
3011 if continuation:
3012 break
3013
3014 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
3015 if message and not parent and tracker['running_total'] == 0:
3016 self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)
3017
3018 @staticmethod
3019 def _generate_comment_continuation(video_id):
3020 """
3021 Generates initial comment section continuation token from given video id
3022 """
3023 token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'
3024 return base64.b64encode(token.encode()).decode()
3025
3026 def _get_comments(self, ytcfg, video_id, contents, webpage):
3027 """Entry for comment extraction"""
3028 def _real_comment_extract(contents):
3029 renderer = next((
3030 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
3031 if item.get('sectionIdentifier') == 'comment-item-section'), None)
3032 yield from self._comment_entries(renderer, ytcfg, video_id)
3033
3034 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
3035 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
3036
3037 @staticmethod
3038 def _get_checkok_params():
3039 return {'contentCheckOk': True, 'racyCheckOk': True}
3040
3041 @classmethod
3042 def _generate_player_context(cls, sts=None):
3043 context = {
3044 'html5Preference': 'HTML5_PREF_WANTS',
3045 }
3046 if sts is not None:
3047 context['signatureTimestamp'] = sts
3048 return {
3049 'playbackContext': {
3050 'contentPlaybackContext': context
3051 },
3052 **cls._get_checkok_params()
3053 }
3054
3055 @staticmethod
3056 def _is_agegated(player_response):
3057 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
3058 return True
3059
3060 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
3061 AGE_GATE_REASONS = (
3062 'confirm your age', 'age-restricted', 'inappropriate', # reason
3063 'age_verification_required', 'age_check_required', # status
3064 )
3065 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
3066
3067 @staticmethod
3068 def _is_unplayable(player_response):
3069 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
3070
3071 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):
3072
3073 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
3074 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
3075 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
3076 headers = self.generate_api_headers(
3077 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
3078
3079 yt_query = {
3080 'videoId': video_id,
3081 'params': '8AEB' # enable stories
3082 }
3083 yt_query.update(self._generate_player_context(sts))
3084 return self._extract_response(
3085 item_id=video_id, ep='player', query=yt_query,
3086 ytcfg=player_ytcfg, headers=headers, fatal=True,
3087 default_client=client,
3088 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
3089 ) or None
3090
3091 def _get_requested_clients(self, url, smuggled_data):
3092 requested_clients = []
3093 default = ['android', 'web']
3094 allowed_clients = sorted(
3095 (client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),
3096 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
3097 for client in self._configuration_arg('player_client'):
3098 if client in allowed_clients:
3099 requested_clients.append(client)
3100 elif client == 'default':
3101 requested_clients.extend(default)
3102 elif client == 'all':
3103 requested_clients.extend(allowed_clients)
3104 else:
3105 self.report_warning(f'Skipping unsupported client {client}')
3106 if not requested_clients:
3107 requested_clients = default
3108
3109 if smuggled_data.get('is_music_url') or self.is_music_url(url):
3110 requested_clients.extend(
3111 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
3112
3113 return orderedSet(requested_clients)
3114
3115 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):
3116 initial_pr = None
3117 if webpage:
3118 initial_pr = self._search_json(
3119 self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
3120
3121 all_clients = set(clients)
3122 clients = clients[::-1]
3123 prs = []
3124
3125 def append_client(*client_names):
3126 """ Append the first client name that exists but not already used """
3127 for client_name in client_names:
3128 actual_client = _split_innertube_client(client_name)[0]
3129 if actual_client in INNERTUBE_CLIENTS:
3130 if actual_client not in all_clients:
3131 clients.append(client_name)
3132 all_clients.add(actual_client)
3133 return
3134
3135 # Android player_response does not have microFormats which are needed for
3136 # extraction of some data. So we return the initial_pr with formats
3137 # stripped out even if not requested by the user
3138 # See: https://github.com/yt-dlp/yt-dlp/issues/501
3139 if initial_pr:
3140 pr = dict(initial_pr)
3141 pr['streamingData'] = None
3142 prs.append(pr)
3143
3144 last_error = None
3145 tried_iframe_fallback = False
3146 player_url = None
3147 while clients:
3148 client, base_client, variant = _split_innertube_client(clients.pop())
3149 player_ytcfg = master_ytcfg if client == 'web' else {}
3150 if 'configs' not in self._configuration_arg('player_skip') and client != 'web':
3151 player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
3152
3153 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
3154 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
3155 if 'js' in self._configuration_arg('player_skip'):
3156 require_js_player = False
3157 player_url = None
3158
3159 if not player_url and not tried_iframe_fallback and require_js_player:
3160 player_url = self._download_player_url(video_id)
3161 tried_iframe_fallback = True
3162
3163 try:
3164 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
3165 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)
3166 except ExtractorError as e:
3167 if last_error:
3168 self.report_warning(last_error)
3169 last_error = e
3170 continue
3171
3172 if pr:
3173 # YouTube may return a different video player response than expected.
3174 # See: https://github.com/TeamNewPipe/NewPipe/issues/8713
3175 pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId'))
3176 if pr_video_id and pr_video_id != video_id:
3177 self.report_warning(
3178 f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message())
3179 else:
3180 prs.append(pr)
3181
3182 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
3183 if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
3184 append_client(f'{base_client}_creator')
3185 elif self._is_agegated(pr):
3186 if variant == 'tv_embedded':
3187 append_client(f'{base_client}_embedded')
3188 elif not variant:
3189 append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
3190
3191 if last_error:
3192 if not len(prs):
3193 raise last_error
3194 self.report_warning(last_error)
3195 return prs, player_url
3196
3197 def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, is_live, duration):
3198 itags, stream_ids = {}, []
3199 itag_qualities, res_qualities = {}, {0: -1}
3200 q = qualities([
3201 # Normally tiny is the smallest video-only formats. But
3202 # audio-only formats with unknown quality may get tagged as tiny
3203 'tiny',
3204 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
3205 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
3206 ])
3207 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
3208
3209 for fmt in streaming_formats:
3210 if fmt.get('targetDurationSec'):
3211 continue
3212
3213 itag = str_or_none(fmt.get('itag'))
3214 audio_track = fmt.get('audioTrack') or {}
3215 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
3216 if stream_id in stream_ids:
3217 continue
3218
3219 quality = fmt.get('quality')
3220 height = int_or_none(fmt.get('height'))
3221 if quality == 'tiny' or not quality:
3222 quality = fmt.get('audioQuality', '').lower() or quality
3223 # The 3gp format (17) in android client has a quality of "small",
3224 # but is actually worse than other formats
3225 if itag == '17':
3226 quality = 'tiny'
3227 if quality:
3228 if itag:
3229 itag_qualities[itag] = quality
3230 if height:
3231 res_qualities[height] = quality
3232 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
3233 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
3234 # number of fragment that would subsequently requested with (`&sq=N`)
3235 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
3236 continue
3237
3238 fmt_url = fmt.get('url')
3239 if not fmt_url:
3240 sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))
3241 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
3242 encrypted_sig = try_get(sc, lambda x: x['s'][0])
3243 if not all((sc, fmt_url, player_url, encrypted_sig)):
3244 continue
3245 try:
3246 fmt_url += '&%s=%s' % (
3247 traverse_obj(sc, ('sp', -1)) or 'signature',
3248 self._decrypt_signature(encrypted_sig, video_id, player_url)
3249 )
3250 except ExtractorError as e:
3251 self.report_warning('Signature extraction failed: Some formats may be missing',
3252 video_id=video_id, only_once=True)
3253 self.write_debug(e, only_once=True)
3254 continue
3255
3256 query = parse_qs(fmt_url)
3257 throttled = False
3258 if query.get('n'):
3259 try:
3260 decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])
3261 fmt_url = update_url_query(fmt_url, {
3262 'n': decrypt_nsig(query['n'][0], video_id, player_url)
3263 })
3264 except ExtractorError as e:
3265 phantomjs_hint = ''
3266 if isinstance(e, JSInterpreter.Exception):
3267 phantomjs_hint = f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} to workaround the issue\n'
3268 self.report_warning(
3269 f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}'
3270 f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)
3271 self.write_debug(e, only_once=True)
3272 throttled = True
3273
3274 if itag:
3275 itags[itag] = 'https'
3276 stream_ids.append(stream_id)
3277
3278 tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
3279 language_preference = (
3280 10 if audio_track.get('audioIsDefault') and 10
3281 else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
3282 else -1)
3283 # Some formats may have much smaller duration than others (possibly damaged during encoding)
3284 # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
3285 # Make sure to avoid false positives with small duration differences.
3286 # E.g. __2ABJjxzNo, ySuUZEjARPY
3287 is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)
3288 if is_damaged:
3289 self.report_warning(
3290 f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
3291 dct = {
3292 'asr': int_or_none(fmt.get('audioSampleRate')),
3293 'filesize': int_or_none(fmt.get('contentLength')),
3294 'format_id': itag,
3295 'format_note': join_nonempty(
3296 '%s%s' % (audio_track.get('displayName') or '',
3297 ' (default)' if language_preference > 0 else ''),
3298 fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
3299 try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
3300 try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
3301 throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),
3302 # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
3303 'source_preference': -10 if throttled else -5 if itag == '22' else -1,
3304 'fps': int_or_none(fmt.get('fps')) or None,
3305 'audio_channels': fmt.get('audioChannels'),
3306 'height': height,
3307 'quality': q(quality),
3308 'has_drm': bool(fmt.get('drmFamilies')),
3309 'tbr': tbr,
3310 'url': fmt_url,
3311 'width': int_or_none(fmt.get('width')),
3312 'language': join_nonempty(audio_track.get('id', '').split('.')[0],
3313 'desc' if language_preference < -1 else ''),
3314 'language_preference': language_preference,
3315 # Strictly de-prioritize damaged and 3gp formats
3316 'preference': -10 if is_damaged else -2 if itag == '17' else None,
3317 }
3318 mime_mobj = re.match(
3319 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
3320 if mime_mobj:
3321 dct['ext'] = mimetype2ext(mime_mobj.group(1))
3322 dct.update(parse_codecs(mime_mobj.group(2)))
3323 no_audio = dct.get('acodec') == 'none'
3324 no_video = dct.get('vcodec') == 'none'
3325 if no_audio:
3326 dct['vbr'] = tbr
3327 if no_video:
3328 dct['abr'] = tbr
3329 if no_audio or no_video:
3330 dct['downloader_options'] = {
3331 # Youtube throttles chunks >~10M
3332 'http_chunk_size': 10485760,
3333 }
3334 if dct.get('ext'):
3335 dct['container'] = dct['ext'] + '_dash'
3336 yield dct
3337
3338 live_from_start = is_live and self.get_param('live_from_start')
3339 skip_manifests = self._configuration_arg('skip')
3340 if not self.get_param('youtube_include_hls_manifest', True):
3341 skip_manifests.append('hls')
3342 if not self.get_param('youtube_include_dash_manifest', True):
3343 skip_manifests.append('dash')
3344 get_dash = 'dash' not in skip_manifests and (
3345 not is_live or live_from_start or self._configuration_arg('include_live_dash'))
3346 get_hls = not live_from_start and 'hls' not in skip_manifests
3347
3348 def process_manifest_format(f, proto, itag):
3349 if itag in itags:
3350 if itags[itag] == proto or f'{itag}-{proto}' in itags:
3351 return False
3352 itag = f'{itag}-{proto}'
3353 if itag:
3354 f['format_id'] = itag
3355 itags[itag] = proto
3356
3357 f['quality'] = itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1)
3358 if f['quality'] == -1 and f.get('height'):
3359 f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
3360 return True
3361
3362 subtitles = {}
3363 for sd in streaming_data:
3364 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
3365 if hls_manifest_url:
3366 fmts, subs = self._extract_m3u8_formats_and_subtitles(hls_manifest_url, video_id, 'mp4', fatal=False, live=is_live)
3367 subtitles = self._merge_subtitles(subs, subtitles)
3368 for f in fmts:
3369 if process_manifest_format(f, 'hls', self._search_regex(
3370 r'/itag/(\d+)', f['url'], 'itag', default=None)):
3371 yield f
3372
3373 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
3374 if dash_manifest_url:
3375 formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
3376 subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
3377 for f in formats:
3378 if process_manifest_format(f, 'dash', f['format_id']):
3379 f['filesize'] = int_or_none(self._search_regex(
3380 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
3381 if live_from_start:
3382 f['is_from_start'] = True
3383
3384 yield f
3385 yield subtitles
3386
3387 def _extract_storyboard(self, player_responses, duration):
3388 spec = get_first(
3389 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
3390 base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))
3391 if not base_url:
3392 return
3393 L = len(spec) - 1
3394 for i, args in enumerate(spec):
3395 args = args.split('#')
3396 counts = list(map(int_or_none, args[:5]))
3397 if len(args) != 8 or not all(counts):
3398 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
3399 continue
3400 width, height, frame_count, cols, rows = counts
3401 N, sigh = args[6:]
3402
3403 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
3404 fragment_count = frame_count / (cols * rows)
3405 fragment_duration = duration / fragment_count
3406 yield {
3407 'format_id': f'sb{i}',
3408 'format_note': 'storyboard',
3409 'ext': 'mhtml',
3410 'protocol': 'mhtml',
3411 'acodec': 'none',
3412 'vcodec': 'none',
3413 'url': url,
3414 'width': width,
3415 'height': height,
3416 'fps': frame_count / duration,
3417 'rows': rows,
3418 'columns': cols,
3419 'fragments': [{
3420 'url': url.replace('$M', str(j)),
3421 'duration': min(fragment_duration, duration - (j * fragment_duration)),
3422 } for j in range(math.ceil(fragment_count))],
3423 }
3424
3425 def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
3426 webpage = None
3427 if 'webpage' not in self._configuration_arg('player_skip'):
3428 webpage = self._download_webpage(
3429 webpage_url + '&bpctr=9999999999&has_verified=1&pp=8AEB', video_id, fatal=False)
3430
3431 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
3432
3433 player_responses, player_url = self._extract_player_responses(
3434 self._get_requested_clients(url, smuggled_data),
3435 video_id, webpage, master_ytcfg)
3436
3437 return webpage, master_ytcfg, player_responses, player_url
3438
3439 def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):
3440 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
3441 is_live = get_first(video_details, 'isLive')
3442 if is_live is None:
3443 is_live = get_first(live_broadcast_details, 'isLiveNow')
3444
3445 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
3446 *formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, is_live, duration)
3447
3448 return live_broadcast_details, is_live, streaming_data, formats, subtitles
3449
3450 def _real_extract(self, url):
3451 url, smuggled_data = unsmuggle_url(url, {})
3452 video_id = self._match_id(url)
3453
3454 base_url = self.http_scheme() + '//www.youtube.com/'
3455 webpage_url = base_url + 'watch?v=' + video_id
3456
3457 webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
3458
3459 playability_statuses = traverse_obj(
3460 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
3461
3462 trailer_video_id = get_first(
3463 playability_statuses,
3464 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
3465 expected_type=str)
3466 if trailer_video_id:
3467 return self.url_result(
3468 trailer_video_id, self.ie_key(), trailer_video_id)
3469
3470 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
3471 if webpage else (lambda x: None))
3472
3473 video_details = traverse_obj(
3474 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
3475 microformats = traverse_obj(
3476 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
3477 expected_type=dict, default=[])
3478 video_title = (
3479 get_first(video_details, 'title')
3480 or self._get_text(microformats, (..., 'title'))
3481 or search_meta(['og:title', 'twitter:title', 'title']))
3482 video_description = get_first(video_details, 'shortDescription')
3483
3484 multifeed_metadata_list = get_first(
3485 player_responses,
3486 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
3487 expected_type=str)
3488 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
3489 if self.get_param('noplaylist'):
3490 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3491 else:
3492 entries = []
3493 feed_ids = []
3494 for feed in multifeed_metadata_list.split(','):
3495 # Unquote should take place before split on comma (,) since textual
3496 # fields may contain comma as well (see
3497 # https://github.com/ytdl-org/youtube-dl/issues/8536)
3498 feed_data = urllib.parse.parse_qs(
3499 urllib.parse.unquote_plus(feed))
3500
3501 def feed_entry(name):
3502 return try_get(
3503 feed_data, lambda x: x[name][0], str)
3504
3505 feed_id = feed_entry('id')
3506 if not feed_id:
3507 continue
3508 feed_title = feed_entry('title')
3509 title = video_title
3510 if feed_title:
3511 title += ' (%s)' % feed_title
3512 entries.append({
3513 '_type': 'url_transparent',
3514 'ie_key': 'Youtube',
3515 'url': smuggle_url(
3516 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
3517 {'force_singlefeed': True}),
3518 'title': title,
3519 })
3520 feed_ids.append(feed_id)
3521 self.to_screen(
3522 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
3523 % (', '.join(feed_ids), video_id))
3524 return self.playlist_result(
3525 entries, video_id, video_title, video_description)
3526
3527 duration = int_or_none(
3528 get_first(video_details, 'lengthSeconds')
3529 or get_first(microformats, 'lengthSeconds')
3530 or parse_duration(search_meta('duration'))) or None
3531
3532 live_broadcast_details, is_live, streaming_data, formats, automatic_captions = \
3533 self._list_formats(video_id, microformats, video_details, player_responses, player_url)
3534
3535 if not formats:
3536 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
3537 self.report_drm(video_id)
3538 pemr = get_first(
3539 playability_statuses,
3540 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
3541 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
3542 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
3543 if subreason:
3544 if subreason == 'The uploader has not made this video available in your country.':
3545 countries = get_first(microformats, 'availableCountries')
3546 if not countries:
3547 regions_allowed = search_meta('regionsAllowed')
3548 countries = regions_allowed.split(',') if regions_allowed else None
3549 self.raise_geo_restricted(subreason, countries, metadata_available=True)
3550 reason += f'. {subreason}'
3551 if reason:
3552 self.raise_no_formats(reason, expected=True)
3553
3554 keywords = get_first(video_details, 'keywords', expected_type=list) or []
3555 if not keywords and webpage:
3556 keywords = [
3557 unescapeHTML(m.group('content'))
3558 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
3559 for keyword in keywords:
3560 if keyword.startswith('yt:stretch='):
3561 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
3562 if mobj:
3563 # NB: float is intentional for forcing float division
3564 w, h = (float(v) for v in mobj.groups())
3565 if w > 0 and h > 0:
3566 ratio = w / h
3567 for f in formats:
3568 if f.get('vcodec') != 'none':
3569 f['stretched_ratio'] = ratio
3570 break
3571 thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))
3572 thumbnail_url = search_meta(['og:image', 'twitter:image'])
3573 if thumbnail_url:
3574 thumbnails.append({
3575 'url': thumbnail_url,
3576 })
3577 original_thumbnails = thumbnails.copy()
3578
3579 # The best resolution thumbnails sometimes does not appear in the webpage
3580 # See: https://github.com/yt-dlp/yt-dlp/issues/340
3581 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
3582 thumbnail_names = [
3583 # While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants
3584 # in resolution, these are not the custom thumbnail. So de-prioritize them
3585 'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',
3586 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3'
3587 ]
3588 n_thumbnail_names = len(thumbnail_names)
3589 thumbnails.extend({
3590 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
3591 video_id=video_id, name=name, ext=ext,
3592 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
3593 } for name in thumbnail_names for ext in ('webp', 'jpg'))
3594 for thumb in thumbnails:
3595 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
3596 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
3597 self._remove_duplicate_formats(thumbnails)
3598 self._downloader._sort_thumbnails(original_thumbnails)
3599
3600 category = get_first(microformats, 'category') or search_meta('genre')
3601 channel_id = str_or_none(
3602 get_first(video_details, 'channelId')
3603 or get_first(microformats, 'externalChannelId')
3604 or search_meta('channelId'))
3605 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
3606
3607 live_content = get_first(video_details, 'isLiveContent')
3608 is_upcoming = get_first(video_details, 'isUpcoming')
3609 if is_live is None:
3610 if is_upcoming or live_content is False:
3611 is_live = False
3612 if is_upcoming is None and (live_content or is_live):
3613 is_upcoming = False
3614 live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
3615 live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
3616 if not duration and live_end_time and live_start_time:
3617 duration = live_end_time - live_start_time
3618
3619 if is_live and self.get_param('live_from_start'):
3620 self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)
3621
3622 formats.extend(self._extract_storyboard(player_responses, duration))
3623
3624 # source_preference is lower for throttled/potentially damaged formats
3625 self._sort_formats(formats, (
3626 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto'))
3627
3628 info = {
3629 'id': video_id,
3630 'title': video_title,
3631 'formats': formats,
3632 'thumbnails': thumbnails,
3633 # The best thumbnail that we are sure exists. Prevents unnecessary
3634 # URL checking if user don't care about getting the best possible thumbnail
3635 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
3636 'description': video_description,
3637 'uploader': get_first(video_details, 'author'),
3638 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
3639 'uploader_url': owner_profile_url,
3640 'channel_id': channel_id,
3641 'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s'),
3642 'duration': duration,
3643 'view_count': int_or_none(
3644 get_first((video_details, microformats), (..., 'viewCount'))
3645 or search_meta('interactionCount')),
3646 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
3647 'age_limit': 18 if (
3648 get_first(microformats, 'isFamilySafe') is False
3649 or search_meta('isFamilyFriendly') == 'false'
3650 or search_meta('og:restrictions:age') == '18+') else 0,
3651 'webpage_url': webpage_url,
3652 'categories': [category] if category else None,
3653 'tags': keywords,
3654 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
3655 'is_live': is_live,
3656 'was_live': (False if is_live or is_upcoming or live_content is False
3657 else None if is_live is None or is_upcoming is None
3658 else live_content),
3659 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
3660 'release_timestamp': live_start_time,
3661 }
3662
3663 if get_first(video_details, 'isPostLiveDvr'):
3664 self.write_debug('Video is in Post-Live Manifestless mode')
3665 info['live_status'] = 'post_live'
3666 if (duration or 0) > 4 * 3600:
3667 self.report_warning(
3668 'The livestream has not finished processing. Only 4 hours of the video can be currently downloaded. '
3669 'This is a known issue and patches are welcome')
3670
3671 subtitles = {}
3672 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
3673 if pctr:
3674 def get_lang_code(track):
3675 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
3676 or track.get('languageCode'))
3677
3678 # Converted into dicts to remove duplicates
3679 captions = {
3680 get_lang_code(sub): sub
3681 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
3682 translation_languages = {
3683 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
3684 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
3685
3686 def process_language(container, base_url, lang_code, sub_name, query):
3687 lang_subs = container.setdefault(lang_code, [])
3688 for fmt in self._SUBTITLE_FORMATS:
3689 query.update({
3690 'fmt': fmt,
3691 })
3692 lang_subs.append({
3693 'ext': fmt,
3694 'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),
3695 'name': sub_name,
3696 })
3697
3698 # NB: Constructing the full subtitle dictionary is slow
3699 get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (
3700 self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))
3701 for lang_code, caption_track in captions.items():
3702 base_url = caption_track.get('baseUrl')
3703 orig_lang = parse_qs(base_url).get('lang', [None])[-1]
3704 if not base_url:
3705 continue
3706 lang_name = self._get_text(caption_track, 'name', max_runs=1)
3707 if caption_track.get('kind') != 'asr':
3708 if not lang_code:
3709 continue
3710 process_language(
3711 subtitles, base_url, lang_code, lang_name, {})
3712 if not caption_track.get('isTranslatable'):
3713 continue
3714 for trans_code, trans_name in translation_languages.items():
3715 if not trans_code:
3716 continue
3717 orig_trans_code = trans_code
3718 if caption_track.get('kind') != 'asr':
3719 if not get_translated_subs:
3720 continue
3721 trans_code += f'-{lang_code}'
3722 trans_name += format_field(lang_name, None, ' from %s')
3723 # Add an "-orig" label to the original language so that it can be distinguished.
3724 # The subs are returned without "-orig" as well for compatibility
3725 if lang_code == f'a-{orig_trans_code}':
3726 process_language(
3727 automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
3728 # Setting tlang=lang returns damaged subtitles.
3729 process_language(automatic_captions, base_url, trans_code, trans_name,
3730 {} if orig_lang == orig_trans_code else {'tlang': trans_code})
3731
3732 info['automatic_captions'] = automatic_captions
3733 info['subtitles'] = subtitles
3734
3735 parsed_url = urllib.parse.urlparse(url)
3736 for component in [parsed_url.fragment, parsed_url.query]:
3737 query = urllib.parse.parse_qs(component)
3738 for k, v in query.items():
3739 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
3740 d_k += '_time'
3741 if d_k not in info and k in s_ks:
3742 info[d_k] = parse_duration(query[k][0])
3743
3744 # Youtube Music Auto-generated description
3745 if video_description:
3746 mobj = re.search(
3747 r'''(?xs)
3748 (?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+
3749 (?P<album>[^\n]+)
3750 (?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
3751 (?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
3752 (.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?
3753 .+\nAuto-generated\ by\ YouTube\.\s*$
3754 ''', video_description)
3755 if mobj:
3756 release_year = mobj.group('release_year')
3757 release_date = mobj.group('release_date')
3758 if release_date:
3759 release_date = release_date.replace('-', '')
3760 if not release_year:
3761 release_year = release_date[:4]
3762 info.update({
3763 'album': mobj.group('album'.strip()),
3764 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
3765 'track': mobj.group('track').strip(),
3766 'release_date': release_date,
3767 'release_year': int_or_none(release_year),
3768 })
3769
3770 initial_data = None
3771 if webpage:
3772 initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)
3773 if not initial_data:
3774 query = {'videoId': video_id}
3775 query.update(self._get_checkok_params())
3776 initial_data = self._extract_response(
3777 item_id=video_id, ep='next', fatal=False,
3778 ytcfg=master_ytcfg, query=query,
3779 headers=self.generate_api_headers(ytcfg=master_ytcfg),
3780 note='Downloading initial data API JSON')
3781
3782 info['comment_count'] = traverse_obj(initial_data, (
3783 'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',
3784 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount', 'simpleText'
3785 ), (
3786 'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',
3787 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo', 'runs', ..., 'text'
3788 ), expected_type=int_or_none, get_all=False)
3789
3790 try: # This will error if there is no livechat
3791 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
3792 except (KeyError, IndexError, TypeError):
3793 pass
3794 else:
3795 info.setdefault('subtitles', {})['live_chat'] = [{
3796 # url is needed to set cookies
3797 'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',
3798 'video_id': video_id,
3799 'ext': 'json',
3800 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
3801 }]
3802
3803 if initial_data:
3804 info['chapters'] = (
3805 self._extract_chapters_from_json(initial_data, duration)
3806 or self._extract_chapters_from_engagement_panel(initial_data, duration)
3807 or self._extract_chapters_from_description(video_description, duration)
3808 or None)
3809
3810 contents = traverse_obj(
3811 initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
3812 expected_type=list, default=[])
3813
3814 vpir = get_first(contents, 'videoPrimaryInfoRenderer')
3815 if vpir:
3816 stl = vpir.get('superTitleLink')
3817 if stl:
3818 stl = self._get_text(stl)
3819 if try_get(
3820 vpir,
3821 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3822 info['location'] = stl
3823 else:
3824 mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)
3825 if mobj:
3826 info.update({
3827 'series': mobj.group(1),
3828 'season_number': int(mobj.group(2)),
3829 'episode_number': int(mobj.group(3)),
3830 })
3831 for tlb in (try_get(
3832 vpir,
3833 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3834 list) or []):
3835 tbr = tlb.get('toggleButtonRenderer') or {}
3836 for getter, regex in [(
3837 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3838 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3839 lambda x: x['accessibility'],
3840 lambda x: x['accessibilityData']['accessibilityData'],
3841 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3842 label = (try_get(tbr, getter, dict) or {}).get('label')
3843 if label:
3844 mobj = re.match(regex, label)
3845 if mobj:
3846 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3847 break
3848 sbr_tooltip = try_get(
3849 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3850 if sbr_tooltip:
3851 like_count, dislike_count = sbr_tooltip.split(' / ')
3852 info.update({
3853 'like_count': str_to_int(like_count),
3854 'dislike_count': str_to_int(dislike_count),
3855 })
3856 vsir = get_first(contents, 'videoSecondaryInfoRenderer')
3857 if vsir:
3858 vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))
3859 info.update({
3860 'channel': self._get_text(vor, 'title'),
3861 'channel_follower_count': self._get_count(vor, 'subscriberCountText')})
3862
3863 rows = try_get(
3864 vsir,
3865 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3866 list) or []
3867 multiple_songs = False
3868 for row in rows:
3869 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3870 multiple_songs = True
3871 break
3872 for row in rows:
3873 mrr = row.get('metadataRowRenderer') or {}
3874 mrr_title = mrr.get('title')
3875 if not mrr_title:
3876 continue
3877 mrr_title = self._get_text(mrr, 'title')
3878 mrr_contents_text = self._get_text(mrr, ('contents', 0))
3879 if mrr_title == 'License':
3880 info['license'] = mrr_contents_text
3881 elif not multiple_songs:
3882 if mrr_title == 'Album':
3883 info['album'] = mrr_contents_text
3884 elif mrr_title == 'Artist':
3885 info['artist'] = mrr_contents_text
3886 elif mrr_title == 'Song':
3887 info['track'] = mrr_contents_text
3888
3889 fallbacks = {
3890 'channel': 'uploader',
3891 'channel_id': 'uploader_id',
3892 'channel_url': 'uploader_url',
3893 }
3894
3895 # The upload date for scheduled, live and past live streams / premieres in microformats
3896 # may be different from the stream date. Although not in UTC, we will prefer it in this case.
3897 # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139
3898 upload_date = (
3899 unified_strdate(get_first(microformats, 'uploadDate'))
3900 or unified_strdate(search_meta('uploadDate')))
3901 if not upload_date or (not info.get('is_live') and not info.get('was_live') and info.get('live_status') != 'is_upcoming'):
3902 upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d') or upload_date
3903 info['upload_date'] = upload_date
3904
3905 for to, frm in fallbacks.items():
3906 if not info.get(to):
3907 info[to] = info.get(frm)
3908
3909 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3910 v = info.get(s_k)
3911 if v:
3912 info[d_k] = v
3913
3914 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3915 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
3916 is_membersonly = None
3917 is_premium = None
3918 if initial_data and is_private is not None:
3919 is_membersonly = False
3920 is_premium = False
3921 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3922 badge_labels = set()
3923 for content in contents:
3924 if not isinstance(content, dict):
3925 continue
3926 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3927 for badge_label in badge_labels:
3928 if badge_label.lower() == 'members only':
3929 is_membersonly = True
3930 elif badge_label.lower() == 'premium':
3931 is_premium = True
3932 elif badge_label.lower() == 'unlisted':
3933 is_unlisted = True
3934
3935 info['availability'] = self._availability(
3936 is_private=is_private,
3937 needs_premium=is_premium,
3938 needs_subscription=is_membersonly,
3939 needs_auth=info['age_limit'] >= 18,
3940 is_unlisted=None if is_private is None else is_unlisted)
3941
3942 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
3943
3944 self.mark_watched(video_id, player_responses)
3945
3946 return info
3947
3948
3949 class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
3950
3951 @staticmethod
3952 def passthrough_smuggled_data(func):
3953 def _smuggle(entries, smuggled_data):
3954 for entry in entries:
3955 # TODO: Convert URL to music.youtube instead.
3956 # Do we need to passthrough any other smuggled_data?
3957 entry['url'] = smuggle_url(entry['url'], smuggled_data)
3958 yield entry
3959
3960 @functools.wraps(func)
3961 def wrapper(self, url):
3962 url, smuggled_data = unsmuggle_url(url, {})
3963 if self.is_music_url(url):
3964 smuggled_data['is_music_url'] = True
3965 info_dict = func(self, url, smuggled_data)
3966 if smuggled_data and info_dict.get('entries'):
3967 info_dict['entries'] = _smuggle(info_dict['entries'], smuggled_data)
3968 return info_dict
3969 return wrapper
3970
3971 def _extract_channel_id(self, webpage):
3972 channel_id = self._html_search_meta(
3973 'channelId', webpage, 'channel id', default=None)
3974 if channel_id:
3975 return channel_id
3976 channel_url = self._html_search_meta(
3977 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3978 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3979 'twitter:app:url:googleplay'), webpage, 'channel url')
3980 return self._search_regex(
3981 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3982 channel_url, 'channel id')
3983
3984 @staticmethod
3985 def _extract_basic_item_renderer(item):
3986 # Modified from _extract_grid_item_renderer
3987 known_basic_renderers = (
3988 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer'
3989 )
3990 for key, renderer in item.items():
3991 if not isinstance(renderer, dict):
3992 continue
3993 elif key in known_basic_renderers:
3994 return renderer
3995 elif key.startswith('grid') and key.endswith('Renderer'):
3996 return renderer
3997
3998 def _grid_entries(self, grid_renderer):
3999 for item in grid_renderer['items']:
4000 if not isinstance(item, dict):
4001 continue
4002 renderer = self._extract_basic_item_renderer(item)
4003 if not isinstance(renderer, dict):
4004 continue
4005 title = self._get_text(renderer, 'title')
4006
4007 # playlist
4008 playlist_id = renderer.get('playlistId')
4009 if playlist_id:
4010 yield self.url_result(
4011 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4012 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4013 video_title=title)
4014 continue
4015 # video
4016 video_id = renderer.get('videoId')
4017 if video_id:
4018 yield self._extract_video(renderer)
4019 continue
4020 # channel
4021 channel_id = renderer.get('channelId')
4022 if channel_id:
4023 yield self.url_result(
4024 'https://www.youtube.com/channel/%s' % channel_id,
4025 ie=YoutubeTabIE.ie_key(), video_title=title)
4026 continue
4027 # generic endpoint URL support
4028 ep_url = urljoin('https://www.youtube.com/', try_get(
4029 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
4030 str))
4031 if ep_url:
4032 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
4033 if ie.suitable(ep_url):
4034 yield self.url_result(
4035 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
4036 break
4037
4038 def _music_reponsive_list_entry(self, renderer):
4039 video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))
4040 if video_id:
4041 return self.url_result(f'https://music.youtube.com/watch?v={video_id}',
4042 ie=YoutubeIE.ie_key(), video_id=video_id)
4043 playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))
4044 if playlist_id:
4045 video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))
4046 if video_id:
4047 return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',
4048 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4049 return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',
4050 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4051 browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))
4052 if browse_id:
4053 return self.url_result(f'https://music.youtube.com/browse/{browse_id}',
4054 ie=YoutubeTabIE.ie_key(), video_id=browse_id)
4055
4056 def _shelf_entries_from_content(self, shelf_renderer):
4057 content = shelf_renderer.get('content')
4058 if not isinstance(content, dict):
4059 return
4060 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
4061 if renderer:
4062 # TODO: add support for nested playlists so each shelf is processed
4063 # as separate playlist
4064 # TODO: this includes only first N items
4065 yield from self._grid_entries(renderer)
4066 renderer = content.get('horizontalListRenderer')
4067 if renderer:
4068 # TODO
4069 pass
4070
4071 def _shelf_entries(self, shelf_renderer, skip_channels=False):
4072 ep = try_get(
4073 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4074 str)
4075 shelf_url = urljoin('https://www.youtube.com', ep)
4076 if shelf_url:
4077 # Skipping links to another channels, note that checking for
4078 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
4079 # will not work
4080 if skip_channels and '/channels?' in shelf_url:
4081 return
4082 title = self._get_text(shelf_renderer, 'title')
4083 yield self.url_result(shelf_url, video_title=title)
4084 # Shelf may not contain shelf URL, fallback to extraction from content
4085 yield from self._shelf_entries_from_content(shelf_renderer)
4086
4087 def _playlist_entries(self, video_list_renderer):
4088 for content in video_list_renderer['contents']:
4089 if not isinstance(content, dict):
4090 continue
4091 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
4092 if not isinstance(renderer, dict):
4093 continue
4094 video_id = renderer.get('videoId')
4095 if not video_id:
4096 continue
4097 yield self._extract_video(renderer)
4098
4099 def _rich_entries(self, rich_grid_renderer):
4100 renderer = try_get(
4101 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
4102 video_id = renderer.get('videoId')
4103 if not video_id:
4104 return
4105 yield self._extract_video(renderer)
4106
4107 def _video_entry(self, video_renderer):
4108 video_id = video_renderer.get('videoId')
4109 if video_id:
4110 return self._extract_video(video_renderer)
4111
4112 def _hashtag_tile_entry(self, hashtag_tile_renderer):
4113 url = urljoin('https://youtube.com', traverse_obj(
4114 hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))
4115 if url:
4116 return self.url_result(
4117 url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))
4118
4119 def _post_thread_entries(self, post_thread_renderer):
4120 post_renderer = try_get(
4121 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
4122 if not post_renderer:
4123 return
4124 # video attachment
4125 video_renderer = try_get(
4126 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
4127 video_id = video_renderer.get('videoId')
4128 if video_id:
4129 entry = self._extract_video(video_renderer)
4130 if entry:
4131 yield entry
4132 # playlist attachment
4133 playlist_id = try_get(
4134 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)
4135 if playlist_id:
4136 yield self.url_result(
4137 'https://www.youtube.com/playlist?list=%s' % playlist_id,
4138 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4139 # inline video links
4140 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
4141 for run in runs:
4142 if not isinstance(run, dict):
4143 continue
4144 ep_url = try_get(
4145 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)
4146 if not ep_url:
4147 continue
4148 if not YoutubeIE.suitable(ep_url):
4149 continue
4150 ep_video_id = YoutubeIE._match_id(ep_url)
4151 if video_id == ep_video_id:
4152 continue
4153 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
4154
4155 def _post_thread_continuation_entries(self, post_thread_continuation):
4156 contents = post_thread_continuation.get('contents')
4157 if not isinstance(contents, list):
4158 return
4159 for content in contents:
4160 renderer = content.get('backstagePostThreadRenderer')
4161 if isinstance(renderer, dict):
4162 yield from self._post_thread_entries(renderer)
4163 continue
4164 renderer = content.get('videoRenderer')
4165 if isinstance(renderer, dict):
4166 yield self._video_entry(renderer)
4167
4168 r''' # unused
4169 def _rich_grid_entries(self, contents):
4170 for content in contents:
4171 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
4172 if video_renderer:
4173 entry = self._video_entry(video_renderer)
4174 if entry:
4175 yield entry
4176 '''
4177
4178 def _extract_entries(self, parent_renderer, continuation_list):
4179 # continuation_list is modified in-place with continuation_list = [continuation_token]
4180 continuation_list[:] = [None]
4181 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
4182 for content in contents:
4183 if not isinstance(content, dict):
4184 continue
4185 is_renderer = traverse_obj(
4186 content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',
4187 expected_type=dict)
4188 if not is_renderer:
4189 renderer = content.get('richItemRenderer')
4190 if renderer:
4191 for entry in self._rich_entries(renderer):
4192 yield entry
4193 continuation_list[0] = self._extract_continuation(parent_renderer)
4194 continue
4195 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
4196 for isr_content in isr_contents:
4197 if not isinstance(isr_content, dict):
4198 continue
4199
4200 known_renderers = {
4201 'playlistVideoListRenderer': self._playlist_entries,
4202 'gridRenderer': self._grid_entries,
4203 'reelShelfRenderer': self._grid_entries,
4204 'shelfRenderer': self._shelf_entries,
4205 'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],
4206 'backstagePostThreadRenderer': self._post_thread_entries,
4207 'videoRenderer': lambda x: [self._video_entry(x)],
4208 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
4209 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
4210 'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]
4211 }
4212 for key, renderer in isr_content.items():
4213 if key not in known_renderers:
4214 continue
4215 for entry in known_renderers[key](renderer):
4216 if entry:
4217 yield entry
4218 continuation_list[0] = self._extract_continuation(renderer)
4219 break
4220
4221 if not continuation_list[0]:
4222 continuation_list[0] = self._extract_continuation(is_renderer)
4223
4224 if not continuation_list[0]:
4225 continuation_list[0] = self._extract_continuation(parent_renderer)
4226
4227 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
4228 continuation_list = [None]
4229 extract_entries = lambda x: self._extract_entries(x, continuation_list)
4230 tab_content = try_get(tab, lambda x: x['content'], dict)
4231 if not tab_content:
4232 return
4233 parent_renderer = (
4234 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
4235 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
4236 yield from extract_entries(parent_renderer)
4237 continuation = continuation_list[0]
4238
4239 for page_num in itertools.count(1):
4240 if not continuation:
4241 break
4242 headers = self.generate_api_headers(
4243 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
4244 response = self._extract_response(
4245 item_id=f'{item_id} page {page_num}',
4246 query=continuation, headers=headers, ytcfg=ytcfg,
4247 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
4248
4249 if not response:
4250 break
4251 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
4252 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
4253 visitor_data = self._extract_visitor_data(response) or visitor_data
4254
4255 known_continuation_renderers = {
4256 'playlistVideoListContinuation': self._playlist_entries,
4257 'gridContinuation': self._grid_entries,
4258 'itemSectionContinuation': self._post_thread_continuation_entries,
4259 'sectionListContinuation': extract_entries, # for feeds
4260 }
4261 continuation_contents = try_get(
4262 response, lambda x: x['continuationContents'], dict) or {}
4263 continuation_renderer = None
4264 for key, value in continuation_contents.items():
4265 if key not in known_continuation_renderers:
4266 continue
4267 continuation_renderer = value
4268 continuation_list = [None]
4269 yield from known_continuation_renderers[key](continuation_renderer)
4270 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
4271 break
4272 if continuation_renderer:
4273 continue
4274
4275 known_renderers = {
4276 'videoRenderer': (self._grid_entries, 'items'), # for membership tab
4277 'gridPlaylistRenderer': (self._grid_entries, 'items'),
4278 'gridVideoRenderer': (self._grid_entries, 'items'),
4279 'gridChannelRenderer': (self._grid_entries, 'items'),
4280 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
4281 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
4282 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
4283 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
4284 }
4285 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
4286 continuation_items = try_get(
4287 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
4288 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
4289 video_items_renderer = None
4290 for key, value in continuation_item.items():
4291 if key not in known_renderers:
4292 continue
4293 video_items_renderer = {known_renderers[key][1]: continuation_items}
4294 continuation_list = [None]
4295 yield from known_renderers[key][0](video_items_renderer)
4296 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
4297 break
4298 if video_items_renderer:
4299 continue
4300 break
4301
4302 @staticmethod
4303 def _extract_selected_tab(tabs, fatal=True):
4304 for tab in tabs:
4305 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
4306 if renderer.get('selected') is True:
4307 return renderer
4308 else:
4309 if fatal:
4310 raise ExtractorError('Unable to find selected tab')
4311
4312 def _extract_uploader(self, data):
4313 uploader = {}
4314 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
4315 owner = try_get(
4316 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
4317 if owner:
4318 owner_text = owner.get('text')
4319 uploader['uploader'] = self._search_regex(
4320 r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text)
4321 uploader['uploader_id'] = try_get(
4322 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], str)
4323 uploader['uploader_url'] = urljoin(
4324 'https://www.youtube.com/',
4325 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], str))
4326 return {k: v for k, v in uploader.items() if v is not None}
4327
4328 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
4329 playlist_id = title = description = channel_url = channel_name = channel_id = None
4330 tags = []
4331
4332 selected_tab = self._extract_selected_tab(tabs)
4333 primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4334 renderer = try_get(
4335 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
4336 if renderer:
4337 channel_name = renderer.get('title')
4338 channel_url = renderer.get('channelUrl')
4339 channel_id = renderer.get('externalId')
4340 else:
4341 renderer = try_get(
4342 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
4343
4344 if renderer:
4345 title = renderer.get('title')
4346 description = renderer.get('description', '')
4347 playlist_id = channel_id
4348 tags = renderer.get('keywords', '').split()
4349
4350 # We can get the uncropped banner/avatar by replacing the crop params with '=s0'
4351 # See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714
4352 def _get_uncropped(url):
4353 return url_or_none((url or '').split('=')[0] + '=s0')
4354
4355 avatar_thumbnails = self._extract_thumbnails(renderer, 'avatar')
4356 if avatar_thumbnails:
4357 uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])
4358 if uncropped_avatar:
4359 avatar_thumbnails.append({
4360 'url': uncropped_avatar,
4361 'id': 'avatar_uncropped',
4362 'preference': 1
4363 })
4364
4365 channel_banners = self._extract_thumbnails(
4366 data, ('header', ..., ['banner', 'mobileBanner', 'tvBanner']))
4367 for banner in channel_banners:
4368 banner['preference'] = -10
4369
4370 if channel_banners:
4371 uncropped_banner = _get_uncropped(channel_banners[0]['url'])
4372 if uncropped_banner:
4373 channel_banners.append({
4374 'url': uncropped_banner,
4375 'id': 'banner_uncropped',
4376 'preference': -5
4377 })
4378
4379 primary_thumbnails = self._extract_thumbnails(
4380 primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))
4381
4382 if playlist_id is None:
4383 playlist_id = item_id
4384
4385 playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')
4386 last_updated_unix, _ = self._extract_time_text(playlist_stats, 2)
4387 if title is None:
4388 title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id
4389 title += format_field(selected_tab, 'title', ' - %s')
4390 title += format_field(selected_tab, 'expandedText', ' - %s')
4391
4392 metadata = {
4393 'playlist_id': playlist_id,
4394 'playlist_title': title,
4395 'playlist_description': description,
4396 'uploader': channel_name,
4397 'uploader_id': channel_id,
4398 'uploader_url': channel_url,
4399 'thumbnails': primary_thumbnails + avatar_thumbnails + channel_banners,
4400 'tags': tags,
4401 'view_count': self._get_count(playlist_stats, 1),
4402 'availability': self._extract_availability(data),
4403 'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'),
4404 'playlist_count': self._get_count(playlist_stats, 0),
4405 'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')),
4406 }
4407 if not channel_id:
4408 metadata.update(self._extract_uploader(data))
4409 metadata.update({
4410 'channel': metadata['uploader'],
4411 'channel_id': metadata['uploader_id'],
4412 'channel_url': metadata['uploader_url']})
4413 return self.playlist_result(
4414 self._entries(
4415 selected_tab, playlist_id, ytcfg,
4416 self._extract_account_syncid(ytcfg, data),
4417 self._extract_visitor_data(data, ytcfg)),
4418 **metadata)
4419
4420 def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):
4421 first_id = last_id = response = None
4422 for page_num in itertools.count(1):
4423 videos = list(self._playlist_entries(playlist))
4424 if not videos:
4425 return
4426 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4427 if start >= len(videos):
4428 return
4429 yield from videos[start:]
4430 first_id = first_id or videos[0]['id']
4431 last_id = videos[-1]['id']
4432 watch_endpoint = try_get(
4433 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
4434 headers = self.generate_api_headers(
4435 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4436 visitor_data=self._extract_visitor_data(response, data, ytcfg))
4437 query = {
4438 'playlistId': playlist_id,
4439 'videoId': watch_endpoint.get('videoId') or last_id,
4440 'index': watch_endpoint.get('index') or len(videos),
4441 'params': watch_endpoint.get('params') or 'OAE%3D'
4442 }
4443 response = self._extract_response(
4444 item_id='%s page %d' % (playlist_id, page_num),
4445 query=query, ep='next', headers=headers, ytcfg=ytcfg,
4446 check_get_keys='contents'
4447 )
4448 playlist = try_get(
4449 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4450
4451 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
4452 title = playlist.get('title') or try_get(
4453 data, lambda x: x['titleText']['simpleText'], str)
4454 playlist_id = playlist.get('playlistId') or item_id
4455
4456 # Delegating everything except mix playlists to regular tab-based playlist URL
4457 playlist_url = urljoin(url, try_get(
4458 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4459 str))
4460
4461 # Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]
4462 # [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg
4463 is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)
4464
4465 if playlist_url and playlist_url != url and not is_known_unviewable:
4466 return self.url_result(
4467 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4468 video_title=title)
4469
4470 return self.playlist_result(
4471 self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),
4472 playlist_id=playlist_id, playlist_title=title)
4473
4474 def _extract_availability(self, data):
4475 """
4476 Gets the availability of a given playlist/tab.
4477 Note: Unless YouTube tells us explicitly, we do not assume it is public
4478 @param data: response
4479 """
4480 is_private = is_unlisted = None
4481 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4482 badge_labels = self._extract_badges(renderer)
4483
4484 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4485 privacy_dropdown_entries = try_get(
4486 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4487 for renderer_dict in privacy_dropdown_entries:
4488 is_selected = try_get(
4489 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4490 if not is_selected:
4491 continue
4492 label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
4493 if label:
4494 badge_labels.add(label.lower())
4495 break
4496
4497 for badge_label in badge_labels:
4498 if badge_label == 'unlisted':
4499 is_unlisted = True
4500 elif badge_label == 'private':
4501 is_private = True
4502 elif badge_label == 'public':
4503 is_unlisted = is_private = False
4504 return self._availability(is_private, False, False, False, is_unlisted)
4505
4506 @staticmethod
4507 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4508 sidebar_renderer = try_get(
4509 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4510 for item in sidebar_renderer:
4511 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4512 if renderer:
4513 return renderer
4514
4515 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
4516 """
4517 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4518 """
4519 browse_id = params = None
4520 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4521 if not renderer:
4522 return
4523 menu_renderer = try_get(
4524 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4525 for menu_item in menu_renderer:
4526 if not isinstance(menu_item, dict):
4527 continue
4528 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4529 text = try_get(
4530 nav_item_renderer, lambda x: x['text']['simpleText'], str)
4531 if not text or text.lower() != 'show unavailable videos':
4532 continue
4533 browse_endpoint = try_get(
4534 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4535 browse_id = browse_endpoint.get('browseId')
4536 params = browse_endpoint.get('params')
4537 break
4538
4539 headers = self.generate_api_headers(
4540 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4541 visitor_data=self._extract_visitor_data(data, ytcfg))
4542 query = {
4543 'params': params or 'wgYCCAA=',
4544 'browseId': browse_id or 'VL%s' % item_id
4545 }
4546 return self._extract_response(
4547 item_id=item_id, headers=headers, query=query,
4548 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
4549 note='Downloading API JSON with unavailable videos')
4550
4551 @functools.cached_property
4552 def skip_webpage(self):
4553 return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())
4554
4555 def _extract_webpage(self, url, item_id, fatal=True):
4556 webpage, data = None, None
4557 for retry in self.RetryManager(fatal=fatal):
4558 try:
4559 webpage = self._download_webpage(url, item_id, note='Downloading webpage')
4560 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
4561 except ExtractorError as e:
4562 if isinstance(e.cause, network_exceptions):
4563 if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):
4564 retry.error = e
4565 continue
4566 self._error_or_warning(e, fatal=fatal)
4567 break
4568
4569 try:
4570 self._extract_and_report_alerts(data)
4571 except ExtractorError as e:
4572 self._error_or_warning(e, fatal=fatal)
4573 break
4574
4575 # Sometimes youtube returns a webpage with incomplete ytInitialData
4576 # See: https://github.com/yt-dlp/yt-dlp/issues/116
4577 if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):
4578 retry.error = ExtractorError('Incomplete yt initial data received')
4579 continue
4580
4581 return webpage, data
4582
4583 def _report_playlist_authcheck(self, ytcfg, fatal=True):
4584 """Use if failed to extract ytcfg (and data) from initial webpage"""
4585 if not ytcfg and self.is_authenticated:
4586 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'
4587 if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:
4588 raise ExtractorError(
4589 f'{msg}. If you are not downloading private content, or '
4590 'your cookies are only for the first account and channel,'
4591 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
4592 expected=True)
4593 self.report_warning(msg, only_once=True)
4594
4595 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
4596 data = None
4597 if not self.skip_webpage:
4598 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
4599 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
4600 # Reject webpage data if redirected to home page without explicitly requesting
4601 selected_tab = self._extract_selected_tab(traverse_obj(
4602 data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list, default=[]), fatal=False) or {}
4603 if (url != 'https://www.youtube.com/feed/recommended'
4604 and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page
4605 and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):
4606 msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'
4607 if fatal:
4608 raise ExtractorError(msg, expected=True)
4609 self.report_warning(msg, only_once=True)
4610 if not data:
4611 self._report_playlist_authcheck(ytcfg, fatal=fatal)
4612 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
4613 return data, ytcfg
4614
4615 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
4616 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
4617 resolve_response = self._extract_response(
4618 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
4619 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
4620 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
4621 for ep_key, ep in endpoints.items():
4622 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
4623 if params:
4624 return self._extract_response(
4625 item_id=item_id, query=params, ep=ep, headers=headers,
4626 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
4627 check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))
4628 err_note = 'Failed to resolve url (does the playlist exist?)'
4629 if fatal:
4630 raise ExtractorError(err_note, expected=True)
4631 self.report_warning(err_note, item_id)
4632
4633 _SEARCH_PARAMS = None
4634
4635 def _search_results(self, query, params=NO_DEFAULT, default_client='web'):
4636 data = {'query': query}
4637 if params is NO_DEFAULT:
4638 params = self._SEARCH_PARAMS
4639 if params:
4640 data['params'] = params
4641
4642 content_keys = (
4643 ('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),
4644 ('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),
4645 # ytmusic search
4646 ('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),
4647 ('continuationContents', ),
4648 )
4649 display_id = f'query "{query}"'
4650 check_get_keys = tuple({keys[0] for keys in content_keys})
4651 ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}
4652 self._report_playlist_authcheck(ytcfg, fatal=False)
4653
4654 continuation_list = [None]
4655 search = None
4656 for page_num in itertools.count(1):
4657 data.update(continuation_list[0] or {})
4658 headers = self.generate_api_headers(
4659 ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)
4660 search = self._extract_response(
4661 item_id=f'{display_id} page {page_num}', ep='search', query=data,
4662 default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)
4663 slr_contents = traverse_obj(search, *content_keys)
4664 yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)
4665 if not continuation_list[0]:
4666 break
4667
4668
4669 class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
4670 IE_DESC = 'YouTube Tabs'
4671 _VALID_URL = r'''(?x:
4672 https?://
4673 (?:\w+\.)?
4674 (?:
4675 youtube(?:kids)?\.com|
4676 %(invidious)s
4677 )/
4678 (?:
4679 (?P<channel_type>channel|c|user|browse)/|
4680 (?P<not_channel>
4681 feed/|hashtag/|
4682 (?:playlist|watch)\?.*?\blist=
4683 )|
4684 (?!(?:%(reserved_names)s)\b) # Direct URLs
4685 )
4686 (?P<id>[^/?\#&]+)
4687 )''' % {
4688 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
4689 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
4690 }
4691 IE_NAME = 'youtube:tab'
4692
4693 _TESTS = [{
4694 'note': 'playlists, multipage',
4695 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
4696 'playlist_mincount': 94,
4697 'info_dict': {
4698 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
4699 'title': 'Igor Kleiner - Playlists',
4700 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
4701 'uploader': 'Igor Kleiner',
4702 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4703 'channel': 'Igor Kleiner',
4704 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4705 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
4706 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4707 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4708 'channel_follower_count': int
4709 },
4710 }, {
4711 'note': 'playlists, multipage, different order',
4712 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
4713 'playlist_mincount': 94,
4714 'info_dict': {
4715 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
4716 'title': 'Igor Kleiner - Playlists',
4717 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
4718 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4719 'uploader': 'Igor Kleiner',
4720 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4721 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'],
4722 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
4723 'channel': 'Igor Kleiner',
4724 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
4725 'channel_follower_count': int
4726 },
4727 }, {
4728 'note': 'playlists, series',
4729 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
4730 'playlist_mincount': 5,
4731 'info_dict': {
4732 'id': 'UCYO_jab_esuFRV4b17AJtAw',
4733 'title': '3Blue1Brown - Playlists',
4734 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
4735 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
4736 'uploader': '3Blue1Brown',
4737 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4738 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4739 'channel': '3Blue1Brown',
4740 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
4741 'tags': ['Mathematics'],
4742 'channel_follower_count': int
4743 },
4744 }, {
4745 'note': 'playlists, singlepage',
4746 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
4747 'playlist_mincount': 4,
4748 'info_dict': {
4749 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
4750 'title': 'ThirstForScience - Playlists',
4751 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
4752 'uploader': 'ThirstForScience',
4753 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
4754 'uploader_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
4755 'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
4756 'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
4757 'tags': 'count:13',
4758 'channel': 'ThirstForScience',
4759 'channel_follower_count': int
4760 }
4761 }, {
4762 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
4763 'only_matching': True,
4764 }, {
4765 'note': 'basic, single video playlist',
4766 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4767 'info_dict': {
4768 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4769 'uploader': 'Sergey M.',
4770 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4771 'title': 'youtube-dl public playlist',
4772 'description': '',
4773 'tags': [],
4774 'view_count': int,
4775 'modified_date': '20201130',
4776 'channel': 'Sergey M.',
4777 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4778 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4779 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4780 },
4781 'playlist_count': 1,
4782 }, {
4783 'note': 'empty playlist',
4784 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
4785 'info_dict': {
4786 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4787 'uploader': 'Sergey M.',
4788 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
4789 'title': 'youtube-dl empty playlist',
4790 'tags': [],
4791 'channel': 'Sergey M.',
4792 'description': '',
4793 'modified_date': '20160902',
4794 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
4795 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4796 'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4797 },
4798 'playlist_count': 0,
4799 }, {
4800 'note': 'Home tab',
4801 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
4802 'info_dict': {
4803 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4804 'title': 'lex will - Home',
4805 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4806 'uploader': 'lex will',
4807 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4808 'channel': 'lex will',
4809 'tags': ['bible', 'history', 'prophesy'],
4810 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4811 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4812 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4813 'channel_follower_count': int
4814 },
4815 'playlist_mincount': 2,
4816 }, {
4817 'note': 'Videos tab',
4818 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
4819 'info_dict': {
4820 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4821 'title': 'lex will - Videos',
4822 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4823 'uploader': 'lex will',
4824 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4825 'tags': ['bible', 'history', 'prophesy'],
4826 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4827 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4828 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4829 'channel': 'lex will',
4830 'channel_follower_count': int
4831 },
4832 'playlist_mincount': 975,
4833 }, {
4834 'note': 'Videos tab, sorted by popular',
4835 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
4836 'info_dict': {
4837 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4838 'title': 'lex will - Videos',
4839 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4840 'uploader': 'lex will',
4841 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4842 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4843 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4844 'channel': 'lex will',
4845 'tags': ['bible', 'history', 'prophesy'],
4846 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4847 'channel_follower_count': int
4848 },
4849 'playlist_mincount': 199,
4850 }, {
4851 'note': 'Playlists tab',
4852 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
4853 'info_dict': {
4854 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4855 'title': 'lex will - Playlists',
4856 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4857 'uploader': 'lex will',
4858 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4859 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4860 'channel': 'lex will',
4861 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4862 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4863 'tags': ['bible', 'history', 'prophesy'],
4864 'channel_follower_count': int
4865 },
4866 'playlist_mincount': 17,
4867 }, {
4868 'note': 'Community tab',
4869 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
4870 'info_dict': {
4871 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4872 'title': 'lex will - Community',
4873 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4874 'uploader': 'lex will',
4875 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4876 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4877 'channel': 'lex will',
4878 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4879 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4880 'tags': ['bible', 'history', 'prophesy'],
4881 'channel_follower_count': int
4882 },
4883 'playlist_mincount': 18,
4884 }, {
4885 'note': 'Channels tab',
4886 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
4887 'info_dict': {
4888 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4889 'title': 'lex will - Channels',
4890 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
4891 'uploader': 'lex will',
4892 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4893 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4894 'channel': 'lex will',
4895 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
4896 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
4897 'tags': ['bible', 'history', 'prophesy'],
4898 'channel_follower_count': int
4899 },
4900 'playlist_mincount': 12,
4901 }, {
4902 'note': 'Search tab',
4903 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
4904 'playlist_mincount': 40,
4905 'info_dict': {
4906 'id': 'UCYO_jab_esuFRV4b17AJtAw',
4907 'title': '3Blue1Brown - Search - linear algebra',
4908 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
4909 'uploader': '3Blue1Brown',
4910 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
4911 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4912 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
4913 'tags': ['Mathematics'],
4914 'channel': '3Blue1Brown',
4915 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
4916 'channel_follower_count': int
4917 },
4918 }, {
4919 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4920 'only_matching': True,
4921 }, {
4922 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4923 'only_matching': True,
4924 }, {
4925 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
4926 'only_matching': True,
4927 }, {
4928 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
4929 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
4930 'info_dict': {
4931 'title': '29C3: Not my department',
4932 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
4933 'uploader': 'Christiaan008',
4934 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
4935 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
4936 'tags': [],
4937 'uploader_url': 'https://www.youtube.com/c/ChRiStIaAn008',
4938 'view_count': int,
4939 'modified_date': '20150605',
4940 'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
4941 'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',
4942 'channel': 'Christiaan008',
4943 },
4944 'playlist_count': 96,
4945 }, {
4946 'note': 'Large playlist',
4947 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
4948 'info_dict': {
4949 'title': 'Uploads from Cauchemar',
4950 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
4951 'uploader': 'Cauchemar',
4952 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
4953 'channel_url': 'https://www.youtube.com/c/Cauchemar89',
4954 'tags': [],
4955 'modified_date': r're:\d{8}',
4956 'channel': 'Cauchemar',
4957 'uploader_url': 'https://www.youtube.com/c/Cauchemar89',
4958 'view_count': int,
4959 'description': '',
4960 'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
4961 },
4962 'playlist_mincount': 1123,
4963 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
4964 }, {
4965 'note': 'even larger playlist, 8832 videos',
4966 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
4967 'only_matching': True,
4968 }, {
4969 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
4970 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
4971 'info_dict': {
4972 'title': 'Uploads from Interstellar Movie',
4973 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
4974 'uploader': 'Interstellar Movie',
4975 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4976 'uploader_url': 'https://www.youtube.com/c/InterstellarMovie',
4977 'tags': [],
4978 'view_count': int,
4979 'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4980 'channel_url': 'https://www.youtube.com/c/InterstellarMovie',
4981 'channel': 'Interstellar Movie',
4982 'description': '',
4983 'modified_date': r're:\d{8}',
4984 },
4985 'playlist_mincount': 21,
4986 }, {
4987 'note': 'Playlist with "show unavailable videos" button',
4988 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
4989 'info_dict': {
4990 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
4991 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
4992 'uploader': 'Phim Siêu Nhân Nhật Bản',
4993 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
4994 'view_count': int,
4995 'channel': 'Phim Siêu Nhân Nhật Bản',
4996 'tags': [],
4997 'uploader_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
4998 'description': '',
4999 'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5000 'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
5001 'modified_date': r're:\d{8}',
5002 },
5003 'playlist_mincount': 200,
5004 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5005 }, {
5006 'note': 'Playlist with unavailable videos in page 7',
5007 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
5008 'info_dict': {
5009 'title': 'Uploads from BlankTV',
5010 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
5011 'uploader': 'BlankTV',
5012 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
5013 'channel': 'BlankTV',
5014 'channel_url': 'https://www.youtube.com/c/blanktv',
5015 'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',
5016 'view_count': int,
5017 'tags': [],
5018 'uploader_url': 'https://www.youtube.com/c/blanktv',
5019 'modified_date': r're:\d{8}',
5020 'description': '',
5021 },
5022 'playlist_mincount': 1000,
5023 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5024 }, {
5025 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
5026 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5027 'info_dict': {
5028 'title': 'Data Analysis with Dr Mike Pound',
5029 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5030 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
5031 'uploader': 'Computerphile',
5032 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
5033 'uploader_url': 'https://www.youtube.com/user/Computerphile',
5034 'tags': [],
5035 'view_count': int,
5036 'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
5037 'channel_url': 'https://www.youtube.com/user/Computerphile',
5038 'channel': 'Computerphile',
5039 },
5040 'playlist_mincount': 11,
5041 }, {
5042 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5043 'only_matching': True,
5044 }, {
5045 'note': 'Playlist URL that does not actually serve a playlist',
5046 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
5047 'info_dict': {
5048 'id': 'FqZTN594JQw',
5049 'ext': 'webm',
5050 'title': "Smiley's People 01 detective, Adventure Series, Action",
5051 'uploader': 'STREEM',
5052 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
5053 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
5054 'upload_date': '20150526',
5055 'license': 'Standard YouTube License',
5056 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
5057 'categories': ['People & Blogs'],
5058 'tags': list,
5059 'view_count': int,
5060 'like_count': int,
5061 },
5062 'params': {
5063 'skip_download': True,
5064 },
5065 'skip': 'This video is not available.',
5066 'add_ie': [YoutubeIE.ie_key()],
5067 }, {
5068 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
5069 'only_matching': True,
5070 }, {
5071 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
5072 'only_matching': True,
5073 }, {
5074 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
5075 'info_dict': {
5076 'id': 'Wq15eF5vCbI', # This will keep changing
5077 'ext': 'mp4',
5078 'title': str,
5079 'uploader': 'Sky News',
5080 'uploader_id': 'skynews',
5081 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
5082 'upload_date': r're:\d{8}',
5083 'description': str,
5084 'categories': ['News & Politics'],
5085 'tags': list,
5086 'like_count': int,
5087 'release_timestamp': 1642502819,
5088 'channel': 'Sky News',
5089 'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',
5090 'age_limit': 0,
5091 'view_count': int,
5092 'thumbnail': 'https://i.ytimg.com/vi/GgL890LIznQ/maxresdefault_live.jpg',
5093 'playable_in_embed': True,
5094 'release_date': '20220118',
5095 'availability': 'public',
5096 'live_status': 'is_live',
5097 'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',
5098 'channel_follower_count': int
5099 },
5100 'params': {
5101 'skip_download': True,
5102 },
5103 'expected_warnings': ['Ignoring subtitle tracks found in '],
5104 }, {
5105 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
5106 'info_dict': {
5107 'id': 'a48o2S1cPoo',
5108 'ext': 'mp4',
5109 'title': 'The Young Turks - Live Main Show',
5110 'uploader': 'The Young Turks',
5111 'uploader_id': 'TheYoungTurks',
5112 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
5113 'upload_date': '20150715',
5114 'license': 'Standard YouTube License',
5115 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
5116 'categories': ['News & Politics'],
5117 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
5118 'like_count': int,
5119 },
5120 'params': {
5121 'skip_download': True,
5122 },
5123 'only_matching': True,
5124 }, {
5125 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
5126 'only_matching': True,
5127 }, {
5128 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
5129 'only_matching': True,
5130 }, {
5131 'note': 'A channel that is not live. Should raise error',
5132 'url': 'https://www.youtube.com/user/numberphile/live',
5133 'only_matching': True,
5134 }, {
5135 'url': 'https://www.youtube.com/feed/trending',
5136 'only_matching': True,
5137 }, {
5138 'url': 'https://www.youtube.com/feed/library',
5139 'only_matching': True,
5140 }, {
5141 'url': 'https://www.youtube.com/feed/history',
5142 'only_matching': True,
5143 }, {
5144 'url': 'https://www.youtube.com/feed/subscriptions',
5145 'only_matching': True,
5146 }, {
5147 'url': 'https://www.youtube.com/feed/watch_later',
5148 'only_matching': True,
5149 }, {
5150 'note': 'Recommended - redirects to home page.',
5151 'url': 'https://www.youtube.com/feed/recommended',
5152 'only_matching': True,
5153 }, {
5154 'note': 'inline playlist with not always working continuations',
5155 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
5156 'only_matching': True,
5157 }, {
5158 'url': 'https://www.youtube.com/course',
5159 'only_matching': True,
5160 }, {
5161 'url': 'https://www.youtube.com/zsecurity',
5162 'only_matching': True,
5163 }, {
5164 'url': 'http://www.youtube.com/NASAgovVideo/videos',
5165 'only_matching': True,
5166 }, {
5167 'url': 'https://www.youtube.com/TheYoungTurks/live',
5168 'only_matching': True,
5169 }, {
5170 'url': 'https://www.youtube.com/hashtag/cctv9',
5171 'info_dict': {
5172 'id': 'cctv9',
5173 'title': '#cctv9',
5174 'tags': [],
5175 },
5176 'playlist_mincount': 350,
5177 }, {
5178 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
5179 'only_matching': True,
5180 }, {
5181 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
5182 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5183 'only_matching': True
5184 }, {
5185 'note': '/browse/ should redirect to /channel/',
5186 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
5187 'only_matching': True
5188 }, {
5189 'note': 'VLPL, should redirect to playlist?list=PL...',
5190 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5191 'info_dict': {
5192 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
5193 'uploader': 'NoCopyrightSounds',
5194 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
5195 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
5196 'title': 'NCS : All Releases 💿',
5197 'uploader_url': 'https://www.youtube.com/c/NoCopyrightSounds',
5198 'channel_url': 'https://www.youtube.com/c/NoCopyrightSounds',
5199 'modified_date': r're:\d{8}',
5200 'view_count': int,
5201 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
5202 'tags': [],
5203 'channel': 'NoCopyrightSounds',
5204 },
5205 'playlist_mincount': 166,
5206 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5207 }, {
5208 'note': 'Topic, should redirect to playlist?list=UU...',
5209 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5210 'info_dict': {
5211 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5212 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5213 'title': 'Uploads from Royalty Free Music - Topic',
5214 'uploader': 'Royalty Free Music - Topic',
5215 'tags': [],
5216 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5217 'channel': 'Royalty Free Music - Topic',
5218 'view_count': int,
5219 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5220 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5221 'modified_date': r're:\d{8}',
5222 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5223 'description': '',
5224 },
5225 'expected_warnings': [
5226 'The URL does not have a videos tab',
5227 r'[Uu]navailable videos (are|will be) hidden',
5228 ],
5229 'playlist_mincount': 101,
5230 }, {
5231 'note': 'Topic without a UU playlist',
5232 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
5233 'info_dict': {
5234 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
5235 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
5236 'tags': [],
5237 },
5238 'expected_warnings': [
5239 'the playlist redirect gave error',
5240 ],
5241 'playlist_mincount': 9,
5242 }, {
5243 'note': 'Youtube music Album',
5244 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
5245 'info_dict': {
5246 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
5247 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
5248 'tags': [],
5249 'view_count': int,
5250 'description': '',
5251 'availability': 'unlisted',
5252 'modified_date': r're:\d{8}',
5253 },
5254 'playlist_count': 50,
5255 }, {
5256 'note': 'unlisted single video playlist',
5257 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5258 'info_dict': {
5259 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5260 'uploader': 'colethedj',
5261 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
5262 'title': 'yt-dlp unlisted playlist test',
5263 'availability': 'unlisted',
5264 'tags': [],
5265 'modified_date': '20220418',
5266 'channel': 'colethedj',
5267 'view_count': int,
5268 'description': '',
5269 'uploader_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
5270 'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
5271 'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
5272 },
5273 'playlist_count': 1,
5274 }, {
5275 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
5276 'url': 'https://www.youtube.com/feed/recommended',
5277 'info_dict': {
5278 'id': 'recommended',
5279 'title': 'recommended',
5280 'tags': [],
5281 },
5282 'playlist_mincount': 50,
5283 'params': {
5284 'skip_download': True,
5285 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5286 },
5287 }, {
5288 'note': 'API Fallback: /videos tab, sorted by oldest first',
5289 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
5290 'info_dict': {
5291 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5292 'title': 'Cody\'sLab - Videos',
5293 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
5294 'uploader': 'Cody\'sLab',
5295 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5296 'channel': 'Cody\'sLab',
5297 'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
5298 'tags': [],
5299 'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
5300 'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
5301 'channel_follower_count': int
5302 },
5303 'playlist_mincount': 650,
5304 'params': {
5305 'skip_download': True,
5306 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5307 },
5308 }, {
5309 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
5310 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
5311 'info_dict': {
5312 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
5313 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5314 'title': 'Uploads from Royalty Free Music - Topic',
5315 'uploader': 'Royalty Free Music - Topic',
5316 'modified_date': r're:\d{8}',
5317 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
5318 'description': '',
5319 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5320 'tags': [],
5321 'channel': 'Royalty Free Music - Topic',
5322 'view_count': int,
5323 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
5324 },
5325 'expected_warnings': [
5326 'does not have a videos tab',
5327 r'[Uu]navailable videos (are|will be) hidden',
5328 ],
5329 'playlist_mincount': 101,
5330 'params': {
5331 'skip_download': True,
5332 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
5333 },
5334 }, {
5335 'note': 'non-standard redirect to regional channel',
5336 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
5337 'only_matching': True
5338 }, {
5339 'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
5340 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5341 'info_dict': {
5342 'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
5343 'modified_date': '20220407',
5344 'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5345 'tags': [],
5346 'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5347 'uploader': 'pukkandan',
5348 'availability': 'unlisted',
5349 'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
5350 'channel': 'pukkandan',
5351 'description': 'Test for collaborative playlist',
5352 'title': 'yt-dlp test - collaborative playlist',
5353 'view_count': int,
5354 'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
5355 },
5356 'playlist_mincount': 2
5357 }]
5358
5359 @classmethod
5360 def suitable(cls, url):
5361 return False if YoutubeIE.suitable(url) else super().suitable(url)
5362
5363 _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$')
5364
5365 @YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
5366 def _real_extract(self, url, smuggled_data):
5367 item_id = self._match_id(url)
5368 url = urllib.parse.urlunparse(
5369 urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))
5370 compat_opts = self.get_param('compat_opts', [])
5371
5372 def get_mobj(url):
5373 mobj = self._URL_RE.match(url).groupdict()
5374 mobj.update((k, '') for k, v in mobj.items() if v is None)
5375 return mobj
5376
5377 mobj, redirect_warning = get_mobj(url), None
5378 # Youtube returns incomplete data if tabname is not lower case
5379 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
5380 if is_channel:
5381 if smuggled_data.get('is_music_url'):
5382 if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
5383 item_id = item_id[2:]
5384 pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False
5385 elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
5386 mdata = self._extract_tab_endpoint(
5387 f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
5388 murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
5389 get_all=False, expected_type=str)
5390 if not murl:
5391 raise ExtractorError('Failed to resolve album to playlist')
5392 return self.url_result(murl, ie=YoutubeTabIE.ie_key())
5393 elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
5394 pre = f'https://www.youtube.com/channel/{item_id}'
5395
5396 original_tab_name = tab
5397 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
5398 # Home URLs should redirect to /videos/
5399 redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '
5400 'To download only the videos in the home page, add a "/featured" to the URL')
5401 tab = '/videos'
5402
5403 url = ''.join((pre, tab, post))
5404 mobj = get_mobj(url)
5405
5406 # Handle both video/playlist URLs
5407 qs = parse_qs(url)
5408 video_id, playlist_id = (qs.get(key, [None])[0] for key in ('v', 'list'))
5409
5410 if not video_id and mobj['not_channel'].startswith('watch'):
5411 if not playlist_id:
5412 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
5413 raise ExtractorError('Unable to recognize tab page')
5414 # Common mistake: https://www.youtube.com/watch?list=playlist_id
5415 self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
5416 url = f'https://www.youtube.com/playlist?list={playlist_id}'
5417 mobj = get_mobj(url)
5418
5419 if video_id and playlist_id:
5420 if self.get_param('noplaylist'):
5421 self.to_screen(f'Downloading just video {video_id} because of --no-playlist')
5422 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
5423 ie=YoutubeIE.ie_key(), video_id=video_id)
5424 self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')
5425
5426 data, ytcfg = self._extract_data(url, item_id)
5427
5428 # YouTube may provide a non-standard redirect to the regional channel
5429 # See: https://github.com/yt-dlp/yt-dlp/issues/2694
5430 redirect_url = traverse_obj(
5431 data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)
5432 if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:
5433 redirect_url = ''.join((
5434 urljoin('https://www.youtube.com', redirect_url), mobj['tab'], mobj['post']))
5435 self.to_screen(f'This playlist is likely not available in your region. Following redirect to regional playlist {redirect_url}')
5436 return self.url_result(redirect_url, ie=YoutubeTabIE.ie_key())
5437
5438 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
5439 if tabs:
5440 selected_tab = self._extract_selected_tab(tabs)
5441 selected_tab_name = selected_tab.get('title', '').lower()
5442 if selected_tab_name == 'home':
5443 selected_tab_name = 'featured'
5444 requested_tab_name = mobj['tab'][1:]
5445 if 'no-youtube-channel-redirect' not in compat_opts:
5446 if requested_tab_name == 'live': # Live tab should have redirected to the video
5447 raise UserNotLive(video_id=mobj['id'])
5448 if requested_tab_name not in ('', selected_tab_name):
5449 redirect_warning = f'The channel does not have a {requested_tab_name} tab'
5450 if not original_tab_name:
5451 if item_id[:2] == 'UC':
5452 # Topic channels don't have /videos. Use the equivalent playlist instead
5453 pl_id = f'UU{item_id[2:]}'
5454 pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
5455 try:
5456 data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)
5457 except ExtractorError:
5458 redirect_warning += ' and the playlist redirect gave error'
5459 else:
5460 item_id, url, selected_tab_name = pl_id, pl_url, requested_tab_name
5461 redirect_warning += f'. Redirecting to playlist {pl_id} instead'
5462 if selected_tab_name and selected_tab_name != requested_tab_name:
5463 redirect_warning += f'. {selected_tab_name} tab is being downloaded instead'
5464 else:
5465 raise ExtractorError(redirect_warning, expected=True)
5466
5467 if redirect_warning:
5468 self.to_screen(redirect_warning)
5469 self.write_debug(f'Final URL: {url}')
5470
5471 # YouTube sometimes provides a button to reload playlist with unavailable videos.
5472 if 'no-youtube-unavailable-videos' not in compat_opts:
5473 data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data
5474 self._extract_and_report_alerts(data, only_once=True)
5475 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
5476 if tabs:
5477 return self._extract_from_tabs(item_id, ytcfg, data, tabs)
5478
5479 playlist = traverse_obj(
5480 data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
5481 if playlist:
5482 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
5483
5484 video_id = traverse_obj(
5485 data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
5486 if video_id:
5487 if mobj['tab'] != '/live': # live tab is expected to redirect to video
5488 self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
5489 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
5490 ie=YoutubeIE.ie_key(), video_id=video_id)
5491
5492 raise ExtractorError('Unable to recognize tab page')
5493
5494
5495 class YoutubePlaylistIE(InfoExtractor):
5496 IE_DESC = 'YouTube playlists'
5497 _VALID_URL = r'''(?x)(?:
5498 (?:https?://)?
5499 (?:\w+\.)?
5500 (?:
5501 (?:
5502 youtube(?:kids)?\.com|
5503 %(invidious)s
5504 )
5505 /.*?\?.*?\blist=
5506 )?
5507 (?P<id>%(playlist_id)s)
5508 )''' % {
5509 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
5510 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
5511 }
5512 IE_NAME = 'youtube:playlist'
5513 _TESTS = [{
5514 'note': 'issue #673',
5515 'url': 'PLBB231211A4F62143',
5516 'info_dict': {
5517 'title': '[OLD]Team Fortress 2 (Class-based LP)',
5518 'id': 'PLBB231211A4F62143',
5519 'uploader': 'Wickman',
5520 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
5521 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
5522 'view_count': int,
5523 'uploader_url': 'https://www.youtube.com/user/Wickydoo',
5524 'modified_date': r're:\d{8}',
5525 'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
5526 'channel': 'Wickman',
5527 'tags': [],
5528 'channel_url': 'https://www.youtube.com/user/Wickydoo',
5529 },
5530 'playlist_mincount': 29,
5531 }, {
5532 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
5533 'info_dict': {
5534 'title': 'YDL_safe_search',
5535 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
5536 },
5537 'playlist_count': 2,
5538 'skip': 'This playlist is private',
5539 }, {
5540 'note': 'embedded',
5541 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
5542 'playlist_count': 4,
5543 'info_dict': {
5544 'title': 'JODA15',
5545 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
5546 'uploader': 'milan',
5547 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
5548 'description': '',
5549 'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
5550 'tags': [],
5551 'modified_date': '20140919',
5552 'view_count': int,
5553 'channel': 'milan',
5554 'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
5555 'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
5556 },
5557 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5558 }, {
5559 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
5560 'playlist_mincount': 455,
5561 'info_dict': {
5562 'title': '2018 Chinese New Singles (11/6 updated)',
5563 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
5564 'uploader': 'LBK',
5565 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
5566 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
5567 'channel': 'LBK',
5568 'view_count': int,
5569 'channel_url': 'https://www.youtube.com/c/愛低音的國王',
5570 'tags': [],
5571 'uploader_url': 'https://www.youtube.com/c/愛低音的國王',
5572 'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
5573 'modified_date': r're:\d{8}',
5574 },
5575 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5576 }, {
5577 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
5578 'only_matching': True,
5579 }, {
5580 # music album playlist
5581 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
5582 'only_matching': True,
5583 }]
5584
5585 @classmethod
5586 def suitable(cls, url):
5587 if YoutubeTabIE.suitable(url):
5588 return False
5589 from ..utils import parse_qs
5590 qs = parse_qs(url)
5591 if qs.get('v', [None])[0]:
5592 return False
5593 return super().suitable(url)
5594
5595 def _real_extract(self, url):
5596 playlist_id = self._match_id(url)
5597 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
5598 url = update_url_query(
5599 'https://www.youtube.com/playlist',
5600 parse_qs(url) or {'list': playlist_id})
5601 if is_music_url:
5602 url = smuggle_url(url, {'is_music_url': True})
5603 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
5604
5605
5606 class YoutubeYtBeIE(InfoExtractor):
5607 IE_DESC = 'youtu.be'
5608 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
5609 _TESTS = [{
5610 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
5611 'info_dict': {
5612 'id': 'yeWKywCrFtk',
5613 'ext': 'mp4',
5614 'title': 'Small Scale Baler and Braiding Rugs',
5615 'uploader': 'Backus-Page House Museum',
5616 'uploader_id': 'backuspagemuseum',
5617 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
5618 'upload_date': '20161008',
5619 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
5620 'categories': ['Nonprofits & Activism'],
5621 'tags': list,
5622 'like_count': int,
5623 'age_limit': 0,
5624 'playable_in_embed': True,
5625 'thumbnail': 'https://i.ytimg.com/vi_webp/yeWKywCrFtk/maxresdefault.webp',
5626 'channel': 'Backus-Page House Museum',
5627 'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',
5628 'live_status': 'not_live',
5629 'view_count': int,
5630 'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',
5631 'availability': 'public',
5632 'duration': 59,
5633 'comment_count': int,
5634 'channel_follower_count': int
5635 },
5636 'params': {
5637 'noplaylist': True,
5638 'skip_download': True,
5639 },
5640 }, {
5641 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
5642 'only_matching': True,
5643 }]
5644
5645 def _real_extract(self, url):
5646 mobj = self._match_valid_url(url)
5647 video_id = mobj.group('id')
5648 playlist_id = mobj.group('playlist_id')
5649 return self.url_result(
5650 update_url_query('https://www.youtube.com/watch', {
5651 'v': video_id,
5652 'list': playlist_id,
5653 'feature': 'youtu.be',
5654 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
5655
5656
5657 class YoutubeLivestreamEmbedIE(InfoExtractor):
5658 IE_DESC = 'YouTube livestream embeds'
5659 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'
5660 _TESTS = [{
5661 'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',
5662 'only_matching': True,
5663 }]
5664
5665 def _real_extract(self, url):
5666 channel_id = self._match_id(url)
5667 return self.url_result(
5668 f'https://www.youtube.com/channel/{channel_id}/live',
5669 ie=YoutubeTabIE.ie_key(), video_id=channel_id)
5670
5671
5672 class YoutubeYtUserIE(InfoExtractor):
5673 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
5674 IE_NAME = 'youtube:user'
5675 _VALID_URL = r'ytuser:(?P<id>.+)'
5676 _TESTS = [{
5677 'url': 'ytuser:phihag',
5678 'only_matching': True,
5679 }]
5680
5681 def _real_extract(self, url):
5682 user_id = self._match_id(url)
5683 return self.url_result(
5684 'https://www.youtube.com/user/%s/videos' % user_id,
5685 ie=YoutubeTabIE.ie_key(), video_id=user_id)
5686
5687
5688 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
5689 IE_NAME = 'youtube:favorites'
5690 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
5691 _VALID_URL = r':ytfav(?:ou?rite)?s?'
5692 _LOGIN_REQUIRED = True
5693 _TESTS = [{
5694 'url': ':ytfav',
5695 'only_matching': True,
5696 }, {
5697 'url': ':ytfavorites',
5698 'only_matching': True,
5699 }]
5700
5701 def _real_extract(self, url):
5702 return self.url_result(
5703 'https://www.youtube.com/playlist?list=LL',
5704 ie=YoutubeTabIE.ie_key())
5705
5706
5707 class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):
5708 IE_NAME = 'youtube:notif'
5709 IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'
5710 _VALID_URL = r':ytnotif(?:ication)?s?'
5711 _LOGIN_REQUIRED = True
5712 _TESTS = [{
5713 'url': ':ytnotif',
5714 'only_matching': True,
5715 }, {
5716 'url': ':ytnotifications',
5717 'only_matching': True,
5718 }]
5719
5720 def _extract_notification_menu(self, response, continuation_list):
5721 notification_list = traverse_obj(
5722 response,
5723 ('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),
5724 ('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),
5725 expected_type=list) or []
5726 continuation_list[0] = None
5727 for item in notification_list:
5728 entry = self._extract_notification_renderer(item.get('notificationRenderer'))
5729 if entry:
5730 yield entry
5731 continuation = item.get('continuationItemRenderer')
5732 if continuation:
5733 continuation_list[0] = continuation
5734
5735 def _extract_notification_renderer(self, notification):
5736 video_id = traverse_obj(
5737 notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)
5738 url = f'https://www.youtube.com/watch?v={video_id}'
5739 channel_id = None
5740 if not video_id:
5741 browse_ep = traverse_obj(
5742 notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)
5743 channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)
5744 post_id = self._search_regex(
5745 r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),
5746 'post id', default=None)
5747 if not channel_id or not post_id:
5748 return
5749 # The direct /post url redirects to this in the browser
5750 url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'
5751
5752 channel = traverse_obj(
5753 notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),
5754 expected_type=str)
5755 notification_title = self._get_text(notification, 'shortMessage')
5756 if notification_title:
5757 notification_title = notification_title.replace('\xad', '') # remove soft hyphens
5758 # TODO: handle recommended videos
5759 title = self._search_regex(
5760 rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,
5761 'video title', default=None)
5762 upload_date = (strftime_or_none(self._extract_time_text(notification, 'sentTimeText')[0], '%Y%m%d')
5763 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE.ie_key())
5764 else None)
5765 return {
5766 '_type': 'url',
5767 'url': url,
5768 'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),
5769 'video_id': video_id,
5770 'title': title,
5771 'channel_id': channel_id,
5772 'channel': channel,
5773 'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),
5774 'upload_date': upload_date,
5775 }
5776
5777 def _notification_menu_entries(self, ytcfg):
5778 continuation_list = [None]
5779 response = None
5780 for page in itertools.count(1):
5781 ctoken = traverse_obj(
5782 continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)
5783 response = self._extract_response(
5784 item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,
5785 ep='notification/get_notification_menu', check_get_keys='actions',
5786 headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))
5787 yield from self._extract_notification_menu(response, continuation_list)
5788 if not continuation_list[0]:
5789 break
5790
5791 def _real_extract(self, url):
5792 display_id = 'notifications'
5793 ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}
5794 self._report_playlist_authcheck(ytcfg)
5795 return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)
5796
5797
5798 class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
5799 IE_DESC = 'YouTube search'
5800 IE_NAME = 'youtube:search'
5801 _SEARCH_KEY = 'ytsearch'
5802 _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
5803 _TESTS = [{
5804 'url': 'ytsearch5:youtube-dl test video',
5805 'playlist_count': 5,
5806 'info_dict': {
5807 'id': 'youtube-dl test video',
5808 'title': 'youtube-dl test video',
5809 }
5810 }]
5811
5812
5813 class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
5814 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
5815 _SEARCH_KEY = 'ytsearchdate'
5816 IE_DESC = 'YouTube search, newest videos first'
5817 _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
5818 _TESTS = [{
5819 'url': 'ytsearchdate5:youtube-dl test video',
5820 'playlist_count': 5,
5821 'info_dict': {
5822 'id': 'youtube-dl test video',
5823 'title': 'youtube-dl test video',
5824 }
5825 }]
5826
5827
5828 class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
5829 IE_DESC = 'YouTube search URLs with sorting and filter support'
5830 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
5831 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
5832 _TESTS = [{
5833 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
5834 'playlist_mincount': 5,
5835 'info_dict': {
5836 'id': 'youtube-dl test video',
5837 'title': 'youtube-dl test video',
5838 }
5839 }, {
5840 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
5841 'playlist_mincount': 5,
5842 'info_dict': {
5843 'id': 'python',
5844 'title': 'python',
5845 }
5846 }, {
5847 'url': 'https://www.youtube.com/results?search_query=%23cats',
5848 'playlist_mincount': 1,
5849 'info_dict': {
5850 'id': '#cats',
5851 'title': '#cats',
5852 # The test suite does not have support for nested playlists
5853 # 'entries': [{
5854 # 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',
5855 # 'title': '#cats',
5856 # }],
5857 },
5858 }, {
5859 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
5860 'only_matching': True,
5861 }]
5862
5863 def _real_extract(self, url):
5864 qs = parse_qs(url)
5865 query = (qs.get('search_query') or qs.get('q'))[0]
5866 return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
5867
5868
5869 class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
5870 IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'
5871 IE_NAME = 'youtube:music:search_url'
5872 _VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
5873 _TESTS = [{
5874 'url': 'https://music.youtube.com/search?q=royalty+free+music',
5875 'playlist_count': 16,
5876 'info_dict': {
5877 'id': 'royalty free music',
5878 'title': 'royalty free music',
5879 }
5880 }, {
5881 'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',
5882 'playlist_mincount': 30,
5883 'info_dict': {
5884 'id': 'royalty free music - songs',
5885 'title': 'royalty free music - songs',
5886 },
5887 'params': {'extract_flat': 'in_playlist'}
5888 }, {
5889 'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',
5890 'playlist_mincount': 30,
5891 'info_dict': {
5892 'id': 'royalty free music - community playlists',
5893 'title': 'royalty free music - community playlists',
5894 },
5895 'params': {'extract_flat': 'in_playlist'}
5896 }]
5897
5898 _SECTIONS = {
5899 'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',
5900 'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',
5901 'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',
5902 'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',
5903 'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',
5904 'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',
5905 }
5906
5907 def _real_extract(self, url):
5908 qs = parse_qs(url)
5909 query = (qs.get('search_query') or qs.get('q'))[0]
5910 params = qs.get('sp', (None,))[0]
5911 if params:
5912 section = next((k for k, v in self._SECTIONS.items() if v == params), params)
5913 else:
5914 section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower()
5915 params = self._SECTIONS.get(section)
5916 if not params:
5917 section = None
5918 title = join_nonempty(query, section, delim=' - ')
5919 return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)
5920
5921
5922 class YoutubeFeedsInfoExtractor(InfoExtractor):
5923 """
5924 Base class for feed extractors
5925 Subclasses must re-define the _FEED_NAME property.
5926 """
5927 _LOGIN_REQUIRED = True
5928 _FEED_NAME = 'feeds'
5929
5930 def _real_initialize(self):
5931 YoutubeBaseInfoExtractor._check_login_required(self)
5932
5933 @classproperty
5934 def IE_NAME(self):
5935 return f'youtube:{self._FEED_NAME}'
5936
5937 def _real_extract(self, url):
5938 return self.url_result(
5939 f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())
5940
5941
5942 class YoutubeWatchLaterIE(InfoExtractor):
5943 IE_NAME = 'youtube:watchlater'
5944 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
5945 _VALID_URL = r':ytwatchlater'
5946 _TESTS = [{
5947 'url': ':ytwatchlater',
5948 'only_matching': True,
5949 }]
5950
5951 def _real_extract(self, url):
5952 return self.url_result(
5953 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
5954
5955
5956 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
5957 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
5958 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
5959 _FEED_NAME = 'recommended'
5960 _LOGIN_REQUIRED = False
5961 _TESTS = [{
5962 'url': ':ytrec',
5963 'only_matching': True,
5964 }, {
5965 'url': ':ytrecommended',
5966 'only_matching': True,
5967 }, {
5968 'url': 'https://youtube.com',
5969 'only_matching': True,
5970 }]
5971
5972
5973 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
5974 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
5975 _VALID_URL = r':ytsub(?:scription)?s?'
5976 _FEED_NAME = 'subscriptions'
5977 _TESTS = [{
5978 'url': ':ytsubs',
5979 'only_matching': True,
5980 }, {
5981 'url': ':ytsubscriptions',
5982 'only_matching': True,
5983 }]
5984
5985
5986 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
5987 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
5988 _VALID_URL = r':ythis(?:tory)?'
5989 _FEED_NAME = 'history'
5990 _TESTS = [{
5991 'url': ':ythistory',
5992 'only_matching': True,
5993 }]
5994
5995
5996 class YoutubeStoriesIE(InfoExtractor):
5997 IE_DESC = 'YouTube channel stories; "ytstories:" prefix'
5998 IE_NAME = 'youtube:stories'
5999 _VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'
6000 _TESTS = [{
6001 'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',
6002 'only_matching': True,
6003 }]
6004
6005 def _real_extract(self, url):
6006 playlist_id = f'RLTD{self._match_id(url)}'
6007 return self.url_result(
6008 f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1',
6009 ie=YoutubeTabIE, video_id=playlist_id)
6010
6011
6012 class YoutubeTruncatedURLIE(InfoExtractor):
6013 IE_NAME = 'youtube:truncated_url'
6014 IE_DESC = False # Do not list
6015 _VALID_URL = r'''(?x)
6016 (?:https?://)?
6017 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
6018 (?:watch\?(?:
6019 feature=[a-z_]+|
6020 annotation_id=annotation_[^&]+|
6021 x-yt-cl=[0-9]+|
6022 hl=[^&]*|
6023 t=[0-9]+
6024 )?
6025 |
6026 attribution_link\?a=[^&]+
6027 )
6028 $
6029 '''
6030
6031 _TESTS = [{
6032 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
6033 'only_matching': True,
6034 }, {
6035 'url': 'https://www.youtube.com/watch?',
6036 'only_matching': True,
6037 }, {
6038 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
6039 'only_matching': True,
6040 }, {
6041 'url': 'https://www.youtube.com/watch?feature=foo',
6042 'only_matching': True,
6043 }, {
6044 'url': 'https://www.youtube.com/watch?hl=en-GB',
6045 'only_matching': True,
6046 }, {
6047 'url': 'https://www.youtube.com/watch?t=2372',
6048 'only_matching': True,
6049 }]
6050
6051 def _real_extract(self, url):
6052 raise ExtractorError(
6053 'Did you forget to quote the URL? Remember that & is a meta '
6054 'character in most shells, so you want to put the URL in quotes, '
6055 'like youtube-dl '
6056 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
6057 ' or simply youtube-dl BaW_jenozKc .',
6058 expected=True)
6059
6060
6061 class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
6062 IE_NAME = 'youtube:clip'
6063 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'
6064 _TESTS = [{
6065 # FIXME: Other metadata should be extracted from the clip, not from the base video
6066 'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',
6067 'info_dict': {
6068 'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',
6069 'ext': 'mp4',
6070 'section_start': 29.0,
6071 'section_end': 39.7,
6072 'duration': 10.7,
6073 'age_limit': 0,
6074 'availability': 'public',
6075 'categories': ['Gaming'],
6076 'channel': 'Scott The Woz',
6077 'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',
6078 'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',
6079 'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',
6080 'like_count': int,
6081 'playable_in_embed': True,
6082 'tags': 'count:17',
6083 'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',
6084 'title': 'Mobile Games on Console - Scott The Woz',
6085 'upload_date': '20210920',
6086 'uploader': 'Scott The Woz',
6087 'uploader_id': 'scottthewoz',
6088 'uploader_url': 'http://www.youtube.com/user/scottthewoz',
6089 'view_count': int,
6090 'live_status': 'not_live',
6091 'channel_follower_count': int
6092 }
6093 }]
6094
6095 def _real_extract(self, url):
6096 clip_id = self._match_id(url)
6097 _, data = self._extract_webpage(url, clip_id)
6098
6099 video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))
6100 if not video_id:
6101 raise ExtractorError('Unable to find video ID')
6102
6103 clip_data = traverse_obj(data, (
6104 'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',
6105 'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,
6106 'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',
6107 'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)
6108
6109 return {
6110 '_type': 'url_transparent',
6111 'url': f'https://www.youtube.com/watch?v={video_id}',
6112 'ie_key': YoutubeIE.ie_key(),
6113 'id': clip_id,
6114 'section_start': int(clip_data['startTimeMs']) / 1000,
6115 'section_end': int(clip_data['endTimeMs']) / 1000,
6116 }
6117
6118
6119 class YoutubeTruncatedIDIE(InfoExtractor):
6120 IE_NAME = 'youtube:truncated_id'
6121 IE_DESC = False # Do not list
6122 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
6123
6124 _TESTS = [{
6125 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
6126 'only_matching': True,
6127 }]
6128
6129 def _real_extract(self, url):
6130 video_id = self._match_id(url)
6131 raise ExtractorError(
6132 f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',
6133 expected=True)