]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/youtube.py
[youtube:tab] Extract more metadata from feeds/channels/playlists (#1018)
[yt-dlp.git] / yt_dlp / extractor / youtube.py
1 # coding: utf-8
2
3 from __future__ import unicode_literals
4
5 import calendar
6 import copy
7 import datetime
8 import hashlib
9 import itertools
10 import json
11 import math
12 import os.path
13 import random
14 import re
15 import sys
16 import time
17 import traceback
18
19 from .common import InfoExtractor, SearchInfoExtractor
20 from ..compat import (
21 compat_chr,
22 compat_HTTPError,
23 compat_parse_qs,
24 compat_str,
25 compat_urllib_parse_unquote_plus,
26 compat_urllib_parse_urlencode,
27 compat_urllib_parse_urlparse,
28 compat_urlparse,
29 )
30 from ..jsinterp import JSInterpreter
31 from ..utils import (
32 bug_reports_message,
33 clean_html,
34 datetime_from_str,
35 dict_get,
36 error_to_compat_str,
37 ExtractorError,
38 float_or_none,
39 format_field,
40 int_or_none,
41 is_html,
42 join_nonempty,
43 mimetype2ext,
44 network_exceptions,
45 NO_DEFAULT,
46 orderedSet,
47 parse_codecs,
48 parse_count,
49 parse_duration,
50 parse_iso8601,
51 parse_qs,
52 qualities,
53 remove_end,
54 remove_start,
55 smuggle_url,
56 str_or_none,
57 str_to_int,
58 strftime_or_none,
59 traverse_obj,
60 try_get,
61 unescapeHTML,
62 unified_strdate,
63 unsmuggle_url,
64 update_url_query,
65 url_or_none,
66 urljoin,
67 variadic,
68 )
69
70
71 def get_first(obj, keys, **kwargs):
72 return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
73
74
75 # any clients starting with _ cannot be explicity requested by the user
76 INNERTUBE_CLIENTS = {
77 'web': {
78 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
79 'INNERTUBE_CONTEXT': {
80 'client': {
81 'clientName': 'WEB',
82 'clientVersion': '2.20210622.10.00',
83 }
84 },
85 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
86 },
87 'web_embedded': {
88 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
89 'INNERTUBE_CONTEXT': {
90 'client': {
91 'clientName': 'WEB_EMBEDDED_PLAYER',
92 'clientVersion': '1.20210620.0.1',
93 },
94 },
95 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
96 },
97 'web_music': {
98 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
99 'INNERTUBE_HOST': 'music.youtube.com',
100 'INNERTUBE_CONTEXT': {
101 'client': {
102 'clientName': 'WEB_REMIX',
103 'clientVersion': '1.20210621.00.00',
104 }
105 },
106 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
107 },
108 'web_creator': {
109 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
110 'INNERTUBE_CONTEXT': {
111 'client': {
112 'clientName': 'WEB_CREATOR',
113 'clientVersion': '1.20210621.00.00',
114 }
115 },
116 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
117 },
118 'android': {
119 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
120 'INNERTUBE_CONTEXT': {
121 'client': {
122 'clientName': 'ANDROID',
123 'clientVersion': '16.20',
124 }
125 },
126 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
127 'REQUIRE_JS_PLAYER': False
128 },
129 'android_embedded': {
130 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
131 'INNERTUBE_CONTEXT': {
132 'client': {
133 'clientName': 'ANDROID_EMBEDDED_PLAYER',
134 'clientVersion': '16.20',
135 },
136 },
137 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
138 'REQUIRE_JS_PLAYER': False
139 },
140 'android_music': {
141 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
142 'INNERTUBE_HOST': 'music.youtube.com',
143 'INNERTUBE_CONTEXT': {
144 'client': {
145 'clientName': 'ANDROID_MUSIC',
146 'clientVersion': '4.32',
147 }
148 },
149 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
150 'REQUIRE_JS_PLAYER': False
151 },
152 'android_creator': {
153 'INNERTUBE_CONTEXT': {
154 'client': {
155 'clientName': 'ANDROID_CREATOR',
156 'clientVersion': '21.24.100',
157 },
158 },
159 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
160 'REQUIRE_JS_PLAYER': False
161 },
162 # ios has HLS live streams
163 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680
164 'ios': {
165 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
166 'INNERTUBE_CONTEXT': {
167 'client': {
168 'clientName': 'IOS',
169 'clientVersion': '16.20',
170 }
171 },
172 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
173 'REQUIRE_JS_PLAYER': False
174 },
175 'ios_embedded': {
176 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
177 'INNERTUBE_CONTEXT': {
178 'client': {
179 'clientName': 'IOS_MESSAGES_EXTENSION',
180 'clientVersion': '16.20',
181 },
182 },
183 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
184 'REQUIRE_JS_PLAYER': False
185 },
186 'ios_music': {
187 'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
188 'INNERTUBE_HOST': 'music.youtube.com',
189 'INNERTUBE_CONTEXT': {
190 'client': {
191 'clientName': 'IOS_MUSIC',
192 'clientVersion': '4.32',
193 },
194 },
195 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
196 'REQUIRE_JS_PLAYER': False
197 },
198 'ios_creator': {
199 'INNERTUBE_CONTEXT': {
200 'client': {
201 'clientName': 'IOS_CREATOR',
202 'clientVersion': '21.24.100',
203 },
204 },
205 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
206 'REQUIRE_JS_PLAYER': False
207 },
208 # mweb has 'ultralow' formats
209 # See: https://github.com/yt-dlp/yt-dlp/pull/557
210 'mweb': {
211 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
212 'INNERTUBE_CONTEXT': {
213 'client': {
214 'clientName': 'MWEB',
215 'clientVersion': '2.20210721.07.00',
216 }
217 },
218 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
219 },
220 }
221
222
223 def build_innertube_clients():
224 third_party = {
225 'embedUrl': 'https://google.com', # Can be any valid URL
226 }
227 base_clients = ('android', 'web', 'ios', 'mweb')
228 priority = qualities(base_clients[::-1])
229
230 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
231 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
232 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
233 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
234 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
235 ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
236
237 if client in base_clients:
238 INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
239 agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
240 agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
241 agegate_ytcfg['priority'] -= 1
242 elif client.endswith('_embedded'):
243 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
244 ytcfg['priority'] -= 2
245 else:
246 ytcfg['priority'] -= 3
247
248
249 build_innertube_clients()
250
251
252 class YoutubeBaseInfoExtractor(InfoExtractor):
253 """Provide base functions for Youtube extractors"""
254
255 _RESERVED_NAMES = (
256 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
257 r'shorts|movies|results|shared|hashtag|trending|feed|feeds|'
258 r'browse|oembed|get_video_info|iframe_api|s/player|'
259 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
260
261 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
262
263 _NETRC_MACHINE = 'youtube'
264
265 # If True it will raise an error if no login info is provided
266 _LOGIN_REQUIRED = False
267
268 _INVIDIOUS_SITES = (
269 # invidious-redirect websites
270 r'(?:www\.)?redirect\.invidious\.io',
271 r'(?:(?:www|dev)\.)?invidio\.us',
272 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
273 r'(?:www\.)?invidious\.pussthecat\.org',
274 r'(?:www\.)?invidious\.zee\.li',
275 r'(?:www\.)?invidious\.ethibox\.fr',
276 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
277 # youtube-dl invidious instances list
278 r'(?:(?:www|no)\.)?invidiou\.sh',
279 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
280 r'(?:www\.)?invidious\.kabi\.tk',
281 r'(?:www\.)?invidious\.mastodon\.host',
282 r'(?:www\.)?invidious\.zapashcanon\.fr',
283 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
284 r'(?:www\.)?invidious\.tinfoil-hat\.net',
285 r'(?:www\.)?invidious\.himiko\.cloud',
286 r'(?:www\.)?invidious\.reallyancient\.tech',
287 r'(?:www\.)?invidious\.tube',
288 r'(?:www\.)?invidiou\.site',
289 r'(?:www\.)?invidious\.site',
290 r'(?:www\.)?invidious\.xyz',
291 r'(?:www\.)?invidious\.nixnet\.xyz',
292 r'(?:www\.)?invidious\.048596\.xyz',
293 r'(?:www\.)?invidious\.drycat\.fr',
294 r'(?:www\.)?inv\.skyn3t\.in',
295 r'(?:www\.)?tube\.poal\.co',
296 r'(?:www\.)?tube\.connect\.cafe',
297 r'(?:www\.)?vid\.wxzm\.sx',
298 r'(?:www\.)?vid\.mint\.lgbt',
299 r'(?:www\.)?vid\.puffyan\.us',
300 r'(?:www\.)?yewtu\.be',
301 r'(?:www\.)?yt\.elukerio\.org',
302 r'(?:www\.)?yt\.lelux\.fi',
303 r'(?:www\.)?invidious\.ggc-project\.de',
304 r'(?:www\.)?yt\.maisputain\.ovh',
305 r'(?:www\.)?ytprivate\.com',
306 r'(?:www\.)?invidious\.13ad\.de',
307 r'(?:www\.)?invidious\.toot\.koeln',
308 r'(?:www\.)?invidious\.fdn\.fr',
309 r'(?:www\.)?watch\.nettohikari\.com',
310 r'(?:www\.)?invidious\.namazso\.eu',
311 r'(?:www\.)?invidious\.silkky\.cloud',
312 r'(?:www\.)?invidious\.exonip\.de',
313 r'(?:www\.)?invidious\.riverside\.rocks',
314 r'(?:www\.)?invidious\.blamefran\.net',
315 r'(?:www\.)?invidious\.moomoo\.de',
316 r'(?:www\.)?ytb\.trom\.tf',
317 r'(?:www\.)?yt\.cyberhost\.uk',
318 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
319 r'(?:www\.)?qklhadlycap4cnod\.onion',
320 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
321 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
322 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
323 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
324 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
325 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
326 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
327 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
328 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
329 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
330 )
331
332 def _login(self):
333 """
334 Attempt to log in to YouTube.
335 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
336 """
337
338 if (self._LOGIN_REQUIRED
339 and self.get_param('cookiefile') is None
340 and self.get_param('cookiesfrombrowser') is None):
341 self.raise_login_required(
342 'Login details are needed to download this content', method='cookies')
343 username, password = self._get_login_info()
344 if username:
345 self.report_warning(f'Cannot login to YouTube using username and password. {self._LOGIN_HINTS["cookies"]}')
346
347 def _initialize_consent(self):
348 cookies = self._get_cookies('https://www.youtube.com/')
349 if cookies.get('__Secure-3PSID'):
350 return
351 consent_id = None
352 consent = cookies.get('CONSENT')
353 if consent:
354 if 'YES' in consent.value:
355 return
356 consent_id = self._search_regex(
357 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
358 if not consent_id:
359 consent_id = random.randint(100, 999)
360 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
361
362 def _initialize_pref(self):
363 cookies = self._get_cookies('https://www.youtube.com/')
364 pref_cookie = cookies.get('PREF')
365 pref = {}
366 if pref_cookie:
367 try:
368 pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))
369 except ValueError:
370 self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
371 pref.update({'hl': 'en'})
372 self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))
373
374 def _real_initialize(self):
375 self._initialize_pref()
376 self._initialize_consent()
377 self._login()
378
379 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
380 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
381 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
382
383 def _get_default_ytcfg(self, client='web'):
384 return copy.deepcopy(INNERTUBE_CLIENTS[client])
385
386 def _get_innertube_host(self, client='web'):
387 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
388
389 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
390 # try_get but with fallback to default ytcfg client values when present
391 _func = lambda y: try_get(y, getter, expected_type)
392 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
393
394 def _extract_client_name(self, ytcfg, default_client='web'):
395 return self._ytcfg_get_safe(
396 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
397 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
398
399 def _extract_client_version(self, ytcfg, default_client='web'):
400 return self._ytcfg_get_safe(
401 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
402 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
403
404 def _extract_api_key(self, ytcfg=None, default_client='web'):
405 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
406
407 def _extract_context(self, ytcfg=None, default_client='web'):
408 context = get_first(
409 (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
410 # Enforce language for extraction
411 traverse_obj(context, 'client', expected_type=dict, default={})['hl'] = 'en'
412 return context
413
414 _SAPISID = None
415
416 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
417 time_now = round(time.time())
418 if self._SAPISID is None:
419 yt_cookies = self._get_cookies('https://www.youtube.com')
420 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
421 # See: https://github.com/yt-dlp/yt-dlp/issues/393
422 sapisid_cookie = dict_get(
423 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
424 if sapisid_cookie and sapisid_cookie.value:
425 self._SAPISID = sapisid_cookie.value
426 self.write_debug('Extracted SAPISID cookie')
427 # SAPISID cookie is required if not already present
428 if not yt_cookies.get('SAPISID'):
429 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
430 self._set_cookie(
431 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
432 else:
433 self._SAPISID = False
434 if not self._SAPISID:
435 return None
436 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
437 sapisidhash = hashlib.sha1(
438 f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
439 return f'SAPISIDHASH {time_now}_{sapisidhash}'
440
441 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
442 note='Downloading API JSON', errnote='Unable to download API page',
443 context=None, api_key=None, api_hostname=None, default_client='web'):
444
445 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
446 data.update(query)
447 real_headers = self.generate_api_headers(default_client=default_client)
448 real_headers.update({'content-type': 'application/json'})
449 if headers:
450 real_headers.update(headers)
451 return self._download_json(
452 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
453 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
454 data=json.dumps(data).encode('utf8'), headers=real_headers,
455 query={'key': api_key or self._extract_api_key()})
456
457 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
458 data = self._search_regex(
459 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
460 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)
461 if data:
462 return self._parse_json(data, item_id, fatal=fatal)
463
464 @staticmethod
465 def _extract_session_index(*data):
466 """
467 Index of current account in account list.
468 See: https://github.com/yt-dlp/yt-dlp/pull/519
469 """
470 for ytcfg in data:
471 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
472 if session_index is not None:
473 return session_index
474
475 # Deprecated?
476 def _extract_identity_token(self, ytcfg=None, webpage=None):
477 if ytcfg:
478 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
479 if token:
480 return token
481 if webpage:
482 return self._search_regex(
483 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
484 'identity token', default=None, fatal=False)
485
486 @staticmethod
487 def _extract_account_syncid(*args):
488 """
489 Extract syncId required to download private playlists of secondary channels
490 @params response and/or ytcfg
491 """
492 for data in args:
493 # ytcfg includes channel_syncid if on secondary channel
494 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
495 if delegated_sid:
496 return delegated_sid
497 sync_ids = (try_get(
498 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
499 lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')
500 if len(sync_ids) >= 2 and sync_ids[1]:
501 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
502 # and just "user_syncid||" for primary channel. We only want the channel_syncid
503 return sync_ids[0]
504
505 @staticmethod
506 def _extract_visitor_data(*args):
507 """
508 Extracts visitorData from an API response or ytcfg
509 Appears to be used to track session state
510 """
511 return get_first(
512 args, (('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))),
513 expected_type=str)
514
515 @property
516 def is_authenticated(self):
517 return bool(self._generate_sapisidhash_header())
518
519 def extract_ytcfg(self, video_id, webpage):
520 if not webpage:
521 return {}
522 return self._parse_json(
523 self._search_regex(
524 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
525 default='{}'), video_id, fatal=False) or {}
526
527 def generate_api_headers(
528 self, *, ytcfg=None, account_syncid=None, session_index=None,
529 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
530
531 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
532 headers = {
533 'X-YouTube-Client-Name': compat_str(
534 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
535 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
536 'Origin': origin,
537 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
538 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
539 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)
540 }
541 if session_index is None:
542 session_index = self._extract_session_index(ytcfg)
543 if account_syncid or session_index is not None:
544 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
545
546 auth = self._generate_sapisidhash_header(origin)
547 if auth is not None:
548 headers['Authorization'] = auth
549 headers['X-Origin'] = origin
550 return {h: v for h, v in headers.items() if v is not None}
551
552 @staticmethod
553 def _build_api_continuation_query(continuation, ctp=None):
554 query = {
555 'continuation': continuation
556 }
557 # TODO: Inconsistency with clickTrackingParams.
558 # Currently we have a fixed ctp contained within context (from ytcfg)
559 # and a ctp in root query for continuation.
560 if ctp:
561 query['clickTracking'] = {'clickTrackingParams': ctp}
562 return query
563
564 @classmethod
565 def _extract_next_continuation_data(cls, renderer):
566 next_continuation = try_get(
567 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
568 lambda x: x['continuation']['reloadContinuationData']), dict)
569 if not next_continuation:
570 return
571 continuation = next_continuation.get('continuation')
572 if not continuation:
573 return
574 ctp = next_continuation.get('clickTrackingParams')
575 return cls._build_api_continuation_query(continuation, ctp)
576
577 @classmethod
578 def _extract_continuation_ep_data(cls, continuation_ep: dict):
579 if isinstance(continuation_ep, dict):
580 continuation = try_get(
581 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
582 if not continuation:
583 return
584 ctp = continuation_ep.get('clickTrackingParams')
585 return cls._build_api_continuation_query(continuation, ctp)
586
587 @classmethod
588 def _extract_continuation(cls, renderer):
589 next_continuation = cls._extract_next_continuation_data(renderer)
590 if next_continuation:
591 return next_continuation
592
593 contents = []
594 for key in ('contents', 'items'):
595 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
596
597 for content in contents:
598 if not isinstance(content, dict):
599 continue
600 continuation_ep = try_get(
601 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
602 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
603 dict)
604 continuation = cls._extract_continuation_ep_data(continuation_ep)
605 if continuation:
606 return continuation
607
608 @classmethod
609 def _extract_alerts(cls, data):
610 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
611 if not isinstance(alert_dict, dict):
612 continue
613 for alert in alert_dict.values():
614 alert_type = alert.get('type')
615 if not alert_type:
616 continue
617 message = cls._get_text(alert, 'text')
618 if message:
619 yield alert_type, message
620
621 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
622 errors = []
623 warnings = []
624 for alert_type, alert_message in alerts:
625 if alert_type.lower() == 'error' and fatal:
626 errors.append([alert_type, alert_message])
627 else:
628 warnings.append([alert_type, alert_message])
629
630 for alert_type, alert_message in (warnings + errors[:-1]):
631 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message), only_once=only_once)
632 if errors:
633 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
634
635 def _extract_and_report_alerts(self, data, *args, **kwargs):
636 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
637
638 def _extract_badges(self, renderer: dict):
639 badges = set()
640 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
641 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
642 if label:
643 badges.add(label.lower())
644 return badges
645
646 @staticmethod
647 def _get_text(data, *path_list, max_runs=None):
648 for path in path_list or [None]:
649 if path is None:
650 obj = [data]
651 else:
652 obj = traverse_obj(data, path, default=[])
653 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
654 obj = [obj]
655 for item in obj:
656 text = try_get(item, lambda x: x['simpleText'], compat_str)
657 if text:
658 return text
659 runs = try_get(item, lambda x: x['runs'], list) or []
660 if not runs and isinstance(item, list):
661 runs = item
662
663 runs = runs[:min(len(runs), max_runs or len(runs))]
664 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
665 if text:
666 return text
667
668 @staticmethod
669 def extract_relative_time(relative_time_text):
670 """
671 Extracts a relative time from string and converts to dt object
672 e.g. 'streamed 6 days ago', '5 seconds ago (edited)'
673 """
674 mobj = re.search(r'(?P<time>\d+)\s*(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?\s*ago', relative_time_text)
675 if mobj:
676 try:
677 return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')), precision='auto')
678 except ValueError:
679 return None
680
681 def _extract_time_text(self, renderer, *path_list):
682 text = self._get_text(renderer, *path_list) or ''
683 dt = self.extract_relative_time(text)
684 timestamp = None
685 if isinstance(dt, datetime.datetime):
686 timestamp = calendar.timegm(dt.timetuple())
687 if text and timestamp is None:
688 self.report_warning('Cannot parse localized time text' + bug_reports_message(), only_once=True)
689 return timestamp, text
690
691 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
692 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
693 default_client='web'):
694 response = None
695 last_error = None
696 count = -1
697 retries = self.get_param('extractor_retries', 3)
698 if check_get_keys is None:
699 check_get_keys = []
700 while count < retries:
701 count += 1
702 if last_error:
703 self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))
704 try:
705 response = self._call_api(
706 ep=ep, fatal=True, headers=headers,
707 video_id=item_id, query=query,
708 context=self._extract_context(ytcfg, default_client),
709 api_key=self._extract_api_key(ytcfg, default_client),
710 api_hostname=api_hostname, default_client=default_client,
711 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
712 except ExtractorError as e:
713 if isinstance(e.cause, network_exceptions):
714 if isinstance(e.cause, compat_HTTPError) and not is_html(e.cause.read(512)):
715 e.cause.seek(0)
716 yt_error = try_get(
717 self._parse_json(e.cause.read().decode(), item_id, fatal=False),
718 lambda x: x['error']['message'], compat_str)
719 if yt_error:
720 self._report_alerts([('ERROR', yt_error)], fatal=False)
721 # Downloading page may result in intermittent 5xx HTTP error
722 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
723 # We also want to catch all other network exceptions since errors in later pages can be troublesome
724 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
725 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
726 last_error = error_to_compat_str(e.cause or e.msg)
727 if count < retries:
728 continue
729 if fatal:
730 raise
731 else:
732 self.report_warning(error_to_compat_str(e))
733 return
734
735 else:
736 try:
737 self._extract_and_report_alerts(response, only_once=True)
738 except ExtractorError as e:
739 # YouTube servers may return errors we want to retry on in a 200 OK response
740 # See: https://github.com/yt-dlp/yt-dlp/issues/839
741 if 'unknown error' in e.msg.lower():
742 last_error = e.msg
743 continue
744 if fatal:
745 raise
746 self.report_warning(error_to_compat_str(e))
747 return
748 if not check_get_keys or dict_get(response, check_get_keys):
749 break
750 # Youtube sometimes sends incomplete data
751 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
752 last_error = 'Incomplete data received'
753 if count >= retries:
754 if fatal:
755 raise ExtractorError(last_error)
756 else:
757 self.report_warning(last_error)
758 return
759 return response
760
761 @staticmethod
762 def is_music_url(url):
763 return re.match(r'https?://music\.youtube\.com/', url) is not None
764
765 def _extract_video(self, renderer):
766 video_id = renderer.get('videoId')
767 title = self._get_text(renderer, 'title')
768 description = self._get_text(renderer, 'descriptionSnippet')
769 duration = parse_duration(self._get_text(
770 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
771 view_count_text = self._get_text(renderer, 'viewCountText') or ''
772 view_count = str_to_int(self._search_regex(
773 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
774 'view count', default=None))
775
776 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
777 channel_id = traverse_obj(
778 renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'), expected_type=str, get_all=False)
779 timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')
780 scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
781 overlay_style = traverse_obj(
782 renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'), get_all=False, expected_type=str)
783 badges = self._extract_badges(renderer)
784 return {
785 '_type': 'url',
786 'ie_key': YoutubeIE.ie_key(),
787 'id': video_id,
788 'url': f'https://www.youtube.com/watch?v={video_id}',
789 'title': title,
790 'description': description,
791 'duration': duration,
792 'view_count': view_count,
793 'uploader': uploader,
794 'channel_id': channel_id,
795 'upload_date': strftime_or_none(timestamp, '%Y%m%d'),
796 'live_status': ('is_upcoming' if scheduled_timestamp is not None
797 else 'was_live' if 'streamed' in time_text.lower()
798 else 'is_live' if overlay_style is not None and overlay_style == 'LIVE' or 'live now' in badges
799 else None),
800 'release_timestamp': scheduled_timestamp,
801 'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)
802 }
803
804
805 class YoutubeIE(YoutubeBaseInfoExtractor):
806 IE_DESC = 'YouTube'
807 _VALID_URL = r"""(?x)^
808 (
809 (?:https?://|//) # http(s):// or protocol-independent URL
810 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
811 (?:www\.)?deturl\.com/www\.youtube\.com|
812 (?:www\.)?pwnyoutube\.com|
813 (?:www\.)?hooktube\.com|
814 (?:www\.)?yourepeat\.com|
815 tube\.majestyc\.net|
816 %(invidious)s|
817 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
818 (?:.*?\#/)? # handle anchor (#/) redirect urls
819 (?: # the various things that can precede the ID:
820 (?:(?:v|embed|e|shorts)/(?!videoseries)) # v/ or embed/ or e/ or shorts/
821 |(?: # or the v= param in all its forms
822 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
823 (?:\?|\#!?) # the params delimiter ? or # or #!
824 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
825 v=
826 )
827 ))
828 |(?:
829 youtu\.be| # just youtu.be/xxxx
830 vid\.plus| # or vid.plus/xxxx
831 zwearz\.com/watch| # or zwearz.com/watch/xxxx
832 %(invidious)s
833 )/
834 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
835 )
836 )? # all until now is optional -> you can pass the naked ID
837 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
838 (?(1).+)? # if we found the ID, everything can follow
839 (?:\#|$)""" % {
840 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
841 }
842 _PLAYER_INFO_RE = (
843 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
844 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
845 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
846 )
847 _formats = {
848 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
849 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
850 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
851 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
852 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
853 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
854 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
855 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
856 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
857 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
858 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
859 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
860 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
861 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
862 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
863 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
864 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
865 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
866
867
868 # 3D videos
869 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
870 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
871 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
872 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
873 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
874 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
875 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
876
877 # Apple HTTP Live Streaming
878 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
879 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
880 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
881 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
882 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
883 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
884 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
885 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
886
887 # DASH mp4 video
888 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
889 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
890 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
891 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
892 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
893 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
894 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
895 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
896 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
897 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
898 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
899 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
900
901 # Dash mp4 audio
902 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
903 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
904 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
905 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
906 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
907 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
908 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
909
910 # Dash webm
911 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
912 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
913 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
914 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
915 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
916 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
917 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
918 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
919 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
920 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
921 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
922 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
923 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
924 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
925 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
926 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
927 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
928 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
929 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
930 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
931 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
932 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
933
934 # Dash webm audio
935 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
936 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
937
938 # Dash webm audio with opus inside
939 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
940 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
941 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
942
943 # RTMP (unnamed)
944 '_rtmp': {'protocol': 'rtmp'},
945
946 # av01 video only formats sometimes served with "unknown" codecs
947 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
948 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
949 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
950 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
951 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
952 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
953 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
954 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
955 }
956 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
957
958 _GEO_BYPASS = False
959
960 IE_NAME = 'youtube'
961 _TESTS = [
962 {
963 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
964 'info_dict': {
965 'id': 'BaW_jenozKc',
966 'ext': 'mp4',
967 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
968 'uploader': 'Philipp Hagemeister',
969 'uploader_id': 'phihag',
970 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
971 'channel': 'Philipp Hagemeister',
972 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
973 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
974 'upload_date': '20121002',
975 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
976 'categories': ['Science & Technology'],
977 'tags': ['youtube-dl'],
978 'duration': 10,
979 'view_count': int,
980 'like_count': int,
981 # 'dislike_count': int,
982 'availability': 'public',
983 'playable_in_embed': True,
984 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
985 'live_status': 'not_live',
986 'age_limit': 0,
987 'start_time': 1,
988 'end_time': 9,
989 }
990 },
991 {
992 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
993 'note': 'Embed-only video (#1746)',
994 'info_dict': {
995 'id': 'yZIXLfi8CZQ',
996 'ext': 'mp4',
997 'upload_date': '20120608',
998 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
999 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1000 'uploader': 'SET India',
1001 'uploader_id': 'setindia',
1002 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
1003 'age_limit': 18,
1004 },
1005 'skip': 'Private video',
1006 },
1007 {
1008 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1009 'note': 'Use the first video ID in the URL',
1010 'info_dict': {
1011 'id': 'BaW_jenozKc',
1012 'ext': 'mp4',
1013 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1014 'uploader': 'Philipp Hagemeister',
1015 'uploader_id': 'phihag',
1016 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1017 'upload_date': '20121002',
1018 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1019 'categories': ['Science & Technology'],
1020 'tags': ['youtube-dl'],
1021 'duration': 10,
1022 'view_count': int,
1023 'like_count': int,
1024 'dislike_count': int,
1025 },
1026 'params': {
1027 'skip_download': True,
1028 },
1029 },
1030 {
1031 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1032 'note': '256k DASH audio (format 141) via DASH manifest',
1033 'info_dict': {
1034 'id': 'a9LDPn-MO4I',
1035 'ext': 'm4a',
1036 'upload_date': '20121002',
1037 'uploader_id': '8KVIDEO',
1038 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1039 'description': '',
1040 'uploader': '8KVIDEO',
1041 'title': 'UHDTV TEST 8K VIDEO.mp4'
1042 },
1043 'params': {
1044 'youtube_include_dash_manifest': True,
1045 'format': '141',
1046 },
1047 'skip': 'format 141 not served anymore',
1048 },
1049 # DASH manifest with encrypted signature
1050 {
1051 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1052 'info_dict': {
1053 'id': 'IB3lcPjvWLA',
1054 'ext': 'm4a',
1055 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1056 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1057 'duration': 244,
1058 'uploader': 'AfrojackVEVO',
1059 'uploader_id': 'AfrojackVEVO',
1060 'upload_date': '20131011',
1061 'abr': 129.495,
1062 },
1063 'params': {
1064 'youtube_include_dash_manifest': True,
1065 'format': '141/bestaudio[ext=m4a]',
1066 },
1067 },
1068 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
1069 {
1070 'note': 'Embed allowed age-gate video',
1071 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1072 'info_dict': {
1073 'id': 'HtVdAasjOgU',
1074 'ext': 'mp4',
1075 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1076 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1077 'duration': 142,
1078 'uploader': 'The Witcher',
1079 'uploader_id': 'WitcherGame',
1080 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1081 'upload_date': '20140605',
1082 'age_limit': 18,
1083 },
1084 },
1085 {
1086 'note': 'Age-gate video with embed allowed in public site',
1087 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1088 'info_dict': {
1089 'id': 'HsUATh_Nc2U',
1090 'ext': 'mp4',
1091 'title': 'Godzilla 2 (Official Video)',
1092 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1093 'upload_date': '20200408',
1094 'uploader_id': 'FlyingKitty900',
1095 'uploader': 'FlyingKitty',
1096 'age_limit': 18,
1097 },
1098 },
1099 {
1100 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1101 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1102 'info_dict': {
1103 'id': 'Tq92D6wQ1mg',
1104 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
1105 'ext': 'mp4',
1106 'upload_date': '20191227',
1107 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1108 'uploader': 'Projekt Melody',
1109 'description': 'md5:17eccca93a786d51bc67646756894066',
1110 'age_limit': 18,
1111 },
1112 },
1113 {
1114 'note': 'Non-Agegated non-embeddable video',
1115 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1116 'info_dict': {
1117 'id': 'MeJVWBSsPAY',
1118 'ext': 'mp4',
1119 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1120 'uploader': 'Herr Lurik',
1121 'uploader_id': 'st3in234',
1122 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1123 'upload_date': '20130730',
1124 },
1125 },
1126 {
1127 'note': 'Non-bypassable age-gated video',
1128 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1129 'only_matching': True,
1130 },
1131 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1132 # YouTube Red ad is not captured for creator
1133 {
1134 'url': '__2ABJjxzNo',
1135 'info_dict': {
1136 'id': '__2ABJjxzNo',
1137 'ext': 'mp4',
1138 'duration': 266,
1139 'upload_date': '20100430',
1140 'uploader_id': 'deadmau5',
1141 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1142 'creator': 'deadmau5',
1143 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1144 'uploader': 'deadmau5',
1145 'title': 'Deadmau5 - Some Chords (HD)',
1146 'alt_title': 'Some Chords',
1147 },
1148 'expected_warnings': [
1149 'DASH manifest missing',
1150 ]
1151 },
1152 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1153 {
1154 'url': 'lqQg6PlCWgI',
1155 'info_dict': {
1156 'id': 'lqQg6PlCWgI',
1157 'ext': 'mp4',
1158 'duration': 6085,
1159 'upload_date': '20150827',
1160 'uploader_id': 'olympic',
1161 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1162 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
1163 'uploader': 'Olympics',
1164 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1165 },
1166 'params': {
1167 'skip_download': 'requires avconv',
1168 }
1169 },
1170 # Non-square pixels
1171 {
1172 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1173 'info_dict': {
1174 'id': '_b-2C3KPAM0',
1175 'ext': 'mp4',
1176 'stretched_ratio': 16 / 9.,
1177 'duration': 85,
1178 'upload_date': '20110310',
1179 'uploader_id': 'AllenMeow',
1180 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1181 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1182 'uploader': '孫ᄋᄅ',
1183 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1184 },
1185 },
1186 # url_encoded_fmt_stream_map is empty string
1187 {
1188 'url': 'qEJwOuvDf7I',
1189 'info_dict': {
1190 'id': 'qEJwOuvDf7I',
1191 'ext': 'webm',
1192 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1193 'description': '',
1194 'upload_date': '20150404',
1195 'uploader_id': 'spbelect',
1196 'uploader': 'Наблюдатели Петербурга',
1197 },
1198 'params': {
1199 'skip_download': 'requires avconv',
1200 },
1201 'skip': 'This live event has ended.',
1202 },
1203 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1204 {
1205 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1206 'info_dict': {
1207 'id': 'FIl7x6_3R5Y',
1208 'ext': 'webm',
1209 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1210 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1211 'duration': 220,
1212 'upload_date': '20150625',
1213 'uploader_id': 'dorappi2000',
1214 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1215 'uploader': 'dorappi2000',
1216 'formats': 'mincount:31',
1217 },
1218 'skip': 'not actual anymore',
1219 },
1220 # DASH manifest with segment_list
1221 {
1222 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1223 'md5': '8ce563a1d667b599d21064e982ab9e31',
1224 'info_dict': {
1225 'id': 'CsmdDsKjzN8',
1226 'ext': 'mp4',
1227 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1228 'uploader': 'Airtek',
1229 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1230 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1231 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1232 },
1233 'params': {
1234 'youtube_include_dash_manifest': True,
1235 'format': '135', # bestvideo
1236 },
1237 'skip': 'This live event has ended.',
1238 },
1239 {
1240 # Multifeed videos (multiple cameras), URL is for Main Camera
1241 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1242 'info_dict': {
1243 'id': 'jvGDaLqkpTg',
1244 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1245 'description': 'md5:e03b909557865076822aa169218d6a5d',
1246 },
1247 'playlist': [{
1248 'info_dict': {
1249 'id': 'jvGDaLqkpTg',
1250 'ext': 'mp4',
1251 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1252 'description': 'md5:e03b909557865076822aa169218d6a5d',
1253 'duration': 10643,
1254 'upload_date': '20161111',
1255 'uploader': 'Team PGP',
1256 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1257 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1258 },
1259 }, {
1260 'info_dict': {
1261 'id': '3AKt1R1aDnw',
1262 'ext': 'mp4',
1263 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1264 'description': 'md5:e03b909557865076822aa169218d6a5d',
1265 'duration': 10991,
1266 'upload_date': '20161111',
1267 'uploader': 'Team PGP',
1268 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1269 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1270 },
1271 }, {
1272 'info_dict': {
1273 'id': 'RtAMM00gpVc',
1274 'ext': 'mp4',
1275 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1276 'description': 'md5:e03b909557865076822aa169218d6a5d',
1277 'duration': 10995,
1278 'upload_date': '20161111',
1279 'uploader': 'Team PGP',
1280 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1281 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1282 },
1283 }, {
1284 'info_dict': {
1285 'id': '6N2fdlP3C5U',
1286 'ext': 'mp4',
1287 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1288 'description': 'md5:e03b909557865076822aa169218d6a5d',
1289 'duration': 10990,
1290 'upload_date': '20161111',
1291 'uploader': 'Team PGP',
1292 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1293 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1294 },
1295 }],
1296 'params': {
1297 'skip_download': True,
1298 },
1299 'skip': 'Not multifeed anymore',
1300 },
1301 {
1302 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1303 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1304 'info_dict': {
1305 'id': 'gVfLd0zydlo',
1306 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1307 },
1308 'playlist_count': 2,
1309 'skip': 'Not multifeed anymore',
1310 },
1311 {
1312 'url': 'https://vid.plus/FlRa-iH7PGw',
1313 'only_matching': True,
1314 },
1315 {
1316 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1317 'only_matching': True,
1318 },
1319 {
1320 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1321 # Also tests cut-off URL expansion in video description (see
1322 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1323 # https://github.com/ytdl-org/youtube-dl/issues/8164)
1324 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1325 'info_dict': {
1326 'id': 'lsguqyKfVQg',
1327 'ext': 'mp4',
1328 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1329 'alt_title': 'Dark Walk',
1330 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1331 'duration': 133,
1332 'upload_date': '20151119',
1333 'uploader_id': 'IronSoulElf',
1334 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1335 'uploader': 'IronSoulElf',
1336 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1337 'track': 'Dark Walk',
1338 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1339 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1340 },
1341 'params': {
1342 'skip_download': True,
1343 },
1344 },
1345 {
1346 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1347 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1348 'only_matching': True,
1349 },
1350 {
1351 # Video with yt:stretch=17:0
1352 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1353 'info_dict': {
1354 'id': 'Q39EVAstoRM',
1355 'ext': 'mp4',
1356 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1357 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1358 'upload_date': '20151107',
1359 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1360 'uploader': 'CH GAMER DROID',
1361 },
1362 'params': {
1363 'skip_download': True,
1364 },
1365 'skip': 'This video does not exist.',
1366 },
1367 {
1368 # Video with incomplete 'yt:stretch=16:'
1369 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1370 'only_matching': True,
1371 },
1372 {
1373 # Video licensed under Creative Commons
1374 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1375 'info_dict': {
1376 'id': 'M4gD1WSo5mA',
1377 'ext': 'mp4',
1378 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1379 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1380 'duration': 721,
1381 'upload_date': '20150127',
1382 'uploader_id': 'BerkmanCenter',
1383 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1384 'uploader': 'The Berkman Klein Center for Internet & Society',
1385 'license': 'Creative Commons Attribution license (reuse allowed)',
1386 },
1387 'params': {
1388 'skip_download': True,
1389 },
1390 },
1391 {
1392 # Channel-like uploader_url
1393 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1394 'info_dict': {
1395 'id': 'eQcmzGIKrzg',
1396 'ext': 'mp4',
1397 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1398 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1399 'duration': 4060,
1400 'upload_date': '20151119',
1401 'uploader': 'Bernie Sanders',
1402 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1403 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1404 'license': 'Creative Commons Attribution license (reuse allowed)',
1405 },
1406 'params': {
1407 'skip_download': True,
1408 },
1409 },
1410 {
1411 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1412 'only_matching': True,
1413 },
1414 {
1415 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1416 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1417 'only_matching': True,
1418 },
1419 {
1420 # Rental video preview
1421 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1422 'info_dict': {
1423 'id': 'uGpuVWrhIzE',
1424 'ext': 'mp4',
1425 'title': 'Piku - Trailer',
1426 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1427 'upload_date': '20150811',
1428 'uploader': 'FlixMatrix',
1429 'uploader_id': 'FlixMatrixKaravan',
1430 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1431 'license': 'Standard YouTube License',
1432 },
1433 'params': {
1434 'skip_download': True,
1435 },
1436 'skip': 'This video is not available.',
1437 },
1438 {
1439 # YouTube Red video with episode data
1440 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1441 'info_dict': {
1442 'id': 'iqKdEhx-dD4',
1443 'ext': 'mp4',
1444 'title': 'Isolation - Mind Field (Ep 1)',
1445 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1446 'duration': 2085,
1447 'upload_date': '20170118',
1448 'uploader': 'Vsauce',
1449 'uploader_id': 'Vsauce',
1450 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1451 'series': 'Mind Field',
1452 'season_number': 1,
1453 'episode_number': 1,
1454 },
1455 'params': {
1456 'skip_download': True,
1457 },
1458 'expected_warnings': [
1459 'Skipping DASH manifest',
1460 ],
1461 },
1462 {
1463 # The following content has been identified by the YouTube community
1464 # as inappropriate or offensive to some audiences.
1465 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1466 'info_dict': {
1467 'id': '6SJNVb0GnPI',
1468 'ext': 'mp4',
1469 'title': 'Race Differences in Intelligence',
1470 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1471 'duration': 965,
1472 'upload_date': '20140124',
1473 'uploader': 'New Century Foundation',
1474 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1475 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1476 },
1477 'params': {
1478 'skip_download': True,
1479 },
1480 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1481 },
1482 {
1483 # itag 212
1484 'url': '1t24XAntNCY',
1485 'only_matching': True,
1486 },
1487 {
1488 # geo restricted to JP
1489 'url': 'sJL6WA-aGkQ',
1490 'only_matching': True,
1491 },
1492 {
1493 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1494 'only_matching': True,
1495 },
1496 {
1497 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1498 'only_matching': True,
1499 },
1500 {
1501 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1502 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1503 'only_matching': True,
1504 },
1505 {
1506 # DRM protected
1507 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1508 'only_matching': True,
1509 },
1510 {
1511 # Video with unsupported adaptive stream type formats
1512 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1513 'info_dict': {
1514 'id': 'Z4Vy8R84T1U',
1515 'ext': 'mp4',
1516 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1517 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1518 'duration': 433,
1519 'upload_date': '20130923',
1520 'uploader': 'Amelia Putri Harwita',
1521 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1522 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1523 'formats': 'maxcount:10',
1524 },
1525 'params': {
1526 'skip_download': True,
1527 'youtube_include_dash_manifest': False,
1528 },
1529 'skip': 'not actual anymore',
1530 },
1531 {
1532 # Youtube Music Auto-generated description
1533 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1534 'info_dict': {
1535 'id': 'MgNrAu2pzNs',
1536 'ext': 'mp4',
1537 'title': 'Voyeur Girl',
1538 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1539 'upload_date': '20190312',
1540 'uploader': 'Stephen - Topic',
1541 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1542 'artist': 'Stephen',
1543 'track': 'Voyeur Girl',
1544 'album': 'it\'s too much love to know my dear',
1545 'release_date': '20190313',
1546 'release_year': 2019,
1547 },
1548 'params': {
1549 'skip_download': True,
1550 },
1551 },
1552 {
1553 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1554 'only_matching': True,
1555 },
1556 {
1557 # invalid -> valid video id redirection
1558 'url': 'DJztXj2GPfl',
1559 'info_dict': {
1560 'id': 'DJztXj2GPfk',
1561 'ext': 'mp4',
1562 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1563 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1564 'upload_date': '20090125',
1565 'uploader': 'Prochorowka',
1566 'uploader_id': 'Prochorowka',
1567 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1568 'artist': 'Panjabi MC',
1569 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1570 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1571 },
1572 'params': {
1573 'skip_download': True,
1574 },
1575 'skip': 'Video unavailable',
1576 },
1577 {
1578 # empty description results in an empty string
1579 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1580 'info_dict': {
1581 'id': 'x41yOUIvK2k',
1582 'ext': 'mp4',
1583 'title': 'IMG 3456',
1584 'description': '',
1585 'upload_date': '20170613',
1586 'uploader_id': 'ElevageOrVert',
1587 'uploader': 'ElevageOrVert',
1588 },
1589 'params': {
1590 'skip_download': True,
1591 },
1592 },
1593 {
1594 # with '};' inside yt initial data (see [1])
1595 # see [2] for an example with '};' inside ytInitialPlayerResponse
1596 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1597 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1598 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1599 'info_dict': {
1600 'id': 'CHqg6qOn4no',
1601 'ext': 'mp4',
1602 'title': 'Part 77 Sort a list of simple types in c#',
1603 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1604 'upload_date': '20130831',
1605 'uploader_id': 'kudvenkat',
1606 'uploader': 'kudvenkat',
1607 },
1608 'params': {
1609 'skip_download': True,
1610 },
1611 },
1612 {
1613 # another example of '};' in ytInitialData
1614 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1615 'only_matching': True,
1616 },
1617 {
1618 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1619 'only_matching': True,
1620 },
1621 {
1622 # https://github.com/ytdl-org/youtube-dl/pull/28094
1623 'url': 'OtqTfy26tG0',
1624 'info_dict': {
1625 'id': 'OtqTfy26tG0',
1626 'ext': 'mp4',
1627 'title': 'Burn Out',
1628 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1629 'upload_date': '20141120',
1630 'uploader': 'The Cinematic Orchestra - Topic',
1631 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1632 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1633 'artist': 'The Cinematic Orchestra',
1634 'track': 'Burn Out',
1635 'album': 'Every Day',
1636 'release_data': None,
1637 'release_year': None,
1638 },
1639 'params': {
1640 'skip_download': True,
1641 },
1642 },
1643 {
1644 # controversial video, only works with bpctr when authenticated with cookies
1645 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1646 'only_matching': True,
1647 },
1648 {
1649 # controversial video, requires bpctr/contentCheckOk
1650 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1651 'info_dict': {
1652 'id': 'SZJvDhaSDnc',
1653 'ext': 'mp4',
1654 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1655 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1656 'uploader': 'CBS This Morning',
1657 'uploader_id': 'CBSThisMorning',
1658 'upload_date': '20140716',
1659 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1660 }
1661 },
1662 {
1663 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1664 'url': 'cBvYw8_A0vQ',
1665 'info_dict': {
1666 'id': 'cBvYw8_A0vQ',
1667 'ext': 'mp4',
1668 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1669 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1670 'upload_date': '20201120',
1671 'uploader': 'Walk around Japan',
1672 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1673 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1674 },
1675 'params': {
1676 'skip_download': True,
1677 },
1678 }, {
1679 # Has multiple audio streams
1680 'url': 'WaOKSUlf4TM',
1681 'only_matching': True
1682 }, {
1683 # Requires Premium: has format 141 when requested using YTM url
1684 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1685 'only_matching': True
1686 }, {
1687 # multiple subtitles with same lang_code
1688 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1689 'only_matching': True,
1690 }, {
1691 # Force use android client fallback
1692 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1693 'info_dict': {
1694 'id': 'YOelRv7fMxY',
1695 'title': 'DIGGING A SECRET TUNNEL Part 1',
1696 'ext': '3gp',
1697 'upload_date': '20210624',
1698 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1699 'uploader': 'colinfurze',
1700 'uploader_id': 'colinfurze',
1701 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1702 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
1703 },
1704 'params': {
1705 'format': '17', # 3gp format available on android
1706 'extractor_args': {'youtube': {'player_client': ['android']}},
1707 },
1708 },
1709 {
1710 # Skip download of additional client configs (remix client config in this case)
1711 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1712 'only_matching': True,
1713 'params': {
1714 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1715 },
1716 }, {
1717 # shorts
1718 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
1719 'only_matching': True,
1720 }, {
1721 'note': 'Storyboards',
1722 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
1723 'info_dict': {
1724 'id': '5KLPxDtMqe8',
1725 'ext': 'mhtml',
1726 'format_id': 'sb0',
1727 'title': 'Your Brain is Plastic',
1728 'uploader_id': 'scishow',
1729 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
1730 'upload_date': '20140324',
1731 'uploader': 'SciShow',
1732 }, 'params': {'format': 'mhtml', 'skip_download': True}
1733 }
1734 ]
1735
1736 @classmethod
1737 def suitable(cls, url):
1738 from ..utils import parse_qs
1739
1740 qs = parse_qs(url)
1741 if qs.get('list', [None])[0]:
1742 return False
1743 return super(YoutubeIE, cls).suitable(url)
1744
1745 def __init__(self, *args, **kwargs):
1746 super(YoutubeIE, self).__init__(*args, **kwargs)
1747 self._code_cache = {}
1748 self._player_cache = {}
1749
1750 def _extract_player_url(self, *ytcfgs, webpage=None):
1751 player_url = traverse_obj(
1752 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
1753 get_all=False, expected_type=compat_str)
1754 if not player_url:
1755 return
1756 if player_url.startswith('//'):
1757 player_url = 'https:' + player_url
1758 elif not re.match(r'https?://', player_url):
1759 player_url = compat_urlparse.urljoin(
1760 'https://www.youtube.com', player_url)
1761 return player_url
1762
1763 def _download_player_url(self, video_id, fatal=False):
1764 res = self._download_webpage(
1765 'https://www.youtube.com/iframe_api',
1766 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
1767 if res:
1768 player_version = self._search_regex(
1769 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
1770 if player_version:
1771 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
1772
1773 def _signature_cache_id(self, example_sig):
1774 """ Return a string representation of a signature """
1775 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1776
1777 @classmethod
1778 def _extract_player_info(cls, player_url):
1779 for player_re in cls._PLAYER_INFO_RE:
1780 id_m = re.search(player_re, player_url)
1781 if id_m:
1782 break
1783 else:
1784 raise ExtractorError('Cannot identify player %r' % player_url)
1785 return id_m.group('id')
1786
1787 def _load_player(self, video_id, player_url, fatal=True):
1788 player_id = self._extract_player_info(player_url)
1789 if player_id not in self._code_cache:
1790 code = self._download_webpage(
1791 player_url, video_id, fatal=fatal,
1792 note='Downloading player ' + player_id,
1793 errnote='Download of %s failed' % player_url)
1794 if code:
1795 self._code_cache[player_id] = code
1796 return self._code_cache.get(player_id)
1797
1798 def _extract_signature_function(self, video_id, player_url, example_sig):
1799 player_id = self._extract_player_info(player_url)
1800
1801 # Read from filesystem cache
1802 func_id = 'js_%s_%s' % (
1803 player_id, self._signature_cache_id(example_sig))
1804 assert os.path.basename(func_id) == func_id
1805
1806 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1807 if cache_spec is not None:
1808 return lambda s: ''.join(s[i] for i in cache_spec)
1809
1810 code = self._load_player(video_id, player_url)
1811 if code:
1812 res = self._parse_sig_js(code)
1813
1814 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1815 cache_res = res(test_string)
1816 cache_spec = [ord(c) for c in cache_res]
1817
1818 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1819 return res
1820
1821 def _print_sig_code(self, func, example_sig):
1822 if not self.get_param('youtube_print_sig_code'):
1823 return
1824
1825 def gen_sig_code(idxs):
1826 def _genslice(start, end, step):
1827 starts = '' if start == 0 else str(start)
1828 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1829 steps = '' if step == 1 else (':%d' % step)
1830 return 's[%s%s%s]' % (starts, ends, steps)
1831
1832 step = None
1833 # Quelch pyflakes warnings - start will be set when step is set
1834 start = '(Never used)'
1835 for i, prev in zip(idxs[1:], idxs[:-1]):
1836 if step is not None:
1837 if i - prev == step:
1838 continue
1839 yield _genslice(start, prev, step)
1840 step = None
1841 continue
1842 if i - prev in [-1, 1]:
1843 step = i - prev
1844 start = prev
1845 continue
1846 else:
1847 yield 's[%d]' % prev
1848 if step is None:
1849 yield 's[%d]' % i
1850 else:
1851 yield _genslice(start, i, step)
1852
1853 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1854 cache_res = func(test_string)
1855 cache_spec = [ord(c) for c in cache_res]
1856 expr_code = ' + '.join(gen_sig_code(cache_spec))
1857 signature_id_tuple = '(%s)' % (
1858 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1859 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1860 ' return %s\n') % (signature_id_tuple, expr_code)
1861 self.to_screen('Extracted signature function:\n' + code)
1862
1863 def _parse_sig_js(self, jscode):
1864 funcname = self._search_regex(
1865 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1866 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1867 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
1868 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
1869 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
1870 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1871 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1872 # Obsolete patterns
1873 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1874 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1875 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1876 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1877 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1878 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1879 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1880 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1881 jscode, 'Initial JS player signature function name', group='sig')
1882
1883 jsi = JSInterpreter(jscode)
1884 initial_function = jsi.extract_function(funcname)
1885 return lambda s: initial_function([s])
1886
1887 def _decrypt_signature(self, s, video_id, player_url):
1888 """Turn the encrypted s field into a working signature"""
1889
1890 if player_url is None:
1891 raise ExtractorError('Cannot decrypt signature without player_url')
1892
1893 try:
1894 player_id = (player_url, self._signature_cache_id(s))
1895 if player_id not in self._player_cache:
1896 func = self._extract_signature_function(
1897 video_id, player_url, s
1898 )
1899 self._player_cache[player_id] = func
1900 func = self._player_cache[player_id]
1901 self._print_sig_code(func, s)
1902 return func(s)
1903 except Exception as e:
1904 raise ExtractorError('Signature extraction failed: ' + traceback.format_exc(), cause=e)
1905
1906 def _decrypt_nsig(self, s, video_id, player_url):
1907 """Turn the encrypted n field into a working signature"""
1908 if player_url is None:
1909 raise ExtractorError('Cannot decrypt nsig without player_url')
1910 if player_url.startswith('//'):
1911 player_url = 'https:' + player_url
1912 elif not re.match(r'https?://', player_url):
1913 player_url = compat_urlparse.urljoin(
1914 'https://www.youtube.com', player_url)
1915
1916 sig_id = ('nsig_value', s)
1917 if sig_id in self._player_cache:
1918 return self._player_cache[sig_id]
1919
1920 try:
1921 player_id = ('nsig', player_url)
1922 if player_id not in self._player_cache:
1923 self._player_cache[player_id] = self._extract_n_function(video_id, player_url)
1924 func = self._player_cache[player_id]
1925 self._player_cache[sig_id] = func(s)
1926 self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')
1927 return self._player_cache[sig_id]
1928 except Exception as e:
1929 raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)
1930
1931 def _extract_n_function_name(self, jscode):
1932 return self._search_regex(
1933 (r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]{3})\([a-zA-Z0-9]\)',),
1934 jscode, 'Initial JS player n function name', group='nfunc')
1935
1936 def _extract_n_function(self, video_id, player_url):
1937 player_id = self._extract_player_info(player_url)
1938 func_code = self._downloader.cache.load('youtube-nsig', player_id)
1939
1940 if func_code:
1941 jsi = JSInterpreter(func_code)
1942 else:
1943 jscode = self._load_player(video_id, player_url)
1944 funcname = self._extract_n_function_name(jscode)
1945 jsi = JSInterpreter(jscode)
1946 func_code = jsi.extract_function_code(funcname)
1947 self._downloader.cache.store('youtube-nsig', player_id, func_code)
1948
1949 if self.get_param('youtube_print_sig_code'):
1950 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
1951
1952 return lambda s: jsi.extract_function_from_code(*func_code)([s])
1953
1954 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1955 """
1956 Extract signatureTimestamp (sts)
1957 Required to tell API what sig/player version is in use.
1958 """
1959 sts = None
1960 if isinstance(ytcfg, dict):
1961 sts = int_or_none(ytcfg.get('STS'))
1962
1963 if not sts:
1964 # Attempt to extract from player
1965 if player_url is None:
1966 error_msg = 'Cannot extract signature timestamp without player_url.'
1967 if fatal:
1968 raise ExtractorError(error_msg)
1969 self.report_warning(error_msg)
1970 return
1971 code = self._load_player(video_id, player_url, fatal=fatal)
1972 if code:
1973 sts = int_or_none(self._search_regex(
1974 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
1975 'JS player signature timestamp', group='sts', fatal=fatal))
1976 return sts
1977
1978 def _mark_watched(self, video_id, player_responses):
1979 playback_url = get_first(
1980 player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
1981 expected_type=url_or_none)
1982 if not playback_url:
1983 self.report_warning('Unable to mark watched')
1984 return
1985 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1986 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1987
1988 # cpn generation algorithm is reverse engineered from base.js.
1989 # In fact it works even with dummy cpn.
1990 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1991 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1992
1993 qs.update({
1994 'ver': ['2'],
1995 'cpn': [cpn],
1996 })
1997 playback_url = compat_urlparse.urlunparse(
1998 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1999
2000 self._download_webpage(
2001 playback_url, video_id, 'Marking watched',
2002 'Unable to mark watched', fatal=False)
2003
2004 @staticmethod
2005 def _extract_urls(webpage):
2006 # Embedded YouTube player
2007 entries = [
2008 unescapeHTML(mobj.group('url'))
2009 for mobj in re.finditer(r'''(?x)
2010 (?:
2011 <iframe[^>]+?src=|
2012 data-video-url=|
2013 <embed[^>]+?src=|
2014 embedSWF\(?:\s*|
2015 <object[^>]+data=|
2016 new\s+SWFObject\(
2017 )
2018 (["\'])
2019 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
2020 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
2021 \1''', webpage)]
2022
2023 # lazyYT YouTube embed
2024 entries.extend(list(map(
2025 unescapeHTML,
2026 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
2027
2028 # Wordpress "YouTube Video Importer" plugin
2029 matches = re.findall(r'''(?x)<div[^>]+
2030 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2031 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
2032 entries.extend(m[-1] for m in matches)
2033
2034 return entries
2035
2036 @staticmethod
2037 def _extract_url(webpage):
2038 urls = YoutubeIE._extract_urls(webpage)
2039 return urls[0] if urls else None
2040
2041 @classmethod
2042 def extract_id(cls, url):
2043 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
2044 if mobj is None:
2045 raise ExtractorError('Invalid URL: %s' % url)
2046 return mobj.group('id')
2047
2048 def _extract_chapters_from_json(self, data, duration):
2049 chapter_list = traverse_obj(
2050 data, (
2051 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2052 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2053 ), expected_type=list)
2054
2055 return self._extract_chapters(
2056 chapter_list,
2057 chapter_time=lambda chapter: float_or_none(
2058 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2059 chapter_title=lambda chapter: traverse_obj(
2060 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2061 duration=duration)
2062
2063 def _extract_chapters_from_engagement_panel(self, data, duration):
2064 content_list = traverse_obj(
2065 data,
2066 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
2067 expected_type=list, default=[])
2068 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2069 chapter_title = lambda chapter: self._get_text(chapter, 'title')
2070
2071 return next((
2072 filter(None, (
2073 self._extract_chapters(
2074 traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2075 chapter_time, chapter_title, duration)
2076 for contents in content_list
2077 ))), [])
2078
2079 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
2080 chapters = []
2081 last_chapter = {'start_time': 0}
2082 for idx, chapter in enumerate(chapter_list or []):
2083 title = chapter_title(chapter)
2084 start_time = chapter_time(chapter)
2085 if start_time is None:
2086 continue
2087 last_chapter['end_time'] = start_time
2088 if start_time < last_chapter['start_time']:
2089 if idx == 1:
2090 chapters.pop()
2091 self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2092 else:
2093 self.report_warning(f'Invalid start time for chapter "{title}"')
2094 continue
2095 last_chapter = {'start_time': start_time, 'title': title}
2096 chapters.append(last_chapter)
2097 last_chapter['end_time'] = duration
2098 return chapters
2099
2100 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2101 return self._parse_json(self._search_regex(
2102 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2103 regex), webpage, name, default='{}'), video_id, fatal=False)
2104
2105 def _extract_comment(self, comment_renderer, parent=None):
2106 comment_id = comment_renderer.get('commentId')
2107 if not comment_id:
2108 return
2109
2110 text = self._get_text(comment_renderer, 'contentText')
2111
2112 # note: timestamp is an estimate calculated from the current time and time_text
2113 timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')
2114 author = self._get_text(comment_renderer, 'authorText')
2115 author_id = try_get(comment_renderer,
2116 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
2117
2118 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2119 lambda x: x['likeCount']), compat_str)) or 0
2120 author_thumbnail = try_get(comment_renderer,
2121 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2122
2123 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2124 is_favorited = 'creatorHeart' in (try_get(
2125 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2126 return {
2127 'id': comment_id,
2128 'text': text,
2129 'timestamp': timestamp,
2130 'time_text': time_text,
2131 'like_count': votes,
2132 'is_favorited': is_favorited,
2133 'author': author,
2134 'author_id': author_id,
2135 'author_thumbnail': author_thumbnail,
2136 'author_is_uploader': author_is_uploader,
2137 'parent': parent or 'root'
2138 }
2139
2140 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
2141
2142 get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
2143
2144 def extract_header(contents):
2145 _continuation = None
2146 for content in contents:
2147 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
2148 expected_comment_count = parse_count(self._get_text(
2149 comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
2150
2151 if expected_comment_count:
2152 tracker['est_total'] = expected_comment_count
2153 self.to_screen(f'Downloading ~{expected_comment_count} comments')
2154 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
2155
2156 sort_menu_item = try_get(
2157 comments_header_renderer,
2158 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2159 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2160
2161 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2162 if not _continuation:
2163 continue
2164
2165 sort_text = str_or_none(sort_menu_item.get('title'))
2166 if not sort_text:
2167 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2168 self.to_screen('Sorting comments by %s' % sort_text.lower())
2169 break
2170 return _continuation
2171
2172 def extract_thread(contents):
2173 if not parent:
2174 tracker['current_page_thread'] = 0
2175 for content in contents:
2176 if not parent and tracker['total_parent_comments'] >= max_parents:
2177 yield
2178 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2179 comment_renderer = get_first(
2180 (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
2181 expected_type=dict, default={})
2182
2183 comment = self._extract_comment(comment_renderer, parent)
2184 if not comment:
2185 continue
2186
2187 tracker['running_total'] += 1
2188 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
2189 yield comment
2190
2191 # Attempt to get the replies
2192 comment_replies_renderer = try_get(
2193 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2194
2195 if comment_replies_renderer:
2196 tracker['current_page_thread'] += 1
2197 comment_entries_iter = self._comment_entries(
2198 comment_replies_renderer, ytcfg, video_id,
2199 parent=comment.get('id'), tracker=tracker)
2200 for reply_comment in itertools.islice(comment_entries_iter, min(max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments']))):
2201 yield reply_comment
2202
2203 # Keeps track of counts across recursive calls
2204 if not tracker:
2205 tracker = dict(
2206 running_total=0,
2207 est_total=0,
2208 current_page_thread=0,
2209 total_parent_comments=0,
2210 total_reply_comments=0)
2211
2212 # TODO: Deprecated
2213 # YouTube comments have a max depth of 2
2214 max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
2215 if max_depth:
2216 self._downloader.deprecation_warning(
2217 '[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')
2218 if max_depth == 1 and parent:
2219 return
2220
2221 max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map(
2222 lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4)
2223
2224 continuation = self._extract_continuation(root_continuation_data)
2225 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
2226 if message and not parent:
2227 self.report_warning(message, video_id=video_id)
2228
2229 response = None
2230 is_first_continuation = parent is None
2231
2232 for page_num in itertools.count(0):
2233 if not continuation:
2234 break
2235 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
2236 comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"
2237 if page_num == 0:
2238 if is_first_continuation:
2239 note_prefix = 'Downloading comment section API JSON'
2240 else:
2241 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
2242 tracker['current_page_thread'], comment_prog_str)
2243 else:
2244 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2245 ' ' if parent else '', ' replies' if parent else '',
2246 page_num, comment_prog_str)
2247
2248 response = self._extract_response(
2249 item_id=None, query=continuation,
2250 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2251 check_get_keys='onResponseReceivedEndpoints')
2252
2253 continuation_contents = traverse_obj(
2254 response, 'onResponseReceivedEndpoints', expected_type=list, default=[])
2255
2256 continuation = None
2257 for continuation_section in continuation_contents:
2258 continuation_items = traverse_obj(
2259 continuation_section,
2260 (('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),
2261 get_all=False, expected_type=list) or []
2262 if is_first_continuation:
2263 continuation = extract_header(continuation_items)
2264 is_first_continuation = False
2265 if continuation:
2266 break
2267 continue
2268
2269 for entry in extract_thread(continuation_items):
2270 if not entry:
2271 return
2272 yield entry
2273 continuation = self._extract_continuation({'contents': continuation_items})
2274 if continuation:
2275 break
2276
2277 def _get_comments(self, ytcfg, video_id, contents, webpage):
2278 """Entry for comment extraction"""
2279 def _real_comment_extract(contents):
2280 renderer = next((
2281 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
2282 if item.get('sectionIdentifier') == 'comment-item-section'), None)
2283 yield from self._comment_entries(renderer, ytcfg, video_id)
2284
2285 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
2286 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
2287
2288 @staticmethod
2289 def _get_checkok_params():
2290 return {'contentCheckOk': True, 'racyCheckOk': True}
2291
2292 @classmethod
2293 def _generate_player_context(cls, sts=None):
2294 context = {
2295 'html5Preference': 'HTML5_PREF_WANTS',
2296 }
2297 if sts is not None:
2298 context['signatureTimestamp'] = sts
2299 return {
2300 'playbackContext': {
2301 'contentPlaybackContext': context
2302 },
2303 **cls._get_checkok_params()
2304 }
2305
2306 @staticmethod
2307 def _is_agegated(player_response):
2308 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
2309 return True
2310
2311 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
2312 AGE_GATE_REASONS = (
2313 'confirm your age', 'age-restricted', 'inappropriate', # reason
2314 'age_verification_required', 'age_check_required', # status
2315 )
2316 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
2317
2318 @staticmethod
2319 def _is_unplayable(player_response):
2320 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
2321
2322 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):
2323
2324 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2325 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
2326 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
2327 headers = self.generate_api_headers(
2328 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
2329
2330 yt_query = {'videoId': video_id}
2331 yt_query.update(self._generate_player_context(sts))
2332 return self._extract_response(
2333 item_id=video_id, ep='player', query=yt_query,
2334 ytcfg=player_ytcfg, headers=headers, fatal=True,
2335 default_client=client,
2336 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2337 ) or None
2338
2339 def _get_requested_clients(self, url, smuggled_data):
2340 requested_clients = []
2341 default = ['android', 'web']
2342 allowed_clients = sorted(
2343 [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],
2344 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
2345 for client in self._configuration_arg('player_client'):
2346 if client in allowed_clients:
2347 requested_clients.append(client)
2348 elif client == 'default':
2349 requested_clients.extend(default)
2350 elif client == 'all':
2351 requested_clients.extend(allowed_clients)
2352 else:
2353 self.report_warning(f'Skipping unsupported client {client}')
2354 if not requested_clients:
2355 requested_clients = default
2356
2357 if smuggled_data.get('is_music_url') or self.is_music_url(url):
2358 requested_clients.extend(
2359 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
2360
2361 return orderedSet(requested_clients)
2362
2363 def _extract_player_ytcfg(self, client, video_id):
2364 url = {
2365 'web_music': 'https://music.youtube.com',
2366 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
2367 }.get(client)
2368 if not url:
2369 return {}
2370 webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config')
2371 return self.extract_ytcfg(video_id, webpage) or {}
2372
2373 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):
2374 initial_pr = None
2375 if webpage:
2376 initial_pr = self._extract_yt_initial_variable(
2377 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2378 video_id, 'initial player response')
2379
2380 original_clients = clients
2381 clients = clients[::-1]
2382 prs = []
2383
2384 def append_client(client_name):
2385 if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:
2386 clients.append(client_name)
2387
2388 # Android player_response does not have microFormats which are needed for
2389 # extraction of some data. So we return the initial_pr with formats
2390 # stripped out even if not requested by the user
2391 # See: https://github.com/yt-dlp/yt-dlp/issues/501
2392 if initial_pr:
2393 pr = dict(initial_pr)
2394 pr['streamingData'] = None
2395 prs.append(pr)
2396
2397 last_error = None
2398 tried_iframe_fallback = False
2399 player_url = None
2400 while clients:
2401 client = clients.pop()
2402 player_ytcfg = master_ytcfg if client == 'web' else {}
2403 if 'configs' not in self._configuration_arg('player_skip'):
2404 player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
2405
2406 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
2407 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
2408 if 'js' in self._configuration_arg('player_skip'):
2409 require_js_player = False
2410 player_url = None
2411
2412 if not player_url and not tried_iframe_fallback and require_js_player:
2413 player_url = self._download_player_url(video_id)
2414 tried_iframe_fallback = True
2415
2416 try:
2417 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
2418 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)
2419 except ExtractorError as e:
2420 if last_error:
2421 self.report_warning(last_error)
2422 last_error = e
2423 continue
2424
2425 if pr:
2426 prs.append(pr)
2427
2428 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
2429 if client.endswith('_agegate') and self._is_unplayable(pr) and self.is_authenticated:
2430 append_client(client.replace('_agegate', '_creator'))
2431 elif self._is_agegated(pr):
2432 append_client(f'{client}_agegate')
2433
2434 if last_error:
2435 if not len(prs):
2436 raise last_error
2437 self.report_warning(last_error)
2438 return prs, player_url
2439
2440 def _extract_formats(self, streaming_data, video_id, player_url, is_live):
2441 itags, stream_ids = {}, []
2442 itag_qualities, res_qualities = {}, {}
2443 q = qualities([
2444 # Normally tiny is the smallest video-only formats. But
2445 # audio-only formats with unknown quality may get tagged as tiny
2446 'tiny',
2447 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
2448 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2449 ])
2450 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
2451
2452 for fmt in streaming_formats:
2453 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2454 continue
2455
2456 itag = str_or_none(fmt.get('itag'))
2457 audio_track = fmt.get('audioTrack') or {}
2458 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2459 if stream_id in stream_ids:
2460 continue
2461
2462 quality = fmt.get('quality')
2463 height = int_or_none(fmt.get('height'))
2464 if quality == 'tiny' or not quality:
2465 quality = fmt.get('audioQuality', '').lower() or quality
2466 # The 3gp format (17) in android client has a quality of "small",
2467 # but is actually worse than other formats
2468 if itag == '17':
2469 quality = 'tiny'
2470 if quality:
2471 if itag:
2472 itag_qualities[itag] = quality
2473 if height:
2474 res_qualities[height] = quality
2475 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2476 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2477 # number of fragment that would subsequently requested with (`&sq=N`)
2478 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2479 continue
2480
2481 fmt_url = fmt.get('url')
2482 if not fmt_url:
2483 sc = compat_parse_qs(fmt.get('signatureCipher'))
2484 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2485 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2486 if not (sc and fmt_url and encrypted_sig):
2487 continue
2488 if not player_url:
2489 continue
2490 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2491 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2492 fmt_url += '&' + sp + '=' + signature
2493
2494 query = parse_qs(fmt_url)
2495 throttled = False
2496 if query.get('n'):
2497 try:
2498 fmt_url = update_url_query(fmt_url, {
2499 'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})
2500 except ExtractorError as e:
2501 self.report_warning(
2502 f'nsig extraction failed: You may experience throttling for some formats\n'
2503 f'n = {query["n"][0]} ; player = {player_url}\n{e}', only_once=True)
2504 throttled = True
2505
2506 if itag:
2507 itags[itag] = 'https'
2508 stream_ids.append(stream_id)
2509
2510 tbr = float_or_none(
2511 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
2512 dct = {
2513 'asr': int_or_none(fmt.get('audioSampleRate')),
2514 'filesize': int_or_none(fmt.get('contentLength')),
2515 'format_id': itag,
2516 'format_note': join_nonempty(
2517 '%s%s' % (audio_track.get('displayName') or '',
2518 ' (default)' if audio_track.get('audioIsDefault') else ''),
2519 fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
2520 throttled and 'THROTTLED', delim=', '),
2521 'source_preference': -10 if throttled else -1,
2522 'fps': int_or_none(fmt.get('fps')) or None,
2523 'height': height,
2524 'quality': q(quality),
2525 'tbr': tbr,
2526 'url': fmt_url,
2527 'width': int_or_none(fmt.get('width')),
2528 'language': audio_track.get('id', '').split('.')[0],
2529 'language_preference': 1 if audio_track.get('audioIsDefault') else -1,
2530 }
2531 mime_mobj = re.match(
2532 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2533 if mime_mobj:
2534 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2535 dct.update(parse_codecs(mime_mobj.group(2)))
2536 no_audio = dct.get('acodec') == 'none'
2537 no_video = dct.get('vcodec') == 'none'
2538 if no_audio:
2539 dct['vbr'] = tbr
2540 if no_video:
2541 dct['abr'] = tbr
2542 if no_audio or no_video:
2543 dct['downloader_options'] = {
2544 # Youtube throttles chunks >~10M
2545 'http_chunk_size': 10485760,
2546 }
2547 if dct.get('ext'):
2548 dct['container'] = dct['ext'] + '_dash'
2549 yield dct
2550
2551 skip_manifests = self._configuration_arg('skip')
2552 get_dash = (
2553 (not is_live or self._configuration_arg('include_live_dash'))
2554 and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True))
2555 get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2556
2557 def process_manifest_format(f, proto, itag):
2558 if itag in itags:
2559 if itags[itag] == proto or f'{itag}-{proto}' in itags:
2560 return False
2561 itag = f'{itag}-{proto}'
2562 if itag:
2563 f['format_id'] = itag
2564 itags[itag] = proto
2565
2566 f['quality'] = next((
2567 q(qdict[val])
2568 for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities))
2569 if val in qdict), -1)
2570 return True
2571
2572 for sd in streaming_data:
2573 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
2574 if hls_manifest_url:
2575 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
2576 if process_manifest_format(f, 'hls', self._search_regex(
2577 r'/itag/(\d+)', f['url'], 'itag', default=None)):
2578 yield f
2579
2580 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2581 if dash_manifest_url:
2582 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
2583 if process_manifest_format(f, 'dash', f['format_id']):
2584 f['filesize'] = int_or_none(self._search_regex(
2585 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
2586 yield f
2587
2588 def _extract_storyboard(self, player_responses, duration):
2589 spec = get_first(
2590 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
2591 if not spec:
2592 return
2593 base_url = spec.pop()
2594 L = len(spec) - 1
2595 for i, args in enumerate(spec):
2596 args = args.split('#')
2597 counts = list(map(int_or_none, args[:5]))
2598 if len(args) != 8 or not all(counts):
2599 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
2600 continue
2601 width, height, frame_count, cols, rows = counts
2602 N, sigh = args[6:]
2603
2604 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
2605 fragment_count = frame_count / (cols * rows)
2606 fragment_duration = duration / fragment_count
2607 yield {
2608 'format_id': f'sb{i}',
2609 'format_note': 'storyboard',
2610 'ext': 'mhtml',
2611 'protocol': 'mhtml',
2612 'acodec': 'none',
2613 'vcodec': 'none',
2614 'url': url,
2615 'width': width,
2616 'height': height,
2617 'fragments': [{
2618 'path': url.replace('$M', str(j)),
2619 'duration': min(fragment_duration, duration - (j * fragment_duration)),
2620 } for j in range(math.ceil(fragment_count))],
2621 }
2622
2623 def _real_extract(self, url):
2624 url, smuggled_data = unsmuggle_url(url, {})
2625 video_id = self._match_id(url)
2626
2627 base_url = self.http_scheme() + '//www.youtube.com/'
2628 webpage_url = base_url + 'watch?v=' + video_id
2629 webpage = None
2630 if 'webpage' not in self._configuration_arg('player_skip'):
2631 webpage = self._download_webpage(
2632 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2633
2634 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2635
2636 player_responses, player_url = self._extract_player_responses(
2637 self._get_requested_clients(url, smuggled_data),
2638 video_id, webpage, master_ytcfg)
2639
2640 playability_statuses = traverse_obj(
2641 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2642
2643 trailer_video_id = get_first(
2644 playability_statuses,
2645 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2646 expected_type=str)
2647 if trailer_video_id:
2648 return self.url_result(
2649 trailer_video_id, self.ie_key(), trailer_video_id)
2650
2651 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2652 if webpage else (lambda x: None))
2653
2654 video_details = traverse_obj(
2655 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2656 microformats = traverse_obj(
2657 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2658 expected_type=dict, default=[])
2659 video_title = (
2660 get_first(video_details, 'title')
2661 or self._get_text(microformats, (..., 'title'))
2662 or search_meta(['og:title', 'twitter:title', 'title']))
2663 video_description = get_first(video_details, 'shortDescription')
2664
2665 multifeed_metadata_list = get_first(
2666 player_responses,
2667 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2668 expected_type=str)
2669 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
2670 if self.get_param('noplaylist'):
2671 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2672 else:
2673 entries = []
2674 feed_ids = []
2675 for feed in multifeed_metadata_list.split(','):
2676 # Unquote should take place before split on comma (,) since textual
2677 # fields may contain comma as well (see
2678 # https://github.com/ytdl-org/youtube-dl/issues/8536)
2679 feed_data = compat_parse_qs(
2680 compat_urllib_parse_unquote_plus(feed))
2681
2682 def feed_entry(name):
2683 return try_get(
2684 feed_data, lambda x: x[name][0], compat_str)
2685
2686 feed_id = feed_entry('id')
2687 if not feed_id:
2688 continue
2689 feed_title = feed_entry('title')
2690 title = video_title
2691 if feed_title:
2692 title += ' (%s)' % feed_title
2693 entries.append({
2694 '_type': 'url_transparent',
2695 'ie_key': 'Youtube',
2696 'url': smuggle_url(
2697 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2698 {'force_singlefeed': True}),
2699 'title': title,
2700 })
2701 feed_ids.append(feed_id)
2702 self.to_screen(
2703 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2704 % (', '.join(feed_ids), video_id))
2705 return self.playlist_result(
2706 entries, video_id, video_title, video_description)
2707
2708 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
2709 is_live = get_first(video_details, 'isLive')
2710 if is_live is None:
2711 is_live = get_first(live_broadcast_details, 'isLiveNow')
2712
2713 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2714 formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
2715
2716 if not formats:
2717 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
2718 self.report_drm(video_id)
2719 pemr = get_first(
2720 playability_statuses,
2721 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2722 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2723 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
2724 if subreason:
2725 if subreason == 'The uploader has not made this video available in your country.':
2726 countries = get_first(microformats, 'availableCountries')
2727 if not countries:
2728 regions_allowed = search_meta('regionsAllowed')
2729 countries = regions_allowed.split(',') if regions_allowed else None
2730 self.raise_geo_restricted(subreason, countries, metadata_available=True)
2731 reason += f'. {subreason}'
2732 if reason:
2733 self.raise_no_formats(reason, expected=True)
2734
2735 keywords = get_first(video_details, 'keywords', expected_type=list) or []
2736 if not keywords and webpage:
2737 keywords = [
2738 unescapeHTML(m.group('content'))
2739 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2740 for keyword in keywords:
2741 if keyword.startswith('yt:stretch='):
2742 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2743 if mobj:
2744 # NB: float is intentional for forcing float division
2745 w, h = (float(v) for v in mobj.groups())
2746 if w > 0 and h > 0:
2747 ratio = w / h
2748 for f in formats:
2749 if f.get('vcodec') != 'none':
2750 f['stretched_ratio'] = ratio
2751 break
2752
2753 thumbnails = []
2754 thumbnail_dicts = traverse_obj(
2755 (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2756 expected_type=dict, default=[])
2757 for thumbnail in thumbnail_dicts:
2758 thumbnail_url = thumbnail.get('url')
2759 if not thumbnail_url:
2760 continue
2761 # Sometimes youtube gives a wrong thumbnail URL. See:
2762 # https://github.com/yt-dlp/yt-dlp/issues/233
2763 # https://github.com/ytdl-org/youtube-dl/issues/28023
2764 if 'maxresdefault' in thumbnail_url:
2765 thumbnail_url = thumbnail_url.split('?')[0]
2766 thumbnails.append({
2767 'url': thumbnail_url,
2768 'height': int_or_none(thumbnail.get('height')),
2769 'width': int_or_none(thumbnail.get('width')),
2770 })
2771 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2772 if thumbnail_url:
2773 thumbnails.append({
2774 'url': thumbnail_url,
2775 })
2776 original_thumbnails = thumbnails.copy()
2777
2778 # The best resolution thumbnails sometimes does not appear in the webpage
2779 # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
2780 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2781 thumbnail_names = [
2782 'maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3',
2783 'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2784 'mqdefault', 'mq1', 'mq2', 'mq3',
2785 'default', '1', '2', '3'
2786 ]
2787 n_thumbnail_names = len(thumbnail_names)
2788 thumbnails.extend({
2789 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2790 video_id=video_id, name=name, ext=ext,
2791 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
2792 } for name in thumbnail_names for ext in ('webp', 'jpg'))
2793 for thumb in thumbnails:
2794 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
2795 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
2796 self._remove_duplicate_formats(thumbnails)
2797 self._downloader._sort_thumbnails(original_thumbnails)
2798
2799 category = get_first(microformats, 'category') or search_meta('genre')
2800 channel_id = str_or_none(
2801 get_first(video_details, 'channelId')
2802 or get_first(microformats, 'externalChannelId')
2803 or search_meta('channelId'))
2804 duration = int_or_none(
2805 get_first(video_details, 'lengthSeconds')
2806 or get_first(microformats, 'lengthSeconds')
2807 or parse_duration(search_meta('duration'))) or None
2808 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2809
2810 live_content = get_first(video_details, 'isLiveContent')
2811 is_upcoming = get_first(video_details, 'isUpcoming')
2812 if is_live is None:
2813 if is_upcoming or live_content is False:
2814 is_live = False
2815 if is_upcoming is None and (live_content or is_live):
2816 is_upcoming = False
2817 live_starttime = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2818 live_endtime = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2819 if not duration and live_endtime and live_starttime:
2820 duration = live_endtime - live_starttime
2821
2822 formats.extend(self._extract_storyboard(player_responses, duration))
2823
2824 # Source is given priority since formats that throttle are given lower source_preference
2825 # When throttling issue is fully fixed, remove this
2826 self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))
2827
2828 info = {
2829 'id': video_id,
2830 'title': video_title,
2831 'formats': formats,
2832 'thumbnails': thumbnails,
2833 # The best thumbnail that we are sure exists. Prevents unnecessary
2834 # URL checking if user don't care about getting the best possible thumbnail
2835 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
2836 'description': video_description,
2837 'upload_date': unified_strdate(
2838 get_first(microformats, 'uploadDate')
2839 or search_meta('uploadDate')),
2840 'uploader': get_first(video_details, 'author'),
2841 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2842 'uploader_url': owner_profile_url,
2843 'channel_id': channel_id,
2844 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
2845 'duration': duration,
2846 'view_count': int_or_none(
2847 get_first((video_details, microformats), (..., 'viewCount'))
2848 or search_meta('interactionCount')),
2849 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
2850 'age_limit': 18 if (
2851 get_first(microformats, 'isFamilySafe') is False
2852 or search_meta('isFamilyFriendly') == 'false'
2853 or search_meta('og:restrictions:age') == '18+') else 0,
2854 'webpage_url': webpage_url,
2855 'categories': [category] if category else None,
2856 'tags': keywords,
2857 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
2858 'is_live': is_live,
2859 'was_live': (False if is_live or is_upcoming or live_content is False
2860 else None if is_live is None or is_upcoming is None
2861 else live_content),
2862 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
2863 'release_timestamp': live_starttime,
2864 }
2865
2866 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
2867 if pctr:
2868 def get_lang_code(track):
2869 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
2870 or track.get('languageCode'))
2871
2872 # Converted into dicts to remove duplicates
2873 captions = {
2874 get_lang_code(sub): sub
2875 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
2876 translation_languages = {
2877 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
2878 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
2879
2880 def process_language(container, base_url, lang_code, sub_name, query):
2881 lang_subs = container.setdefault(lang_code, [])
2882 for fmt in self._SUBTITLE_FORMATS:
2883 query.update({
2884 'fmt': fmt,
2885 })
2886 lang_subs.append({
2887 'ext': fmt,
2888 'url': update_url_query(base_url, query),
2889 'name': sub_name,
2890 })
2891
2892 subtitles, automatic_captions = {}, {}
2893 for lang_code, caption_track in captions.items():
2894 base_url = caption_track.get('baseUrl')
2895 if not base_url:
2896 continue
2897 lang_name = self._get_text(caption_track, 'name', max_runs=1)
2898 if caption_track.get('kind') != 'asr':
2899 if not lang_code:
2900 continue
2901 process_language(
2902 subtitles, base_url, lang_code, lang_name, {})
2903 if not caption_track.get('isTranslatable'):
2904 continue
2905 for trans_code, trans_name in translation_languages.items():
2906 if not trans_code:
2907 continue
2908 if caption_track.get('kind') != 'asr':
2909 trans_code += f'-{lang_code}'
2910 trans_name += format_field(lang_name, template=' from %s')
2911 process_language(
2912 automatic_captions, base_url, trans_code, trans_name, {'tlang': trans_code})
2913 info['automatic_captions'] = automatic_captions
2914 info['subtitles'] = subtitles
2915
2916 parsed_url = compat_urllib_parse_urlparse(url)
2917 for component in [parsed_url.fragment, parsed_url.query]:
2918 query = compat_parse_qs(component)
2919 for k, v in query.items():
2920 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2921 d_k += '_time'
2922 if d_k not in info and k in s_ks:
2923 info[d_k] = parse_duration(query[k][0])
2924
2925 # Youtube Music Auto-generated description
2926 if video_description:
2927 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
2928 if mobj:
2929 release_year = mobj.group('release_year')
2930 release_date = mobj.group('release_date')
2931 if release_date:
2932 release_date = release_date.replace('-', '')
2933 if not release_year:
2934 release_year = release_date[:4]
2935 info.update({
2936 'album': mobj.group('album'.strip()),
2937 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2938 'track': mobj.group('track').strip(),
2939 'release_date': release_date,
2940 'release_year': int_or_none(release_year),
2941 })
2942
2943 initial_data = None
2944 if webpage:
2945 initial_data = self._extract_yt_initial_variable(
2946 webpage, self._YT_INITIAL_DATA_RE, video_id,
2947 'yt initial data')
2948 if not initial_data:
2949 query = {'videoId': video_id}
2950 query.update(self._get_checkok_params())
2951 initial_data = self._extract_response(
2952 item_id=video_id, ep='next', fatal=False,
2953 ytcfg=master_ytcfg, query=query,
2954 headers=self.generate_api_headers(ytcfg=master_ytcfg),
2955 note='Downloading initial data API JSON')
2956
2957 try:
2958 # This will error if there is no livechat
2959 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2960 info.setdefault('subtitles', {})['live_chat'] = [{
2961 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
2962 'video_id': video_id,
2963 'ext': 'json',
2964 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
2965 }]
2966 except (KeyError, IndexError, TypeError):
2967 pass
2968
2969 if initial_data:
2970 info['chapters'] = (
2971 self._extract_chapters_from_json(initial_data, duration)
2972 or self._extract_chapters_from_engagement_panel(initial_data, duration)
2973 or None)
2974
2975 contents = try_get(
2976 initial_data,
2977 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2978 list) or []
2979 for content in contents:
2980 vpir = content.get('videoPrimaryInfoRenderer')
2981 if vpir:
2982 stl = vpir.get('superTitleLink')
2983 if stl:
2984 stl = self._get_text(stl)
2985 if try_get(
2986 vpir,
2987 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2988 info['location'] = stl
2989 else:
2990 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2991 if mobj:
2992 info.update({
2993 'series': mobj.group(1),
2994 'season_number': int(mobj.group(2)),
2995 'episode_number': int(mobj.group(3)),
2996 })
2997 for tlb in (try_get(
2998 vpir,
2999 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3000 list) or []):
3001 tbr = tlb.get('toggleButtonRenderer') or {}
3002 for getter, regex in [(
3003 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3004 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3005 lambda x: x['accessibility'],
3006 lambda x: x['accessibilityData']['accessibilityData'],
3007 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3008 label = (try_get(tbr, getter, dict) or {}).get('label')
3009 if label:
3010 mobj = re.match(regex, label)
3011 if mobj:
3012 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3013 break
3014 sbr_tooltip = try_get(
3015 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3016 if sbr_tooltip:
3017 like_count, dislike_count = sbr_tooltip.split(' / ')
3018 info.update({
3019 'like_count': str_to_int(like_count),
3020 'dislike_count': str_to_int(dislike_count),
3021 })
3022 vsir = content.get('videoSecondaryInfoRenderer')
3023 if vsir:
3024 info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
3025 rows = try_get(
3026 vsir,
3027 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3028 list) or []
3029 multiple_songs = False
3030 for row in rows:
3031 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3032 multiple_songs = True
3033 break
3034 for row in rows:
3035 mrr = row.get('metadataRowRenderer') or {}
3036 mrr_title = mrr.get('title')
3037 if not mrr_title:
3038 continue
3039 mrr_title = self._get_text(mrr, 'title')
3040 mrr_contents_text = self._get_text(mrr, ('contents', 0))
3041 if mrr_title == 'License':
3042 info['license'] = mrr_contents_text
3043 elif not multiple_songs:
3044 if mrr_title == 'Album':
3045 info['album'] = mrr_contents_text
3046 elif mrr_title == 'Artist':
3047 info['artist'] = mrr_contents_text
3048 elif mrr_title == 'Song':
3049 info['track'] = mrr_contents_text
3050
3051 fallbacks = {
3052 'channel': 'uploader',
3053 'channel_id': 'uploader_id',
3054 'channel_url': 'uploader_url',
3055 }
3056 for to, frm in fallbacks.items():
3057 if not info.get(to):
3058 info[to] = info.get(frm)
3059
3060 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3061 v = info.get(s_k)
3062 if v:
3063 info[d_k] = v
3064
3065 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3066 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
3067 is_membersonly = None
3068 is_premium = None
3069 if initial_data and is_private is not None:
3070 is_membersonly = False
3071 is_premium = False
3072 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3073 badge_labels = set()
3074 for content in contents:
3075 if not isinstance(content, dict):
3076 continue
3077 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3078 for badge_label in badge_labels:
3079 if badge_label.lower() == 'members only':
3080 is_membersonly = True
3081 elif badge_label.lower() == 'premium':
3082 is_premium = True
3083 elif badge_label.lower() == 'unlisted':
3084 is_unlisted = True
3085
3086 info['availability'] = self._availability(
3087 is_private=is_private,
3088 needs_premium=is_premium,
3089 needs_subscription=is_membersonly,
3090 needs_auth=info['age_limit'] >= 18,
3091 is_unlisted=None if is_private is None else is_unlisted)
3092
3093 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
3094
3095 self.mark_watched(video_id, player_responses)
3096
3097 return info
3098
3099
3100 class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
3101
3102 def _extract_channel_id(self, webpage):
3103 channel_id = self._html_search_meta(
3104 'channelId', webpage, 'channel id', default=None)
3105 if channel_id:
3106 return channel_id
3107 channel_url = self._html_search_meta(
3108 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3109 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3110 'twitter:app:url:googleplay'), webpage, 'channel url')
3111 return self._search_regex(
3112 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3113 channel_url, 'channel id')
3114
3115 @staticmethod
3116 def _extract_basic_item_renderer(item):
3117 # Modified from _extract_grid_item_renderer
3118 known_basic_renderers = (
3119 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
3120 )
3121 for key, renderer in item.items():
3122 if not isinstance(renderer, dict):
3123 continue
3124 elif key in known_basic_renderers:
3125 return renderer
3126 elif key.startswith('grid') and key.endswith('Renderer'):
3127 return renderer
3128
3129 def _grid_entries(self, grid_renderer):
3130 for item in grid_renderer['items']:
3131 if not isinstance(item, dict):
3132 continue
3133 renderer = self._extract_basic_item_renderer(item)
3134 if not isinstance(renderer, dict):
3135 continue
3136 title = self._get_text(renderer, 'title')
3137
3138 # playlist
3139 playlist_id = renderer.get('playlistId')
3140 if playlist_id:
3141 yield self.url_result(
3142 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3143 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3144 video_title=title)
3145 continue
3146 # video
3147 video_id = renderer.get('videoId')
3148 if video_id:
3149 yield self._extract_video(renderer)
3150 continue
3151 # channel
3152 channel_id = renderer.get('channelId')
3153 if channel_id:
3154 yield self.url_result(
3155 'https://www.youtube.com/channel/%s' % channel_id,
3156 ie=YoutubeTabIE.ie_key(), video_title=title)
3157 continue
3158 # generic endpoint URL support
3159 ep_url = urljoin('https://www.youtube.com/', try_get(
3160 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3161 compat_str))
3162 if ep_url:
3163 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3164 if ie.suitable(ep_url):
3165 yield self.url_result(
3166 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3167 break
3168
3169 def _shelf_entries_from_content(self, shelf_renderer):
3170 content = shelf_renderer.get('content')
3171 if not isinstance(content, dict):
3172 return
3173 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3174 if renderer:
3175 # TODO: add support for nested playlists so each shelf is processed
3176 # as separate playlist
3177 # TODO: this includes only first N items
3178 for entry in self._grid_entries(renderer):
3179 yield entry
3180 renderer = content.get('horizontalListRenderer')
3181 if renderer:
3182 # TODO
3183 pass
3184
3185 def _shelf_entries(self, shelf_renderer, skip_channels=False):
3186 ep = try_get(
3187 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3188 compat_str)
3189 shelf_url = urljoin('https://www.youtube.com', ep)
3190 if shelf_url:
3191 # Skipping links to another channels, note that checking for
3192 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3193 # will not work
3194 if skip_channels and '/channels?' in shelf_url:
3195 return
3196 title = self._get_text(shelf_renderer, 'title')
3197 yield self.url_result(shelf_url, video_title=title)
3198 # Shelf may not contain shelf URL, fallback to extraction from content
3199 for entry in self._shelf_entries_from_content(shelf_renderer):
3200 yield entry
3201
3202 def _playlist_entries(self, video_list_renderer):
3203 for content in video_list_renderer['contents']:
3204 if not isinstance(content, dict):
3205 continue
3206 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3207 if not isinstance(renderer, dict):
3208 continue
3209 video_id = renderer.get('videoId')
3210 if not video_id:
3211 continue
3212 yield self._extract_video(renderer)
3213
3214 def _rich_entries(self, rich_grid_renderer):
3215 renderer = try_get(
3216 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3217 video_id = renderer.get('videoId')
3218 if not video_id:
3219 return
3220 yield self._extract_video(renderer)
3221
3222 def _video_entry(self, video_renderer):
3223 video_id = video_renderer.get('videoId')
3224 if video_id:
3225 return self._extract_video(video_renderer)
3226
3227 def _post_thread_entries(self, post_thread_renderer):
3228 post_renderer = try_get(
3229 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3230 if not post_renderer:
3231 return
3232 # video attachment
3233 video_renderer = try_get(
3234 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3235 video_id = video_renderer.get('videoId')
3236 if video_id:
3237 entry = self._extract_video(video_renderer)
3238 if entry:
3239 yield entry
3240 # playlist attachment
3241 playlist_id = try_get(
3242 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3243 if playlist_id:
3244 yield self.url_result(
3245 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3246 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3247 # inline video links
3248 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3249 for run in runs:
3250 if not isinstance(run, dict):
3251 continue
3252 ep_url = try_get(
3253 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3254 if not ep_url:
3255 continue
3256 if not YoutubeIE.suitable(ep_url):
3257 continue
3258 ep_video_id = YoutubeIE._match_id(ep_url)
3259 if video_id == ep_video_id:
3260 continue
3261 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
3262
3263 def _post_thread_continuation_entries(self, post_thread_continuation):
3264 contents = post_thread_continuation.get('contents')
3265 if not isinstance(contents, list):
3266 return
3267 for content in contents:
3268 renderer = content.get('backstagePostThreadRenderer')
3269 if not isinstance(renderer, dict):
3270 continue
3271 for entry in self._post_thread_entries(renderer):
3272 yield entry
3273
3274 r''' # unused
3275 def _rich_grid_entries(self, contents):
3276 for content in contents:
3277 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3278 if video_renderer:
3279 entry = self._video_entry(video_renderer)
3280 if entry:
3281 yield entry
3282 '''
3283 def _extract_entries(self, parent_renderer, continuation_list):
3284 # continuation_list is modified in-place with continuation_list = [continuation_token]
3285 continuation_list[:] = [None]
3286 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3287 for content in contents:
3288 if not isinstance(content, dict):
3289 continue
3290 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3291 if not is_renderer:
3292 renderer = content.get('richItemRenderer')
3293 if renderer:
3294 for entry in self._rich_entries(renderer):
3295 yield entry
3296 continuation_list[0] = self._extract_continuation(parent_renderer)
3297 continue
3298 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3299 for isr_content in isr_contents:
3300 if not isinstance(isr_content, dict):
3301 continue
3302
3303 known_renderers = {
3304 'playlistVideoListRenderer': self._playlist_entries,
3305 'gridRenderer': self._grid_entries,
3306 'shelfRenderer': lambda x: self._shelf_entries(x),
3307 'backstagePostThreadRenderer': self._post_thread_entries,
3308 'videoRenderer': lambda x: [self._video_entry(x)],
3309 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
3310 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
3311 }
3312 for key, renderer in isr_content.items():
3313 if key not in known_renderers:
3314 continue
3315 for entry in known_renderers[key](renderer):
3316 if entry:
3317 yield entry
3318 continuation_list[0] = self._extract_continuation(renderer)
3319 break
3320
3321 if not continuation_list[0]:
3322 continuation_list[0] = self._extract_continuation(is_renderer)
3323
3324 if not continuation_list[0]:
3325 continuation_list[0] = self._extract_continuation(parent_renderer)
3326
3327 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
3328 continuation_list = [None]
3329 extract_entries = lambda x: self._extract_entries(x, continuation_list)
3330 tab_content = try_get(tab, lambda x: x['content'], dict)
3331 if not tab_content:
3332 return
3333 parent_renderer = (
3334 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3335 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
3336 for entry in extract_entries(parent_renderer):
3337 yield entry
3338 continuation = continuation_list[0]
3339
3340 for page_num in itertools.count(1):
3341 if not continuation:
3342 break
3343 headers = self.generate_api_headers(
3344 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
3345 response = self._extract_response(
3346 item_id='%s page %s' % (item_id, page_num),
3347 query=continuation, headers=headers, ytcfg=ytcfg,
3348 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3349
3350 if not response:
3351 break
3352 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
3353 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
3354 visitor_data = self._extract_visitor_data(response) or visitor_data
3355
3356 known_continuation_renderers = {
3357 'playlistVideoListContinuation': self._playlist_entries,
3358 'gridContinuation': self._grid_entries,
3359 'itemSectionContinuation': self._post_thread_continuation_entries,
3360 'sectionListContinuation': extract_entries, # for feeds
3361 }
3362 continuation_contents = try_get(
3363 response, lambda x: x['continuationContents'], dict) or {}
3364 continuation_renderer = None
3365 for key, value in continuation_contents.items():
3366 if key not in known_continuation_renderers:
3367 continue
3368 continuation_renderer = value
3369 continuation_list = [None]
3370 for entry in known_continuation_renderers[key](continuation_renderer):
3371 yield entry
3372 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3373 break
3374 if continuation_renderer:
3375 continue
3376
3377 known_renderers = {
3378 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3379 'gridVideoRenderer': (self._grid_entries, 'items'),
3380 'gridChannelRenderer': (self._grid_entries, 'items'),
3381 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
3382 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
3383 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
3384 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
3385 }
3386 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3387 continuation_items = try_get(
3388 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
3389 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3390 video_items_renderer = None
3391 for key, value in continuation_item.items():
3392 if key not in known_renderers:
3393 continue
3394 video_items_renderer = {known_renderers[key][1]: continuation_items}
3395 continuation_list = [None]
3396 for entry in known_renderers[key][0](video_items_renderer):
3397 yield entry
3398 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
3399 break
3400 if video_items_renderer:
3401 continue
3402 break
3403
3404 @staticmethod
3405 def _extract_selected_tab(tabs):
3406 for tab in tabs:
3407 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3408 if renderer.get('selected') is True:
3409 return renderer
3410 else:
3411 raise ExtractorError('Unable to find selected tab')
3412
3413 @classmethod
3414 def _extract_uploader(cls, data):
3415 uploader = {}
3416 renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3417 owner = try_get(
3418 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3419 if owner:
3420 uploader['uploader'] = owner.get('text')
3421 uploader['uploader_id'] = try_get(
3422 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3423 uploader['uploader_url'] = urljoin(
3424 'https://www.youtube.com/',
3425 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3426 return {k: v for k, v in uploader.items() if v is not None}
3427
3428 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
3429 playlist_id = title = description = channel_url = channel_name = channel_id = None
3430 thumbnails_list = []
3431 tags = []
3432
3433 selected_tab = self._extract_selected_tab(tabs)
3434 renderer = try_get(
3435 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3436 if renderer:
3437 channel_name = renderer.get('title')
3438 channel_url = renderer.get('channelUrl')
3439 channel_id = renderer.get('externalId')
3440 else:
3441 renderer = try_get(
3442 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3443
3444 if renderer:
3445 title = renderer.get('title')
3446 description = renderer.get('description', '')
3447 playlist_id = channel_id
3448 tags = renderer.get('keywords', '').split()
3449 thumbnails_list = (
3450 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
3451 or try_get(
3452 self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3453 lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3454 list)
3455 or [])
3456
3457 thumbnails = []
3458 for t in thumbnails_list:
3459 if not isinstance(t, dict):
3460 continue
3461 thumbnail_url = url_or_none(t.get('url'))
3462 if not thumbnail_url:
3463 continue
3464 thumbnails.append({
3465 'url': thumbnail_url,
3466 'width': int_or_none(t.get('width')),
3467 'height': int_or_none(t.get('height')),
3468 })
3469 if playlist_id is None:
3470 playlist_id = item_id
3471 if title is None:
3472 title = (
3473 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3474 or playlist_id)
3475 title += format_field(selected_tab, 'title', ' - %s')
3476 title += format_field(selected_tab, 'expandedText', ' - %s')
3477 metadata = {
3478 'playlist_id': playlist_id,
3479 'playlist_title': title,
3480 'playlist_description': description,
3481 'uploader': channel_name,
3482 'uploader_id': channel_id,
3483 'uploader_url': channel_url,
3484 'thumbnails': thumbnails,
3485 'tags': tags,
3486 }
3487 availability = self._extract_availability(data)
3488 if availability:
3489 metadata['availability'] = availability
3490 if not channel_id:
3491 metadata.update(self._extract_uploader(data))
3492 metadata.update({
3493 'channel': metadata['uploader'],
3494 'channel_id': metadata['uploader_id'],
3495 'channel_url': metadata['uploader_url']})
3496 return self.playlist_result(
3497 self._entries(
3498 selected_tab, playlist_id, ytcfg,
3499 self._extract_account_syncid(ytcfg, data),
3500 self._extract_visitor_data(data, ytcfg)),
3501 **metadata)
3502
3503 def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg):
3504 first_id = last_id = response = None
3505 for page_num in itertools.count(1):
3506 videos = list(self._playlist_entries(playlist))
3507 if not videos:
3508 return
3509 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3510 if start >= len(videos):
3511 return
3512 for video in videos[start:]:
3513 if video['id'] == first_id:
3514 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3515 return
3516 yield video
3517 first_id = first_id or videos[0]['id']
3518 last_id = videos[-1]['id']
3519 watch_endpoint = try_get(
3520 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3521 headers = self.generate_api_headers(
3522 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3523 visitor_data=self._extract_visitor_data(response, data, ytcfg))
3524 query = {
3525 'playlistId': playlist_id,
3526 'videoId': watch_endpoint.get('videoId') or last_id,
3527 'index': watch_endpoint.get('index') or len(videos),
3528 'params': watch_endpoint.get('params') or 'OAE%3D'
3529 }
3530 response = self._extract_response(
3531 item_id='%s page %d' % (playlist_id, page_num),
3532 query=query, ep='next', headers=headers, ytcfg=ytcfg,
3533 check_get_keys='contents'
3534 )
3535 playlist = try_get(
3536 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3537
3538 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
3539 title = playlist.get('title') or try_get(
3540 data, lambda x: x['titleText']['simpleText'], compat_str)
3541 playlist_id = playlist.get('playlistId') or item_id
3542
3543 # Delegating everything except mix playlists to regular tab-based playlist URL
3544 playlist_url = urljoin(url, try_get(
3545 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3546 compat_str))
3547 if playlist_url and playlist_url != url:
3548 return self.url_result(
3549 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3550 video_title=title)
3551
3552 return self.playlist_result(
3553 self._extract_mix_playlist(playlist, playlist_id, data, ytcfg),
3554 playlist_id=playlist_id, playlist_title=title)
3555
3556 def _extract_availability(self, data):
3557 """
3558 Gets the availability of a given playlist/tab.
3559 Note: Unless YouTube tells us explicitly, we do not assume it is public
3560 @param data: response
3561 """
3562 is_private = is_unlisted = None
3563 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
3564 badge_labels = self._extract_badges(renderer)
3565
3566 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
3567 privacy_dropdown_entries = try_get(
3568 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
3569 for renderer_dict in privacy_dropdown_entries:
3570 is_selected = try_get(
3571 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
3572 if not is_selected:
3573 continue
3574 label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
3575 if label:
3576 badge_labels.add(label.lower())
3577 break
3578
3579 for badge_label in badge_labels:
3580 if badge_label == 'unlisted':
3581 is_unlisted = True
3582 elif badge_label == 'private':
3583 is_private = True
3584 elif badge_label == 'public':
3585 is_unlisted = is_private = False
3586 return self._availability(is_private, False, False, False, is_unlisted)
3587
3588 @staticmethod
3589 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
3590 sidebar_renderer = try_get(
3591 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
3592 for item in sidebar_renderer:
3593 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
3594 if renderer:
3595 return renderer
3596
3597 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
3598 """
3599 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3600 """
3601 browse_id = params = None
3602 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
3603 if not renderer:
3604 return
3605 menu_renderer = try_get(
3606 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3607 for menu_item in menu_renderer:
3608 if not isinstance(menu_item, dict):
3609 continue
3610 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3611 text = try_get(
3612 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3613 if not text or text.lower() != 'show unavailable videos':
3614 continue
3615 browse_endpoint = try_get(
3616 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3617 browse_id = browse_endpoint.get('browseId')
3618 params = browse_endpoint.get('params')
3619 break
3620
3621 headers = self.generate_api_headers(
3622 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3623 visitor_data=self._extract_visitor_data(data, ytcfg))
3624 query = {
3625 'params': params or 'wgYCCAA=',
3626 'browseId': browse_id or 'VL%s' % item_id
3627 }
3628 return self._extract_response(
3629 item_id=item_id, headers=headers, query=query,
3630 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
3631 note='Downloading API JSON with unavailable videos')
3632
3633 def _extract_webpage(self, url, item_id, fatal=True):
3634 retries = self.get_param('extractor_retries', 3)
3635 count = -1
3636 webpage = data = last_error = None
3637 while count < retries:
3638 count += 1
3639 # Sometimes youtube returns a webpage with incomplete ytInitialData
3640 # See: https://github.com/yt-dlp/yt-dlp/issues/116
3641 if last_error:
3642 self.report_warning('%s. Retrying ...' % last_error)
3643 try:
3644 webpage = self._download_webpage(
3645 url, item_id,
3646 note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))
3647 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
3648 except ExtractorError as e:
3649 if isinstance(e.cause, network_exceptions):
3650 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
3651 last_error = error_to_compat_str(e.cause or e.msg)
3652 if count < retries:
3653 continue
3654 if fatal:
3655 raise
3656 self.report_warning(error_to_compat_str(e))
3657 break
3658 else:
3659 try:
3660 self._extract_and_report_alerts(data)
3661 except ExtractorError as e:
3662 if fatal:
3663 raise
3664 self.report_warning(error_to_compat_str(e))
3665 break
3666
3667 if dict_get(data, ('contents', 'currentVideoEndpoint')):
3668 break
3669
3670 last_error = 'Incomplete yt initial data received'
3671 if count >= retries:
3672 if fatal:
3673 raise ExtractorError(last_error)
3674 self.report_warning(last_error)
3675 break
3676
3677 return webpage, data
3678
3679 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
3680 data = None
3681 if 'webpage' not in self._configuration_arg('skip'):
3682 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
3683 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
3684 if not data:
3685 if not ytcfg and self.is_authenticated:
3686 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download.'
3687 if 'authcheck' not in self._configuration_arg('skip') and fatal:
3688 raise ExtractorError(
3689 msg + ' If you are not downloading private content, or your cookies are only for the first account and channel,'
3690 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
3691 expected=True)
3692 self.report_warning(msg, only_once=True)
3693 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
3694 return data, ytcfg
3695
3696 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
3697 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
3698 resolve_response = self._extract_response(
3699 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
3700 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
3701 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
3702 for ep_key, ep in endpoints.items():
3703 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
3704 if params:
3705 return self._extract_response(
3706 item_id=item_id, query=params, ep=ep, headers=headers,
3707 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
3708 check_get_keys=('contents', 'currentVideoEndpoint'))
3709 err_note = 'Failed to resolve url (does the playlist exist?)'
3710 if fatal:
3711 raise ExtractorError(err_note, expected=True)
3712 self.report_warning(err_note, item_id)
3713
3714 @staticmethod
3715 def _smuggle_data(entries, data):
3716 for entry in entries:
3717 if data:
3718 entry['url'] = smuggle_url(entry['url'], data)
3719 yield entry
3720
3721 _SEARCH_PARAMS = None
3722
3723 def _search_results(self, query, params=NO_DEFAULT):
3724 data = {'query': query}
3725 if params is NO_DEFAULT:
3726 params = self._SEARCH_PARAMS
3727 if params:
3728 data['params'] = params
3729 continuation_list = [None]
3730 for page_num in itertools.count(1):
3731 data.update(continuation_list[0] or {})
3732 search = self._extract_response(
3733 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
3734 check_get_keys=('contents', 'onResponseReceivedCommands'))
3735 slr_contents = try_get(
3736 search,
3737 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3738 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3739 list)
3740 yield from self._extract_entries({'contents': slr_contents}, continuation_list)
3741 if not continuation_list[0]:
3742 break
3743
3744
3745 class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
3746 IE_DESC = 'YouTube Tabs'
3747 _VALID_URL = r'''(?x:
3748 https?://
3749 (?:\w+\.)?
3750 (?:
3751 youtube(?:kids)?\.com|
3752 %(invidious)s
3753 )/
3754 (?:
3755 (?P<channel_type>channel|c|user|browse)/|
3756 (?P<not_channel>
3757 feed/|hashtag/|
3758 (?:playlist|watch)\?.*?\blist=
3759 )|
3760 (?!(?:%(reserved_names)s)\b) # Direct URLs
3761 )
3762 (?P<id>[^/?\#&]+)
3763 )''' % {
3764 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
3765 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
3766 }
3767 IE_NAME = 'youtube:tab'
3768
3769 _TESTS = [{
3770 'note': 'playlists, multipage',
3771 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3772 'playlist_mincount': 94,
3773 'info_dict': {
3774 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3775 'title': 'Игорь Клейнер - Playlists',
3776 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3777 'uploader': 'Игорь Клейнер',
3778 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3779 },
3780 }, {
3781 'note': 'playlists, multipage, different order',
3782 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3783 'playlist_mincount': 94,
3784 'info_dict': {
3785 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3786 'title': 'Игорь Клейнер - Playlists',
3787 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3788 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3789 'uploader': 'Игорь Клейнер',
3790 },
3791 }, {
3792 'note': 'playlists, series',
3793 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3794 'playlist_mincount': 5,
3795 'info_dict': {
3796 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3797 'title': '3Blue1Brown - Playlists',
3798 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3799 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3800 'uploader': '3Blue1Brown',
3801 },
3802 }, {
3803 'note': 'playlists, singlepage',
3804 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3805 'playlist_mincount': 4,
3806 'info_dict': {
3807 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3808 'title': 'ThirstForScience - Playlists',
3809 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
3810 'uploader': 'ThirstForScience',
3811 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3812 }
3813 }, {
3814 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3815 'only_matching': True,
3816 }, {
3817 'note': 'basic, single video playlist',
3818 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3819 'info_dict': {
3820 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3821 'uploader': 'Sergey M.',
3822 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3823 'title': 'youtube-dl public playlist',
3824 },
3825 'playlist_count': 1,
3826 }, {
3827 'note': 'empty playlist',
3828 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3829 'info_dict': {
3830 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3831 'uploader': 'Sergey M.',
3832 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3833 'title': 'youtube-dl empty playlist',
3834 },
3835 'playlist_count': 0,
3836 }, {
3837 'note': 'Home tab',
3838 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
3839 'info_dict': {
3840 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3841 'title': 'lex will - Home',
3842 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3843 'uploader': 'lex will',
3844 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3845 },
3846 'playlist_mincount': 2,
3847 }, {
3848 'note': 'Videos tab',
3849 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
3850 'info_dict': {
3851 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3852 'title': 'lex will - Videos',
3853 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3854 'uploader': 'lex will',
3855 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3856 },
3857 'playlist_mincount': 975,
3858 }, {
3859 'note': 'Videos tab, sorted by popular',
3860 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
3861 'info_dict': {
3862 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3863 'title': 'lex will - Videos',
3864 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3865 'uploader': 'lex will',
3866 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3867 },
3868 'playlist_mincount': 199,
3869 }, {
3870 'note': 'Playlists tab',
3871 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
3872 'info_dict': {
3873 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3874 'title': 'lex will - Playlists',
3875 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3876 'uploader': 'lex will',
3877 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3878 },
3879 'playlist_mincount': 17,
3880 }, {
3881 'note': 'Community tab',
3882 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
3883 'info_dict': {
3884 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3885 'title': 'lex will - Community',
3886 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3887 'uploader': 'lex will',
3888 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3889 },
3890 'playlist_mincount': 18,
3891 }, {
3892 'note': 'Channels tab',
3893 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
3894 'info_dict': {
3895 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3896 'title': 'lex will - Channels',
3897 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3898 'uploader': 'lex will',
3899 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3900 },
3901 'playlist_mincount': 12,
3902 }, {
3903 'note': 'Search tab',
3904 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3905 'playlist_mincount': 40,
3906 'info_dict': {
3907 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3908 'title': '3Blue1Brown - Search - linear algebra',
3909 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3910 'uploader': '3Blue1Brown',
3911 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3912 },
3913 }, {
3914 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3915 'only_matching': True,
3916 }, {
3917 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3918 'only_matching': True,
3919 }, {
3920 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3921 'only_matching': True,
3922 }, {
3923 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3924 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3925 'info_dict': {
3926 'title': '29C3: Not my department',
3927 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3928 'uploader': 'Christiaan008',
3929 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
3930 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
3931 },
3932 'playlist_count': 96,
3933 }, {
3934 'note': 'Large playlist',
3935 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
3936 'info_dict': {
3937 'title': 'Uploads from Cauchemar',
3938 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3939 'uploader': 'Cauchemar',
3940 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
3941 },
3942 'playlist_mincount': 1123,
3943 }, {
3944 'note': 'even larger playlist, 8832 videos',
3945 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3946 'only_matching': True,
3947 }, {
3948 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3949 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3950 'info_dict': {
3951 'title': 'Uploads from Interstellar Movie',
3952 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
3953 'uploader': 'Interstellar Movie',
3954 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
3955 },
3956 'playlist_mincount': 21,
3957 }, {
3958 'note': 'Playlist with "show unavailable videos" button',
3959 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3960 'info_dict': {
3961 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3962 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3963 'uploader': 'Phim Siêu Nhân Nhật Bản',
3964 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3965 },
3966 'playlist_mincount': 200,
3967 }, {
3968 'note': 'Playlist with unavailable videos in page 7',
3969 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3970 'info_dict': {
3971 'title': 'Uploads from BlankTV',
3972 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3973 'uploader': 'BlankTV',
3974 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3975 },
3976 'playlist_mincount': 1000,
3977 }, {
3978 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
3979 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3980 'info_dict': {
3981 'title': 'Data Analysis with Dr Mike Pound',
3982 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3983 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3984 'uploader': 'Computerphile',
3985 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
3986 },
3987 'playlist_mincount': 11,
3988 }, {
3989 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3990 'only_matching': True,
3991 }, {
3992 'note': 'Playlist URL that does not actually serve a playlist',
3993 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3994 'info_dict': {
3995 'id': 'FqZTN594JQw',
3996 'ext': 'webm',
3997 'title': "Smiley's People 01 detective, Adventure Series, Action",
3998 'uploader': 'STREEM',
3999 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
4000 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
4001 'upload_date': '20150526',
4002 'license': 'Standard YouTube License',
4003 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
4004 'categories': ['People & Blogs'],
4005 'tags': list,
4006 'view_count': int,
4007 'like_count': int,
4008 'dislike_count': int,
4009 },
4010 'params': {
4011 'skip_download': True,
4012 },
4013 'skip': 'This video is not available.',
4014 'add_ie': [YoutubeIE.ie_key()],
4015 }, {
4016 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
4017 'only_matching': True,
4018 }, {
4019 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
4020 'only_matching': True,
4021 }, {
4022 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
4023 'info_dict': {
4024 'id': '3yImotZU3tw', # This will keep changing
4025 'ext': 'mp4',
4026 'title': compat_str,
4027 'uploader': 'Sky News',
4028 'uploader_id': 'skynews',
4029 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
4030 'upload_date': r're:\d{8}',
4031 'description': compat_str,
4032 'categories': ['News & Politics'],
4033 'tags': list,
4034 'like_count': int,
4035 'dislike_count': int,
4036 },
4037 'params': {
4038 'skip_download': True,
4039 },
4040 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
4041 }, {
4042 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
4043 'info_dict': {
4044 'id': 'a48o2S1cPoo',
4045 'ext': 'mp4',
4046 'title': 'The Young Turks - Live Main Show',
4047 'uploader': 'The Young Turks',
4048 'uploader_id': 'TheYoungTurks',
4049 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
4050 'upload_date': '20150715',
4051 'license': 'Standard YouTube License',
4052 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
4053 'categories': ['News & Politics'],
4054 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
4055 'like_count': int,
4056 'dislike_count': int,
4057 },
4058 'params': {
4059 'skip_download': True,
4060 },
4061 'only_matching': True,
4062 }, {
4063 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
4064 'only_matching': True,
4065 }, {
4066 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
4067 'only_matching': True,
4068 }, {
4069 'note': 'A channel that is not live. Should raise error',
4070 'url': 'https://www.youtube.com/user/numberphile/live',
4071 'only_matching': True,
4072 }, {
4073 'url': 'https://www.youtube.com/feed/trending',
4074 'only_matching': True,
4075 }, {
4076 'url': 'https://www.youtube.com/feed/library',
4077 'only_matching': True,
4078 }, {
4079 'url': 'https://www.youtube.com/feed/history',
4080 'only_matching': True,
4081 }, {
4082 'url': 'https://www.youtube.com/feed/subscriptions',
4083 'only_matching': True,
4084 }, {
4085 'url': 'https://www.youtube.com/feed/watch_later',
4086 'only_matching': True,
4087 }, {
4088 'note': 'Recommended - redirects to home page.',
4089 'url': 'https://www.youtube.com/feed/recommended',
4090 'only_matching': True,
4091 }, {
4092 'note': 'inline playlist with not always working continuations',
4093 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
4094 'only_matching': True,
4095 }, {
4096 'url': 'https://www.youtube.com/course',
4097 'only_matching': True,
4098 }, {
4099 'url': 'https://www.youtube.com/zsecurity',
4100 'only_matching': True,
4101 }, {
4102 'url': 'http://www.youtube.com/NASAgovVideo/videos',
4103 'only_matching': True,
4104 }, {
4105 'url': 'https://www.youtube.com/TheYoungTurks/live',
4106 'only_matching': True,
4107 }, {
4108 'url': 'https://www.youtube.com/hashtag/cctv9',
4109 'info_dict': {
4110 'id': 'cctv9',
4111 'title': '#cctv9',
4112 },
4113 'playlist_mincount': 350,
4114 }, {
4115 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
4116 'only_matching': True,
4117 }, {
4118 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
4119 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4120 'only_matching': True
4121 }, {
4122 'note': '/browse/ should redirect to /channel/',
4123 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
4124 'only_matching': True
4125 }, {
4126 'note': 'VLPL, should redirect to playlist?list=PL...',
4127 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4128 'info_dict': {
4129 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4130 'uploader': 'NoCopyrightSounds',
4131 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
4132 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
4133 'title': 'NCS Releases',
4134 },
4135 'playlist_mincount': 166,
4136 }, {
4137 'note': 'Topic, should redirect to playlist?list=UU...',
4138 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
4139 'info_dict': {
4140 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
4141 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
4142 'title': 'Uploads from Royalty Free Music - Topic',
4143 'uploader': 'Royalty Free Music - Topic',
4144 },
4145 'expected_warnings': [
4146 'A channel/user page was given',
4147 'The URL does not have a videos tab',
4148 ],
4149 'playlist_mincount': 101,
4150 }, {
4151 'note': 'Topic without a UU playlist',
4152 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
4153 'info_dict': {
4154 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
4155 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
4156 },
4157 'expected_warnings': [
4158 'A channel/user page was given',
4159 'The URL does not have a videos tab',
4160 'Falling back to channel URL',
4161 ],
4162 'playlist_mincount': 9,
4163 }, {
4164 'note': 'Youtube music Album',
4165 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
4166 'info_dict': {
4167 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
4168 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
4169 },
4170 'playlist_count': 50,
4171 }, {
4172 'note': 'unlisted single video playlist',
4173 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
4174 'info_dict': {
4175 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
4176 'uploader': 'colethedj',
4177 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
4178 'title': 'yt-dlp unlisted playlist test',
4179 'availability': 'unlisted'
4180 },
4181 'playlist_count': 1,
4182 }, {
4183 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
4184 'url': 'https://www.youtube.com/feed/recommended',
4185 'info_dict': {
4186 'id': 'recommended',
4187 'title': 'recommended',
4188 },
4189 'playlist_mincount': 50,
4190 'params': {
4191 'skip_download': True,
4192 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4193 },
4194 }, {
4195 'note': 'API Fallback: /videos tab, sorted by oldest first',
4196 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
4197 'info_dict': {
4198 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
4199 'title': 'Cody\'sLab - Videos',
4200 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
4201 'uploader': 'Cody\'sLab',
4202 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
4203 },
4204 'playlist_mincount': 650,
4205 'params': {
4206 'skip_download': True,
4207 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4208 },
4209 }, {
4210 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
4211 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
4212 'info_dict': {
4213 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
4214 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
4215 'title': 'Uploads from Royalty Free Music - Topic',
4216 'uploader': 'Royalty Free Music - Topic',
4217 },
4218 'expected_warnings': [
4219 'A channel/user page was given',
4220 'The URL does not have a videos tab',
4221 ],
4222 'playlist_mincount': 101,
4223 'params': {
4224 'skip_download': True,
4225 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4226 },
4227 }]
4228
4229 @classmethod
4230 def suitable(cls, url):
4231 return False if YoutubeIE.suitable(url) else super(
4232 YoutubeTabIE, cls).suitable(url)
4233
4234 def _real_extract(self, url):
4235 url, smuggled_data = unsmuggle_url(url, {})
4236 if self.is_music_url(url):
4237 smuggled_data['is_music_url'] = True
4238 info_dict = self.__real_extract(url, smuggled_data)
4239 if info_dict.get('entries'):
4240 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4241 return info_dict
4242
4243 _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$')
4244
4245 def __real_extract(self, url, smuggled_data):
4246 item_id = self._match_id(url)
4247 url = compat_urlparse.urlunparse(
4248 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
4249 compat_opts = self.get_param('compat_opts', [])
4250
4251 def get_mobj(url):
4252 mobj = self._URL_RE.match(url).groupdict()
4253 mobj.update((k, '') for k, v in mobj.items() if v is None)
4254 return mobj
4255
4256 mobj, redirect_warning = get_mobj(url), None
4257 # Youtube returns incomplete data if tabname is not lower case
4258 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4259 if is_channel:
4260 if smuggled_data.get('is_music_url'):
4261 if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
4262 item_id = item_id[2:]
4263 pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False
4264 elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
4265 mdata = self._extract_tab_endpoint(
4266 f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
4267 murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
4268 get_all=False, expected_type=compat_str)
4269 if not murl:
4270 raise ExtractorError('Failed to resolve album to playlist')
4271 return self.url_result(murl, ie=YoutubeTabIE.ie_key())
4272 elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
4273 pre = f'https://www.youtube.com/channel/{item_id}'
4274
4275 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4276 # Home URLs should redirect to /videos/
4277 redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '
4278 'To download only the videos in the home page, add a "/featured" to the URL')
4279 tab = '/videos'
4280
4281 url = ''.join((pre, tab, post))
4282 mobj = get_mobj(url)
4283
4284 # Handle both video/playlist URLs
4285 qs = parse_qs(url)
4286 video_id, playlist_id = [qs.get(key, [None])[0] for key in ('v', 'list')]
4287
4288 if not video_id and mobj['not_channel'].startswith('watch'):
4289 if not playlist_id:
4290 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
4291 raise ExtractorError('Unable to recognize tab page')
4292 # Common mistake: https://www.youtube.com/watch?list=playlist_id
4293 self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
4294 url = f'https://www.youtube.com/playlist?list={playlist_id}'
4295 mobj = get_mobj(url)
4296
4297 if video_id and playlist_id:
4298 if self.get_param('noplaylist'):
4299 self.to_screen(f'Downloading just video {video_id} because of --no-playlist')
4300 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
4301 ie=YoutubeIE.ie_key(), video_id=video_id)
4302 self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')
4303
4304 data, ytcfg = self._extract_data(url, item_id)
4305
4306 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
4307 if tabs:
4308 selected_tab = self._extract_selected_tab(tabs)
4309 tab_name = selected_tab.get('title', '')
4310 if 'no-youtube-channel-redirect' not in compat_opts:
4311 if mobj['tab'] == '/live':
4312 # Live tab should have redirected to the video
4313 raise ExtractorError('The channel is not currently live', expected=True)
4314 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4315 redirect_warning = f'The URL does not have a {mobj["tab"][1:]} tab'
4316 if not mobj['not_channel'] and item_id[:2] == 'UC':
4317 # Topic channels don't have /videos. Use the equivalent playlist instead
4318 pl_id = f'UU{item_id[2:]}'
4319 pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
4320 try:
4321 data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True)
4322 except ExtractorError:
4323 redirect_warning += ' and the playlist redirect gave error'
4324 else:
4325 item_id, url, tab_name = pl_id, pl_url, mobj['tab'][1:]
4326 redirect_warning += f'. Redirecting to playlist {pl_id} instead'
4327 if tab_name.lower() != mobj['tab'][1:]:
4328 redirect_warning += f'. {tab_name} tab is being downloaded instead'
4329
4330 if redirect_warning:
4331 self.report_warning(redirect_warning)
4332 self.write_debug(f'Final URL: {url}')
4333
4334 # YouTube sometimes provides a button to reload playlist with unavailable videos.
4335 if 'no-youtube-unavailable-videos' not in compat_opts:
4336 data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data
4337 self._extract_and_report_alerts(data, only_once=True)
4338 tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
4339 if tabs:
4340 return self._extract_from_tabs(item_id, ytcfg, data, tabs)
4341
4342 playlist = traverse_obj(
4343 data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
4344 if playlist:
4345 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
4346
4347 video_id = traverse_obj(
4348 data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
4349 if video_id:
4350 if mobj['tab'] != '/live': # live tab is expected to redirect to video
4351 self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
4352 return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
4353 ie=YoutubeIE.ie_key(), video_id=video_id)
4354
4355 raise ExtractorError('Unable to recognize tab page')
4356
4357
4358 class YoutubePlaylistIE(InfoExtractor):
4359 IE_DESC = 'YouTube playlists'
4360 _VALID_URL = r'''(?x)(?:
4361 (?:https?://)?
4362 (?:\w+\.)?
4363 (?:
4364 (?:
4365 youtube(?:kids)?\.com|
4366 %(invidious)s
4367 )
4368 /.*?\?.*?\blist=
4369 )?
4370 (?P<id>%(playlist_id)s)
4371 )''' % {
4372 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
4373 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
4374 }
4375 IE_NAME = 'youtube:playlist'
4376 _TESTS = [{
4377 'note': 'issue #673',
4378 'url': 'PLBB231211A4F62143',
4379 'info_dict': {
4380 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4381 'id': 'PLBB231211A4F62143',
4382 'uploader': 'Wickydoo',
4383 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4384 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
4385 },
4386 'playlist_mincount': 29,
4387 }, {
4388 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4389 'info_dict': {
4390 'title': 'YDL_safe_search',
4391 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4392 },
4393 'playlist_count': 2,
4394 'skip': 'This playlist is private',
4395 }, {
4396 'note': 'embedded',
4397 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4398 'playlist_count': 4,
4399 'info_dict': {
4400 'title': 'JODA15',
4401 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4402 'uploader': 'milan',
4403 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
4404 }
4405 }, {
4406 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4407 'playlist_mincount': 654,
4408 'info_dict': {
4409 'title': '2018 Chinese New Singles (11/6 updated)',
4410 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4411 'uploader': 'LBK',
4412 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4413 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
4414 }
4415 }, {
4416 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4417 'only_matching': True,
4418 }, {
4419 # music album playlist
4420 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4421 'only_matching': True,
4422 }]
4423
4424 @classmethod
4425 def suitable(cls, url):
4426 if YoutubeTabIE.suitable(url):
4427 return False
4428 from ..utils import parse_qs
4429 qs = parse_qs(url)
4430 if qs.get('v', [None])[0]:
4431 return False
4432 return super(YoutubePlaylistIE, cls).suitable(url)
4433
4434 def _real_extract(self, url):
4435 playlist_id = self._match_id(url)
4436 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
4437 url = update_url_query(
4438 'https://www.youtube.com/playlist',
4439 parse_qs(url) or {'list': playlist_id})
4440 if is_music_url:
4441 url = smuggle_url(url, {'is_music_url': True})
4442 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4443
4444
4445 class YoutubeYtBeIE(InfoExtractor):
4446 IE_DESC = 'youtu.be'
4447 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4448 _TESTS = [{
4449 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4450 'info_dict': {
4451 'id': 'yeWKywCrFtk',
4452 'ext': 'mp4',
4453 'title': 'Small Scale Baler and Braiding Rugs',
4454 'uploader': 'Backus-Page House Museum',
4455 'uploader_id': 'backuspagemuseum',
4456 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4457 'upload_date': '20161008',
4458 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4459 'categories': ['Nonprofits & Activism'],
4460 'tags': list,
4461 'like_count': int,
4462 'dislike_count': int,
4463 },
4464 'params': {
4465 'noplaylist': True,
4466 'skip_download': True,
4467 },
4468 }, {
4469 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
4470 'only_matching': True,
4471 }]
4472
4473 def _real_extract(self, url):
4474 mobj = self._match_valid_url(url)
4475 video_id = mobj.group('id')
4476 playlist_id = mobj.group('playlist_id')
4477 return self.url_result(
4478 update_url_query('https://www.youtube.com/watch', {
4479 'v': video_id,
4480 'list': playlist_id,
4481 'feature': 'youtu.be',
4482 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4483
4484
4485 class YoutubeYtUserIE(InfoExtractor):
4486 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
4487 _VALID_URL = r'ytuser:(?P<id>.+)'
4488 _TESTS = [{
4489 'url': 'ytuser:phihag',
4490 'only_matching': True,
4491 }]
4492
4493 def _real_extract(self, url):
4494 user_id = self._match_id(url)
4495 return self.url_result(
4496 'https://www.youtube.com/user/%s/videos' % user_id,
4497 ie=YoutubeTabIE.ie_key(), video_id=user_id)
4498
4499
4500 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
4501 IE_NAME = 'youtube:favorites'
4502 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
4503 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4504 _LOGIN_REQUIRED = True
4505 _TESTS = [{
4506 'url': ':ytfav',
4507 'only_matching': True,
4508 }, {
4509 'url': ':ytfavorites',
4510 'only_matching': True,
4511 }]
4512
4513 def _real_extract(self, url):
4514 return self.url_result(
4515 'https://www.youtube.com/playlist?list=LL',
4516 ie=YoutubeTabIE.ie_key())
4517
4518
4519 class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
4520 IE_DESC = 'YouTube search'
4521 IE_NAME = 'youtube:search'
4522 _SEARCH_KEY = 'ytsearch'
4523 _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
4524 _TESTS = []
4525
4526
4527 class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
4528 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
4529 _SEARCH_KEY = 'ytsearchdate'
4530 IE_DESC = 'YouTube search, newest videos first'
4531 _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
4532
4533
4534 class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
4535 IE_DESC = 'YouTube search URLs with sorting and filter support'
4536 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4537 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
4538 _TESTS = [{
4539 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4540 'playlist_mincount': 5,
4541 'info_dict': {
4542 'id': 'youtube-dl test video',
4543 'title': 'youtube-dl test video',
4544 }
4545 }, {
4546 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
4547 'playlist_mincount': 5,
4548 'info_dict': {
4549 'id': 'python',
4550 'title': 'python',
4551 }
4552
4553 }, {
4554 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4555 'only_matching': True,
4556 }]
4557
4558 def _real_extract(self, url):
4559 qs = parse_qs(url)
4560 query = (qs.get('search_query') or qs.get('q'))[0]
4561 return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
4562
4563
4564 class YoutubeFeedsInfoExtractor(YoutubeTabIE):
4565 """
4566 Base class for feed extractors
4567 Subclasses must define the _FEED_NAME property.
4568 """
4569 _LOGIN_REQUIRED = True
4570 _TESTS = []
4571
4572 @property
4573 def IE_NAME(self):
4574 return 'youtube:%s' % self._FEED_NAME
4575
4576 def _real_extract(self, url):
4577 return self.url_result(
4578 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4579 ie=YoutubeTabIE.ie_key())
4580
4581
4582 class YoutubeWatchLaterIE(InfoExtractor):
4583 IE_NAME = 'youtube:watchlater'
4584 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
4585 _VALID_URL = r':ytwatchlater'
4586 _TESTS = [{
4587 'url': ':ytwatchlater',
4588 'only_matching': True,
4589 }]
4590
4591 def _real_extract(self, url):
4592 return self.url_result(
4593 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
4594
4595
4596 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4597 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
4598 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
4599 _FEED_NAME = 'recommended'
4600 _LOGIN_REQUIRED = False
4601 _TESTS = [{
4602 'url': ':ytrec',
4603 'only_matching': True,
4604 }, {
4605 'url': ':ytrecommended',
4606 'only_matching': True,
4607 }, {
4608 'url': 'https://youtube.com',
4609 'only_matching': True,
4610 }]
4611
4612
4613 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
4614 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
4615 _VALID_URL = r':ytsub(?:scription)?s?'
4616 _FEED_NAME = 'subscriptions'
4617 _TESTS = [{
4618 'url': ':ytsubs',
4619 'only_matching': True,
4620 }, {
4621 'url': ':ytsubscriptions',
4622 'only_matching': True,
4623 }]
4624
4625
4626 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
4627 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
4628 _VALID_URL = r':ythis(?:tory)?'
4629 _FEED_NAME = 'history'
4630 _TESTS = [{
4631 'url': ':ythistory',
4632 'only_matching': True,
4633 }]
4634
4635
4636 class YoutubeTruncatedURLIE(InfoExtractor):
4637 IE_NAME = 'youtube:truncated_url'
4638 IE_DESC = False # Do not list
4639 _VALID_URL = r'''(?x)
4640 (?:https?://)?
4641 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4642 (?:watch\?(?:
4643 feature=[a-z_]+|
4644 annotation_id=annotation_[^&]+|
4645 x-yt-cl=[0-9]+|
4646 hl=[^&]*|
4647 t=[0-9]+
4648 )?
4649 |
4650 attribution_link\?a=[^&]+
4651 )
4652 $
4653 '''
4654
4655 _TESTS = [{
4656 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
4657 'only_matching': True,
4658 }, {
4659 'url': 'https://www.youtube.com/watch?',
4660 'only_matching': True,
4661 }, {
4662 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4663 'only_matching': True,
4664 }, {
4665 'url': 'https://www.youtube.com/watch?feature=foo',
4666 'only_matching': True,
4667 }, {
4668 'url': 'https://www.youtube.com/watch?hl=en-GB',
4669 'only_matching': True,
4670 }, {
4671 'url': 'https://www.youtube.com/watch?t=2372',
4672 'only_matching': True,
4673 }]
4674
4675 def _real_extract(self, url):
4676 raise ExtractorError(
4677 'Did you forget to quote the URL? Remember that & is a meta '
4678 'character in most shells, so you want to put the URL in quotes, '
4679 'like youtube-dl '
4680 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
4681 ' or simply youtube-dl BaW_jenozKc .',
4682 expected=True)
4683
4684
4685 class YoutubeClipIE(InfoExtractor):
4686 IE_NAME = 'youtube:clip'
4687 IE_DESC = False # Do not list
4688 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'
4689
4690 def _real_extract(self, url):
4691 self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')
4692 return self.url_result(url, 'Generic')
4693
4694
4695 class YoutubeTruncatedIDIE(InfoExtractor):
4696 IE_NAME = 'youtube:truncated_id'
4697 IE_DESC = False # Do not list
4698 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
4699
4700 _TESTS = [{
4701 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4702 'only_matching': True,
4703 }]
4704
4705 def _real_extract(self, url):
4706 video_id = self._match_id(url)
4707 raise ExtractorError(
4708 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4709 expected=True)