]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/youtube.py
56de2ef5915fc850836b6b73c1d103be3ec4bbe1
[yt-dlp.git] / yt_dlp / extractor / youtube.py
1 # coding: utf-8
2
3 from __future__ import unicode_literals
4
5 import base64
6 import calendar
7 import copy
8 import datetime
9 import hashlib
10 import itertools
11 import json
12 import os.path
13 import random
14 import re
15 import time
16 import traceback
17
18 from .common import InfoExtractor, SearchInfoExtractor
19 from ..compat import (
20 compat_chr,
21 compat_HTTPError,
22 compat_parse_qs,
23 compat_str,
24 compat_urllib_parse_unquote_plus,
25 compat_urllib_parse_urlencode,
26 compat_urllib_parse_urlparse,
27 compat_urlparse,
28 )
29 from ..jsinterp import JSInterpreter
30 from ..utils import (
31 bytes_to_intlist,
32 clean_html,
33 datetime_from_str,
34 dict_get,
35 error_to_compat_str,
36 ExtractorError,
37 float_or_none,
38 format_field,
39 int_or_none,
40 intlist_to_bytes,
41 is_html,
42 mimetype2ext,
43 network_exceptions,
44 orderedSet,
45 parse_codecs,
46 parse_count,
47 parse_duration,
48 parse_iso8601,
49 parse_qs,
50 qualities,
51 remove_end,
52 remove_start,
53 smuggle_url,
54 str_or_none,
55 str_to_int,
56 traverse_obj,
57 try_get,
58 unescapeHTML,
59 unified_strdate,
60 unsmuggle_url,
61 update_url_query,
62 url_or_none,
63 urljoin,
64 variadic,
65 )
66
67
68 # any clients starting with _ cannot be explicity requested by the user
69 INNERTUBE_CLIENTS = {
70 'web': {
71 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
72 'INNERTUBE_CONTEXT': {
73 'client': {
74 'clientName': 'WEB',
75 'clientVersion': '2.20210622.10.00',
76 }
77 },
78 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
79 },
80 'web_embedded': {
81 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
82 'INNERTUBE_CONTEXT': {
83 'client': {
84 'clientName': 'WEB_EMBEDDED_PLAYER',
85 'clientVersion': '1.20210620.0.1',
86 },
87 },
88 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
89 },
90 'web_music': {
91 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
92 'INNERTUBE_HOST': 'music.youtube.com',
93 'INNERTUBE_CONTEXT': {
94 'client': {
95 'clientName': 'WEB_REMIX',
96 'clientVersion': '1.20210621.00.00',
97 }
98 },
99 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
100 },
101 'web_creator': {
102 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
103 'INNERTUBE_CONTEXT': {
104 'client': {
105 'clientName': 'WEB_CREATOR',
106 'clientVersion': '1.20210621.00.00',
107 }
108 },
109 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
110 },
111 'android': {
112 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
113 'INNERTUBE_CONTEXT': {
114 'client': {
115 'clientName': 'ANDROID',
116 'clientVersion': '16.20',
117 }
118 },
119 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
120 'REQUIRE_JS_PLAYER': False
121 },
122 'android_embedded': {
123 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
124 'INNERTUBE_CONTEXT': {
125 'client': {
126 'clientName': 'ANDROID_EMBEDDED_PLAYER',
127 'clientVersion': '16.20',
128 },
129 },
130 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
131 'REQUIRE_JS_PLAYER': False
132 },
133 'android_music': {
134 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
135 'INNERTUBE_HOST': 'music.youtube.com',
136 'INNERTUBE_CONTEXT': {
137 'client': {
138 'clientName': 'ANDROID_MUSIC',
139 'clientVersion': '4.32',
140 }
141 },
142 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
143 'REQUIRE_JS_PLAYER': False
144 },
145 'android_creator': {
146 'INNERTUBE_CONTEXT': {
147 'client': {
148 'clientName': 'ANDROID_CREATOR',
149 'clientVersion': '21.24.100',
150 },
151 },
152 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
153 'REQUIRE_JS_PLAYER': False
154 },
155 # ios has HLS live streams
156 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680
157 'ios': {
158 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
159 'INNERTUBE_CONTEXT': {
160 'client': {
161 'clientName': 'IOS',
162 'clientVersion': '16.20',
163 }
164 },
165 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
166 'REQUIRE_JS_PLAYER': False
167 },
168 'ios_embedded': {
169 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
170 'INNERTUBE_CONTEXT': {
171 'client': {
172 'clientName': 'IOS_MESSAGES_EXTENSION',
173 'clientVersion': '16.20',
174 },
175 },
176 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
177 'REQUIRE_JS_PLAYER': False
178 },
179 'ios_music': {
180 'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
181 'INNERTUBE_HOST': 'music.youtube.com',
182 'INNERTUBE_CONTEXT': {
183 'client': {
184 'clientName': 'IOS_MUSIC',
185 'clientVersion': '4.32',
186 },
187 },
188 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
189 'REQUIRE_JS_PLAYER': False
190 },
191 'ios_creator': {
192 'INNERTUBE_CONTEXT': {
193 'client': {
194 'clientName': 'IOS_CREATOR',
195 'clientVersion': '21.24.100',
196 },
197 },
198 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
199 'REQUIRE_JS_PLAYER': False
200 },
201 # mweb has 'ultralow' formats
202 # See: https://github.com/yt-dlp/yt-dlp/pull/557
203 'mweb': {
204 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
205 'INNERTUBE_CONTEXT': {
206 'client': {
207 'clientName': 'MWEB',
208 'clientVersion': '2.20210721.07.00',
209 }
210 },
211 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
212 },
213 }
214
215
216 def build_innertube_clients():
217 third_party = {
218 'embedUrl': 'https://google.com', # Can be any valid URL
219 }
220 base_clients = ('android', 'web', 'ios', 'mweb')
221 priority = qualities(base_clients[::-1])
222
223 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
224 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
225 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
226 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
227 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
228 ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
229
230 if client in base_clients:
231 INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
232 agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
233 agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
234 agegate_ytcfg['priority'] -= 1
235 elif client.endswith('_embedded'):
236 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
237 ytcfg['priority'] -= 2
238 else:
239 ytcfg['priority'] -= 3
240
241
242 build_innertube_clients()
243
244
245 class YoutubeBaseInfoExtractor(InfoExtractor):
246 """Provide base functions for Youtube extractors"""
247
248 _RESERVED_NAMES = (
249 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
250 r'shorts|movies|results|shared|hashtag|trending|feed|feeds|'
251 r'browse|oembed|get_video_info|iframe_api|s/player|'
252 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
253
254 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
255
256 _NETRC_MACHINE = 'youtube'
257
258 # If True it will raise an error if no login info is provided
259 _LOGIN_REQUIRED = False
260
261 r''' # Unused since login is broken
262 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
263 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
264
265 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
266 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
267 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
268 '''
269
270 def _login(self):
271 """
272 Attempt to log in to YouTube.
273 True is returned if successful or skipped.
274 False is returned if login failed.
275
276 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
277 """
278
279 def warn(message):
280 self.report_warning(message)
281
282 # username+password login is broken
283 if (self._LOGIN_REQUIRED
284 and self.get_param('cookiefile') is None
285 and self.get_param('cookiesfrombrowser') is None):
286 self.raise_login_required(
287 'Login details are needed to download this content', method='cookies')
288 username, password = self._get_login_info()
289 if username:
290 warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
291 return
292
293 # Everything below this is broken!
294 r'''
295 # No authentication to be performed
296 if username is None:
297 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
298 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
299 # if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
300 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
301 return True
302
303 login_page = self._download_webpage(
304 self._LOGIN_URL, None,
305 note='Downloading login page',
306 errnote='unable to fetch login page', fatal=False)
307 if login_page is False:
308 return
309
310 login_form = self._hidden_inputs(login_page)
311
312 def req(url, f_req, note, errnote):
313 data = login_form.copy()
314 data.update({
315 'pstMsg': 1,
316 'checkConnection': 'youtube',
317 'checkedDomains': 'youtube',
318 'hl': 'en',
319 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
320 'f.req': json.dumps(f_req),
321 'flowName': 'GlifWebSignIn',
322 'flowEntry': 'ServiceLogin',
323 # TODO: reverse actual botguard identifier generation algo
324 'bgRequest': '["identifier",""]',
325 })
326 return self._download_json(
327 url, None, note=note, errnote=errnote,
328 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
329 fatal=False,
330 data=urlencode_postdata(data), headers={
331 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
332 'Google-Accounts-XSRF': 1,
333 })
334
335 lookup_req = [
336 username,
337 None, [], None, 'US', None, None, 2, False, True,
338 [
339 None, None,
340 [2, 1, None, 1,
341 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
342 None, [], 4],
343 1, [None, None, []], None, None, None, True
344 ],
345 username,
346 ]
347
348 lookup_results = req(
349 self._LOOKUP_URL, lookup_req,
350 'Looking up account info', 'Unable to look up account info')
351
352 if lookup_results is False:
353 return False
354
355 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
356 if not user_hash:
357 warn('Unable to extract user hash')
358 return False
359
360 challenge_req = [
361 user_hash,
362 None, 1, None, [1, None, None, None, [password, None, True]],
363 [
364 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
365 1, [None, None, []], None, None, None, True
366 ]]
367
368 challenge_results = req(
369 self._CHALLENGE_URL, challenge_req,
370 'Logging in', 'Unable to log in')
371
372 if challenge_results is False:
373 return
374
375 login_res = try_get(challenge_results, lambda x: x[0][5], list)
376 if login_res:
377 login_msg = try_get(login_res, lambda x: x[5], compat_str)
378 warn(
379 'Unable to login: %s' % 'Invalid password'
380 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
381 return False
382
383 res = try_get(challenge_results, lambda x: x[0][-1], list)
384 if not res:
385 warn('Unable to extract result entry')
386 return False
387
388 login_challenge = try_get(res, lambda x: x[0][0], list)
389 if login_challenge:
390 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
391 if challenge_str == 'TWO_STEP_VERIFICATION':
392 # SEND_SUCCESS - TFA code has been successfully sent to phone
393 # QUOTA_EXCEEDED - reached the limit of TFA codes
394 status = try_get(login_challenge, lambda x: x[5], compat_str)
395 if status == 'QUOTA_EXCEEDED':
396 warn('Exceeded the limit of TFA codes, try later')
397 return False
398
399 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
400 if not tl:
401 warn('Unable to extract TL')
402 return False
403
404 tfa_code = self._get_tfa_info('2-step verification code')
405
406 if not tfa_code:
407 warn(
408 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
409 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
410 return False
411
412 tfa_code = remove_start(tfa_code, 'G-')
413
414 tfa_req = [
415 user_hash, None, 2, None,
416 [
417 9, None, None, None, None, None, None, None,
418 [None, tfa_code, True, 2]
419 ]]
420
421 tfa_results = req(
422 self._TFA_URL.format(tl), tfa_req,
423 'Submitting TFA code', 'Unable to submit TFA code')
424
425 if tfa_results is False:
426 return False
427
428 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
429 if tfa_res:
430 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
431 warn(
432 'Unable to finish TFA: %s' % 'Invalid TFA code'
433 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
434 return False
435
436 check_cookie_url = try_get(
437 tfa_results, lambda x: x[0][-1][2], compat_str)
438 else:
439 CHALLENGES = {
440 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
441 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
442 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
443 }
444 challenge = CHALLENGES.get(
445 challenge_str,
446 '%s returned error %s.' % (self.IE_NAME, challenge_str))
447 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
448 return False
449 else:
450 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
451
452 if not check_cookie_url:
453 warn('Unable to extract CheckCookie URL')
454 return False
455
456 check_cookie_results = self._download_webpage(
457 check_cookie_url, None, 'Checking cookie', fatal=False)
458
459 if check_cookie_results is False:
460 return False
461
462 if 'https://myaccount.google.com/' not in check_cookie_results:
463 warn('Unable to log in')
464 return False
465
466 return True
467 '''
468
469 def _initialize_consent(self):
470 cookies = self._get_cookies('https://www.youtube.com/')
471 if cookies.get('__Secure-3PSID'):
472 return
473 consent_id = None
474 consent = cookies.get('CONSENT')
475 if consent:
476 if 'YES' in consent.value:
477 return
478 consent_id = self._search_regex(
479 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
480 if not consent_id:
481 consent_id = random.randint(100, 999)
482 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
483
484 def _real_initialize(self):
485 self._initialize_consent()
486 if self._downloader is None:
487 return
488 if not self._login():
489 return
490
491 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
492 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
493 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
494
495 def _get_default_ytcfg(self, client='web'):
496 return copy.deepcopy(INNERTUBE_CLIENTS[client])
497
498 def _get_innertube_host(self, client='web'):
499 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
500
501 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
502 # try_get but with fallback to default ytcfg client values when present
503 _func = lambda y: try_get(y, getter, expected_type)
504 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
505
506 def _extract_client_name(self, ytcfg, default_client='web'):
507 return self._ytcfg_get_safe(
508 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
509 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
510
511 def _extract_client_version(self, ytcfg, default_client='web'):
512 return self._ytcfg_get_safe(
513 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
514 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
515
516 def _extract_api_key(self, ytcfg=None, default_client='web'):
517 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
518
519 def _extract_context(self, ytcfg=None, default_client='web'):
520 _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
521 context = _get_context(ytcfg)
522 if context:
523 return context
524
525 context = _get_context(self._get_default_ytcfg(default_client))
526 if not ytcfg:
527 return context
528
529 # Recreate the client context (required)
530 context['client'].update({
531 'clientVersion': self._extract_client_version(ytcfg, default_client),
532 'clientName': self._extract_client_name(ytcfg, default_client),
533 })
534 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
535 if visitor_data:
536 context['client']['visitorData'] = visitor_data
537 return context
538
539 _SAPISID = None
540
541 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
542 time_now = round(time.time())
543 if self._SAPISID is None:
544 yt_cookies = self._get_cookies('https://www.youtube.com')
545 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
546 # See: https://github.com/yt-dlp/yt-dlp/issues/393
547 sapisid_cookie = dict_get(
548 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
549 if sapisid_cookie and sapisid_cookie.value:
550 self._SAPISID = sapisid_cookie.value
551 self.write_debug('Extracted SAPISID cookie')
552 # SAPISID cookie is required if not already present
553 if not yt_cookies.get('SAPISID'):
554 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
555 self._set_cookie(
556 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
557 else:
558 self._SAPISID = False
559 if not self._SAPISID:
560 return None
561 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
562 sapisidhash = hashlib.sha1(
563 f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
564 return f'SAPISIDHASH {time_now}_{sapisidhash}'
565
566 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
567 note='Downloading API JSON', errnote='Unable to download API page',
568 context=None, api_key=None, api_hostname=None, default_client='web'):
569
570 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
571 data.update(query)
572 real_headers = self.generate_api_headers(default_client=default_client)
573 real_headers.update({'content-type': 'application/json'})
574 if headers:
575 real_headers.update(headers)
576 return self._download_json(
577 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
578 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
579 data=json.dumps(data).encode('utf8'), headers=real_headers,
580 query={'key': api_key or self._extract_api_key()})
581
582 def extract_yt_initial_data(self, video_id, webpage):
583 return self._parse_json(
584 self._search_regex(
585 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
586 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
587 video_id)
588
589 @staticmethod
590 def _extract_session_index(*data):
591 """
592 Index of current account in account list.
593 See: https://github.com/yt-dlp/yt-dlp/pull/519
594 """
595 for ytcfg in data:
596 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
597 if session_index is not None:
598 return session_index
599
600 # Deprecated?
601 def _extract_identity_token(self, ytcfg=None, webpage=None):
602 if ytcfg:
603 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
604 if token:
605 return token
606 if webpage:
607 return self._search_regex(
608 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
609 'identity token', default=None, fatal=False)
610
611 @staticmethod
612 def _extract_account_syncid(*args):
613 """
614 Extract syncId required to download private playlists of secondary channels
615 @params response and/or ytcfg
616 """
617 for data in args:
618 # ytcfg includes channel_syncid if on secondary channel
619 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
620 if delegated_sid:
621 return delegated_sid
622 sync_ids = (try_get(
623 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
624 lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')
625 if len(sync_ids) >= 2 and sync_ids[1]:
626 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
627 # and just "user_syncid||" for primary channel. We only want the channel_syncid
628 return sync_ids[0]
629
630 @property
631 def is_authenticated(self):
632 return bool(self._generate_sapisidhash_header())
633
634 def extract_ytcfg(self, video_id, webpage):
635 if not webpage:
636 return {}
637 return self._parse_json(
638 self._search_regex(
639 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
640 default='{}'), video_id, fatal=False) or {}
641
642 def generate_api_headers(
643 self, *, ytcfg=None, account_syncid=None, session_index=None,
644 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
645
646 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
647 headers = {
648 'X-YouTube-Client-Name': compat_str(
649 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
650 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
651 'Origin': origin,
652 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
653 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
654 'X-Goog-Visitor-Id': visitor_data or try_get(
655 self._extract_context(ytcfg, default_client), lambda x: x['client']['visitorData'], compat_str)
656 }
657 if session_index is None:
658 session_index = self._extract_session_index(ytcfg)
659 if account_syncid or session_index is not None:
660 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
661
662 auth = self._generate_sapisidhash_header(origin)
663 if auth is not None:
664 headers['Authorization'] = auth
665 headers['X-Origin'] = origin
666 return {h: v for h, v in headers.items() if v is not None}
667
668 @staticmethod
669 def _build_api_continuation_query(continuation, ctp=None):
670 query = {
671 'continuation': continuation
672 }
673 # TODO: Inconsistency with clickTrackingParams.
674 # Currently we have a fixed ctp contained within context (from ytcfg)
675 # and a ctp in root query for continuation.
676 if ctp:
677 query['clickTracking'] = {'clickTrackingParams': ctp}
678 return query
679
680 @classmethod
681 def _extract_next_continuation_data(cls, renderer):
682 next_continuation = try_get(
683 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
684 lambda x: x['continuation']['reloadContinuationData']), dict)
685 if not next_continuation:
686 return
687 continuation = next_continuation.get('continuation')
688 if not continuation:
689 return
690 ctp = next_continuation.get('clickTrackingParams')
691 return cls._build_api_continuation_query(continuation, ctp)
692
693 @classmethod
694 def _extract_continuation_ep_data(cls, continuation_ep: dict):
695 if isinstance(continuation_ep, dict):
696 continuation = try_get(
697 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
698 if not continuation:
699 return
700 ctp = continuation_ep.get('clickTrackingParams')
701 return cls._build_api_continuation_query(continuation, ctp)
702
703 @classmethod
704 def _extract_continuation(cls, renderer):
705 next_continuation = cls._extract_next_continuation_data(renderer)
706 if next_continuation:
707 return next_continuation
708
709 contents = []
710 for key in ('contents', 'items'):
711 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
712
713 for content in contents:
714 if not isinstance(content, dict):
715 continue
716 continuation_ep = try_get(
717 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
718 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
719 dict)
720 continuation = cls._extract_continuation_ep_data(continuation_ep)
721 if continuation:
722 return continuation
723
724 @classmethod
725 def _extract_alerts(cls, data):
726 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
727 if not isinstance(alert_dict, dict):
728 continue
729 for alert in alert_dict.values():
730 alert_type = alert.get('type')
731 if not alert_type:
732 continue
733 message = cls._get_text(alert, 'text')
734 if message:
735 yield alert_type, message
736
737 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
738 errors = []
739 warnings = []
740 for alert_type, alert_message in alerts:
741 if alert_type.lower() == 'error' and fatal:
742 errors.append([alert_type, alert_message])
743 else:
744 warnings.append([alert_type, alert_message])
745
746 for alert_type, alert_message in (warnings + errors[:-1]):
747 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message), only_once=only_once)
748 if errors:
749 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
750
751 def _extract_and_report_alerts(self, data, *args, **kwargs):
752 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
753
754 def _extract_badges(self, renderer: dict):
755 badges = set()
756 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
757 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
758 if label:
759 badges.add(label.lower())
760 return badges
761
762 @staticmethod
763 def _get_text(data, *path_list, max_runs=None):
764 for path in path_list or [None]:
765 if path is None:
766 obj = [data]
767 else:
768 obj = traverse_obj(data, path, default=[])
769 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
770 obj = [obj]
771 for item in obj:
772 text = try_get(item, lambda x: x['simpleText'], compat_str)
773 if text:
774 return text
775 runs = try_get(item, lambda x: x['runs'], list) or []
776 if not runs and isinstance(item, list):
777 runs = item
778
779 runs = runs[:min(len(runs), max_runs or len(runs))]
780 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
781 if text:
782 return text
783
784 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
785 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
786 default_client='web'):
787 response = None
788 last_error = None
789 count = -1
790 retries = self.get_param('extractor_retries', 3)
791 if check_get_keys is None:
792 check_get_keys = []
793 while count < retries:
794 count += 1
795 if last_error:
796 self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))
797 try:
798 response = self._call_api(
799 ep=ep, fatal=True, headers=headers,
800 video_id=item_id, query=query,
801 context=self._extract_context(ytcfg, default_client),
802 api_key=self._extract_api_key(ytcfg, default_client),
803 api_hostname=api_hostname, default_client=default_client,
804 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
805 except ExtractorError as e:
806 if isinstance(e.cause, network_exceptions):
807 if isinstance(e.cause, compat_HTTPError) and not is_html(e.cause.read(512)):
808 e.cause.seek(0)
809 yt_error = try_get(
810 self._parse_json(e.cause.read().decode(), item_id, fatal=False),
811 lambda x: x['error']['message'], compat_str)
812 if yt_error:
813 self._report_alerts([('ERROR', yt_error)], fatal=False)
814 # Downloading page may result in intermittent 5xx HTTP error
815 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
816 # We also want to catch all other network exceptions since errors in later pages can be troublesome
817 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
818 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
819 last_error = error_to_compat_str(e.cause or e.msg)
820 if count < retries:
821 continue
822 if fatal:
823 raise
824 else:
825 self.report_warning(error_to_compat_str(e))
826 return
827
828 else:
829 # Youtube may send alerts if there was an issue with the continuation page
830 try:
831 self._extract_and_report_alerts(response, expected=False, only_once=True)
832 except ExtractorError as e:
833 # YouTube servers may return errors we want to retry on in a 200 OK response
834 # See: https://github.com/yt-dlp/yt-dlp/issues/839
835 if 'unknown error' in e.msg.lower():
836 last_error = e.msg
837 continue
838 if fatal:
839 raise
840 self.report_warning(error_to_compat_str(e))
841 return
842 if not check_get_keys or dict_get(response, check_get_keys):
843 break
844 # Youtube sometimes sends incomplete data
845 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
846 last_error = 'Incomplete data received'
847 if count >= retries:
848 if fatal:
849 raise ExtractorError(last_error)
850 else:
851 self.report_warning(last_error)
852 return
853 return response
854
855 @staticmethod
856 def is_music_url(url):
857 return re.match(r'https?://music\.youtube\.com/', url) is not None
858
859 def _extract_video(self, renderer):
860 video_id = renderer.get('videoId')
861 title = self._get_text(renderer, 'title')
862 description = self._get_text(renderer, 'descriptionSnippet')
863 duration = parse_duration(self._get_text(
864 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
865 view_count_text = self._get_text(renderer, 'viewCountText') or ''
866 view_count = str_to_int(self._search_regex(
867 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
868 'view count', default=None))
869
870 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
871
872 return {
873 '_type': 'url',
874 'ie_key': YoutubeIE.ie_key(),
875 'id': video_id,
876 'url': f'https://www.youtube.com/watch?v={video_id}',
877 'title': title,
878 'description': description,
879 'duration': duration,
880 'view_count': view_count,
881 'uploader': uploader,
882 }
883
884
885 class YoutubeIE(YoutubeBaseInfoExtractor):
886 IE_DESC = 'YouTube.com'
887 _INVIDIOUS_SITES = (
888 # invidious-redirect websites
889 r'(?:www\.)?redirect\.invidious\.io',
890 r'(?:(?:www|dev)\.)?invidio\.us',
891 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
892 r'(?:www\.)?invidious\.pussthecat\.org',
893 r'(?:www\.)?invidious\.zee\.li',
894 r'(?:www\.)?invidious\.ethibox\.fr',
895 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
896 # youtube-dl invidious instances list
897 r'(?:(?:www|no)\.)?invidiou\.sh',
898 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
899 r'(?:www\.)?invidious\.kabi\.tk',
900 r'(?:www\.)?invidious\.mastodon\.host',
901 r'(?:www\.)?invidious\.zapashcanon\.fr',
902 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
903 r'(?:www\.)?invidious\.tinfoil-hat\.net',
904 r'(?:www\.)?invidious\.himiko\.cloud',
905 r'(?:www\.)?invidious\.reallyancient\.tech',
906 r'(?:www\.)?invidious\.tube',
907 r'(?:www\.)?invidiou\.site',
908 r'(?:www\.)?invidious\.site',
909 r'(?:www\.)?invidious\.xyz',
910 r'(?:www\.)?invidious\.nixnet\.xyz',
911 r'(?:www\.)?invidious\.048596\.xyz',
912 r'(?:www\.)?invidious\.drycat\.fr',
913 r'(?:www\.)?inv\.skyn3t\.in',
914 r'(?:www\.)?tube\.poal\.co',
915 r'(?:www\.)?tube\.connect\.cafe',
916 r'(?:www\.)?vid\.wxzm\.sx',
917 r'(?:www\.)?vid\.mint\.lgbt',
918 r'(?:www\.)?vid\.puffyan\.us',
919 r'(?:www\.)?yewtu\.be',
920 r'(?:www\.)?yt\.elukerio\.org',
921 r'(?:www\.)?yt\.lelux\.fi',
922 r'(?:www\.)?invidious\.ggc-project\.de',
923 r'(?:www\.)?yt\.maisputain\.ovh',
924 r'(?:www\.)?ytprivate\.com',
925 r'(?:www\.)?invidious\.13ad\.de',
926 r'(?:www\.)?invidious\.toot\.koeln',
927 r'(?:www\.)?invidious\.fdn\.fr',
928 r'(?:www\.)?watch\.nettohikari\.com',
929 r'(?:www\.)?invidious\.namazso\.eu',
930 r'(?:www\.)?invidious\.silkky\.cloud',
931 r'(?:www\.)?invidious\.exonip\.de',
932 r'(?:www\.)?invidious\.riverside\.rocks',
933 r'(?:www\.)?invidious\.blamefran\.net',
934 r'(?:www\.)?invidious\.moomoo\.de',
935 r'(?:www\.)?ytb\.trom\.tf',
936 r'(?:www\.)?yt\.cyberhost\.uk',
937 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
938 r'(?:www\.)?qklhadlycap4cnod\.onion',
939 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
940 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
941 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
942 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
943 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
944 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
945 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
946 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
947 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
948 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
949 )
950 _VALID_URL = r"""(?x)^
951 (
952 (?:https?://|//) # http(s):// or protocol-independent URL
953 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
954 (?:www\.)?deturl\.com/www\.youtube\.com|
955 (?:www\.)?pwnyoutube\.com|
956 (?:www\.)?hooktube\.com|
957 (?:www\.)?yourepeat\.com|
958 tube\.majestyc\.net|
959 %(invidious)s|
960 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
961 (?:.*?\#/)? # handle anchor (#/) redirect urls
962 (?: # the various things that can precede the ID:
963 (?:(?:v|embed|e|shorts)/(?!videoseries)) # v/ or embed/ or e/ or shorts/
964 |(?: # or the v= param in all its forms
965 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
966 (?:\?|\#!?) # the params delimiter ? or # or #!
967 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
968 v=
969 )
970 ))
971 |(?:
972 youtu\.be| # just youtu.be/xxxx
973 vid\.plus| # or vid.plus/xxxx
974 zwearz\.com/watch| # or zwearz.com/watch/xxxx
975 %(invidious)s
976 )/
977 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
978 )
979 )? # all until now is optional -> you can pass the naked ID
980 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
981 (?(1).+)? # if we found the ID, everything can follow
982 (?:\#|$)""" % {
983 'invidious': '|'.join(_INVIDIOUS_SITES),
984 }
985 _PLAYER_INFO_RE = (
986 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
987 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
988 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
989 )
990 _formats = {
991 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
992 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
993 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
994 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
995 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
996 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
997 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
998 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
999 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
1000 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
1001 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1002 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1003 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1004 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1005 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
1006 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
1007 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1008 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1009
1010
1011 # 3D videos
1012 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1013 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1014 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1015 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1016 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
1017 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1018 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1019
1020 # Apple HTTP Live Streaming
1021 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1022 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1023 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1024 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1025 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1026 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1027 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1028 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
1029
1030 # DASH mp4 video
1031 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
1032 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
1033 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1034 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
1035 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
1036 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
1037 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
1038 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1039 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
1040 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1041 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1042 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
1043
1044 # Dash mp4 audio
1045 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1046 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1047 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1048 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1049 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1050 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1051 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
1052
1053 # Dash webm
1054 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1055 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1056 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1057 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1058 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1059 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1060 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1061 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1062 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1063 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1064 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1065 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1066 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1067 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1068 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1069 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
1070 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1071 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1072 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1073 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1074 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1075 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1076
1077 # Dash webm audio
1078 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1079 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
1080
1081 # Dash webm audio with opus inside
1082 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1083 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1084 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
1085
1086 # RTMP (unnamed)
1087 '_rtmp': {'protocol': 'rtmp'},
1088
1089 # av01 video only formats sometimes served with "unknown" codecs
1090 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1091 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1092 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
1093 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1094 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1095 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1096 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1097 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1098 }
1099 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
1100
1101 _GEO_BYPASS = False
1102
1103 IE_NAME = 'youtube'
1104 _TESTS = [
1105 {
1106 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
1107 'info_dict': {
1108 'id': 'BaW_jenozKc',
1109 'ext': 'mp4',
1110 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1111 'uploader': 'Philipp Hagemeister',
1112 'uploader_id': 'phihag',
1113 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1114 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1115 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1116 'upload_date': '20121002',
1117 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1118 'categories': ['Science & Technology'],
1119 'tags': ['youtube-dl'],
1120 'duration': 10,
1121 'view_count': int,
1122 'like_count': int,
1123 'dislike_count': int,
1124 'start_time': 1,
1125 'end_time': 9,
1126 }
1127 },
1128 {
1129 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1130 'note': 'Embed-only video (#1746)',
1131 'info_dict': {
1132 'id': 'yZIXLfi8CZQ',
1133 'ext': 'mp4',
1134 'upload_date': '20120608',
1135 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1136 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1137 'uploader': 'SET India',
1138 'uploader_id': 'setindia',
1139 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
1140 'age_limit': 18,
1141 },
1142 'skip': 'Private video',
1143 },
1144 {
1145 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1146 'note': 'Use the first video ID in the URL',
1147 'info_dict': {
1148 'id': 'BaW_jenozKc',
1149 'ext': 'mp4',
1150 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1151 'uploader': 'Philipp Hagemeister',
1152 'uploader_id': 'phihag',
1153 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1154 'upload_date': '20121002',
1155 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1156 'categories': ['Science & Technology'],
1157 'tags': ['youtube-dl'],
1158 'duration': 10,
1159 'view_count': int,
1160 'like_count': int,
1161 'dislike_count': int,
1162 },
1163 'params': {
1164 'skip_download': True,
1165 },
1166 },
1167 {
1168 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1169 'note': '256k DASH audio (format 141) via DASH manifest',
1170 'info_dict': {
1171 'id': 'a9LDPn-MO4I',
1172 'ext': 'm4a',
1173 'upload_date': '20121002',
1174 'uploader_id': '8KVIDEO',
1175 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1176 'description': '',
1177 'uploader': '8KVIDEO',
1178 'title': 'UHDTV TEST 8K VIDEO.mp4'
1179 },
1180 'params': {
1181 'youtube_include_dash_manifest': True,
1182 'format': '141',
1183 },
1184 'skip': 'format 141 not served anymore',
1185 },
1186 # DASH manifest with encrypted signature
1187 {
1188 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1189 'info_dict': {
1190 'id': 'IB3lcPjvWLA',
1191 'ext': 'm4a',
1192 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1193 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1194 'duration': 244,
1195 'uploader': 'AfrojackVEVO',
1196 'uploader_id': 'AfrojackVEVO',
1197 'upload_date': '20131011',
1198 'abr': 129.495,
1199 },
1200 'params': {
1201 'youtube_include_dash_manifest': True,
1202 'format': '141/bestaudio[ext=m4a]',
1203 },
1204 },
1205 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
1206 {
1207 'note': 'Embed allowed age-gate video',
1208 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1209 'info_dict': {
1210 'id': 'HtVdAasjOgU',
1211 'ext': 'mp4',
1212 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1213 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1214 'duration': 142,
1215 'uploader': 'The Witcher',
1216 'uploader_id': 'WitcherGame',
1217 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1218 'upload_date': '20140605',
1219 'age_limit': 18,
1220 },
1221 },
1222 {
1223 'note': 'Age-gate video with embed allowed in public site',
1224 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1225 'info_dict': {
1226 'id': 'HsUATh_Nc2U',
1227 'ext': 'mp4',
1228 'title': 'Godzilla 2 (Official Video)',
1229 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1230 'upload_date': '20200408',
1231 'uploader_id': 'FlyingKitty900',
1232 'uploader': 'FlyingKitty',
1233 'age_limit': 18,
1234 },
1235 },
1236 {
1237 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1238 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1239 'info_dict': {
1240 'id': 'Tq92D6wQ1mg',
1241 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
1242 'ext': 'mp4',
1243 'upload_date': '20191227',
1244 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1245 'uploader': 'Projekt Melody',
1246 'description': 'md5:17eccca93a786d51bc67646756894066',
1247 'age_limit': 18,
1248 },
1249 },
1250 {
1251 'note': 'Non-Agegated non-embeddable video',
1252 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1253 'info_dict': {
1254 'id': 'MeJVWBSsPAY',
1255 'ext': 'mp4',
1256 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1257 'uploader': 'Herr Lurik',
1258 'uploader_id': 'st3in234',
1259 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1260 'upload_date': '20130730',
1261 },
1262 },
1263 {
1264 'note': 'Non-bypassable age-gated video',
1265 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1266 'only_matching': True,
1267 },
1268 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1269 # YouTube Red ad is not captured for creator
1270 {
1271 'url': '__2ABJjxzNo',
1272 'info_dict': {
1273 'id': '__2ABJjxzNo',
1274 'ext': 'mp4',
1275 'duration': 266,
1276 'upload_date': '20100430',
1277 'uploader_id': 'deadmau5',
1278 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1279 'creator': 'deadmau5',
1280 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1281 'uploader': 'deadmau5',
1282 'title': 'Deadmau5 - Some Chords (HD)',
1283 'alt_title': 'Some Chords',
1284 },
1285 'expected_warnings': [
1286 'DASH manifest missing',
1287 ]
1288 },
1289 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1290 {
1291 'url': 'lqQg6PlCWgI',
1292 'info_dict': {
1293 'id': 'lqQg6PlCWgI',
1294 'ext': 'mp4',
1295 'duration': 6085,
1296 'upload_date': '20150827',
1297 'uploader_id': 'olympic',
1298 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1299 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
1300 'uploader': 'Olympics',
1301 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1302 },
1303 'params': {
1304 'skip_download': 'requires avconv',
1305 }
1306 },
1307 # Non-square pixels
1308 {
1309 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1310 'info_dict': {
1311 'id': '_b-2C3KPAM0',
1312 'ext': 'mp4',
1313 'stretched_ratio': 16 / 9.,
1314 'duration': 85,
1315 'upload_date': '20110310',
1316 'uploader_id': 'AllenMeow',
1317 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1318 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1319 'uploader': '孫ᄋᄅ',
1320 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1321 },
1322 },
1323 # url_encoded_fmt_stream_map is empty string
1324 {
1325 'url': 'qEJwOuvDf7I',
1326 'info_dict': {
1327 'id': 'qEJwOuvDf7I',
1328 'ext': 'webm',
1329 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1330 'description': '',
1331 'upload_date': '20150404',
1332 'uploader_id': 'spbelect',
1333 'uploader': 'Наблюдатели Петербурга',
1334 },
1335 'params': {
1336 'skip_download': 'requires avconv',
1337 },
1338 'skip': 'This live event has ended.',
1339 },
1340 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1341 {
1342 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1343 'info_dict': {
1344 'id': 'FIl7x6_3R5Y',
1345 'ext': 'webm',
1346 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1347 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1348 'duration': 220,
1349 'upload_date': '20150625',
1350 'uploader_id': 'dorappi2000',
1351 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1352 'uploader': 'dorappi2000',
1353 'formats': 'mincount:31',
1354 },
1355 'skip': 'not actual anymore',
1356 },
1357 # DASH manifest with segment_list
1358 {
1359 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1360 'md5': '8ce563a1d667b599d21064e982ab9e31',
1361 'info_dict': {
1362 'id': 'CsmdDsKjzN8',
1363 'ext': 'mp4',
1364 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1365 'uploader': 'Airtek',
1366 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1367 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1368 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1369 },
1370 'params': {
1371 'youtube_include_dash_manifest': True,
1372 'format': '135', # bestvideo
1373 },
1374 'skip': 'This live event has ended.',
1375 },
1376 {
1377 # Multifeed videos (multiple cameras), URL is for Main Camera
1378 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1379 'info_dict': {
1380 'id': 'jvGDaLqkpTg',
1381 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1382 'description': 'md5:e03b909557865076822aa169218d6a5d',
1383 },
1384 'playlist': [{
1385 'info_dict': {
1386 'id': 'jvGDaLqkpTg',
1387 'ext': 'mp4',
1388 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1389 'description': 'md5:e03b909557865076822aa169218d6a5d',
1390 'duration': 10643,
1391 'upload_date': '20161111',
1392 'uploader': 'Team PGP',
1393 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1394 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1395 },
1396 }, {
1397 'info_dict': {
1398 'id': '3AKt1R1aDnw',
1399 'ext': 'mp4',
1400 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1401 'description': 'md5:e03b909557865076822aa169218d6a5d',
1402 'duration': 10991,
1403 'upload_date': '20161111',
1404 'uploader': 'Team PGP',
1405 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1406 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1407 },
1408 }, {
1409 'info_dict': {
1410 'id': 'RtAMM00gpVc',
1411 'ext': 'mp4',
1412 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1413 'description': 'md5:e03b909557865076822aa169218d6a5d',
1414 'duration': 10995,
1415 'upload_date': '20161111',
1416 'uploader': 'Team PGP',
1417 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1418 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1419 },
1420 }, {
1421 'info_dict': {
1422 'id': '6N2fdlP3C5U',
1423 'ext': 'mp4',
1424 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1425 'description': 'md5:e03b909557865076822aa169218d6a5d',
1426 'duration': 10990,
1427 'upload_date': '20161111',
1428 'uploader': 'Team PGP',
1429 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1430 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1431 },
1432 }],
1433 'params': {
1434 'skip_download': True,
1435 },
1436 'skip': 'Not multifeed anymore',
1437 },
1438 {
1439 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1440 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1441 'info_dict': {
1442 'id': 'gVfLd0zydlo',
1443 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1444 },
1445 'playlist_count': 2,
1446 'skip': 'Not multifeed anymore',
1447 },
1448 {
1449 'url': 'https://vid.plus/FlRa-iH7PGw',
1450 'only_matching': True,
1451 },
1452 {
1453 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1454 'only_matching': True,
1455 },
1456 {
1457 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1458 # Also tests cut-off URL expansion in video description (see
1459 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1460 # https://github.com/ytdl-org/youtube-dl/issues/8164)
1461 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1462 'info_dict': {
1463 'id': 'lsguqyKfVQg',
1464 'ext': 'mp4',
1465 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1466 'alt_title': 'Dark Walk',
1467 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1468 'duration': 133,
1469 'upload_date': '20151119',
1470 'uploader_id': 'IronSoulElf',
1471 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1472 'uploader': 'IronSoulElf',
1473 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1474 'track': 'Dark Walk',
1475 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1476 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1477 },
1478 'params': {
1479 'skip_download': True,
1480 },
1481 },
1482 {
1483 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1484 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1485 'only_matching': True,
1486 },
1487 {
1488 # Video with yt:stretch=17:0
1489 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1490 'info_dict': {
1491 'id': 'Q39EVAstoRM',
1492 'ext': 'mp4',
1493 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1494 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1495 'upload_date': '20151107',
1496 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1497 'uploader': 'CH GAMER DROID',
1498 },
1499 'params': {
1500 'skip_download': True,
1501 },
1502 'skip': 'This video does not exist.',
1503 },
1504 {
1505 # Video with incomplete 'yt:stretch=16:'
1506 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1507 'only_matching': True,
1508 },
1509 {
1510 # Video licensed under Creative Commons
1511 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1512 'info_dict': {
1513 'id': 'M4gD1WSo5mA',
1514 'ext': 'mp4',
1515 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1516 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1517 'duration': 721,
1518 'upload_date': '20150127',
1519 'uploader_id': 'BerkmanCenter',
1520 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1521 'uploader': 'The Berkman Klein Center for Internet & Society',
1522 'license': 'Creative Commons Attribution license (reuse allowed)',
1523 },
1524 'params': {
1525 'skip_download': True,
1526 },
1527 },
1528 {
1529 # Channel-like uploader_url
1530 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1531 'info_dict': {
1532 'id': 'eQcmzGIKrzg',
1533 'ext': 'mp4',
1534 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1535 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1536 'duration': 4060,
1537 'upload_date': '20151119',
1538 'uploader': 'Bernie Sanders',
1539 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1540 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1541 'license': 'Creative Commons Attribution license (reuse allowed)',
1542 },
1543 'params': {
1544 'skip_download': True,
1545 },
1546 },
1547 {
1548 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1549 'only_matching': True,
1550 },
1551 {
1552 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1553 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1554 'only_matching': True,
1555 },
1556 {
1557 # Rental video preview
1558 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1559 'info_dict': {
1560 'id': 'uGpuVWrhIzE',
1561 'ext': 'mp4',
1562 'title': 'Piku - Trailer',
1563 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1564 'upload_date': '20150811',
1565 'uploader': 'FlixMatrix',
1566 'uploader_id': 'FlixMatrixKaravan',
1567 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1568 'license': 'Standard YouTube License',
1569 },
1570 'params': {
1571 'skip_download': True,
1572 },
1573 'skip': 'This video is not available.',
1574 },
1575 {
1576 # YouTube Red video with episode data
1577 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1578 'info_dict': {
1579 'id': 'iqKdEhx-dD4',
1580 'ext': 'mp4',
1581 'title': 'Isolation - Mind Field (Ep 1)',
1582 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1583 'duration': 2085,
1584 'upload_date': '20170118',
1585 'uploader': 'Vsauce',
1586 'uploader_id': 'Vsauce',
1587 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1588 'series': 'Mind Field',
1589 'season_number': 1,
1590 'episode_number': 1,
1591 },
1592 'params': {
1593 'skip_download': True,
1594 },
1595 'expected_warnings': [
1596 'Skipping DASH manifest',
1597 ],
1598 },
1599 {
1600 # The following content has been identified by the YouTube community
1601 # as inappropriate or offensive to some audiences.
1602 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1603 'info_dict': {
1604 'id': '6SJNVb0GnPI',
1605 'ext': 'mp4',
1606 'title': 'Race Differences in Intelligence',
1607 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1608 'duration': 965,
1609 'upload_date': '20140124',
1610 'uploader': 'New Century Foundation',
1611 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1612 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1613 },
1614 'params': {
1615 'skip_download': True,
1616 },
1617 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1618 },
1619 {
1620 # itag 212
1621 'url': '1t24XAntNCY',
1622 'only_matching': True,
1623 },
1624 {
1625 # geo restricted to JP
1626 'url': 'sJL6WA-aGkQ',
1627 'only_matching': True,
1628 },
1629 {
1630 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1631 'only_matching': True,
1632 },
1633 {
1634 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1635 'only_matching': True,
1636 },
1637 {
1638 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1639 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1640 'only_matching': True,
1641 },
1642 {
1643 # DRM protected
1644 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1645 'only_matching': True,
1646 },
1647 {
1648 # Video with unsupported adaptive stream type formats
1649 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1650 'info_dict': {
1651 'id': 'Z4Vy8R84T1U',
1652 'ext': 'mp4',
1653 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1654 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1655 'duration': 433,
1656 'upload_date': '20130923',
1657 'uploader': 'Amelia Putri Harwita',
1658 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1659 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1660 'formats': 'maxcount:10',
1661 },
1662 'params': {
1663 'skip_download': True,
1664 'youtube_include_dash_manifest': False,
1665 },
1666 'skip': 'not actual anymore',
1667 },
1668 {
1669 # Youtube Music Auto-generated description
1670 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1671 'info_dict': {
1672 'id': 'MgNrAu2pzNs',
1673 'ext': 'mp4',
1674 'title': 'Voyeur Girl',
1675 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1676 'upload_date': '20190312',
1677 'uploader': 'Stephen - Topic',
1678 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1679 'artist': 'Stephen',
1680 'track': 'Voyeur Girl',
1681 'album': 'it\'s too much love to know my dear',
1682 'release_date': '20190313',
1683 'release_year': 2019,
1684 },
1685 'params': {
1686 'skip_download': True,
1687 },
1688 },
1689 {
1690 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1691 'only_matching': True,
1692 },
1693 {
1694 # invalid -> valid video id redirection
1695 'url': 'DJztXj2GPfl',
1696 'info_dict': {
1697 'id': 'DJztXj2GPfk',
1698 'ext': 'mp4',
1699 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1700 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1701 'upload_date': '20090125',
1702 'uploader': 'Prochorowka',
1703 'uploader_id': 'Prochorowka',
1704 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1705 'artist': 'Panjabi MC',
1706 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1707 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1708 },
1709 'params': {
1710 'skip_download': True,
1711 },
1712 'skip': 'Video unavailable',
1713 },
1714 {
1715 # empty description results in an empty string
1716 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1717 'info_dict': {
1718 'id': 'x41yOUIvK2k',
1719 'ext': 'mp4',
1720 'title': 'IMG 3456',
1721 'description': '',
1722 'upload_date': '20170613',
1723 'uploader_id': 'ElevageOrVert',
1724 'uploader': 'ElevageOrVert',
1725 },
1726 'params': {
1727 'skip_download': True,
1728 },
1729 },
1730 {
1731 # with '};' inside yt initial data (see [1])
1732 # see [2] for an example with '};' inside ytInitialPlayerResponse
1733 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1734 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1735 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1736 'info_dict': {
1737 'id': 'CHqg6qOn4no',
1738 'ext': 'mp4',
1739 'title': 'Part 77 Sort a list of simple types in c#',
1740 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1741 'upload_date': '20130831',
1742 'uploader_id': 'kudvenkat',
1743 'uploader': 'kudvenkat',
1744 },
1745 'params': {
1746 'skip_download': True,
1747 },
1748 },
1749 {
1750 # another example of '};' in ytInitialData
1751 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1752 'only_matching': True,
1753 },
1754 {
1755 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1756 'only_matching': True,
1757 },
1758 {
1759 # https://github.com/ytdl-org/youtube-dl/pull/28094
1760 'url': 'OtqTfy26tG0',
1761 'info_dict': {
1762 'id': 'OtqTfy26tG0',
1763 'ext': 'mp4',
1764 'title': 'Burn Out',
1765 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1766 'upload_date': '20141120',
1767 'uploader': 'The Cinematic Orchestra - Topic',
1768 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1769 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1770 'artist': 'The Cinematic Orchestra',
1771 'track': 'Burn Out',
1772 'album': 'Every Day',
1773 'release_data': None,
1774 'release_year': None,
1775 },
1776 'params': {
1777 'skip_download': True,
1778 },
1779 },
1780 {
1781 # controversial video, only works with bpctr when authenticated with cookies
1782 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1783 'only_matching': True,
1784 },
1785 {
1786 # controversial video, requires bpctr/contentCheckOk
1787 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1788 'info_dict': {
1789 'id': 'SZJvDhaSDnc',
1790 'ext': 'mp4',
1791 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1792 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1793 'uploader': 'CBS This Morning',
1794 'uploader_id': 'CBSThisMorning',
1795 'upload_date': '20140716',
1796 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1797 }
1798 },
1799 {
1800 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1801 'url': 'cBvYw8_A0vQ',
1802 'info_dict': {
1803 'id': 'cBvYw8_A0vQ',
1804 'ext': 'mp4',
1805 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1806 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1807 'upload_date': '20201120',
1808 'uploader': 'Walk around Japan',
1809 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1810 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1811 },
1812 'params': {
1813 'skip_download': True,
1814 },
1815 }, {
1816 # Has multiple audio streams
1817 'url': 'WaOKSUlf4TM',
1818 'only_matching': True
1819 }, {
1820 # Requires Premium: has format 141 when requested using YTM url
1821 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1822 'only_matching': True
1823 }, {
1824 # multiple subtitles with same lang_code
1825 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1826 'only_matching': True,
1827 }, {
1828 # Force use android client fallback
1829 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1830 'info_dict': {
1831 'id': 'YOelRv7fMxY',
1832 'title': 'DIGGING A SECRET TUNNEL Part 1',
1833 'ext': '3gp',
1834 'upload_date': '20210624',
1835 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1836 'uploader': 'colinfurze',
1837 'uploader_id': 'colinfurze',
1838 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1839 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
1840 },
1841 'params': {
1842 'format': '17', # 3gp format available on android
1843 'extractor_args': {'youtube': {'player_client': ['android']}},
1844 },
1845 },
1846 {
1847 # Skip download of additional client configs (remix client config in this case)
1848 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1849 'only_matching': True,
1850 'params': {
1851 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1852 },
1853 }, {
1854 # shorts
1855 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
1856 'only_matching': True,
1857 },
1858 ]
1859
1860 @classmethod
1861 def suitable(cls, url):
1862 from ..utils import parse_qs
1863
1864 qs = parse_qs(url)
1865 if qs.get('list', [None])[0]:
1866 return False
1867 return super(YoutubeIE, cls).suitable(url)
1868
1869 def __init__(self, *args, **kwargs):
1870 super(YoutubeIE, self).__init__(*args, **kwargs)
1871 self._code_cache = {}
1872 self._player_cache = {}
1873
1874 def _extract_player_url(self, *ytcfgs, webpage=None):
1875 player_url = traverse_obj(
1876 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
1877 get_all=False, expected_type=compat_str)
1878 if not player_url:
1879 return
1880 if player_url.startswith('//'):
1881 player_url = 'https:' + player_url
1882 elif not re.match(r'https?://', player_url):
1883 player_url = compat_urlparse.urljoin(
1884 'https://www.youtube.com', player_url)
1885 return player_url
1886
1887 def _download_player_url(self, video_id, fatal=False):
1888 res = self._download_webpage(
1889 'https://www.youtube.com/iframe_api',
1890 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
1891 if res:
1892 player_version = self._search_regex(
1893 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
1894 if player_version:
1895 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
1896
1897 def _signature_cache_id(self, example_sig):
1898 """ Return a string representation of a signature """
1899 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1900
1901 @classmethod
1902 def _extract_player_info(cls, player_url):
1903 for player_re in cls._PLAYER_INFO_RE:
1904 id_m = re.search(player_re, player_url)
1905 if id_m:
1906 break
1907 else:
1908 raise ExtractorError('Cannot identify player %r' % player_url)
1909 return id_m.group('id')
1910
1911 def _load_player(self, video_id, player_url, fatal=True) -> bool:
1912 player_id = self._extract_player_info(player_url)
1913 if player_id not in self._code_cache:
1914 code = self._download_webpage(
1915 player_url, video_id, fatal=fatal,
1916 note='Downloading player ' + player_id,
1917 errnote='Download of %s failed' % player_url)
1918 if code:
1919 self._code_cache[player_id] = code
1920 return player_id in self._code_cache
1921
1922 def _extract_signature_function(self, video_id, player_url, example_sig):
1923 player_id = self._extract_player_info(player_url)
1924
1925 # Read from filesystem cache
1926 func_id = 'js_%s_%s' % (
1927 player_id, self._signature_cache_id(example_sig))
1928 assert os.path.basename(func_id) == func_id
1929
1930 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1931 if cache_spec is not None:
1932 return lambda s: ''.join(s[i] for i in cache_spec)
1933
1934 if self._load_player(video_id, player_url):
1935 code = self._code_cache[player_id]
1936 res = self._parse_sig_js(code)
1937
1938 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1939 cache_res = res(test_string)
1940 cache_spec = [ord(c) for c in cache_res]
1941
1942 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1943 return res
1944
1945 def _print_sig_code(self, func, example_sig):
1946 def gen_sig_code(idxs):
1947 def _genslice(start, end, step):
1948 starts = '' if start == 0 else str(start)
1949 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1950 steps = '' if step == 1 else (':%d' % step)
1951 return 's[%s%s%s]' % (starts, ends, steps)
1952
1953 step = None
1954 # Quelch pyflakes warnings - start will be set when step is set
1955 start = '(Never used)'
1956 for i, prev in zip(idxs[1:], idxs[:-1]):
1957 if step is not None:
1958 if i - prev == step:
1959 continue
1960 yield _genslice(start, prev, step)
1961 step = None
1962 continue
1963 if i - prev in [-1, 1]:
1964 step = i - prev
1965 start = prev
1966 continue
1967 else:
1968 yield 's[%d]' % prev
1969 if step is None:
1970 yield 's[%d]' % i
1971 else:
1972 yield _genslice(start, i, step)
1973
1974 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1975 cache_res = func(test_string)
1976 cache_spec = [ord(c) for c in cache_res]
1977 expr_code = ' + '.join(gen_sig_code(cache_spec))
1978 signature_id_tuple = '(%s)' % (
1979 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1980 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1981 ' return %s\n') % (signature_id_tuple, expr_code)
1982 self.to_screen('Extracted signature function:\n' + code)
1983
1984 def _parse_sig_js(self, jscode):
1985 funcname = self._search_regex(
1986 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1987 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1988 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
1989 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
1990 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
1991 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1992 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1993 # Obsolete patterns
1994 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1995 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1996 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1997 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1998 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1999 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2000 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2001 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
2002 jscode, 'Initial JS player signature function name', group='sig')
2003
2004 jsi = JSInterpreter(jscode)
2005 initial_function = jsi.extract_function(funcname)
2006 return lambda s: initial_function([s])
2007
2008 def _decrypt_signature(self, s, video_id, player_url):
2009 """Turn the encrypted s field into a working signature"""
2010
2011 if player_url is None:
2012 raise ExtractorError('Cannot decrypt signature without player_url')
2013
2014 try:
2015 player_id = (player_url, self._signature_cache_id(s))
2016 if player_id not in self._player_cache:
2017 func = self._extract_signature_function(
2018 video_id, player_url, s
2019 )
2020 self._player_cache[player_id] = func
2021 func = self._player_cache[player_id]
2022 if self.get_param('youtube_print_sig_code'):
2023 self._print_sig_code(func, s)
2024 return func(s)
2025 except Exception as e:
2026 tb = traceback.format_exc()
2027 raise ExtractorError(
2028 'Signature extraction failed: ' + tb, cause=e)
2029
2030 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
2031 """
2032 Extract signatureTimestamp (sts)
2033 Required to tell API what sig/player version is in use.
2034 """
2035 sts = None
2036 if isinstance(ytcfg, dict):
2037 sts = int_or_none(ytcfg.get('STS'))
2038
2039 if not sts:
2040 # Attempt to extract from player
2041 if player_url is None:
2042 error_msg = 'Cannot extract signature timestamp without player_url.'
2043 if fatal:
2044 raise ExtractorError(error_msg)
2045 self.report_warning(error_msg)
2046 return
2047 if self._load_player(video_id, player_url, fatal=fatal):
2048 player_id = self._extract_player_info(player_url)
2049 code = self._code_cache[player_id]
2050 sts = int_or_none(self._search_regex(
2051 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2052 'JS player signature timestamp', group='sts', fatal=fatal))
2053 return sts
2054
2055 def _mark_watched(self, video_id, player_responses):
2056 playback_url = traverse_obj(
2057 player_responses, (..., 'playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
2058 expected_type=url_or_none, get_all=False)
2059 if not playback_url:
2060 self.report_warning('Unable to mark watched')
2061 return
2062 parsed_playback_url = compat_urlparse.urlparse(playback_url)
2063 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
2064
2065 # cpn generation algorithm is reverse engineered from base.js.
2066 # In fact it works even with dummy cpn.
2067 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2068 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
2069
2070 qs.update({
2071 'ver': ['2'],
2072 'cpn': [cpn],
2073 })
2074 playback_url = compat_urlparse.urlunparse(
2075 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
2076
2077 self._download_webpage(
2078 playback_url, video_id, 'Marking watched',
2079 'Unable to mark watched', fatal=False)
2080
2081 @staticmethod
2082 def _extract_urls(webpage):
2083 # Embedded YouTube player
2084 entries = [
2085 unescapeHTML(mobj.group('url'))
2086 for mobj in re.finditer(r'''(?x)
2087 (?:
2088 <iframe[^>]+?src=|
2089 data-video-url=|
2090 <embed[^>]+?src=|
2091 embedSWF\(?:\s*|
2092 <object[^>]+data=|
2093 new\s+SWFObject\(
2094 )
2095 (["\'])
2096 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
2097 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
2098 \1''', webpage)]
2099
2100 # lazyYT YouTube embed
2101 entries.extend(list(map(
2102 unescapeHTML,
2103 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
2104
2105 # Wordpress "YouTube Video Importer" plugin
2106 matches = re.findall(r'''(?x)<div[^>]+
2107 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2108 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
2109 entries.extend(m[-1] for m in matches)
2110
2111 return entries
2112
2113 @staticmethod
2114 def _extract_url(webpage):
2115 urls = YoutubeIE._extract_urls(webpage)
2116 return urls[0] if urls else None
2117
2118 @classmethod
2119 def extract_id(cls, url):
2120 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
2121 if mobj is None:
2122 raise ExtractorError('Invalid URL: %s' % url)
2123 return mobj.group('id')
2124
2125 def _extract_chapters_from_json(self, data, duration):
2126 chapter_list = traverse_obj(
2127 data, (
2128 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2129 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2130 ), expected_type=list)
2131
2132 return self._extract_chapters(
2133 chapter_list,
2134 chapter_time=lambda chapter: float_or_none(
2135 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2136 chapter_title=lambda chapter: traverse_obj(
2137 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2138 duration=duration)
2139
2140 def _extract_chapters_from_engagement_panel(self, data, duration):
2141 content_list = traverse_obj(
2142 data,
2143 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
2144 expected_type=list, default=[])
2145 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2146 chapter_title = lambda chapter: self._get_text(chapter, 'title')
2147
2148 return next((
2149 filter(None, (
2150 self._extract_chapters(
2151 traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2152 chapter_time, chapter_title, duration)
2153 for contents in content_list
2154 ))), [])
2155
2156 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
2157 chapters = []
2158 last_chapter = {'start_time': 0}
2159 for idx, chapter in enumerate(chapter_list or []):
2160 title = chapter_title(chapter)
2161 start_time = chapter_time(chapter)
2162 if start_time is None:
2163 continue
2164 last_chapter['end_time'] = start_time
2165 if start_time < last_chapter['start_time']:
2166 if idx == 1:
2167 chapters.pop()
2168 self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2169 else:
2170 self.report_warning(f'Invalid start time for chapter "{title}"')
2171 continue
2172 last_chapter = {'start_time': start_time, 'title': title}
2173 chapters.append(last_chapter)
2174 last_chapter['end_time'] = duration
2175 return chapters
2176
2177 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2178 return self._parse_json(self._search_regex(
2179 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2180 regex), webpage, name, default='{}'), video_id, fatal=False)
2181
2182 @staticmethod
2183 def parse_time_text(time_text):
2184 """
2185 Parse the comment time text
2186 time_text is in the format 'X units ago (edited)'
2187 """
2188 time_text_split = time_text.split(' ')
2189 if len(time_text_split) >= 3:
2190 try:
2191 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
2192 except ValueError:
2193 return None
2194
2195 def _extract_comment(self, comment_renderer, parent=None):
2196 comment_id = comment_renderer.get('commentId')
2197 if not comment_id:
2198 return
2199
2200 text = self._get_text(comment_renderer, 'contentText')
2201
2202 # note: timestamp is an estimate calculated from the current time and time_text
2203 time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
2204 time_text_dt = self.parse_time_text(time_text)
2205 if isinstance(time_text_dt, datetime.datetime):
2206 timestamp = calendar.timegm(time_text_dt.timetuple())
2207 author = self._get_text(comment_renderer, 'authorText')
2208 author_id = try_get(comment_renderer,
2209 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
2210
2211 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2212 lambda x: x['likeCount']), compat_str)) or 0
2213 author_thumbnail = try_get(comment_renderer,
2214 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2215
2216 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2217 is_favorited = 'creatorHeart' in (try_get(
2218 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2219 return {
2220 'id': comment_id,
2221 'text': text,
2222 'timestamp': timestamp,
2223 'time_text': time_text,
2224 'like_count': votes,
2225 'is_favorited': is_favorited,
2226 'author': author,
2227 'author_id': author_id,
2228 'author_thumbnail': author_thumbnail,
2229 'author_is_uploader': author_is_uploader,
2230 'parent': parent or 'root'
2231 }
2232
2233 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, comment_counts=None):
2234
2235 def extract_header(contents):
2236 _total_comments = 0
2237 _continuation = None
2238 for content in contents:
2239 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
2240 expected_comment_count = parse_count(self._get_text(
2241 comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
2242
2243 if expected_comment_count:
2244 comment_counts[1] = expected_comment_count
2245 self.to_screen('Downloading ~%d comments' % expected_comment_count)
2246 _total_comments = comment_counts[1]
2247 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2248 comment_sort_index = int(sort_mode_str != 'top') # 1 = new, 0 = top
2249
2250 sort_menu_item = try_get(
2251 comments_header_renderer,
2252 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2253 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2254
2255 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2256 if not _continuation:
2257 continue
2258
2259 sort_text = sort_menu_item.get('title')
2260 if isinstance(sort_text, compat_str):
2261 sort_text = sort_text.lower()
2262 else:
2263 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2264 self.to_screen('Sorting comments by %s' % sort_text)
2265 break
2266 return _total_comments, _continuation
2267
2268 def extract_thread(contents):
2269 if not parent:
2270 comment_counts[2] = 0
2271 for content in contents:
2272 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2273 comment_renderer = try_get(
2274 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2275 content, (lambda x: x['commentRenderer'], dict))
2276
2277 if not comment_renderer:
2278 continue
2279 comment = self._extract_comment(comment_renderer, parent)
2280 if not comment:
2281 continue
2282 comment_counts[0] += 1
2283 yield comment
2284 # Attempt to get the replies
2285 comment_replies_renderer = try_get(
2286 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2287
2288 if comment_replies_renderer:
2289 comment_counts[2] += 1
2290 comment_entries_iter = self._comment_entries(
2291 comment_replies_renderer, ytcfg, video_id,
2292 parent=comment.get('id'), comment_counts=comment_counts)
2293
2294 for reply_comment in comment_entries_iter:
2295 yield reply_comment
2296
2297 # YouTube comments have a max depth of 2
2298 max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2299 if max_depth == 1 and parent:
2300 return
2301 if not comment_counts:
2302 # comment so far, est. total comments, current comment thread #
2303 comment_counts = [0, 0, 0]
2304
2305 continuation = self._extract_continuation(root_continuation_data)
2306 if continuation and len(continuation['continuation']) < 27:
2307 self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2308 continuation_token = self._generate_comment_continuation(video_id)
2309 continuation = self._build_api_continuation_query(continuation_token, None)
2310
2311 visitor_data = None
2312 is_first_continuation = parent is None
2313
2314 for page_num in itertools.count(0):
2315 if not continuation:
2316 break
2317 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data)
2318 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2319 if page_num == 0:
2320 if is_first_continuation:
2321 note_prefix = 'Downloading comment section API JSON'
2322 else:
2323 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
2324 comment_counts[2], comment_prog_str)
2325 else:
2326 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2327 ' ' if parent else '', ' replies' if parent else '',
2328 page_num, comment_prog_str)
2329
2330 response = self._extract_response(
2331 item_id=None, query=continuation,
2332 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2333 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
2334 if not response:
2335 break
2336 visitor_data = try_get(
2337 response,
2338 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2339 compat_str) or visitor_data
2340
2341 continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
2342
2343 continuation = None
2344 if isinstance(continuation_contents, list):
2345 for continuation_section in continuation_contents:
2346 if not isinstance(continuation_section, dict):
2347 continue
2348 continuation_items = try_get(
2349 continuation_section,
2350 (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2351 lambda x: x['appendContinuationItemsAction']['continuationItems']),
2352 list) or []
2353 if is_first_continuation:
2354 total_comments, continuation = extract_header(continuation_items)
2355 if total_comments:
2356 yield total_comments
2357 is_first_continuation = False
2358 if continuation:
2359 break
2360 continue
2361 count = 0
2362 for count, entry in enumerate(extract_thread(continuation_items)):
2363 yield entry
2364 continuation = self._extract_continuation({'contents': continuation_items})
2365 if continuation:
2366 # Sometimes YouTube provides a continuation without any comments
2367 # In most cases we end up just downloading these with very little comments to come.
2368 if count == 0:
2369 if not parent:
2370 self.report_warning('No comments received - assuming end of comments')
2371 continuation = None
2372 break
2373
2374 # Deprecated response structure
2375 elif isinstance(continuation_contents, dict):
2376 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2377 for key, continuation_renderer in continuation_contents.items():
2378 if key not in known_continuation_renderers:
2379 continue
2380 if not isinstance(continuation_renderer, dict):
2381 continue
2382 if is_first_continuation:
2383 header_continuation_items = [continuation_renderer.get('header') or {}]
2384 total_comments, continuation = extract_header(header_continuation_items)
2385 if total_comments:
2386 yield total_comments
2387 is_first_continuation = False
2388 if continuation:
2389 break
2390
2391 # Sometimes YouTube provides a continuation without any comments
2392 # In most cases we end up just downloading these with very little comments to come.
2393 count = 0
2394 for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2395 yield entry
2396 continuation = self._extract_continuation(continuation_renderer)
2397 if count == 0:
2398 if not parent:
2399 self.report_warning('No comments received - assuming end of comments')
2400 continuation = None
2401 break
2402
2403 @staticmethod
2404 def _generate_comment_continuation(video_id):
2405 """
2406 Generates initial comment section continuation token from given video id
2407 """
2408 b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2409 parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2410 new_continuation_intlist = list(itertools.chain.from_iterable(
2411 [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2412 return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2413
2414 def _extract_comments(self, ytcfg, video_id, contents, webpage):
2415 """Entry for comment extraction"""
2416 def _real_comment_extract(contents):
2417 yield from self._comment_entries(
2418 traverse_obj(contents, (..., 'itemSectionRenderer'), get_all=False), ytcfg, video_id)
2419
2420 comments = []
2421 estimated_total = 0
2422 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
2423 # Force English regardless of account setting to prevent parsing issues
2424 # See: https://github.com/yt-dlp/yt-dlp/issues/532
2425 ytcfg = copy.deepcopy(ytcfg)
2426 traverse_obj(
2427 ytcfg, ('INNERTUBE_CONTEXT', 'client'), expected_type=dict, default={})['hl'] = 'en'
2428 try:
2429 for comment in _real_comment_extract(contents):
2430 if len(comments) >= max_comments:
2431 break
2432 if isinstance(comment, int):
2433 estimated_total = comment
2434 continue
2435 comments.append(comment)
2436 except KeyboardInterrupt:
2437 self.to_screen('Interrupted by user')
2438 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
2439 return {
2440 'comments': comments,
2441 'comment_count': len(comments),
2442 }
2443
2444 @staticmethod
2445 def _get_checkok_params():
2446 return {'contentCheckOk': True, 'racyCheckOk': True}
2447
2448 @classmethod
2449 def _generate_player_context(cls, sts=None):
2450 context = {
2451 'html5Preference': 'HTML5_PREF_WANTS',
2452 }
2453 if sts is not None:
2454 context['signatureTimestamp'] = sts
2455 return {
2456 'playbackContext': {
2457 'contentPlaybackContext': context
2458 },
2459 **cls._get_checkok_params()
2460 }
2461
2462 @staticmethod
2463 def _is_agegated(player_response):
2464 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
2465 return True
2466
2467 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
2468 AGE_GATE_REASONS = (
2469 'confirm your age', 'age-restricted', 'inappropriate', # reason
2470 'age_verification_required', 'age_check_required', # status
2471 )
2472 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
2473
2474 @staticmethod
2475 def _is_unplayable(player_response):
2476 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
2477
2478 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):
2479
2480 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2481 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
2482 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
2483 headers = self.generate_api_headers(
2484 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
2485
2486 yt_query = {'videoId': video_id}
2487 yt_query.update(self._generate_player_context(sts))
2488 return self._extract_response(
2489 item_id=video_id, ep='player', query=yt_query,
2490 ytcfg=player_ytcfg, headers=headers, fatal=True,
2491 default_client=client,
2492 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2493 ) or None
2494
2495 def _get_requested_clients(self, url, smuggled_data):
2496 requested_clients = []
2497 allowed_clients = sorted(
2498 [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],
2499 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
2500 for client in self._configuration_arg('player_client'):
2501 if client in allowed_clients:
2502 requested_clients.append(client)
2503 elif client == 'all':
2504 requested_clients.extend(allowed_clients)
2505 else:
2506 self.report_warning(f'Skipping unsupported client {client}')
2507 if not requested_clients:
2508 requested_clients = ['android', 'web']
2509
2510 if smuggled_data.get('is_music_url') or self.is_music_url(url):
2511 requested_clients.extend(
2512 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
2513
2514 return orderedSet(requested_clients)
2515
2516 def _extract_player_ytcfg(self, client, video_id):
2517 url = {
2518 'web_music': 'https://music.youtube.com',
2519 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
2520 }.get(client)
2521 if not url:
2522 return {}
2523 webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config')
2524 return self.extract_ytcfg(video_id, webpage) or {}
2525
2526 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):
2527 initial_pr = None
2528 if webpage:
2529 initial_pr = self._extract_yt_initial_variable(
2530 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2531 video_id, 'initial player response')
2532
2533 original_clients = clients
2534 clients = clients[::-1]
2535 prs = []
2536
2537 def append_client(client_name):
2538 if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:
2539 clients.append(client_name)
2540
2541 # Android player_response does not have microFormats which are needed for
2542 # extraction of some data. So we return the initial_pr with formats
2543 # stripped out even if not requested by the user
2544 # See: https://github.com/yt-dlp/yt-dlp/issues/501
2545 if initial_pr:
2546 pr = dict(initial_pr)
2547 pr['streamingData'] = None
2548 prs.append(pr)
2549
2550 last_error = None
2551 tried_iframe_fallback = False
2552 player_url = None
2553 while clients:
2554 client = clients.pop()
2555 player_ytcfg = master_ytcfg if client == 'web' else {}
2556 if 'configs' not in self._configuration_arg('player_skip'):
2557 player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
2558
2559 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
2560 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
2561 if 'js' in self._configuration_arg('player_skip'):
2562 require_js_player = False
2563 player_url = None
2564
2565 if not player_url and not tried_iframe_fallback and require_js_player:
2566 player_url = self._download_player_url(video_id)
2567 tried_iframe_fallback = True
2568
2569 try:
2570 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
2571 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)
2572 except ExtractorError as e:
2573 if last_error:
2574 self.report_warning(last_error)
2575 last_error = e
2576 continue
2577
2578 if pr:
2579 prs.append(pr)
2580
2581 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
2582 if client.endswith('_agegate') and self._is_unplayable(pr) and self.is_authenticated:
2583 append_client(client.replace('_agegate', '_creator'))
2584 elif self._is_agegated(pr):
2585 append_client(f'{client}_agegate')
2586
2587 if last_error:
2588 if not len(prs):
2589 raise last_error
2590 self.report_warning(last_error)
2591 return prs, player_url
2592
2593 def _extract_formats(self, streaming_data, video_id, player_url, is_live):
2594 itags, stream_ids = [], []
2595 itag_qualities, res_qualities = {}, {}
2596 q = qualities([
2597 # Normally tiny is the smallest video-only formats. But
2598 # audio-only formats with unknown quality may get tagged as tiny
2599 'tiny',
2600 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
2601 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2602 ])
2603 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
2604
2605 for fmt in streaming_formats:
2606 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2607 continue
2608
2609 itag = str_or_none(fmt.get('itag'))
2610 audio_track = fmt.get('audioTrack') or {}
2611 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2612 if stream_id in stream_ids:
2613 continue
2614
2615 quality = fmt.get('quality')
2616 height = int_or_none(fmt.get('height'))
2617 if quality == 'tiny' or not quality:
2618 quality = fmt.get('audioQuality', '').lower() or quality
2619 # The 3gp format (17) in android client has a quality of "small",
2620 # but is actually worse than other formats
2621 if itag == '17':
2622 quality = 'tiny'
2623 if quality:
2624 if itag:
2625 itag_qualities[itag] = quality
2626 if height:
2627 res_qualities[height] = quality
2628 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2629 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2630 # number of fragment that would subsequently requested with (`&sq=N`)
2631 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2632 continue
2633
2634 fmt_url = fmt.get('url')
2635 if not fmt_url:
2636 sc = compat_parse_qs(fmt.get('signatureCipher'))
2637 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2638 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2639 if not (sc and fmt_url and encrypted_sig):
2640 continue
2641 if not player_url:
2642 continue
2643 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2644 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2645 fmt_url += '&' + sp + '=' + signature
2646
2647 if itag:
2648 itags.append(itag)
2649 stream_ids.append(stream_id)
2650
2651 tbr = float_or_none(
2652 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
2653 dct = {
2654 'asr': int_or_none(fmt.get('audioSampleRate')),
2655 'filesize': int_or_none(fmt.get('contentLength')),
2656 'format_id': itag,
2657 'format_note': ', '.join(filter(None, (
2658 '%s%s' % (audio_track.get('displayName') or '',
2659 ' (default)' if audio_track.get('audioIsDefault') else ''),
2660 fmt.get('qualityLabel') or quality.replace('audio_quality_', '')))),
2661 'fps': int_or_none(fmt.get('fps')),
2662 'height': height,
2663 'quality': q(quality),
2664 'tbr': tbr,
2665 'url': fmt_url,
2666 'width': int_or_none(fmt.get('width')),
2667 'language': audio_track.get('id', '').split('.')[0],
2668 'language_preference': 1 if audio_track.get('audioIsDefault') else -1,
2669 }
2670 mime_mobj = re.match(
2671 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2672 if mime_mobj:
2673 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2674 dct.update(parse_codecs(mime_mobj.group(2)))
2675 no_audio = dct.get('acodec') == 'none'
2676 no_video = dct.get('vcodec') == 'none'
2677 if no_audio:
2678 dct['vbr'] = tbr
2679 if no_video:
2680 dct['abr'] = tbr
2681 if no_audio or no_video:
2682 dct['downloader_options'] = {
2683 # Youtube throttles chunks >~10M
2684 'http_chunk_size': 10485760,
2685 }
2686 if dct.get('ext'):
2687 dct['container'] = dct['ext'] + '_dash'
2688 yield dct
2689
2690 skip_manifests = self._configuration_arg('skip')
2691 get_dash = (
2692 (not is_live or self._configuration_arg('include_live_dash'))
2693 and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True))
2694 get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2695
2696 def guess_quality(f):
2697 for val, qdict in ((f.get('format_id'), itag_qualities), (f.get('height'), res_qualities)):
2698 if val in qdict:
2699 return q(qdict[val])
2700 return -1
2701
2702 for sd in streaming_data:
2703 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
2704 if hls_manifest_url:
2705 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
2706 itag = self._search_regex(
2707 r'/itag/(\d+)', f['url'], 'itag', default=None)
2708 if itag in itags:
2709 continue
2710 if itag:
2711 f['format_id'] = itag
2712 itags.append(itag)
2713 f['quality'] = guess_quality(f)
2714 yield f
2715
2716 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2717 if dash_manifest_url:
2718 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
2719 itag = f['format_id']
2720 if itag in itags:
2721 continue
2722 if itag:
2723 itags.append(itag)
2724 f['quality'] = guess_quality(f)
2725 filesize = int_or_none(self._search_regex(
2726 r'/clen/(\d+)', f.get('fragment_base_url')
2727 or f['url'], 'file size', default=None))
2728 if filesize:
2729 f['filesize'] = filesize
2730 yield f
2731
2732 def _real_extract(self, url):
2733 url, smuggled_data = unsmuggle_url(url, {})
2734 video_id = self._match_id(url)
2735
2736 base_url = self.http_scheme() + '//www.youtube.com/'
2737 webpage_url = base_url + 'watch?v=' + video_id
2738 webpage = None
2739 if 'webpage' not in self._configuration_arg('player_skip'):
2740 webpage = self._download_webpage(
2741 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2742
2743 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2744
2745 player_responses, player_url = self._extract_player_responses(
2746 self._get_requested_clients(url, smuggled_data),
2747 video_id, webpage, master_ytcfg)
2748
2749 get_first = lambda obj, keys, **kwargs: traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
2750
2751 playability_statuses = traverse_obj(
2752 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2753
2754 trailer_video_id = get_first(
2755 playability_statuses,
2756 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2757 expected_type=str)
2758 if trailer_video_id:
2759 return self.url_result(
2760 trailer_video_id, self.ie_key(), trailer_video_id)
2761
2762 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2763 if webpage else (lambda x: None))
2764
2765 video_details = traverse_obj(
2766 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2767 microformats = traverse_obj(
2768 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2769 expected_type=dict, default=[])
2770 video_title = (
2771 get_first(video_details, 'title')
2772 or self._get_text(microformats, (..., 'title'))
2773 or search_meta(['og:title', 'twitter:title', 'title']))
2774 video_description = get_first(video_details, 'shortDescription')
2775
2776 if not smuggled_data.get('force_singlefeed', False):
2777 if not self.get_param('noplaylist'):
2778 multifeed_metadata_list = get_first(
2779 player_responses,
2780 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2781 expected_type=str)
2782 if multifeed_metadata_list:
2783 entries = []
2784 feed_ids = []
2785 for feed in multifeed_metadata_list.split(','):
2786 # Unquote should take place before split on comma (,) since textual
2787 # fields may contain comma as well (see
2788 # https://github.com/ytdl-org/youtube-dl/issues/8536)
2789 feed_data = compat_parse_qs(
2790 compat_urllib_parse_unquote_plus(feed))
2791
2792 def feed_entry(name):
2793 return try_get(
2794 feed_data, lambda x: x[name][0], compat_str)
2795
2796 feed_id = feed_entry('id')
2797 if not feed_id:
2798 continue
2799 feed_title = feed_entry('title')
2800 title = video_title
2801 if feed_title:
2802 title += ' (%s)' % feed_title
2803 entries.append({
2804 '_type': 'url_transparent',
2805 'ie_key': 'Youtube',
2806 'url': smuggle_url(
2807 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2808 {'force_singlefeed': True}),
2809 'title': title,
2810 })
2811 feed_ids.append(feed_id)
2812 self.to_screen(
2813 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2814 % (', '.join(feed_ids), video_id))
2815 return self.playlist_result(
2816 entries, video_id, video_title, video_description)
2817 else:
2818 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2819
2820 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
2821 is_live = get_first(video_details, 'isLive')
2822 if is_live is None:
2823 is_live = get_first(live_broadcast_details, 'isLiveNow')
2824
2825 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2826 formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
2827
2828 if not formats:
2829 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
2830 self.report_drm(video_id)
2831 pemr = get_first(
2832 playability_statuses,
2833 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2834 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2835 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
2836 if subreason:
2837 if subreason == 'The uploader has not made this video available in your country.':
2838 countries = get_first(microformats, 'availableCountries')
2839 if not countries:
2840 regions_allowed = search_meta('regionsAllowed')
2841 countries = regions_allowed.split(',') if regions_allowed else None
2842 self.raise_geo_restricted(subreason, countries, metadata_available=True)
2843 reason += f'. {subreason}'
2844 if reason:
2845 self.raise_no_formats(reason, expected=True)
2846
2847 for f in formats:
2848 if '&c=WEB&' in f['url'] and '&ratebypass=yes&' not in f['url']: # throttled
2849 f['source_preference'] = -10
2850 # TODO: this method is not reliable
2851 f['format_note'] = format_field(f, 'format_note', '%s ') + '(maybe throttled)'
2852
2853 # Source is given priority since formats that throttle are given lower source_preference
2854 # When throttling issue is fully fixed, remove this
2855 self._sort_formats(formats, ('quality', 'res', 'fps', 'source', 'codec:vp9.2', 'lang'))
2856
2857 keywords = get_first(video_details, 'keywords', expected_type=list) or []
2858 if not keywords and webpage:
2859 keywords = [
2860 unescapeHTML(m.group('content'))
2861 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2862 for keyword in keywords:
2863 if keyword.startswith('yt:stretch='):
2864 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2865 if mobj:
2866 # NB: float is intentional for forcing float division
2867 w, h = (float(v) for v in mobj.groups())
2868 if w > 0 and h > 0:
2869 ratio = w / h
2870 for f in formats:
2871 if f.get('vcodec') != 'none':
2872 f['stretched_ratio'] = ratio
2873 break
2874
2875 thumbnails = []
2876 thumbnail_dicts = traverse_obj(
2877 (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2878 expected_type=dict, default=[])
2879 for thumbnail in thumbnail_dicts:
2880 thumbnail_url = thumbnail.get('url')
2881 if not thumbnail_url:
2882 continue
2883 # Sometimes youtube gives a wrong thumbnail URL. See:
2884 # https://github.com/yt-dlp/yt-dlp/issues/233
2885 # https://github.com/ytdl-org/youtube-dl/issues/28023
2886 if 'maxresdefault' in thumbnail_url:
2887 thumbnail_url = thumbnail_url.split('?')[0]
2888 thumbnails.append({
2889 'url': thumbnail_url,
2890 'height': int_or_none(thumbnail.get('height')),
2891 'width': int_or_none(thumbnail.get('width')),
2892 })
2893 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2894 if thumbnail_url:
2895 thumbnails.append({
2896 'url': thumbnail_url,
2897 })
2898 # The best resolution thumbnails sometimes does not appear in the webpage
2899 # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
2900 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2901 hq_thumbnail_names = ['maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3']
2902 # TODO: Test them also? - For some videos, even these don't exist
2903 guaranteed_thumbnail_names = [
2904 'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2905 'mqdefault', 'mq1', 'mq2', 'mq3',
2906 'default', '1', '2', '3'
2907 ]
2908 thumbnail_names = hq_thumbnail_names + guaranteed_thumbnail_names
2909 n_thumbnail_names = len(thumbnail_names)
2910
2911 thumbnails.extend({
2912 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2913 video_id=video_id, name=name, ext=ext,
2914 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
2915 '_test_url': name in hq_thumbnail_names,
2916 } for name in thumbnail_names for ext in ('webp', 'jpg'))
2917 for thumb in thumbnails:
2918 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
2919 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
2920 self._remove_duplicate_formats(thumbnails)
2921
2922 category = get_first(microformats, 'category') or search_meta('genre')
2923 channel_id = str_or_none(
2924 get_first(video_details, 'channelId')
2925 or get_first(microformats, 'externalChannelId')
2926 or search_meta('channelId'))
2927 duration = int_or_none(
2928 get_first(video_details, 'lengthSeconds')
2929 or get_first(microformats, 'lengthSeconds')
2930 or parse_duration(search_meta('duration'))) or None
2931 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2932
2933 live_content = get_first(video_details, 'isLiveContent')
2934 is_upcoming = get_first(video_details, 'isUpcoming')
2935 if is_live is None:
2936 if is_upcoming or live_content is False:
2937 is_live = False
2938 if is_upcoming is None and (live_content or is_live):
2939 is_upcoming = False
2940 live_starttime = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2941 live_endtime = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2942 if not duration and live_endtime and live_starttime:
2943 duration = live_endtime - live_starttime
2944
2945 info = {
2946 'id': video_id,
2947 'title': self._live_title(video_title) if is_live else video_title,
2948 'formats': formats,
2949 'thumbnails': thumbnails,
2950 'description': video_description,
2951 'upload_date': unified_strdate(
2952 get_first(microformats, 'uploadDate')
2953 or search_meta('uploadDate')),
2954 'uploader': get_first(video_details, 'author'),
2955 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2956 'uploader_url': owner_profile_url,
2957 'channel_id': channel_id,
2958 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
2959 'duration': duration,
2960 'view_count': int_or_none(
2961 get_first((video_details, microformats), (..., 'viewCount'))
2962 or search_meta('interactionCount')),
2963 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
2964 'age_limit': 18 if (
2965 get_first(microformats, 'isFamilySafe') is False
2966 or search_meta('isFamilyFriendly') == 'false'
2967 or search_meta('og:restrictions:age') == '18+') else 0,
2968 'webpage_url': webpage_url,
2969 'categories': [category] if category else None,
2970 'tags': keywords,
2971 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
2972 'is_live': is_live,
2973 'was_live': (False if is_live or is_upcoming or live_content is False
2974 else None if is_live is None or is_upcoming is None
2975 else live_content),
2976 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
2977 'release_timestamp': live_starttime,
2978 }
2979
2980 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
2981 # Converted into dicts to remove duplicates
2982 captions = {
2983 sub.get('baseUrl'): sub
2984 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
2985 translation_languages = {
2986 lang.get('languageCode'): lang.get('languageName')
2987 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
2988 subtitles = {}
2989 if pctr:
2990 def process_language(container, base_url, lang_code, sub_name, query):
2991 lang_subs = container.setdefault(lang_code, [])
2992 for fmt in self._SUBTITLE_FORMATS:
2993 query.update({
2994 'fmt': fmt,
2995 })
2996 lang_subs.append({
2997 'ext': fmt,
2998 'url': update_url_query(base_url, query),
2999 'name': sub_name,
3000 })
3001
3002 for base_url, caption_track in captions.items():
3003 if not base_url:
3004 continue
3005 if caption_track.get('kind') != 'asr':
3006 lang_code = (
3007 remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
3008 or caption_track.get('languageCode'))
3009 if not lang_code:
3010 continue
3011 process_language(
3012 subtitles, base_url, lang_code,
3013 traverse_obj(caption_track, ('name', 'simpleText'), ('name', 'runs', ..., 'text'), get_all=False),
3014 {})
3015 continue
3016 automatic_captions = {}
3017 for trans_code, trans_name in translation_languages.items():
3018 if not trans_code:
3019 continue
3020 process_language(
3021 automatic_captions, base_url, trans_code,
3022 self._get_text(trans_name, max_runs=1),
3023 {'tlang': trans_code})
3024 info['automatic_captions'] = automatic_captions
3025 info['subtitles'] = subtitles
3026
3027 parsed_url = compat_urllib_parse_urlparse(url)
3028 for component in [parsed_url.fragment, parsed_url.query]:
3029 query = compat_parse_qs(component)
3030 for k, v in query.items():
3031 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
3032 d_k += '_time'
3033 if d_k not in info and k in s_ks:
3034 info[d_k] = parse_duration(query[k][0])
3035
3036 # Youtube Music Auto-generated description
3037 if video_description:
3038 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
3039 if mobj:
3040 release_year = mobj.group('release_year')
3041 release_date = mobj.group('release_date')
3042 if release_date:
3043 release_date = release_date.replace('-', '')
3044 if not release_year:
3045 release_year = release_date[:4]
3046 info.update({
3047 'album': mobj.group('album'.strip()),
3048 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
3049 'track': mobj.group('track').strip(),
3050 'release_date': release_date,
3051 'release_year': int_or_none(release_year),
3052 })
3053
3054 initial_data = None
3055 if webpage:
3056 initial_data = self._extract_yt_initial_variable(
3057 webpage, self._YT_INITIAL_DATA_RE, video_id,
3058 'yt initial data')
3059 if not initial_data:
3060 query = {'videoId': video_id}
3061 query.update(self._get_checkok_params())
3062 initial_data = self._extract_response(
3063 item_id=video_id, ep='next', fatal=False,
3064 ytcfg=master_ytcfg, query=query,
3065 headers=self.generate_api_headers(ytcfg=master_ytcfg),
3066 note='Downloading initial data API JSON')
3067
3068 try:
3069 # This will error if there is no livechat
3070 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
3071 info['subtitles']['live_chat'] = [{
3072 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
3073 'video_id': video_id,
3074 'ext': 'json',
3075 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
3076 }]
3077 except (KeyError, IndexError, TypeError):
3078 pass
3079
3080 if initial_data:
3081 info['chapters'] = (
3082 self._extract_chapters_from_json(initial_data, duration)
3083 or self._extract_chapters_from_engagement_panel(initial_data, duration)
3084 or None)
3085
3086 contents = try_get(
3087 initial_data,
3088 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
3089 list) or []
3090 for content in contents:
3091 vpir = content.get('videoPrimaryInfoRenderer')
3092 if vpir:
3093 stl = vpir.get('superTitleLink')
3094 if stl:
3095 stl = self._get_text(stl)
3096 if try_get(
3097 vpir,
3098 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3099 info['location'] = stl
3100 else:
3101 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
3102 if mobj:
3103 info.update({
3104 'series': mobj.group(1),
3105 'season_number': int(mobj.group(2)),
3106 'episode_number': int(mobj.group(3)),
3107 })
3108 for tlb in (try_get(
3109 vpir,
3110 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3111 list) or []):
3112 tbr = tlb.get('toggleButtonRenderer') or {}
3113 for getter, regex in [(
3114 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3115 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3116 lambda x: x['accessibility'],
3117 lambda x: x['accessibilityData']['accessibilityData'],
3118 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3119 label = (try_get(tbr, getter, dict) or {}).get('label')
3120 if label:
3121 mobj = re.match(regex, label)
3122 if mobj:
3123 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3124 break
3125 sbr_tooltip = try_get(
3126 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3127 if sbr_tooltip:
3128 like_count, dislike_count = sbr_tooltip.split(' / ')
3129 info.update({
3130 'like_count': str_to_int(like_count),
3131 'dislike_count': str_to_int(dislike_count),
3132 })
3133 vsir = content.get('videoSecondaryInfoRenderer')
3134 if vsir:
3135 info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
3136 rows = try_get(
3137 vsir,
3138 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3139 list) or []
3140 multiple_songs = False
3141 for row in rows:
3142 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3143 multiple_songs = True
3144 break
3145 for row in rows:
3146 mrr = row.get('metadataRowRenderer') or {}
3147 mrr_title = mrr.get('title')
3148 if not mrr_title:
3149 continue
3150 mrr_title = self._get_text(mrr, 'title')
3151 mrr_contents_text = self._get_text(mrr, ('contents', 0))
3152 if mrr_title == 'License':
3153 info['license'] = mrr_contents_text
3154 elif not multiple_songs:
3155 if mrr_title == 'Album':
3156 info['album'] = mrr_contents_text
3157 elif mrr_title == 'Artist':
3158 info['artist'] = mrr_contents_text
3159 elif mrr_title == 'Song':
3160 info['track'] = mrr_contents_text
3161
3162 fallbacks = {
3163 'channel': 'uploader',
3164 'channel_id': 'uploader_id',
3165 'channel_url': 'uploader_url',
3166 }
3167 for to, frm in fallbacks.items():
3168 if not info.get(to):
3169 info[to] = info.get(frm)
3170
3171 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3172 v = info.get(s_k)
3173 if v:
3174 info[d_k] = v
3175
3176 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3177 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
3178 is_membersonly = None
3179 is_premium = None
3180 if initial_data and is_private is not None:
3181 is_membersonly = False
3182 is_premium = False
3183 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3184 badge_labels = set()
3185 for content in contents:
3186 if not isinstance(content, dict):
3187 continue
3188 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3189 for badge_label in badge_labels:
3190 if badge_label.lower() == 'members only':
3191 is_membersonly = True
3192 elif badge_label.lower() == 'premium':
3193 is_premium = True
3194 elif badge_label.lower() == 'unlisted':
3195 is_unlisted = True
3196
3197 info['availability'] = self._availability(
3198 is_private=is_private,
3199 needs_premium=is_premium,
3200 needs_subscription=is_membersonly,
3201 needs_auth=info['age_limit'] >= 18,
3202 is_unlisted=None if is_private is None else is_unlisted)
3203
3204 if self.get_param('getcomments', False):
3205 info['__post_extractor'] = lambda: self._extract_comments(master_ytcfg, video_id, contents, webpage)
3206
3207 self.mark_watched(video_id, player_responses)
3208
3209 return info
3210
3211
3212 class YoutubeTabIE(YoutubeBaseInfoExtractor):
3213 IE_DESC = 'YouTube.com tab'
3214 _VALID_URL = r'''(?x)
3215 https?://
3216 (?:\w+\.)?
3217 (?:
3218 youtube(?:kids)?\.com|
3219 invidio\.us
3220 )/
3221 (?:
3222 (?P<channel_type>channel|c|user|browse)/|
3223 (?P<not_channel>
3224 feed/|hashtag/|
3225 (?:playlist|watch)\?.*?\blist=
3226 )|
3227 (?!(?:%s)\b) # Direct URLs
3228 )
3229 (?P<id>[^/?\#&]+)
3230 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
3231 IE_NAME = 'youtube:tab'
3232
3233 _TESTS = [{
3234 'note': 'playlists, multipage',
3235 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3236 'playlist_mincount': 94,
3237 'info_dict': {
3238 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3239 'title': 'Игорь Клейнер - Playlists',
3240 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3241 'uploader': 'Игорь Клейнер',
3242 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3243 },
3244 }, {
3245 'note': 'playlists, multipage, different order',
3246 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3247 'playlist_mincount': 94,
3248 'info_dict': {
3249 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3250 'title': 'Игорь Клейнер - Playlists',
3251 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3252 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3253 'uploader': 'Игорь Клейнер',
3254 },
3255 }, {
3256 'note': 'playlists, series',
3257 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3258 'playlist_mincount': 5,
3259 'info_dict': {
3260 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3261 'title': '3Blue1Brown - Playlists',
3262 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3263 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3264 'uploader': '3Blue1Brown',
3265 },
3266 }, {
3267 'note': 'playlists, singlepage',
3268 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3269 'playlist_mincount': 4,
3270 'info_dict': {
3271 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3272 'title': 'ThirstForScience - Playlists',
3273 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
3274 'uploader': 'ThirstForScience',
3275 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3276 }
3277 }, {
3278 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3279 'only_matching': True,
3280 }, {
3281 'note': 'basic, single video playlist',
3282 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3283 'info_dict': {
3284 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3285 'uploader': 'Sergey M.',
3286 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3287 'title': 'youtube-dl public playlist',
3288 },
3289 'playlist_count': 1,
3290 }, {
3291 'note': 'empty playlist',
3292 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3293 'info_dict': {
3294 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3295 'uploader': 'Sergey M.',
3296 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3297 'title': 'youtube-dl empty playlist',
3298 },
3299 'playlist_count': 0,
3300 }, {
3301 'note': 'Home tab',
3302 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
3303 'info_dict': {
3304 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3305 'title': 'lex will - Home',
3306 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3307 'uploader': 'lex will',
3308 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3309 },
3310 'playlist_mincount': 2,
3311 }, {
3312 'note': 'Videos tab',
3313 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
3314 'info_dict': {
3315 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3316 'title': 'lex will - Videos',
3317 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3318 'uploader': 'lex will',
3319 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3320 },
3321 'playlist_mincount': 975,
3322 }, {
3323 'note': 'Videos tab, sorted by popular',
3324 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
3325 'info_dict': {
3326 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3327 'title': 'lex will - Videos',
3328 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3329 'uploader': 'lex will',
3330 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3331 },
3332 'playlist_mincount': 199,
3333 }, {
3334 'note': 'Playlists tab',
3335 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
3336 'info_dict': {
3337 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3338 'title': 'lex will - Playlists',
3339 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3340 'uploader': 'lex will',
3341 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3342 },
3343 'playlist_mincount': 17,
3344 }, {
3345 'note': 'Community tab',
3346 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
3347 'info_dict': {
3348 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3349 'title': 'lex will - Community',
3350 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3351 'uploader': 'lex will',
3352 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3353 },
3354 'playlist_mincount': 18,
3355 }, {
3356 'note': 'Channels tab',
3357 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
3358 'info_dict': {
3359 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3360 'title': 'lex will - Channels',
3361 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3362 'uploader': 'lex will',
3363 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3364 },
3365 'playlist_mincount': 12,
3366 }, {
3367 'note': 'Search tab',
3368 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3369 'playlist_mincount': 40,
3370 'info_dict': {
3371 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3372 'title': '3Blue1Brown - Search - linear algebra',
3373 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3374 'uploader': '3Blue1Brown',
3375 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3376 },
3377 }, {
3378 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3379 'only_matching': True,
3380 }, {
3381 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3382 'only_matching': True,
3383 }, {
3384 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3385 'only_matching': True,
3386 }, {
3387 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3388 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3389 'info_dict': {
3390 'title': '29C3: Not my department',
3391 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3392 'uploader': 'Christiaan008',
3393 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
3394 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
3395 },
3396 'playlist_count': 96,
3397 }, {
3398 'note': 'Large playlist',
3399 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
3400 'info_dict': {
3401 'title': 'Uploads from Cauchemar',
3402 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3403 'uploader': 'Cauchemar',
3404 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
3405 },
3406 'playlist_mincount': 1123,
3407 }, {
3408 'note': 'even larger playlist, 8832 videos',
3409 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3410 'only_matching': True,
3411 }, {
3412 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3413 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3414 'info_dict': {
3415 'title': 'Uploads from Interstellar Movie',
3416 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
3417 'uploader': 'Interstellar Movie',
3418 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
3419 },
3420 'playlist_mincount': 21,
3421 }, {
3422 'note': 'Playlist with "show unavailable videos" button',
3423 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3424 'info_dict': {
3425 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3426 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3427 'uploader': 'Phim Siêu Nhân Nhật Bản',
3428 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3429 },
3430 'playlist_mincount': 200,
3431 }, {
3432 'note': 'Playlist with unavailable videos in page 7',
3433 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3434 'info_dict': {
3435 'title': 'Uploads from BlankTV',
3436 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3437 'uploader': 'BlankTV',
3438 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3439 },
3440 'playlist_mincount': 1000,
3441 }, {
3442 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
3443 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3444 'info_dict': {
3445 'title': 'Data Analysis with Dr Mike Pound',
3446 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3447 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3448 'uploader': 'Computerphile',
3449 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
3450 },
3451 'playlist_mincount': 11,
3452 }, {
3453 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3454 'only_matching': True,
3455 }, {
3456 'note': 'Playlist URL that does not actually serve a playlist',
3457 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3458 'info_dict': {
3459 'id': 'FqZTN594JQw',
3460 'ext': 'webm',
3461 'title': "Smiley's People 01 detective, Adventure Series, Action",
3462 'uploader': 'STREEM',
3463 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
3464 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
3465 'upload_date': '20150526',
3466 'license': 'Standard YouTube License',
3467 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3468 'categories': ['People & Blogs'],
3469 'tags': list,
3470 'view_count': int,
3471 'like_count': int,
3472 'dislike_count': int,
3473 },
3474 'params': {
3475 'skip_download': True,
3476 },
3477 'skip': 'This video is not available.',
3478 'add_ie': [YoutubeIE.ie_key()],
3479 }, {
3480 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
3481 'only_matching': True,
3482 }, {
3483 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
3484 'only_matching': True,
3485 }, {
3486 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3487 'info_dict': {
3488 'id': '3yImotZU3tw', # This will keep changing
3489 'ext': 'mp4',
3490 'title': compat_str,
3491 'uploader': 'Sky News',
3492 'uploader_id': 'skynews',
3493 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
3494 'upload_date': r're:\d{8}',
3495 'description': compat_str,
3496 'categories': ['News & Politics'],
3497 'tags': list,
3498 'like_count': int,
3499 'dislike_count': int,
3500 },
3501 'params': {
3502 'skip_download': True,
3503 },
3504 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
3505 }, {
3506 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3507 'info_dict': {
3508 'id': 'a48o2S1cPoo',
3509 'ext': 'mp4',
3510 'title': 'The Young Turks - Live Main Show',
3511 'uploader': 'The Young Turks',
3512 'uploader_id': 'TheYoungTurks',
3513 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3514 'upload_date': '20150715',
3515 'license': 'Standard YouTube License',
3516 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3517 'categories': ['News & Politics'],
3518 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3519 'like_count': int,
3520 'dislike_count': int,
3521 },
3522 'params': {
3523 'skip_download': True,
3524 },
3525 'only_matching': True,
3526 }, {
3527 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3528 'only_matching': True,
3529 }, {
3530 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3531 'only_matching': True,
3532 }, {
3533 'note': 'A channel that is not live. Should raise error',
3534 'url': 'https://www.youtube.com/user/numberphile/live',
3535 'only_matching': True,
3536 }, {
3537 'url': 'https://www.youtube.com/feed/trending',
3538 'only_matching': True,
3539 }, {
3540 'url': 'https://www.youtube.com/feed/library',
3541 'only_matching': True,
3542 }, {
3543 'url': 'https://www.youtube.com/feed/history',
3544 'only_matching': True,
3545 }, {
3546 'url': 'https://www.youtube.com/feed/subscriptions',
3547 'only_matching': True,
3548 }, {
3549 'url': 'https://www.youtube.com/feed/watch_later',
3550 'only_matching': True,
3551 }, {
3552 'note': 'Recommended - redirects to home page',
3553 'url': 'https://www.youtube.com/feed/recommended',
3554 'only_matching': True,
3555 }, {
3556 'note': 'inline playlist with not always working continuations',
3557 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3558 'only_matching': True,
3559 }, {
3560 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3561 'only_matching': True,
3562 }, {
3563 'url': 'https://www.youtube.com/course',
3564 'only_matching': True,
3565 }, {
3566 'url': 'https://www.youtube.com/zsecurity',
3567 'only_matching': True,
3568 }, {
3569 'url': 'http://www.youtube.com/NASAgovVideo/videos',
3570 'only_matching': True,
3571 }, {
3572 'url': 'https://www.youtube.com/TheYoungTurks/live',
3573 'only_matching': True,
3574 }, {
3575 'url': 'https://www.youtube.com/hashtag/cctv9',
3576 'info_dict': {
3577 'id': 'cctv9',
3578 'title': '#cctv9',
3579 },
3580 'playlist_mincount': 350,
3581 }, {
3582 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3583 'only_matching': True,
3584 }, {
3585 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
3586 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3587 'only_matching': True
3588 }, {
3589 'note': '/browse/ should redirect to /channel/',
3590 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3591 'only_matching': True
3592 }, {
3593 'note': 'VLPL, should redirect to playlist?list=PL...',
3594 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3595 'info_dict': {
3596 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3597 'uploader': 'NoCopyrightSounds',
3598 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3599 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3600 'title': 'NCS Releases',
3601 },
3602 'playlist_mincount': 166,
3603 }, {
3604 'note': 'Topic, should redirect to playlist?list=UU...',
3605 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3606 'info_dict': {
3607 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3608 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3609 'title': 'Uploads from Royalty Free Music - Topic',
3610 'uploader': 'Royalty Free Music - Topic',
3611 },
3612 'expected_warnings': [
3613 'A channel/user page was given',
3614 'The URL does not have a videos tab',
3615 ],
3616 'playlist_mincount': 101,
3617 }, {
3618 'note': 'Topic without a UU playlist',
3619 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3620 'info_dict': {
3621 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3622 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3623 },
3624 'expected_warnings': [
3625 'A channel/user page was given',
3626 'The URL does not have a videos tab',
3627 'Falling back to channel URL',
3628 ],
3629 'playlist_mincount': 9,
3630 }, {
3631 'note': 'Youtube music Album',
3632 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3633 'info_dict': {
3634 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3635 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3636 },
3637 'playlist_count': 50,
3638 }, {
3639 'note': 'unlisted single video playlist',
3640 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3641 'info_dict': {
3642 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3643 'uploader': 'colethedj',
3644 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3645 'title': 'yt-dlp unlisted playlist test',
3646 'availability': 'unlisted'
3647 },
3648 'playlist_count': 1,
3649 }]
3650
3651 @classmethod
3652 def suitable(cls, url):
3653 return False if YoutubeIE.suitable(url) else super(
3654 YoutubeTabIE, cls).suitable(url)
3655
3656 def _extract_channel_id(self, webpage):
3657 channel_id = self._html_search_meta(
3658 'channelId', webpage, 'channel id', default=None)
3659 if channel_id:
3660 return channel_id
3661 channel_url = self._html_search_meta(
3662 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3663 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3664 'twitter:app:url:googleplay'), webpage, 'channel url')
3665 return self._search_regex(
3666 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3667 channel_url, 'channel id')
3668
3669 @staticmethod
3670 def _extract_basic_item_renderer(item):
3671 # Modified from _extract_grid_item_renderer
3672 known_basic_renderers = (
3673 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
3674 )
3675 for key, renderer in item.items():
3676 if not isinstance(renderer, dict):
3677 continue
3678 elif key in known_basic_renderers:
3679 return renderer
3680 elif key.startswith('grid') and key.endswith('Renderer'):
3681 return renderer
3682
3683 def _grid_entries(self, grid_renderer):
3684 for item in grid_renderer['items']:
3685 if not isinstance(item, dict):
3686 continue
3687 renderer = self._extract_basic_item_renderer(item)
3688 if not isinstance(renderer, dict):
3689 continue
3690 title = self._get_text(renderer, 'title')
3691
3692 # playlist
3693 playlist_id = renderer.get('playlistId')
3694 if playlist_id:
3695 yield self.url_result(
3696 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3697 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3698 video_title=title)
3699 continue
3700 # video
3701 video_id = renderer.get('videoId')
3702 if video_id:
3703 yield self._extract_video(renderer)
3704 continue
3705 # channel
3706 channel_id = renderer.get('channelId')
3707 if channel_id:
3708 yield self.url_result(
3709 'https://www.youtube.com/channel/%s' % channel_id,
3710 ie=YoutubeTabIE.ie_key(), video_title=title)
3711 continue
3712 # generic endpoint URL support
3713 ep_url = urljoin('https://www.youtube.com/', try_get(
3714 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3715 compat_str))
3716 if ep_url:
3717 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3718 if ie.suitable(ep_url):
3719 yield self.url_result(
3720 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3721 break
3722
3723 def _shelf_entries_from_content(self, shelf_renderer):
3724 content = shelf_renderer.get('content')
3725 if not isinstance(content, dict):
3726 return
3727 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3728 if renderer:
3729 # TODO: add support for nested playlists so each shelf is processed
3730 # as separate playlist
3731 # TODO: this includes only first N items
3732 for entry in self._grid_entries(renderer):
3733 yield entry
3734 renderer = content.get('horizontalListRenderer')
3735 if renderer:
3736 # TODO
3737 pass
3738
3739 def _shelf_entries(self, shelf_renderer, skip_channels=False):
3740 ep = try_get(
3741 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3742 compat_str)
3743 shelf_url = urljoin('https://www.youtube.com', ep)
3744 if shelf_url:
3745 # Skipping links to another channels, note that checking for
3746 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3747 # will not work
3748 if skip_channels and '/channels?' in shelf_url:
3749 return
3750 title = self._get_text(shelf_renderer, 'title')
3751 yield self.url_result(shelf_url, video_title=title)
3752 # Shelf may not contain shelf URL, fallback to extraction from content
3753 for entry in self._shelf_entries_from_content(shelf_renderer):
3754 yield entry
3755
3756 def _playlist_entries(self, video_list_renderer):
3757 for content in video_list_renderer['contents']:
3758 if not isinstance(content, dict):
3759 continue
3760 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3761 if not isinstance(renderer, dict):
3762 continue
3763 video_id = renderer.get('videoId')
3764 if not video_id:
3765 continue
3766 yield self._extract_video(renderer)
3767
3768 def _rich_entries(self, rich_grid_renderer):
3769 renderer = try_get(
3770 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3771 video_id = renderer.get('videoId')
3772 if not video_id:
3773 return
3774 yield self._extract_video(renderer)
3775
3776 def _video_entry(self, video_renderer):
3777 video_id = video_renderer.get('videoId')
3778 if video_id:
3779 return self._extract_video(video_renderer)
3780
3781 def _post_thread_entries(self, post_thread_renderer):
3782 post_renderer = try_get(
3783 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3784 if not post_renderer:
3785 return
3786 # video attachment
3787 video_renderer = try_get(
3788 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3789 video_id = video_renderer.get('videoId')
3790 if video_id:
3791 entry = self._extract_video(video_renderer)
3792 if entry:
3793 yield entry
3794 # playlist attachment
3795 playlist_id = try_get(
3796 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3797 if playlist_id:
3798 yield self.url_result(
3799 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3800 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3801 # inline video links
3802 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3803 for run in runs:
3804 if not isinstance(run, dict):
3805 continue
3806 ep_url = try_get(
3807 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3808 if not ep_url:
3809 continue
3810 if not YoutubeIE.suitable(ep_url):
3811 continue
3812 ep_video_id = YoutubeIE._match_id(ep_url)
3813 if video_id == ep_video_id:
3814 continue
3815 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
3816
3817 def _post_thread_continuation_entries(self, post_thread_continuation):
3818 contents = post_thread_continuation.get('contents')
3819 if not isinstance(contents, list):
3820 return
3821 for content in contents:
3822 renderer = content.get('backstagePostThreadRenderer')
3823 if not isinstance(renderer, dict):
3824 continue
3825 for entry in self._post_thread_entries(renderer):
3826 yield entry
3827
3828 r''' # unused
3829 def _rich_grid_entries(self, contents):
3830 for content in contents:
3831 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3832 if video_renderer:
3833 entry = self._video_entry(video_renderer)
3834 if entry:
3835 yield entry
3836 '''
3837 def _entries(self, tab, item_id, account_syncid, ytcfg):
3838
3839 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3840 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3841 for content in contents:
3842 if not isinstance(content, dict):
3843 continue
3844 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3845 if not is_renderer:
3846 renderer = content.get('richItemRenderer')
3847 if renderer:
3848 for entry in self._rich_entries(renderer):
3849 yield entry
3850 continuation_list[0] = self._extract_continuation(parent_renderer)
3851 continue
3852 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3853 for isr_content in isr_contents:
3854 if not isinstance(isr_content, dict):
3855 continue
3856
3857 known_renderers = {
3858 'playlistVideoListRenderer': self._playlist_entries,
3859 'gridRenderer': self._grid_entries,
3860 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3861 'backstagePostThreadRenderer': self._post_thread_entries,
3862 'videoRenderer': lambda x: [self._video_entry(x)],
3863 }
3864 for key, renderer in isr_content.items():
3865 if key not in known_renderers:
3866 continue
3867 for entry in known_renderers[key](renderer):
3868 if entry:
3869 yield entry
3870 continuation_list[0] = self._extract_continuation(renderer)
3871 break
3872
3873 if not continuation_list[0]:
3874 continuation_list[0] = self._extract_continuation(is_renderer)
3875
3876 if not continuation_list[0]:
3877 continuation_list[0] = self._extract_continuation(parent_renderer)
3878
3879 continuation_list = [None] # Python 2 doesnot support nonlocal
3880 tab_content = try_get(tab, lambda x: x['content'], dict)
3881 if not tab_content:
3882 return
3883 parent_renderer = (
3884 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3885 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
3886 for entry in extract_entries(parent_renderer):
3887 yield entry
3888 continuation = continuation_list[0]
3889 visitor_data = None
3890
3891 for page_num in itertools.count(1):
3892 if not continuation:
3893 break
3894 headers = self.generate_api_headers(
3895 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
3896 response = self._extract_response(
3897 item_id='%s page %s' % (item_id, page_num),
3898 query=continuation, headers=headers, ytcfg=ytcfg,
3899 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3900
3901 if not response:
3902 break
3903 visitor_data = try_get(
3904 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
3905
3906 known_continuation_renderers = {
3907 'playlistVideoListContinuation': self._playlist_entries,
3908 'gridContinuation': self._grid_entries,
3909 'itemSectionContinuation': self._post_thread_continuation_entries,
3910 'sectionListContinuation': extract_entries, # for feeds
3911 }
3912 continuation_contents = try_get(
3913 response, lambda x: x['continuationContents'], dict) or {}
3914 continuation_renderer = None
3915 for key, value in continuation_contents.items():
3916 if key not in known_continuation_renderers:
3917 continue
3918 continuation_renderer = value
3919 continuation_list = [None]
3920 for entry in known_continuation_renderers[key](continuation_renderer):
3921 yield entry
3922 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3923 break
3924 if continuation_renderer:
3925 continue
3926
3927 known_renderers = {
3928 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3929 'gridVideoRenderer': (self._grid_entries, 'items'),
3930 'gridChannelRenderer': (self._grid_entries, 'items'),
3931 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
3932 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
3933 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
3934 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
3935 }
3936 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3937 continuation_items = try_get(
3938 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
3939 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3940 video_items_renderer = None
3941 for key, value in continuation_item.items():
3942 if key not in known_renderers:
3943 continue
3944 video_items_renderer = {known_renderers[key][1]: continuation_items}
3945 continuation_list = [None]
3946 for entry in known_renderers[key][0](video_items_renderer):
3947 yield entry
3948 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
3949 break
3950 if video_items_renderer:
3951 continue
3952 break
3953
3954 @staticmethod
3955 def _extract_selected_tab(tabs):
3956 for tab in tabs:
3957 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3958 if renderer.get('selected') is True:
3959 return renderer
3960 else:
3961 raise ExtractorError('Unable to find selected tab')
3962
3963 @classmethod
3964 def _extract_uploader(cls, data):
3965 uploader = {}
3966 renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3967 owner = try_get(
3968 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3969 if owner:
3970 uploader['uploader'] = owner.get('text')
3971 uploader['uploader_id'] = try_get(
3972 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3973 uploader['uploader_url'] = urljoin(
3974 'https://www.youtube.com/',
3975 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3976 return {k: v for k, v in uploader.items() if v is not None}
3977
3978 def _extract_from_tabs(self, item_id, webpage, data, tabs):
3979 playlist_id = title = description = channel_url = channel_name = channel_id = None
3980 thumbnails_list = tags = []
3981
3982 selected_tab = self._extract_selected_tab(tabs)
3983 renderer = try_get(
3984 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3985 if renderer:
3986 channel_name = renderer.get('title')
3987 channel_url = renderer.get('channelUrl')
3988 channel_id = renderer.get('externalId')
3989 else:
3990 renderer = try_get(
3991 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3992
3993 if renderer:
3994 title = renderer.get('title')
3995 description = renderer.get('description', '')
3996 playlist_id = channel_id
3997 tags = renderer.get('keywords', '').split()
3998 thumbnails_list = (
3999 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
4000 or try_get(
4001 self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
4002 lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
4003 list)
4004 or [])
4005
4006 thumbnails = []
4007 for t in thumbnails_list:
4008 if not isinstance(t, dict):
4009 continue
4010 thumbnail_url = url_or_none(t.get('url'))
4011 if not thumbnail_url:
4012 continue
4013 thumbnails.append({
4014 'url': thumbnail_url,
4015 'width': int_or_none(t.get('width')),
4016 'height': int_or_none(t.get('height')),
4017 })
4018 if playlist_id is None:
4019 playlist_id = item_id
4020 if title is None:
4021 title = (
4022 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
4023 or playlist_id)
4024 title += format_field(selected_tab, 'title', ' - %s')
4025 title += format_field(selected_tab, 'expandedText', ' - %s')
4026 metadata = {
4027 'playlist_id': playlist_id,
4028 'playlist_title': title,
4029 'playlist_description': description,
4030 'uploader': channel_name,
4031 'uploader_id': channel_id,
4032 'uploader_url': channel_url,
4033 'thumbnails': thumbnails,
4034 'tags': tags,
4035 }
4036 availability = self._extract_availability(data)
4037 if availability:
4038 metadata['availability'] = availability
4039 if not channel_id:
4040 metadata.update(self._extract_uploader(data))
4041 metadata.update({
4042 'channel': metadata['uploader'],
4043 'channel_id': metadata['uploader_id'],
4044 'channel_url': metadata['uploader_url']})
4045 ytcfg = self.extract_ytcfg(item_id, webpage)
4046 return self.playlist_result(
4047 self._entries(
4048 selected_tab, playlist_id,
4049 self._extract_account_syncid(ytcfg, data), ytcfg),
4050 **metadata)
4051
4052 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
4053 first_id = last_id = None
4054 ytcfg = self.extract_ytcfg(playlist_id, webpage)
4055 headers = self.generate_api_headers(
4056 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data))
4057 for page_num in itertools.count(1):
4058 videos = list(self._playlist_entries(playlist))
4059 if not videos:
4060 return
4061 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4062 if start >= len(videos):
4063 return
4064 for video in videos[start:]:
4065 if video['id'] == first_id:
4066 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
4067 return
4068 yield video
4069 first_id = first_id or videos[0]['id']
4070 last_id = videos[-1]['id']
4071 watch_endpoint = try_get(
4072 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
4073 query = {
4074 'playlistId': playlist_id,
4075 'videoId': watch_endpoint.get('videoId') or last_id,
4076 'index': watch_endpoint.get('index') or len(videos),
4077 'params': watch_endpoint.get('params') or 'OAE%3D'
4078 }
4079 response = self._extract_response(
4080 item_id='%s page %d' % (playlist_id, page_num),
4081 query=query, ep='next', headers=headers, ytcfg=ytcfg,
4082 check_get_keys='contents'
4083 )
4084 playlist = try_get(
4085 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4086
4087 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
4088 title = playlist.get('title') or try_get(
4089 data, lambda x: x['titleText']['simpleText'], compat_str)
4090 playlist_id = playlist.get('playlistId') or item_id
4091
4092 # Delegating everything except mix playlists to regular tab-based playlist URL
4093 playlist_url = urljoin(url, try_get(
4094 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4095 compat_str))
4096 if playlist_url and playlist_url != url:
4097 return self.url_result(
4098 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4099 video_title=title)
4100
4101 return self.playlist_result(
4102 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
4103 playlist_id=playlist_id, playlist_title=title)
4104
4105 def _extract_availability(self, data):
4106 """
4107 Gets the availability of a given playlist/tab.
4108 Note: Unless YouTube tells us explicitly, we do not assume it is public
4109 @param data: response
4110 """
4111 is_private = is_unlisted = None
4112 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4113 badge_labels = self._extract_badges(renderer)
4114
4115 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4116 privacy_dropdown_entries = try_get(
4117 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4118 for renderer_dict in privacy_dropdown_entries:
4119 is_selected = try_get(
4120 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4121 if not is_selected:
4122 continue
4123 label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
4124 if label:
4125 badge_labels.add(label.lower())
4126 break
4127
4128 for badge_label in badge_labels:
4129 if badge_label == 'unlisted':
4130 is_unlisted = True
4131 elif badge_label == 'private':
4132 is_private = True
4133 elif badge_label == 'public':
4134 is_unlisted = is_private = False
4135 return self._availability(is_private, False, False, False, is_unlisted)
4136
4137 @staticmethod
4138 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4139 sidebar_renderer = try_get(
4140 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4141 for item in sidebar_renderer:
4142 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4143 if renderer:
4144 return renderer
4145
4146 def _reload_with_unavailable_videos(self, item_id, data, webpage):
4147 """
4148 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4149 """
4150 browse_id = params = None
4151 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4152 if not renderer:
4153 return
4154 menu_renderer = try_get(
4155 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4156 for menu_item in menu_renderer:
4157 if not isinstance(menu_item, dict):
4158 continue
4159 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4160 text = try_get(
4161 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
4162 if not text or text.lower() != 'show unavailable videos':
4163 continue
4164 browse_endpoint = try_get(
4165 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4166 browse_id = browse_endpoint.get('browseId')
4167 params = browse_endpoint.get('params')
4168 break
4169
4170 ytcfg = self.extract_ytcfg(item_id, webpage)
4171 headers = self.generate_api_headers(
4172 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4173 visitor_data=try_get(self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
4174 query = {
4175 'params': params or 'wgYCCAA=',
4176 'browseId': browse_id or 'VL%s' % item_id
4177 }
4178 return self._extract_response(
4179 item_id=item_id, headers=headers, query=query,
4180 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
4181 note='Downloading API JSON with unavailable videos')
4182
4183 def _extract_webpage(self, url, item_id):
4184 retries = self.get_param('extractor_retries', 3)
4185 count = -1
4186 last_error = 'Incomplete yt initial data recieved'
4187 while count < retries:
4188 count += 1
4189 # Sometimes youtube returns a webpage with incomplete ytInitialData
4190 # See: https://github.com/yt-dlp/yt-dlp/issues/116
4191 if count:
4192 self.report_warning('%s. Retrying ...' % last_error)
4193 webpage = self._download_webpage(
4194 url, item_id,
4195 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
4196 data = self.extract_yt_initial_data(item_id, webpage)
4197 if data.get('contents') or data.get('currentVideoEndpoint'):
4198 break
4199 # Extract alerts here only when there is error
4200 self._extract_and_report_alerts(data)
4201 if count >= retries:
4202 raise ExtractorError(last_error)
4203 return webpage, data
4204
4205 @staticmethod
4206 def _smuggle_data(entries, data):
4207 for entry in entries:
4208 if data:
4209 entry['url'] = smuggle_url(entry['url'], data)
4210 yield entry
4211
4212 def _real_extract(self, url):
4213 url, smuggled_data = unsmuggle_url(url, {})
4214 if self.is_music_url(url):
4215 smuggled_data['is_music_url'] = True
4216 info_dict = self.__real_extract(url, smuggled_data)
4217 if info_dict.get('entries'):
4218 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4219 return info_dict
4220
4221 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4222
4223 def __real_extract(self, url, smuggled_data):
4224 item_id = self._match_id(url)
4225 url = compat_urlparse.urlunparse(
4226 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
4227 compat_opts = self.get_param('compat_opts', [])
4228
4229 def get_mobj(url):
4230 mobj = self._url_re.match(url).groupdict()
4231 mobj.update((k, '') for k, v in mobj.items() if v is None)
4232 return mobj
4233
4234 mobj = get_mobj(url)
4235 # Youtube returns incomplete data if tabname is not lower case
4236 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4237
4238 if is_channel:
4239 if smuggled_data.get('is_music_url'):
4240 if item_id[:2] == 'VL':
4241 # Youtube music VL channels have an equivalent playlist
4242 item_id = item_id[2:]
4243 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4244 elif item_id[:2] == 'MP':
4245 # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4246 item_id = self._search_regex(
4247 r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4248 self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4249 'playlist id')
4250 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4251 elif mobj['channel_type'] == 'browse':
4252 # Youtube music /browse/ should be changed to /channel/
4253 pre = 'https://www.youtube.com/channel/%s' % item_id
4254 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4255 # Home URLs should redirect to /videos/
4256 self.report_warning(
4257 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4258 'To download only the videos in the home page, add a "/featured" to the URL')
4259 tab = '/videos'
4260
4261 url = ''.join((pre, tab, post))
4262 mobj = get_mobj(url)
4263
4264 # Handle both video/playlist URLs
4265 qs = parse_qs(url)
4266 video_id = qs.get('v', [None])[0]
4267 playlist_id = qs.get('list', [None])[0]
4268
4269 if not video_id and mobj['not_channel'].startswith('watch'):
4270 if not playlist_id:
4271 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
4272 raise ExtractorError('Unable to recognize tab page')
4273 # Common mistake: https://www.youtube.com/watch?list=playlist_id
4274 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
4275 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
4276 mobj = get_mobj(url)
4277
4278 if video_id and playlist_id:
4279 if self.get_param('noplaylist'):
4280 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4281 return self.url_result(f'https://www.youtube.com/watch?v={video_id}', ie=YoutubeIE.ie_key(), video_id=video_id)
4282 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4283
4284 webpage, data = self._extract_webpage(url, item_id)
4285
4286 tabs = try_get(
4287 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4288 if tabs:
4289 selected_tab = self._extract_selected_tab(tabs)
4290 tab_name = selected_tab.get('title', '')
4291 if 'no-youtube-channel-redirect' not in compat_opts:
4292 if mobj['tab'] == '/live':
4293 # Live tab should have redirected to the video
4294 raise ExtractorError('The channel is not currently live', expected=True)
4295 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4296 if not mobj['not_channel'] and item_id[:2] == 'UC':
4297 # Topic channels don't have /videos. Use the equivalent playlist instead
4298 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4299 pl_id = 'UU%s' % item_id[2:]
4300 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4301 try:
4302 pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4303 for alert_type, alert_message in self._extract_alerts(pl_data):
4304 if alert_type == 'error':
4305 raise ExtractorError('Youtube said: %s' % alert_message)
4306 item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4307 except ExtractorError:
4308 self.report_warning('The playlist gave error. Falling back to channel URL')
4309 else:
4310 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
4311
4312 self.write_debug('Final URL: %s' % url)
4313
4314 # YouTube sometimes provides a button to reload playlist with unavailable videos.
4315 if 'no-youtube-unavailable-videos' not in compat_opts:
4316 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
4317 self._extract_and_report_alerts(data, only_once=True)
4318 tabs = try_get(
4319 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4320 if tabs:
4321 return self._extract_from_tabs(item_id, webpage, data, tabs)
4322
4323 playlist = try_get(
4324 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4325 if playlist:
4326 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
4327
4328 video_id = try_get(
4329 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4330 compat_str) or video_id
4331 if video_id:
4332 if mobj['tab'] != '/live': # live tab is expected to redirect to video
4333 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
4334 return self.url_result(f'https://www.youtube.com/watch?v={video_id}', ie=YoutubeIE.ie_key(), video_id=video_id)
4335
4336 raise ExtractorError('Unable to recognize tab page')
4337
4338
4339 class YoutubePlaylistIE(InfoExtractor):
4340 IE_DESC = 'YouTube.com playlists'
4341 _VALID_URL = r'''(?x)(?:
4342 (?:https?://)?
4343 (?:\w+\.)?
4344 (?:
4345 (?:
4346 youtube(?:kids)?\.com|
4347 invidio\.us
4348 )
4349 /.*?\?.*?\blist=
4350 )?
4351 (?P<id>%(playlist_id)s)
4352 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4353 IE_NAME = 'youtube:playlist'
4354 _TESTS = [{
4355 'note': 'issue #673',
4356 'url': 'PLBB231211A4F62143',
4357 'info_dict': {
4358 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4359 'id': 'PLBB231211A4F62143',
4360 'uploader': 'Wickydoo',
4361 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4362 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
4363 },
4364 'playlist_mincount': 29,
4365 }, {
4366 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4367 'info_dict': {
4368 'title': 'YDL_safe_search',
4369 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4370 },
4371 'playlist_count': 2,
4372 'skip': 'This playlist is private',
4373 }, {
4374 'note': 'embedded',
4375 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4376 'playlist_count': 4,
4377 'info_dict': {
4378 'title': 'JODA15',
4379 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4380 'uploader': 'milan',
4381 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
4382 }
4383 }, {
4384 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4385 'playlist_mincount': 654,
4386 'info_dict': {
4387 'title': '2018 Chinese New Singles (11/6 updated)',
4388 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4389 'uploader': 'LBK',
4390 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4391 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
4392 }
4393 }, {
4394 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4395 'only_matching': True,
4396 }, {
4397 # music album playlist
4398 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4399 'only_matching': True,
4400 }]
4401
4402 @classmethod
4403 def suitable(cls, url):
4404 if YoutubeTabIE.suitable(url):
4405 return False
4406 # Hack for lazy extractors until more generic solution is implemented
4407 # (see #28780)
4408 from .youtube import parse_qs
4409 qs = parse_qs(url)
4410 if qs.get('v', [None])[0]:
4411 return False
4412 return super(YoutubePlaylistIE, cls).suitable(url)
4413
4414 def _real_extract(self, url):
4415 playlist_id = self._match_id(url)
4416 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
4417 url = update_url_query(
4418 'https://www.youtube.com/playlist',
4419 parse_qs(url) or {'list': playlist_id})
4420 if is_music_url:
4421 url = smuggle_url(url, {'is_music_url': True})
4422 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4423
4424
4425 class YoutubeYtBeIE(InfoExtractor):
4426 IE_DESC = 'youtu.be'
4427 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4428 _TESTS = [{
4429 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4430 'info_dict': {
4431 'id': 'yeWKywCrFtk',
4432 'ext': 'mp4',
4433 'title': 'Small Scale Baler and Braiding Rugs',
4434 'uploader': 'Backus-Page House Museum',
4435 'uploader_id': 'backuspagemuseum',
4436 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4437 'upload_date': '20161008',
4438 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4439 'categories': ['Nonprofits & Activism'],
4440 'tags': list,
4441 'like_count': int,
4442 'dislike_count': int,
4443 },
4444 'params': {
4445 'noplaylist': True,
4446 'skip_download': True,
4447 },
4448 }, {
4449 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
4450 'only_matching': True,
4451 }]
4452
4453 def _real_extract(self, url):
4454 mobj = self._match_valid_url(url)
4455 video_id = mobj.group('id')
4456 playlist_id = mobj.group('playlist_id')
4457 return self.url_result(
4458 update_url_query('https://www.youtube.com/watch', {
4459 'v': video_id,
4460 'list': playlist_id,
4461 'feature': 'youtu.be',
4462 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4463
4464
4465 class YoutubeYtUserIE(InfoExtractor):
4466 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
4467 _VALID_URL = r'ytuser:(?P<id>.+)'
4468 _TESTS = [{
4469 'url': 'ytuser:phihag',
4470 'only_matching': True,
4471 }]
4472
4473 def _real_extract(self, url):
4474 user_id = self._match_id(url)
4475 return self.url_result(
4476 'https://www.youtube.com/user/%s' % user_id,
4477 ie=YoutubeTabIE.ie_key(), video_id=user_id)
4478
4479
4480 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
4481 IE_NAME = 'youtube:favorites'
4482 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4483 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4484 _LOGIN_REQUIRED = True
4485 _TESTS = [{
4486 'url': ':ytfav',
4487 'only_matching': True,
4488 }, {
4489 'url': ':ytfavorites',
4490 'only_matching': True,
4491 }]
4492
4493 def _real_extract(self, url):
4494 return self.url_result(
4495 'https://www.youtube.com/playlist?list=LL',
4496 ie=YoutubeTabIE.ie_key())
4497
4498
4499 class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
4500 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
4501 # there doesn't appear to be a real limit, for example if you search for
4502 # 'python' you get more than 8.000.000 results
4503 _MAX_RESULTS = float('inf')
4504 IE_NAME = 'youtube:search'
4505 _SEARCH_KEY = 'ytsearch'
4506 _SEARCH_PARAMS = None
4507 _TESTS = []
4508
4509 def _entries(self, query, n):
4510 data = {'query': query}
4511 if self._SEARCH_PARAMS:
4512 data['params'] = self._SEARCH_PARAMS
4513 total = 0
4514 continuation = {}
4515 for page_num in itertools.count(1):
4516 data.update(continuation)
4517 search = self._extract_response(
4518 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4519 check_get_keys=('contents', 'onResponseReceivedCommands')
4520 )
4521 if not search:
4522 break
4523 slr_contents = try_get(
4524 search,
4525 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4526 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4527 list)
4528 if not slr_contents:
4529 break
4530
4531 # Youtube sometimes adds promoted content to searches,
4532 # changing the index location of videos and token.
4533 # So we search through all entries till we find them.
4534 continuation = None
4535 for slr_content in slr_contents:
4536 if not continuation:
4537 continuation = self._extract_continuation({'contents': [slr_content]})
4538
4539 isr_contents = try_get(
4540 slr_content,
4541 lambda x: x['itemSectionRenderer']['contents'],
4542 list)
4543 if not isr_contents:
4544 continue
4545 for content in isr_contents:
4546 if not isinstance(content, dict):
4547 continue
4548 video = content.get('videoRenderer')
4549 if not isinstance(video, dict):
4550 continue
4551 video_id = video.get('videoId')
4552 if not video_id:
4553 continue
4554
4555 yield self._extract_video(video)
4556 total += 1
4557 if total == n:
4558 return
4559
4560 if not continuation:
4561 break
4562
4563 def _get_n_results(self, query, n):
4564 """Get a specified number of results for a query"""
4565 return self.playlist_result(self._entries(query, n), query, query)
4566
4567
4568 class YoutubeSearchDateIE(YoutubeSearchIE):
4569 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
4570 _SEARCH_KEY = 'ytsearchdate'
4571 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
4572 _SEARCH_PARAMS = 'CAI%3D'
4573
4574
4575 class YoutubeSearchURLIE(YoutubeSearchIE):
4576 IE_DESC = 'YouTube.com search URLs'
4577 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4578 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
4579 # _MAX_RESULTS = 100
4580 _TESTS = [{
4581 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4582 'playlist_mincount': 5,
4583 'info_dict': {
4584 'id': 'youtube-dl test video',
4585 'title': 'youtube-dl test video',
4586 }
4587 }, {
4588 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4589 'only_matching': True,
4590 }]
4591
4592 @classmethod
4593 def _make_valid_url(cls):
4594 return cls._VALID_URL
4595
4596 def _real_extract(self, url):
4597 qs = parse_qs(url)
4598 query = (qs.get('search_query') or qs.get('q'))[0]
4599 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4600 return self._get_n_results(query, self._MAX_RESULTS)
4601
4602
4603 class YoutubeFeedsInfoExtractor(YoutubeTabIE):
4604 """
4605 Base class for feed extractors
4606 Subclasses must define the _FEED_NAME property.
4607 """
4608 _LOGIN_REQUIRED = True
4609 _TESTS = []
4610
4611 @property
4612 def IE_NAME(self):
4613 return 'youtube:%s' % self._FEED_NAME
4614
4615 def _real_extract(self, url):
4616 return self.url_result(
4617 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4618 ie=YoutubeTabIE.ie_key())
4619
4620
4621 class YoutubeWatchLaterIE(InfoExtractor):
4622 IE_NAME = 'youtube:watchlater'
4623 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
4624 _VALID_URL = r':ytwatchlater'
4625 _TESTS = [{
4626 'url': ':ytwatchlater',
4627 'only_matching': True,
4628 }]
4629
4630 def _real_extract(self, url):
4631 return self.url_result(
4632 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
4633
4634
4635 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4636 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
4637 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
4638 _FEED_NAME = 'recommended'
4639 _LOGIN_REQUIRED = False
4640 _TESTS = [{
4641 'url': ':ytrec',
4642 'only_matching': True,
4643 }, {
4644 'url': ':ytrecommended',
4645 'only_matching': True,
4646 }, {
4647 'url': 'https://youtube.com',
4648 'only_matching': True,
4649 }]
4650
4651
4652 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
4653 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
4654 _VALID_URL = r':ytsub(?:scription)?s?'
4655 _FEED_NAME = 'subscriptions'
4656 _TESTS = [{
4657 'url': ':ytsubs',
4658 'only_matching': True,
4659 }, {
4660 'url': ':ytsubscriptions',
4661 'only_matching': True,
4662 }]
4663
4664
4665 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
4666 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4667 _VALID_URL = r':ythis(?:tory)?'
4668 _FEED_NAME = 'history'
4669 _TESTS = [{
4670 'url': ':ythistory',
4671 'only_matching': True,
4672 }]
4673
4674
4675 class YoutubeTruncatedURLIE(InfoExtractor):
4676 IE_NAME = 'youtube:truncated_url'
4677 IE_DESC = False # Do not list
4678 _VALID_URL = r'''(?x)
4679 (?:https?://)?
4680 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4681 (?:watch\?(?:
4682 feature=[a-z_]+|
4683 annotation_id=annotation_[^&]+|
4684 x-yt-cl=[0-9]+|
4685 hl=[^&]*|
4686 t=[0-9]+
4687 )?
4688 |
4689 attribution_link\?a=[^&]+
4690 )
4691 $
4692 '''
4693
4694 _TESTS = [{
4695 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
4696 'only_matching': True,
4697 }, {
4698 'url': 'https://www.youtube.com/watch?',
4699 'only_matching': True,
4700 }, {
4701 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4702 'only_matching': True,
4703 }, {
4704 'url': 'https://www.youtube.com/watch?feature=foo',
4705 'only_matching': True,
4706 }, {
4707 'url': 'https://www.youtube.com/watch?hl=en-GB',
4708 'only_matching': True,
4709 }, {
4710 'url': 'https://www.youtube.com/watch?t=2372',
4711 'only_matching': True,
4712 }]
4713
4714 def _real_extract(self, url):
4715 raise ExtractorError(
4716 'Did you forget to quote the URL? Remember that & is a meta '
4717 'character in most shells, so you want to put the URL in quotes, '
4718 'like youtube-dl '
4719 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
4720 ' or simply youtube-dl BaW_jenozKc .',
4721 expected=True)
4722
4723
4724 class YoutubeClipIE(InfoExtractor):
4725 IE_NAME = 'youtube:clip'
4726 IE_DESC = False # Do not list
4727 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'
4728
4729 def _real_extract(self, url):
4730 self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')
4731 return self.url_result(url, 'Generic')
4732
4733
4734 class YoutubeTruncatedIDIE(InfoExtractor):
4735 IE_NAME = 'youtube:truncated_id'
4736 IE_DESC = False # Do not list
4737 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
4738
4739 _TESTS = [{
4740 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4741 'only_matching': True,
4742 }]
4743
4744 def _real_extract(self, url):
4745 video_id = self._match_id(url)
4746 raise ExtractorError(
4747 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4748 expected=True)