]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/youtube.py
[docs,cleanup] Some minor refactoring and improve docs
[yt-dlp.git] / yt_dlp / extractor / youtube.py
1 # coding: utf-8
2
3 from __future__ import unicode_literals
4
5 import base64
6 import calendar
7 import copy
8 import datetime
9 import hashlib
10 import itertools
11 import json
12 import os.path
13 import random
14 import re
15 import time
16 import traceback
17
18 from .common import InfoExtractor, SearchInfoExtractor
19 from ..compat import (
20 compat_chr,
21 compat_HTTPError,
22 compat_parse_qs,
23 compat_str,
24 compat_urllib_parse_unquote_plus,
25 compat_urllib_parse_urlencode,
26 compat_urllib_parse_urlparse,
27 compat_urlparse,
28 )
29 from ..jsinterp import JSInterpreter
30 from ..utils import (
31 bytes_to_intlist,
32 clean_html,
33 datetime_from_str,
34 dict_get,
35 error_to_compat_str,
36 ExtractorError,
37 float_or_none,
38 format_field,
39 int_or_none,
40 intlist_to_bytes,
41 is_html,
42 mimetype2ext,
43 network_exceptions,
44 orderedSet,
45 parse_codecs,
46 parse_count,
47 parse_duration,
48 parse_iso8601,
49 parse_qs,
50 qualities,
51 remove_end,
52 remove_start,
53 smuggle_url,
54 str_or_none,
55 str_to_int,
56 traverse_obj,
57 try_get,
58 unescapeHTML,
59 unified_strdate,
60 unsmuggle_url,
61 update_url_query,
62 url_or_none,
63 urljoin,
64 variadic,
65 )
66
67
68 # any clients starting with _ cannot be explicity requested by the user
69 INNERTUBE_CLIENTS = {
70 'web': {
71 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
72 'INNERTUBE_CONTEXT': {
73 'client': {
74 'clientName': 'WEB',
75 'clientVersion': '2.20210622.10.00',
76 }
77 },
78 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
79 },
80 'web_embedded': {
81 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
82 'INNERTUBE_CONTEXT': {
83 'client': {
84 'clientName': 'WEB_EMBEDDED_PLAYER',
85 'clientVersion': '1.20210620.0.1',
86 },
87 },
88 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
89 },
90 'web_music': {
91 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
92 'INNERTUBE_HOST': 'music.youtube.com',
93 'INNERTUBE_CONTEXT': {
94 'client': {
95 'clientName': 'WEB_REMIX',
96 'clientVersion': '1.20210621.00.00',
97 }
98 },
99 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
100 },
101 'web_creator': {
102 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
103 'INNERTUBE_CONTEXT': {
104 'client': {
105 'clientName': 'WEB_CREATOR',
106 'clientVersion': '1.20210621.00.00',
107 }
108 },
109 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
110 },
111 'android': {
112 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
113 'INNERTUBE_CONTEXT': {
114 'client': {
115 'clientName': 'ANDROID',
116 'clientVersion': '16.20',
117 }
118 },
119 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
120 'REQUIRE_JS_PLAYER': False
121 },
122 'android_embedded': {
123 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
124 'INNERTUBE_CONTEXT': {
125 'client': {
126 'clientName': 'ANDROID_EMBEDDED_PLAYER',
127 'clientVersion': '16.20',
128 },
129 },
130 'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
131 'REQUIRE_JS_PLAYER': False
132 },
133 'android_music': {
134 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
135 'INNERTUBE_HOST': 'music.youtube.com',
136 'INNERTUBE_CONTEXT': {
137 'client': {
138 'clientName': 'ANDROID_MUSIC',
139 'clientVersion': '4.32',
140 }
141 },
142 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
143 'REQUIRE_JS_PLAYER': False
144 },
145 'android_creator': {
146 'INNERTUBE_CONTEXT': {
147 'client': {
148 'clientName': 'ANDROID_CREATOR',
149 'clientVersion': '21.24.100',
150 },
151 },
152 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
153 'REQUIRE_JS_PLAYER': False
154 },
155 # ios has HLS live streams
156 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680
157 'ios': {
158 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
159 'INNERTUBE_CONTEXT': {
160 'client': {
161 'clientName': 'IOS',
162 'clientVersion': '16.20',
163 }
164 },
165 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
166 'REQUIRE_JS_PLAYER': False
167 },
168 'ios_embedded': {
169 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
170 'INNERTUBE_CONTEXT': {
171 'client': {
172 'clientName': 'IOS_MESSAGES_EXTENSION',
173 'clientVersion': '16.20',
174 },
175 },
176 'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
177 'REQUIRE_JS_PLAYER': False
178 },
179 'ios_music': {
180 'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
181 'INNERTUBE_HOST': 'music.youtube.com',
182 'INNERTUBE_CONTEXT': {
183 'client': {
184 'clientName': 'IOS_MUSIC',
185 'clientVersion': '4.32',
186 },
187 },
188 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
189 'REQUIRE_JS_PLAYER': False
190 },
191 'ios_creator': {
192 'INNERTUBE_CONTEXT': {
193 'client': {
194 'clientName': 'IOS_CREATOR',
195 'clientVersion': '21.24.100',
196 },
197 },
198 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
199 'REQUIRE_JS_PLAYER': False
200 },
201 # mweb has 'ultralow' formats
202 # See: https://github.com/yt-dlp/yt-dlp/pull/557
203 'mweb': {
204 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
205 'INNERTUBE_CONTEXT': {
206 'client': {
207 'clientName': 'MWEB',
208 'clientVersion': '2.20210721.07.00',
209 }
210 },
211 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
212 },
213 }
214
215
216 def build_innertube_clients():
217 third_party = {
218 'embedUrl': 'https://google.com', # Can be any valid URL
219 }
220 base_clients = ('android', 'web', 'ios', 'mweb')
221 priority = qualities(base_clients[::-1])
222
223 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
224 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
225 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
226 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
227 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
228 ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
229
230 if client in base_clients:
231 INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
232 agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
233 agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
234 agegate_ytcfg['priority'] -= 1
235 elif client.endswith('_embedded'):
236 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
237 ytcfg['priority'] -= 2
238 else:
239 ytcfg['priority'] -= 3
240
241
242 build_innertube_clients()
243
244
245 class YoutubeBaseInfoExtractor(InfoExtractor):
246 """Provide base functions for Youtube extractors"""
247
248 _RESERVED_NAMES = (
249 r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
250 r'shorts|movies|results|shared|hashtag|trending|feed|feeds|'
251 r'browse|oembed|get_video_info|iframe_api|s/player|'
252 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
253
254 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
255
256 _NETRC_MACHINE = 'youtube'
257
258 # If True it will raise an error if no login info is provided
259 _LOGIN_REQUIRED = False
260
261 r''' # Unused since login is broken
262 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
263 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
264
265 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
266 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
267 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
268 '''
269
270 def _login(self):
271 """
272 Attempt to log in to YouTube.
273 True is returned if successful or skipped.
274 False is returned if login failed.
275
276 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
277 """
278
279 def warn(message):
280 self.report_warning(message)
281
282 # username+password login is broken
283 if (self._LOGIN_REQUIRED
284 and self.get_param('cookiefile') is None
285 and self.get_param('cookiesfrombrowser') is None):
286 self.raise_login_required(
287 'Login details are needed to download this content', method='cookies')
288 username, password = self._get_login_info()
289 if username:
290 warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
291 return
292
293 # Everything below this is broken!
294 r'''
295 # No authentication to be performed
296 if username is None:
297 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
298 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
299 # if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
300 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
301 return True
302
303 login_page = self._download_webpage(
304 self._LOGIN_URL, None,
305 note='Downloading login page',
306 errnote='unable to fetch login page', fatal=False)
307 if login_page is False:
308 return
309
310 login_form = self._hidden_inputs(login_page)
311
312 def req(url, f_req, note, errnote):
313 data = login_form.copy()
314 data.update({
315 'pstMsg': 1,
316 'checkConnection': 'youtube',
317 'checkedDomains': 'youtube',
318 'hl': 'en',
319 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
320 'f.req': json.dumps(f_req),
321 'flowName': 'GlifWebSignIn',
322 'flowEntry': 'ServiceLogin',
323 # TODO: reverse actual botguard identifier generation algo
324 'bgRequest': '["identifier",""]',
325 })
326 return self._download_json(
327 url, None, note=note, errnote=errnote,
328 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
329 fatal=False,
330 data=urlencode_postdata(data), headers={
331 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
332 'Google-Accounts-XSRF': 1,
333 })
334
335 lookup_req = [
336 username,
337 None, [], None, 'US', None, None, 2, False, True,
338 [
339 None, None,
340 [2, 1, None, 1,
341 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
342 None, [], 4],
343 1, [None, None, []], None, None, None, True
344 ],
345 username,
346 ]
347
348 lookup_results = req(
349 self._LOOKUP_URL, lookup_req,
350 'Looking up account info', 'Unable to look up account info')
351
352 if lookup_results is False:
353 return False
354
355 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
356 if not user_hash:
357 warn('Unable to extract user hash')
358 return False
359
360 challenge_req = [
361 user_hash,
362 None, 1, None, [1, None, None, None, [password, None, True]],
363 [
364 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
365 1, [None, None, []], None, None, None, True
366 ]]
367
368 challenge_results = req(
369 self._CHALLENGE_URL, challenge_req,
370 'Logging in', 'Unable to log in')
371
372 if challenge_results is False:
373 return
374
375 login_res = try_get(challenge_results, lambda x: x[0][5], list)
376 if login_res:
377 login_msg = try_get(login_res, lambda x: x[5], compat_str)
378 warn(
379 'Unable to login: %s' % 'Invalid password'
380 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
381 return False
382
383 res = try_get(challenge_results, lambda x: x[0][-1], list)
384 if not res:
385 warn('Unable to extract result entry')
386 return False
387
388 login_challenge = try_get(res, lambda x: x[0][0], list)
389 if login_challenge:
390 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
391 if challenge_str == 'TWO_STEP_VERIFICATION':
392 # SEND_SUCCESS - TFA code has been successfully sent to phone
393 # QUOTA_EXCEEDED - reached the limit of TFA codes
394 status = try_get(login_challenge, lambda x: x[5], compat_str)
395 if status == 'QUOTA_EXCEEDED':
396 warn('Exceeded the limit of TFA codes, try later')
397 return False
398
399 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
400 if not tl:
401 warn('Unable to extract TL')
402 return False
403
404 tfa_code = self._get_tfa_info('2-step verification code')
405
406 if not tfa_code:
407 warn(
408 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
409 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
410 return False
411
412 tfa_code = remove_start(tfa_code, 'G-')
413
414 tfa_req = [
415 user_hash, None, 2, None,
416 [
417 9, None, None, None, None, None, None, None,
418 [None, tfa_code, True, 2]
419 ]]
420
421 tfa_results = req(
422 self._TFA_URL.format(tl), tfa_req,
423 'Submitting TFA code', 'Unable to submit TFA code')
424
425 if tfa_results is False:
426 return False
427
428 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
429 if tfa_res:
430 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
431 warn(
432 'Unable to finish TFA: %s' % 'Invalid TFA code'
433 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
434 return False
435
436 check_cookie_url = try_get(
437 tfa_results, lambda x: x[0][-1][2], compat_str)
438 else:
439 CHALLENGES = {
440 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
441 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
442 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
443 }
444 challenge = CHALLENGES.get(
445 challenge_str,
446 '%s returned error %s.' % (self.IE_NAME, challenge_str))
447 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
448 return False
449 else:
450 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
451
452 if not check_cookie_url:
453 warn('Unable to extract CheckCookie URL')
454 return False
455
456 check_cookie_results = self._download_webpage(
457 check_cookie_url, None, 'Checking cookie', fatal=False)
458
459 if check_cookie_results is False:
460 return False
461
462 if 'https://myaccount.google.com/' not in check_cookie_results:
463 warn('Unable to log in')
464 return False
465
466 return True
467 '''
468
469 def _initialize_consent(self):
470 cookies = self._get_cookies('https://www.youtube.com/')
471 if cookies.get('__Secure-3PSID'):
472 return
473 consent_id = None
474 consent = cookies.get('CONSENT')
475 if consent:
476 if 'YES' in consent.value:
477 return
478 consent_id = self._search_regex(
479 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
480 if not consent_id:
481 consent_id = random.randint(100, 999)
482 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
483
484 def _real_initialize(self):
485 self._initialize_consent()
486 if self._downloader is None:
487 return
488 if not self._login():
489 return
490
491 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
492 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
493 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
494
495 def _get_default_ytcfg(self, client='web'):
496 return copy.deepcopy(INNERTUBE_CLIENTS[client])
497
498 def _get_innertube_host(self, client='web'):
499 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
500
501 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
502 # try_get but with fallback to default ytcfg client values when present
503 _func = lambda y: try_get(y, getter, expected_type)
504 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
505
506 def _extract_client_name(self, ytcfg, default_client='web'):
507 return self._ytcfg_get_safe(
508 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
509 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
510
511 def _extract_client_version(self, ytcfg, default_client='web'):
512 return self._ytcfg_get_safe(
513 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
514 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
515
516 def _extract_api_key(self, ytcfg=None, default_client='web'):
517 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
518
519 def _extract_context(self, ytcfg=None, default_client='web'):
520 _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
521 context = _get_context(ytcfg)
522 if context:
523 return context
524
525 context = _get_context(self._get_default_ytcfg(default_client))
526 if not ytcfg:
527 return context
528
529 # Recreate the client context (required)
530 context['client'].update({
531 'clientVersion': self._extract_client_version(ytcfg, default_client),
532 'clientName': self._extract_client_name(ytcfg, default_client),
533 })
534 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
535 if visitor_data:
536 context['client']['visitorData'] = visitor_data
537 return context
538
539 _SAPISID = None
540
541 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
542 time_now = round(time.time())
543 if self._SAPISID is None:
544 yt_cookies = self._get_cookies('https://www.youtube.com')
545 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
546 # See: https://github.com/yt-dlp/yt-dlp/issues/393
547 sapisid_cookie = dict_get(
548 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
549 if sapisid_cookie and sapisid_cookie.value:
550 self._SAPISID = sapisid_cookie.value
551 self.write_debug('Extracted SAPISID cookie')
552 # SAPISID cookie is required if not already present
553 if not yt_cookies.get('SAPISID'):
554 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
555 self._set_cookie(
556 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
557 else:
558 self._SAPISID = False
559 if not self._SAPISID:
560 return None
561 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
562 sapisidhash = hashlib.sha1(
563 f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
564 return f'SAPISIDHASH {time_now}_{sapisidhash}'
565
566 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
567 note='Downloading API JSON', errnote='Unable to download API page',
568 context=None, api_key=None, api_hostname=None, default_client='web'):
569
570 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
571 data.update(query)
572 real_headers = self.generate_api_headers(default_client=default_client)
573 real_headers.update({'content-type': 'application/json'})
574 if headers:
575 real_headers.update(headers)
576 return self._download_json(
577 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
578 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
579 data=json.dumps(data).encode('utf8'), headers=real_headers,
580 query={'key': api_key or self._extract_api_key()})
581
582 def extract_yt_initial_data(self, video_id, webpage):
583 return self._parse_json(
584 self._search_regex(
585 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
586 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
587 video_id)
588
589 @staticmethod
590 def _extract_session_index(*data):
591 """
592 Index of current account in account list.
593 See: https://github.com/yt-dlp/yt-dlp/pull/519
594 """
595 for ytcfg in data:
596 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
597 if session_index is not None:
598 return session_index
599
600 # Deprecated?
601 def _extract_identity_token(self, ytcfg=None, webpage=None):
602 if ytcfg:
603 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
604 if token:
605 return token
606 if webpage:
607 return self._search_regex(
608 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
609 'identity token', default=None, fatal=False)
610
611 @staticmethod
612 def _extract_account_syncid(*args):
613 """
614 Extract syncId required to download private playlists of secondary channels
615 @params response and/or ytcfg
616 """
617 for data in args:
618 # ytcfg includes channel_syncid if on secondary channel
619 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
620 if delegated_sid:
621 return delegated_sid
622 sync_ids = (try_get(
623 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
624 lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')
625 if len(sync_ids) >= 2 and sync_ids[1]:
626 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
627 # and just "user_syncid||" for primary channel. We only want the channel_syncid
628 return sync_ids[0]
629
630 @property
631 def is_authenticated(self):
632 return bool(self._generate_sapisidhash_header())
633
634 def extract_ytcfg(self, video_id, webpage):
635 if not webpage:
636 return {}
637 return self._parse_json(
638 self._search_regex(
639 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
640 default='{}'), video_id, fatal=False) or {}
641
642 def generate_api_headers(
643 self, *, ytcfg=None, account_syncid=None, session_index=None,
644 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
645
646 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
647 headers = {
648 'X-YouTube-Client-Name': compat_str(
649 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
650 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
651 'Origin': origin,
652 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
653 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
654 'X-Goog-Visitor-Id': visitor_data or try_get(
655 self._extract_context(ytcfg, default_client), lambda x: x['client']['visitorData'], compat_str)
656 }
657 if session_index is None:
658 session_index = self._extract_session_index(ytcfg)
659 if account_syncid or session_index is not None:
660 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
661
662 auth = self._generate_sapisidhash_header(origin)
663 if auth is not None:
664 headers['Authorization'] = auth
665 headers['X-Origin'] = origin
666 return {h: v for h, v in headers.items() if v is not None}
667
668 @staticmethod
669 def _build_api_continuation_query(continuation, ctp=None):
670 query = {
671 'continuation': continuation
672 }
673 # TODO: Inconsistency with clickTrackingParams.
674 # Currently we have a fixed ctp contained within context (from ytcfg)
675 # and a ctp in root query for continuation.
676 if ctp:
677 query['clickTracking'] = {'clickTrackingParams': ctp}
678 return query
679
680 @classmethod
681 def _extract_next_continuation_data(cls, renderer):
682 next_continuation = try_get(
683 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
684 lambda x: x['continuation']['reloadContinuationData']), dict)
685 if not next_continuation:
686 return
687 continuation = next_continuation.get('continuation')
688 if not continuation:
689 return
690 ctp = next_continuation.get('clickTrackingParams')
691 return cls._build_api_continuation_query(continuation, ctp)
692
693 @classmethod
694 def _extract_continuation_ep_data(cls, continuation_ep: dict):
695 if isinstance(continuation_ep, dict):
696 continuation = try_get(
697 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
698 if not continuation:
699 return
700 ctp = continuation_ep.get('clickTrackingParams')
701 return cls._build_api_continuation_query(continuation, ctp)
702
703 @classmethod
704 def _extract_continuation(cls, renderer):
705 next_continuation = cls._extract_next_continuation_data(renderer)
706 if next_continuation:
707 return next_continuation
708
709 contents = []
710 for key in ('contents', 'items'):
711 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
712
713 for content in contents:
714 if not isinstance(content, dict):
715 continue
716 continuation_ep = try_get(
717 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
718 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
719 dict)
720 continuation = cls._extract_continuation_ep_data(continuation_ep)
721 if continuation:
722 return continuation
723
724 @classmethod
725 def _extract_alerts(cls, data):
726 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
727 if not isinstance(alert_dict, dict):
728 continue
729 for alert in alert_dict.values():
730 alert_type = alert.get('type')
731 if not alert_type:
732 continue
733 message = cls._get_text(alert, 'text')
734 if message:
735 yield alert_type, message
736
737 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
738 errors = []
739 warnings = []
740 for alert_type, alert_message in alerts:
741 if alert_type.lower() == 'error' and fatal:
742 errors.append([alert_type, alert_message])
743 else:
744 warnings.append([alert_type, alert_message])
745
746 for alert_type, alert_message in (warnings + errors[:-1]):
747 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message), only_once=only_once)
748 if errors:
749 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
750
751 def _extract_and_report_alerts(self, data, *args, **kwargs):
752 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
753
754 def _extract_badges(self, renderer: dict):
755 badges = set()
756 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
757 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
758 if label:
759 badges.add(label.lower())
760 return badges
761
762 @staticmethod
763 def _get_text(data, *path_list, max_runs=None):
764 for path in path_list or [None]:
765 if path is None:
766 obj = [data]
767 else:
768 obj = traverse_obj(data, path, default=[])
769 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
770 obj = [obj]
771 for item in obj:
772 text = try_get(item, lambda x: x['simpleText'], compat_str)
773 if text:
774 return text
775 runs = try_get(item, lambda x: x['runs'], list) or []
776 if not runs and isinstance(item, list):
777 runs = item
778
779 runs = runs[:min(len(runs), max_runs or len(runs))]
780 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
781 if text:
782 return text
783
784 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
785 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
786 default_client='web'):
787 response = None
788 last_error = None
789 count = -1
790 retries = self.get_param('extractor_retries', 3)
791 if check_get_keys is None:
792 check_get_keys = []
793 while count < retries:
794 count += 1
795 if last_error:
796 self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))
797 try:
798 response = self._call_api(
799 ep=ep, fatal=True, headers=headers,
800 video_id=item_id, query=query,
801 context=self._extract_context(ytcfg, default_client),
802 api_key=self._extract_api_key(ytcfg, default_client),
803 api_hostname=api_hostname, default_client=default_client,
804 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
805 except ExtractorError as e:
806 if isinstance(e.cause, network_exceptions):
807 if isinstance(e.cause, compat_HTTPError) and not is_html(e.cause.read(512)):
808 e.cause.seek(0)
809 yt_error = try_get(
810 self._parse_json(e.cause.read().decode(), item_id, fatal=False),
811 lambda x: x['error']['message'], compat_str)
812 if yt_error:
813 self._report_alerts([('ERROR', yt_error)], fatal=False)
814 # Downloading page may result in intermittent 5xx HTTP error
815 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
816 # We also want to catch all other network exceptions since errors in later pages can be troublesome
817 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
818 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
819 last_error = error_to_compat_str(e.cause or e.msg)
820 if count < retries:
821 continue
822 if fatal:
823 raise
824 else:
825 self.report_warning(error_to_compat_str(e))
826 return
827
828 else:
829 # Youtube may send alerts if there was an issue with the continuation page
830 try:
831 self._extract_and_report_alerts(response, expected=False, only_once=True)
832 except ExtractorError as e:
833 # YouTube servers may return errors we want to retry on in a 200 OK response
834 # See: https://github.com/yt-dlp/yt-dlp/issues/839
835 if 'unknown error' in e.msg.lower():
836 last_error = e.msg
837 continue
838 if fatal:
839 raise
840 self.report_warning(error_to_compat_str(e))
841 return
842 if not check_get_keys or dict_get(response, check_get_keys):
843 break
844 # Youtube sometimes sends incomplete data
845 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
846 last_error = 'Incomplete data received'
847 if count >= retries:
848 if fatal:
849 raise ExtractorError(last_error)
850 else:
851 self.report_warning(last_error)
852 return
853 return response
854
855 @staticmethod
856 def is_music_url(url):
857 return re.match(r'https?://music\.youtube\.com/', url) is not None
858
859 def _extract_video(self, renderer):
860 video_id = renderer.get('videoId')
861 title = self._get_text(renderer, 'title')
862 description = self._get_text(renderer, 'descriptionSnippet')
863 duration = parse_duration(self._get_text(
864 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
865 view_count_text = self._get_text(renderer, 'viewCountText') or ''
866 view_count = str_to_int(self._search_regex(
867 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
868 'view count', default=None))
869
870 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
871
872 return {
873 '_type': 'url',
874 'ie_key': YoutubeIE.ie_key(),
875 'id': video_id,
876 'url': f'https://www.youtube.com/watch?v={video_id}',
877 'title': title,
878 'description': description,
879 'duration': duration,
880 'view_count': view_count,
881 'uploader': uploader,
882 }
883
884
885 class YoutubeIE(YoutubeBaseInfoExtractor):
886 IE_DESC = 'YouTube.com'
887 _INVIDIOUS_SITES = (
888 # invidious-redirect websites
889 r'(?:www\.)?redirect\.invidious\.io',
890 r'(?:(?:www|dev)\.)?invidio\.us',
891 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
892 r'(?:www\.)?invidious\.pussthecat\.org',
893 r'(?:www\.)?invidious\.zee\.li',
894 r'(?:www\.)?invidious\.ethibox\.fr',
895 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
896 # youtube-dl invidious instances list
897 r'(?:(?:www|no)\.)?invidiou\.sh',
898 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
899 r'(?:www\.)?invidious\.kabi\.tk',
900 r'(?:www\.)?invidious\.mastodon\.host',
901 r'(?:www\.)?invidious\.zapashcanon\.fr',
902 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
903 r'(?:www\.)?invidious\.tinfoil-hat\.net',
904 r'(?:www\.)?invidious\.himiko\.cloud',
905 r'(?:www\.)?invidious\.reallyancient\.tech',
906 r'(?:www\.)?invidious\.tube',
907 r'(?:www\.)?invidiou\.site',
908 r'(?:www\.)?invidious\.site',
909 r'(?:www\.)?invidious\.xyz',
910 r'(?:www\.)?invidious\.nixnet\.xyz',
911 r'(?:www\.)?invidious\.048596\.xyz',
912 r'(?:www\.)?invidious\.drycat\.fr',
913 r'(?:www\.)?inv\.skyn3t\.in',
914 r'(?:www\.)?tube\.poal\.co',
915 r'(?:www\.)?tube\.connect\.cafe',
916 r'(?:www\.)?vid\.wxzm\.sx',
917 r'(?:www\.)?vid\.mint\.lgbt',
918 r'(?:www\.)?vid\.puffyan\.us',
919 r'(?:www\.)?yewtu\.be',
920 r'(?:www\.)?yt\.elukerio\.org',
921 r'(?:www\.)?yt\.lelux\.fi',
922 r'(?:www\.)?invidious\.ggc-project\.de',
923 r'(?:www\.)?yt\.maisputain\.ovh',
924 r'(?:www\.)?ytprivate\.com',
925 r'(?:www\.)?invidious\.13ad\.de',
926 r'(?:www\.)?invidious\.toot\.koeln',
927 r'(?:www\.)?invidious\.fdn\.fr',
928 r'(?:www\.)?watch\.nettohikari\.com',
929 r'(?:www\.)?invidious\.namazso\.eu',
930 r'(?:www\.)?invidious\.silkky\.cloud',
931 r'(?:www\.)?invidious\.exonip\.de',
932 r'(?:www\.)?invidious\.riverside\.rocks',
933 r'(?:www\.)?invidious\.blamefran\.net',
934 r'(?:www\.)?invidious\.moomoo\.de',
935 r'(?:www\.)?ytb\.trom\.tf',
936 r'(?:www\.)?yt\.cyberhost\.uk',
937 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
938 r'(?:www\.)?qklhadlycap4cnod\.onion',
939 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
940 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
941 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
942 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
943 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
944 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
945 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
946 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
947 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
948 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
949 )
950 _VALID_URL = r"""(?x)^
951 (
952 (?:https?://|//) # http(s):// or protocol-independent URL
953 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
954 (?:www\.)?deturl\.com/www\.youtube\.com|
955 (?:www\.)?pwnyoutube\.com|
956 (?:www\.)?hooktube\.com|
957 (?:www\.)?yourepeat\.com|
958 tube\.majestyc\.net|
959 %(invidious)s|
960 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
961 (?:.*?\#/)? # handle anchor (#/) redirect urls
962 (?: # the various things that can precede the ID:
963 (?:(?:v|embed|e|shorts)/(?!videoseries)) # v/ or embed/ or e/ or shorts/
964 |(?: # or the v= param in all its forms
965 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
966 (?:\?|\#!?) # the params delimiter ? or # or #!
967 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
968 v=
969 )
970 ))
971 |(?:
972 youtu\.be| # just youtu.be/xxxx
973 vid\.plus| # or vid.plus/xxxx
974 zwearz\.com/watch| # or zwearz.com/watch/xxxx
975 %(invidious)s
976 )/
977 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
978 )
979 )? # all until now is optional -> you can pass the naked ID
980 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
981 (?(1).+)? # if we found the ID, everything can follow
982 (?:\#|$)""" % {
983 'invidious': '|'.join(_INVIDIOUS_SITES),
984 }
985 _PLAYER_INFO_RE = (
986 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
987 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
988 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
989 )
990 _formats = {
991 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
992 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
993 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
994 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
995 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
996 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
997 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
998 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
999 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
1000 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
1001 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1002 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1003 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1004 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1005 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
1006 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
1007 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1008 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1009
1010
1011 # 3D videos
1012 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1013 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1014 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1015 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1016 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
1017 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1018 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1019
1020 # Apple HTTP Live Streaming
1021 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1022 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1023 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1024 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1025 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1026 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1027 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1028 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
1029
1030 # DASH mp4 video
1031 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
1032 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
1033 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1034 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
1035 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
1036 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
1037 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
1038 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1039 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
1040 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1041 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1042 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
1043
1044 # Dash mp4 audio
1045 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1046 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1047 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1048 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1049 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1050 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1051 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
1052
1053 # Dash webm
1054 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1055 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1056 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1057 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1058 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1059 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1060 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1061 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1062 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1063 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1064 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1065 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1066 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1067 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1068 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1069 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
1070 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1071 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1072 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1073 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1074 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1075 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1076
1077 # Dash webm audio
1078 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1079 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
1080
1081 # Dash webm audio with opus inside
1082 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1083 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1084 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
1085
1086 # RTMP (unnamed)
1087 '_rtmp': {'protocol': 'rtmp'},
1088
1089 # av01 video only formats sometimes served with "unknown" codecs
1090 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1091 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1092 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
1093 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1094 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1095 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1096 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1097 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1098 }
1099 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
1100
1101 _GEO_BYPASS = False
1102
1103 IE_NAME = 'youtube'
1104 _TESTS = [
1105 {
1106 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
1107 'info_dict': {
1108 'id': 'BaW_jenozKc',
1109 'ext': 'mp4',
1110 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1111 'uploader': 'Philipp Hagemeister',
1112 'uploader_id': 'phihag',
1113 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1114 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1115 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1116 'upload_date': '20121002',
1117 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1118 'categories': ['Science & Technology'],
1119 'tags': ['youtube-dl'],
1120 'duration': 10,
1121 'view_count': int,
1122 'like_count': int,
1123 'dislike_count': int,
1124 'start_time': 1,
1125 'end_time': 9,
1126 }
1127 },
1128 {
1129 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1130 'note': 'Embed-only video (#1746)',
1131 'info_dict': {
1132 'id': 'yZIXLfi8CZQ',
1133 'ext': 'mp4',
1134 'upload_date': '20120608',
1135 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1136 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1137 'uploader': 'SET India',
1138 'uploader_id': 'setindia',
1139 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
1140 'age_limit': 18,
1141 },
1142 'skip': 'Private video',
1143 },
1144 {
1145 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1146 'note': 'Use the first video ID in the URL',
1147 'info_dict': {
1148 'id': 'BaW_jenozKc',
1149 'ext': 'mp4',
1150 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1151 'uploader': 'Philipp Hagemeister',
1152 'uploader_id': 'phihag',
1153 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1154 'upload_date': '20121002',
1155 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1156 'categories': ['Science & Technology'],
1157 'tags': ['youtube-dl'],
1158 'duration': 10,
1159 'view_count': int,
1160 'like_count': int,
1161 'dislike_count': int,
1162 },
1163 'params': {
1164 'skip_download': True,
1165 },
1166 },
1167 {
1168 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1169 'note': '256k DASH audio (format 141) via DASH manifest',
1170 'info_dict': {
1171 'id': 'a9LDPn-MO4I',
1172 'ext': 'm4a',
1173 'upload_date': '20121002',
1174 'uploader_id': '8KVIDEO',
1175 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1176 'description': '',
1177 'uploader': '8KVIDEO',
1178 'title': 'UHDTV TEST 8K VIDEO.mp4'
1179 },
1180 'params': {
1181 'youtube_include_dash_manifest': True,
1182 'format': '141',
1183 },
1184 'skip': 'format 141 not served anymore',
1185 },
1186 # DASH manifest with encrypted signature
1187 {
1188 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1189 'info_dict': {
1190 'id': 'IB3lcPjvWLA',
1191 'ext': 'm4a',
1192 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1193 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1194 'duration': 244,
1195 'uploader': 'AfrojackVEVO',
1196 'uploader_id': 'AfrojackVEVO',
1197 'upload_date': '20131011',
1198 'abr': 129.495,
1199 },
1200 'params': {
1201 'youtube_include_dash_manifest': True,
1202 'format': '141/bestaudio[ext=m4a]',
1203 },
1204 },
1205 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
1206 {
1207 'note': 'Embed allowed age-gate video',
1208 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1209 'info_dict': {
1210 'id': 'HtVdAasjOgU',
1211 'ext': 'mp4',
1212 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1213 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1214 'duration': 142,
1215 'uploader': 'The Witcher',
1216 'uploader_id': 'WitcherGame',
1217 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1218 'upload_date': '20140605',
1219 'age_limit': 18,
1220 },
1221 },
1222 {
1223 'note': 'Age-gate video with embed allowed in public site',
1224 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1225 'info_dict': {
1226 'id': 'HsUATh_Nc2U',
1227 'ext': 'mp4',
1228 'title': 'Godzilla 2 (Official Video)',
1229 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1230 'upload_date': '20200408',
1231 'uploader_id': 'FlyingKitty900',
1232 'uploader': 'FlyingKitty',
1233 'age_limit': 18,
1234 },
1235 },
1236 {
1237 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1238 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1239 'info_dict': {
1240 'id': 'Tq92D6wQ1mg',
1241 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
1242 'ext': 'mp4',
1243 'upload_date': '20191227',
1244 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1245 'uploader': 'Projekt Melody',
1246 'description': 'md5:17eccca93a786d51bc67646756894066',
1247 'age_limit': 18,
1248 },
1249 },
1250 {
1251 'note': 'Non-Agegated non-embeddable video',
1252 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1253 'info_dict': {
1254 'id': 'MeJVWBSsPAY',
1255 'ext': 'mp4',
1256 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1257 'uploader': 'Herr Lurik',
1258 'uploader_id': 'st3in234',
1259 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1260 'upload_date': '20130730',
1261 },
1262 },
1263 {
1264 'note': 'Non-bypassable age-gated video',
1265 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1266 'only_matching': True,
1267 },
1268 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1269 # YouTube Red ad is not captured for creator
1270 {
1271 'url': '__2ABJjxzNo',
1272 'info_dict': {
1273 'id': '__2ABJjxzNo',
1274 'ext': 'mp4',
1275 'duration': 266,
1276 'upload_date': '20100430',
1277 'uploader_id': 'deadmau5',
1278 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1279 'creator': 'deadmau5',
1280 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1281 'uploader': 'deadmau5',
1282 'title': 'Deadmau5 - Some Chords (HD)',
1283 'alt_title': 'Some Chords',
1284 },
1285 'expected_warnings': [
1286 'DASH manifest missing',
1287 ]
1288 },
1289 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1290 {
1291 'url': 'lqQg6PlCWgI',
1292 'info_dict': {
1293 'id': 'lqQg6PlCWgI',
1294 'ext': 'mp4',
1295 'duration': 6085,
1296 'upload_date': '20150827',
1297 'uploader_id': 'olympic',
1298 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1299 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
1300 'uploader': 'Olympics',
1301 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1302 },
1303 'params': {
1304 'skip_download': 'requires avconv',
1305 }
1306 },
1307 # Non-square pixels
1308 {
1309 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1310 'info_dict': {
1311 'id': '_b-2C3KPAM0',
1312 'ext': 'mp4',
1313 'stretched_ratio': 16 / 9.,
1314 'duration': 85,
1315 'upload_date': '20110310',
1316 'uploader_id': 'AllenMeow',
1317 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1318 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1319 'uploader': '孫ᄋᄅ',
1320 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1321 },
1322 },
1323 # url_encoded_fmt_stream_map is empty string
1324 {
1325 'url': 'qEJwOuvDf7I',
1326 'info_dict': {
1327 'id': 'qEJwOuvDf7I',
1328 'ext': 'webm',
1329 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1330 'description': '',
1331 'upload_date': '20150404',
1332 'uploader_id': 'spbelect',
1333 'uploader': 'Наблюдатели Петербурга',
1334 },
1335 'params': {
1336 'skip_download': 'requires avconv',
1337 },
1338 'skip': 'This live event has ended.',
1339 },
1340 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1341 {
1342 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1343 'info_dict': {
1344 'id': 'FIl7x6_3R5Y',
1345 'ext': 'webm',
1346 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1347 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1348 'duration': 220,
1349 'upload_date': '20150625',
1350 'uploader_id': 'dorappi2000',
1351 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1352 'uploader': 'dorappi2000',
1353 'formats': 'mincount:31',
1354 },
1355 'skip': 'not actual anymore',
1356 },
1357 # DASH manifest with segment_list
1358 {
1359 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1360 'md5': '8ce563a1d667b599d21064e982ab9e31',
1361 'info_dict': {
1362 'id': 'CsmdDsKjzN8',
1363 'ext': 'mp4',
1364 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1365 'uploader': 'Airtek',
1366 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1367 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1368 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1369 },
1370 'params': {
1371 'youtube_include_dash_manifest': True,
1372 'format': '135', # bestvideo
1373 },
1374 'skip': 'This live event has ended.',
1375 },
1376 {
1377 # Multifeed videos (multiple cameras), URL is for Main Camera
1378 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1379 'info_dict': {
1380 'id': 'jvGDaLqkpTg',
1381 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1382 'description': 'md5:e03b909557865076822aa169218d6a5d',
1383 },
1384 'playlist': [{
1385 'info_dict': {
1386 'id': 'jvGDaLqkpTg',
1387 'ext': 'mp4',
1388 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1389 'description': 'md5:e03b909557865076822aa169218d6a5d',
1390 'duration': 10643,
1391 'upload_date': '20161111',
1392 'uploader': 'Team PGP',
1393 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1394 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1395 },
1396 }, {
1397 'info_dict': {
1398 'id': '3AKt1R1aDnw',
1399 'ext': 'mp4',
1400 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1401 'description': 'md5:e03b909557865076822aa169218d6a5d',
1402 'duration': 10991,
1403 'upload_date': '20161111',
1404 'uploader': 'Team PGP',
1405 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1406 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1407 },
1408 }, {
1409 'info_dict': {
1410 'id': 'RtAMM00gpVc',
1411 'ext': 'mp4',
1412 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1413 'description': 'md5:e03b909557865076822aa169218d6a5d',
1414 'duration': 10995,
1415 'upload_date': '20161111',
1416 'uploader': 'Team PGP',
1417 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1418 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1419 },
1420 }, {
1421 'info_dict': {
1422 'id': '6N2fdlP3C5U',
1423 'ext': 'mp4',
1424 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1425 'description': 'md5:e03b909557865076822aa169218d6a5d',
1426 'duration': 10990,
1427 'upload_date': '20161111',
1428 'uploader': 'Team PGP',
1429 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1430 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1431 },
1432 }],
1433 'params': {
1434 'skip_download': True,
1435 },
1436 'skip': 'Not multifeed anymore',
1437 },
1438 {
1439 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1440 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1441 'info_dict': {
1442 'id': 'gVfLd0zydlo',
1443 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1444 },
1445 'playlist_count': 2,
1446 'skip': 'Not multifeed anymore',
1447 },
1448 {
1449 'url': 'https://vid.plus/FlRa-iH7PGw',
1450 'only_matching': True,
1451 },
1452 {
1453 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1454 'only_matching': True,
1455 },
1456 {
1457 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1458 # Also tests cut-off URL expansion in video description (see
1459 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1460 # https://github.com/ytdl-org/youtube-dl/issues/8164)
1461 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1462 'info_dict': {
1463 'id': 'lsguqyKfVQg',
1464 'ext': 'mp4',
1465 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1466 'alt_title': 'Dark Walk',
1467 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1468 'duration': 133,
1469 'upload_date': '20151119',
1470 'uploader_id': 'IronSoulElf',
1471 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1472 'uploader': 'IronSoulElf',
1473 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1474 'track': 'Dark Walk',
1475 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1476 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1477 },
1478 'params': {
1479 'skip_download': True,
1480 },
1481 },
1482 {
1483 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1484 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1485 'only_matching': True,
1486 },
1487 {
1488 # Video with yt:stretch=17:0
1489 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1490 'info_dict': {
1491 'id': 'Q39EVAstoRM',
1492 'ext': 'mp4',
1493 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1494 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1495 'upload_date': '20151107',
1496 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1497 'uploader': 'CH GAMER DROID',
1498 },
1499 'params': {
1500 'skip_download': True,
1501 },
1502 'skip': 'This video does not exist.',
1503 },
1504 {
1505 # Video with incomplete 'yt:stretch=16:'
1506 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1507 'only_matching': True,
1508 },
1509 {
1510 # Video licensed under Creative Commons
1511 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1512 'info_dict': {
1513 'id': 'M4gD1WSo5mA',
1514 'ext': 'mp4',
1515 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1516 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1517 'duration': 721,
1518 'upload_date': '20150127',
1519 'uploader_id': 'BerkmanCenter',
1520 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1521 'uploader': 'The Berkman Klein Center for Internet & Society',
1522 'license': 'Creative Commons Attribution license (reuse allowed)',
1523 },
1524 'params': {
1525 'skip_download': True,
1526 },
1527 },
1528 {
1529 # Channel-like uploader_url
1530 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1531 'info_dict': {
1532 'id': 'eQcmzGIKrzg',
1533 'ext': 'mp4',
1534 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1535 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1536 'duration': 4060,
1537 'upload_date': '20151119',
1538 'uploader': 'Bernie Sanders',
1539 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1540 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1541 'license': 'Creative Commons Attribution license (reuse allowed)',
1542 },
1543 'params': {
1544 'skip_download': True,
1545 },
1546 },
1547 {
1548 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1549 'only_matching': True,
1550 },
1551 {
1552 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1553 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1554 'only_matching': True,
1555 },
1556 {
1557 # Rental video preview
1558 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1559 'info_dict': {
1560 'id': 'uGpuVWrhIzE',
1561 'ext': 'mp4',
1562 'title': 'Piku - Trailer',
1563 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1564 'upload_date': '20150811',
1565 'uploader': 'FlixMatrix',
1566 'uploader_id': 'FlixMatrixKaravan',
1567 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1568 'license': 'Standard YouTube License',
1569 },
1570 'params': {
1571 'skip_download': True,
1572 },
1573 'skip': 'This video is not available.',
1574 },
1575 {
1576 # YouTube Red video with episode data
1577 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1578 'info_dict': {
1579 'id': 'iqKdEhx-dD4',
1580 'ext': 'mp4',
1581 'title': 'Isolation - Mind Field (Ep 1)',
1582 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1583 'duration': 2085,
1584 'upload_date': '20170118',
1585 'uploader': 'Vsauce',
1586 'uploader_id': 'Vsauce',
1587 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1588 'series': 'Mind Field',
1589 'season_number': 1,
1590 'episode_number': 1,
1591 },
1592 'params': {
1593 'skip_download': True,
1594 },
1595 'expected_warnings': [
1596 'Skipping DASH manifest',
1597 ],
1598 },
1599 {
1600 # The following content has been identified by the YouTube community
1601 # as inappropriate or offensive to some audiences.
1602 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1603 'info_dict': {
1604 'id': '6SJNVb0GnPI',
1605 'ext': 'mp4',
1606 'title': 'Race Differences in Intelligence',
1607 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1608 'duration': 965,
1609 'upload_date': '20140124',
1610 'uploader': 'New Century Foundation',
1611 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1612 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1613 },
1614 'params': {
1615 'skip_download': True,
1616 },
1617 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1618 },
1619 {
1620 # itag 212
1621 'url': '1t24XAntNCY',
1622 'only_matching': True,
1623 },
1624 {
1625 # geo restricted to JP
1626 'url': 'sJL6WA-aGkQ',
1627 'only_matching': True,
1628 },
1629 {
1630 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1631 'only_matching': True,
1632 },
1633 {
1634 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1635 'only_matching': True,
1636 },
1637 {
1638 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1639 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1640 'only_matching': True,
1641 },
1642 {
1643 # DRM protected
1644 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1645 'only_matching': True,
1646 },
1647 {
1648 # Video with unsupported adaptive stream type formats
1649 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1650 'info_dict': {
1651 'id': 'Z4Vy8R84T1U',
1652 'ext': 'mp4',
1653 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1654 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1655 'duration': 433,
1656 'upload_date': '20130923',
1657 'uploader': 'Amelia Putri Harwita',
1658 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1659 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1660 'formats': 'maxcount:10',
1661 },
1662 'params': {
1663 'skip_download': True,
1664 'youtube_include_dash_manifest': False,
1665 },
1666 'skip': 'not actual anymore',
1667 },
1668 {
1669 # Youtube Music Auto-generated description
1670 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1671 'info_dict': {
1672 'id': 'MgNrAu2pzNs',
1673 'ext': 'mp4',
1674 'title': 'Voyeur Girl',
1675 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1676 'upload_date': '20190312',
1677 'uploader': 'Stephen - Topic',
1678 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1679 'artist': 'Stephen',
1680 'track': 'Voyeur Girl',
1681 'album': 'it\'s too much love to know my dear',
1682 'release_date': '20190313',
1683 'release_year': 2019,
1684 },
1685 'params': {
1686 'skip_download': True,
1687 },
1688 },
1689 {
1690 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1691 'only_matching': True,
1692 },
1693 {
1694 # invalid -> valid video id redirection
1695 'url': 'DJztXj2GPfl',
1696 'info_dict': {
1697 'id': 'DJztXj2GPfk',
1698 'ext': 'mp4',
1699 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1700 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1701 'upload_date': '20090125',
1702 'uploader': 'Prochorowka',
1703 'uploader_id': 'Prochorowka',
1704 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1705 'artist': 'Panjabi MC',
1706 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1707 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1708 },
1709 'params': {
1710 'skip_download': True,
1711 },
1712 'skip': 'Video unavailable',
1713 },
1714 {
1715 # empty description results in an empty string
1716 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1717 'info_dict': {
1718 'id': 'x41yOUIvK2k',
1719 'ext': 'mp4',
1720 'title': 'IMG 3456',
1721 'description': '',
1722 'upload_date': '20170613',
1723 'uploader_id': 'ElevageOrVert',
1724 'uploader': 'ElevageOrVert',
1725 },
1726 'params': {
1727 'skip_download': True,
1728 },
1729 },
1730 {
1731 # with '};' inside yt initial data (see [1])
1732 # see [2] for an example with '};' inside ytInitialPlayerResponse
1733 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1734 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1735 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1736 'info_dict': {
1737 'id': 'CHqg6qOn4no',
1738 'ext': 'mp4',
1739 'title': 'Part 77 Sort a list of simple types in c#',
1740 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1741 'upload_date': '20130831',
1742 'uploader_id': 'kudvenkat',
1743 'uploader': 'kudvenkat',
1744 },
1745 'params': {
1746 'skip_download': True,
1747 },
1748 },
1749 {
1750 # another example of '};' in ytInitialData
1751 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1752 'only_matching': True,
1753 },
1754 {
1755 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1756 'only_matching': True,
1757 },
1758 {
1759 # https://github.com/ytdl-org/youtube-dl/pull/28094
1760 'url': 'OtqTfy26tG0',
1761 'info_dict': {
1762 'id': 'OtqTfy26tG0',
1763 'ext': 'mp4',
1764 'title': 'Burn Out',
1765 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1766 'upload_date': '20141120',
1767 'uploader': 'The Cinematic Orchestra - Topic',
1768 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1769 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1770 'artist': 'The Cinematic Orchestra',
1771 'track': 'Burn Out',
1772 'album': 'Every Day',
1773 'release_data': None,
1774 'release_year': None,
1775 },
1776 'params': {
1777 'skip_download': True,
1778 },
1779 },
1780 {
1781 # controversial video, only works with bpctr when authenticated with cookies
1782 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1783 'only_matching': True,
1784 },
1785 {
1786 # controversial video, requires bpctr/contentCheckOk
1787 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1788 'info_dict': {
1789 'id': 'SZJvDhaSDnc',
1790 'ext': 'mp4',
1791 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1792 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1793 'uploader': 'CBS This Morning',
1794 'uploader_id': 'CBSThisMorning',
1795 'upload_date': '20140716',
1796 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1797 }
1798 },
1799 {
1800 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1801 'url': 'cBvYw8_A0vQ',
1802 'info_dict': {
1803 'id': 'cBvYw8_A0vQ',
1804 'ext': 'mp4',
1805 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1806 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1807 'upload_date': '20201120',
1808 'uploader': 'Walk around Japan',
1809 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1810 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1811 },
1812 'params': {
1813 'skip_download': True,
1814 },
1815 }, {
1816 # Has multiple audio streams
1817 'url': 'WaOKSUlf4TM',
1818 'only_matching': True
1819 }, {
1820 # Requires Premium: has format 141 when requested using YTM url
1821 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1822 'only_matching': True
1823 }, {
1824 # multiple subtitles with same lang_code
1825 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1826 'only_matching': True,
1827 }, {
1828 # Force use android client fallback
1829 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1830 'info_dict': {
1831 'id': 'YOelRv7fMxY',
1832 'title': 'DIGGING A SECRET TUNNEL Part 1',
1833 'ext': '3gp',
1834 'upload_date': '20210624',
1835 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1836 'uploader': 'colinfurze',
1837 'uploader_id': 'colinfurze',
1838 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1839 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
1840 },
1841 'params': {
1842 'format': '17', # 3gp format available on android
1843 'extractor_args': {'youtube': {'player_client': ['android']}},
1844 },
1845 },
1846 {
1847 # Skip download of additional client configs (remix client config in this case)
1848 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1849 'only_matching': True,
1850 'params': {
1851 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1852 },
1853 }, {
1854 # shorts
1855 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
1856 'only_matching': True,
1857 },
1858 ]
1859
1860 @classmethod
1861 def suitable(cls, url):
1862 from ..utils import parse_qs
1863
1864 qs = parse_qs(url)
1865 if qs.get('list', [None])[0]:
1866 return False
1867 return super(YoutubeIE, cls).suitable(url)
1868
1869 def __init__(self, *args, **kwargs):
1870 super(YoutubeIE, self).__init__(*args, **kwargs)
1871 self._code_cache = {}
1872 self._player_cache = {}
1873
1874 def _extract_player_url(self, *ytcfgs, webpage=None):
1875 player_url = traverse_obj(
1876 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
1877 get_all=False, expected_type=compat_str)
1878 if not player_url:
1879 return
1880 if player_url.startswith('//'):
1881 player_url = 'https:' + player_url
1882 elif not re.match(r'https?://', player_url):
1883 player_url = compat_urlparse.urljoin(
1884 'https://www.youtube.com', player_url)
1885 return player_url
1886
1887 def _download_player_url(self, video_id, fatal=False):
1888 res = self._download_webpage(
1889 'https://www.youtube.com/iframe_api',
1890 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
1891 if res:
1892 player_version = self._search_regex(
1893 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
1894 if player_version:
1895 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
1896
1897 def _signature_cache_id(self, example_sig):
1898 """ Return a string representation of a signature """
1899 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1900
1901 @classmethod
1902 def _extract_player_info(cls, player_url):
1903 for player_re in cls._PLAYER_INFO_RE:
1904 id_m = re.search(player_re, player_url)
1905 if id_m:
1906 break
1907 else:
1908 raise ExtractorError('Cannot identify player %r' % player_url)
1909 return id_m.group('id')
1910
1911 def _load_player(self, video_id, player_url, fatal=True) -> bool:
1912 player_id = self._extract_player_info(player_url)
1913 if player_id not in self._code_cache:
1914 self._code_cache[player_id] = self._download_webpage(
1915 player_url, video_id, fatal=fatal,
1916 note='Downloading player ' + player_id,
1917 errnote='Download of %s failed' % player_url)
1918 return player_id in self._code_cache
1919
1920 def _extract_signature_function(self, video_id, player_url, example_sig):
1921 player_id = self._extract_player_info(player_url)
1922
1923 # Read from filesystem cache
1924 func_id = 'js_%s_%s' % (
1925 player_id, self._signature_cache_id(example_sig))
1926 assert os.path.basename(func_id) == func_id
1927
1928 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1929 if cache_spec is not None:
1930 return lambda s: ''.join(s[i] for i in cache_spec)
1931
1932 if self._load_player(video_id, player_url):
1933 code = self._code_cache[player_id]
1934 res = self._parse_sig_js(code)
1935
1936 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1937 cache_res = res(test_string)
1938 cache_spec = [ord(c) for c in cache_res]
1939
1940 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1941 return res
1942
1943 def _print_sig_code(self, func, example_sig):
1944 def gen_sig_code(idxs):
1945 def _genslice(start, end, step):
1946 starts = '' if start == 0 else str(start)
1947 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1948 steps = '' if step == 1 else (':%d' % step)
1949 return 's[%s%s%s]' % (starts, ends, steps)
1950
1951 step = None
1952 # Quelch pyflakes warnings - start will be set when step is set
1953 start = '(Never used)'
1954 for i, prev in zip(idxs[1:], idxs[:-1]):
1955 if step is not None:
1956 if i - prev == step:
1957 continue
1958 yield _genslice(start, prev, step)
1959 step = None
1960 continue
1961 if i - prev in [-1, 1]:
1962 step = i - prev
1963 start = prev
1964 continue
1965 else:
1966 yield 's[%d]' % prev
1967 if step is None:
1968 yield 's[%d]' % i
1969 else:
1970 yield _genslice(start, i, step)
1971
1972 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1973 cache_res = func(test_string)
1974 cache_spec = [ord(c) for c in cache_res]
1975 expr_code = ' + '.join(gen_sig_code(cache_spec))
1976 signature_id_tuple = '(%s)' % (
1977 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1978 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1979 ' return %s\n') % (signature_id_tuple, expr_code)
1980 self.to_screen('Extracted signature function:\n' + code)
1981
1982 def _parse_sig_js(self, jscode):
1983 funcname = self._search_regex(
1984 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1985 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1986 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
1987 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
1988 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
1989 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1990 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1991 # Obsolete patterns
1992 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1993 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1994 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1995 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1996 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1997 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1998 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1999 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
2000 jscode, 'Initial JS player signature function name', group='sig')
2001
2002 jsi = JSInterpreter(jscode)
2003 initial_function = jsi.extract_function(funcname)
2004 return lambda s: initial_function([s])
2005
2006 def _decrypt_signature(self, s, video_id, player_url):
2007 """Turn the encrypted s field into a working signature"""
2008
2009 if player_url is None:
2010 raise ExtractorError('Cannot decrypt signature without player_url')
2011
2012 try:
2013 player_id = (player_url, self._signature_cache_id(s))
2014 if player_id not in self._player_cache:
2015 func = self._extract_signature_function(
2016 video_id, player_url, s
2017 )
2018 self._player_cache[player_id] = func
2019 func = self._player_cache[player_id]
2020 if self.get_param('youtube_print_sig_code'):
2021 self._print_sig_code(func, s)
2022 return func(s)
2023 except Exception as e:
2024 tb = traceback.format_exc()
2025 raise ExtractorError(
2026 'Signature extraction failed: ' + tb, cause=e)
2027
2028 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
2029 """
2030 Extract signatureTimestamp (sts)
2031 Required to tell API what sig/player version is in use.
2032 """
2033 sts = None
2034 if isinstance(ytcfg, dict):
2035 sts = int_or_none(ytcfg.get('STS'))
2036
2037 if not sts:
2038 # Attempt to extract from player
2039 if player_url is None:
2040 error_msg = 'Cannot extract signature timestamp without player_url.'
2041 if fatal:
2042 raise ExtractorError(error_msg)
2043 self.report_warning(error_msg)
2044 return
2045 if self._load_player(video_id, player_url, fatal=fatal):
2046 player_id = self._extract_player_info(player_url)
2047 code = self._code_cache[player_id]
2048 sts = int_or_none(self._search_regex(
2049 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2050 'JS player signature timestamp', group='sts', fatal=fatal))
2051 return sts
2052
2053 def _mark_watched(self, video_id, player_responses):
2054 playback_url = traverse_obj(
2055 player_responses, (..., 'playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
2056 expected_type=url_or_none, get_all=False)
2057 if not playback_url:
2058 self.report_warning('Unable to mark watched')
2059 return
2060 parsed_playback_url = compat_urlparse.urlparse(playback_url)
2061 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
2062
2063 # cpn generation algorithm is reverse engineered from base.js.
2064 # In fact it works even with dummy cpn.
2065 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2066 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
2067
2068 qs.update({
2069 'ver': ['2'],
2070 'cpn': [cpn],
2071 })
2072 playback_url = compat_urlparse.urlunparse(
2073 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
2074
2075 self._download_webpage(
2076 playback_url, video_id, 'Marking watched',
2077 'Unable to mark watched', fatal=False)
2078
2079 @staticmethod
2080 def _extract_urls(webpage):
2081 # Embedded YouTube player
2082 entries = [
2083 unescapeHTML(mobj.group('url'))
2084 for mobj in re.finditer(r'''(?x)
2085 (?:
2086 <iframe[^>]+?src=|
2087 data-video-url=|
2088 <embed[^>]+?src=|
2089 embedSWF\(?:\s*|
2090 <object[^>]+data=|
2091 new\s+SWFObject\(
2092 )
2093 (["\'])
2094 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
2095 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
2096 \1''', webpage)]
2097
2098 # lazyYT YouTube embed
2099 entries.extend(list(map(
2100 unescapeHTML,
2101 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
2102
2103 # Wordpress "YouTube Video Importer" plugin
2104 matches = re.findall(r'''(?x)<div[^>]+
2105 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2106 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
2107 entries.extend(m[-1] for m in matches)
2108
2109 return entries
2110
2111 @staticmethod
2112 def _extract_url(webpage):
2113 urls = YoutubeIE._extract_urls(webpage)
2114 return urls[0] if urls else None
2115
2116 @classmethod
2117 def extract_id(cls, url):
2118 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
2119 if mobj is None:
2120 raise ExtractorError('Invalid URL: %s' % url)
2121 return mobj.group('id')
2122
2123 def _extract_chapters_from_json(self, data, duration):
2124 chapter_list = traverse_obj(
2125 data, (
2126 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2127 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2128 ), expected_type=list)
2129
2130 return self._extract_chapters(
2131 chapter_list,
2132 chapter_time=lambda chapter: float_or_none(
2133 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2134 chapter_title=lambda chapter: traverse_obj(
2135 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2136 duration=duration)
2137
2138 def _extract_chapters_from_engagement_panel(self, data, duration):
2139 content_list = traverse_obj(
2140 data,
2141 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
2142 expected_type=list, default=[])
2143 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2144 chapter_title = lambda chapter: self._get_text(chapter, 'title')
2145
2146 return next((
2147 filter(None, (
2148 self._extract_chapters(
2149 traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2150 chapter_time, chapter_title, duration)
2151 for contents in content_list
2152 ))), [])
2153
2154 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
2155 chapters = []
2156 last_chapter = {'start_time': 0}
2157 for idx, chapter in enumerate(chapter_list or []):
2158 title = chapter_title(chapter)
2159 start_time = chapter_time(chapter)
2160 if start_time is None:
2161 continue
2162 last_chapter['end_time'] = start_time
2163 if start_time < last_chapter['start_time']:
2164 if idx == 1:
2165 chapters.pop()
2166 self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2167 else:
2168 self.report_warning(f'Invalid start time for chapter "{title}"')
2169 continue
2170 last_chapter = {'start_time': start_time, 'title': title}
2171 chapters.append(last_chapter)
2172 last_chapter['end_time'] = duration
2173 return chapters
2174
2175 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2176 return self._parse_json(self._search_regex(
2177 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2178 regex), webpage, name, default='{}'), video_id, fatal=False)
2179
2180 @staticmethod
2181 def parse_time_text(time_text):
2182 """
2183 Parse the comment time text
2184 time_text is in the format 'X units ago (edited)'
2185 """
2186 time_text_split = time_text.split(' ')
2187 if len(time_text_split) >= 3:
2188 try:
2189 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
2190 except ValueError:
2191 return None
2192
2193 def _extract_comment(self, comment_renderer, parent=None):
2194 comment_id = comment_renderer.get('commentId')
2195 if not comment_id:
2196 return
2197
2198 text = self._get_text(comment_renderer, 'contentText')
2199
2200 # note: timestamp is an estimate calculated from the current time and time_text
2201 time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
2202 time_text_dt = self.parse_time_text(time_text)
2203 if isinstance(time_text_dt, datetime.datetime):
2204 timestamp = calendar.timegm(time_text_dt.timetuple())
2205 author = self._get_text(comment_renderer, 'authorText')
2206 author_id = try_get(comment_renderer,
2207 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
2208
2209 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2210 lambda x: x['likeCount']), compat_str)) or 0
2211 author_thumbnail = try_get(comment_renderer,
2212 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2213
2214 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2215 is_favorited = 'creatorHeart' in (try_get(
2216 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2217 return {
2218 'id': comment_id,
2219 'text': text,
2220 'timestamp': timestamp,
2221 'time_text': time_text,
2222 'like_count': votes,
2223 'is_favorited': is_favorited,
2224 'author': author,
2225 'author_id': author_id,
2226 'author_thumbnail': author_thumbnail,
2227 'author_is_uploader': author_is_uploader,
2228 'parent': parent or 'root'
2229 }
2230
2231 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, comment_counts=None):
2232
2233 def extract_header(contents):
2234 _total_comments = 0
2235 _continuation = None
2236 for content in contents:
2237 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
2238 expected_comment_count = parse_count(self._get_text(
2239 comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
2240
2241 if expected_comment_count:
2242 comment_counts[1] = expected_comment_count
2243 self.to_screen('Downloading ~%d comments' % expected_comment_count)
2244 _total_comments = comment_counts[1]
2245 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2246 comment_sort_index = int(sort_mode_str != 'top') # 1 = new, 0 = top
2247
2248 sort_menu_item = try_get(
2249 comments_header_renderer,
2250 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2251 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2252
2253 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2254 if not _continuation:
2255 continue
2256
2257 sort_text = sort_menu_item.get('title')
2258 if isinstance(sort_text, compat_str):
2259 sort_text = sort_text.lower()
2260 else:
2261 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2262 self.to_screen('Sorting comments by %s' % sort_text)
2263 break
2264 return _total_comments, _continuation
2265
2266 def extract_thread(contents):
2267 if not parent:
2268 comment_counts[2] = 0
2269 for content in contents:
2270 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2271 comment_renderer = try_get(
2272 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2273 content, (lambda x: x['commentRenderer'], dict))
2274
2275 if not comment_renderer:
2276 continue
2277 comment = self._extract_comment(comment_renderer, parent)
2278 if not comment:
2279 continue
2280 comment_counts[0] += 1
2281 yield comment
2282 # Attempt to get the replies
2283 comment_replies_renderer = try_get(
2284 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2285
2286 if comment_replies_renderer:
2287 comment_counts[2] += 1
2288 comment_entries_iter = self._comment_entries(
2289 comment_replies_renderer, ytcfg, video_id,
2290 parent=comment.get('id'), comment_counts=comment_counts)
2291
2292 for reply_comment in comment_entries_iter:
2293 yield reply_comment
2294
2295 # YouTube comments have a max depth of 2
2296 max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2297 if max_depth == 1 and parent:
2298 return
2299 if not comment_counts:
2300 # comment so far, est. total comments, current comment thread #
2301 comment_counts = [0, 0, 0]
2302
2303 continuation = self._extract_continuation(root_continuation_data)
2304 if continuation and len(continuation['continuation']) < 27:
2305 self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2306 continuation_token = self._generate_comment_continuation(video_id)
2307 continuation = self._build_api_continuation_query(continuation_token, None)
2308
2309 visitor_data = None
2310 is_first_continuation = parent is None
2311
2312 for page_num in itertools.count(0):
2313 if not continuation:
2314 break
2315 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data)
2316 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2317 if page_num == 0:
2318 if is_first_continuation:
2319 note_prefix = 'Downloading comment section API JSON'
2320 else:
2321 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
2322 comment_counts[2], comment_prog_str)
2323 else:
2324 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2325 ' ' if parent else '', ' replies' if parent else '',
2326 page_num, comment_prog_str)
2327
2328 response = self._extract_response(
2329 item_id=None, query=continuation,
2330 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2331 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
2332 if not response:
2333 break
2334 visitor_data = try_get(
2335 response,
2336 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2337 compat_str) or visitor_data
2338
2339 continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
2340
2341 continuation = None
2342 if isinstance(continuation_contents, list):
2343 for continuation_section in continuation_contents:
2344 if not isinstance(continuation_section, dict):
2345 continue
2346 continuation_items = try_get(
2347 continuation_section,
2348 (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2349 lambda x: x['appendContinuationItemsAction']['continuationItems']),
2350 list) or []
2351 if is_first_continuation:
2352 total_comments, continuation = extract_header(continuation_items)
2353 if total_comments:
2354 yield total_comments
2355 is_first_continuation = False
2356 if continuation:
2357 break
2358 continue
2359 count = 0
2360 for count, entry in enumerate(extract_thread(continuation_items)):
2361 yield entry
2362 continuation = self._extract_continuation({'contents': continuation_items})
2363 if continuation:
2364 # Sometimes YouTube provides a continuation without any comments
2365 # In most cases we end up just downloading these with very little comments to come.
2366 if count == 0:
2367 if not parent:
2368 self.report_warning('No comments received - assuming end of comments')
2369 continuation = None
2370 break
2371
2372 # Deprecated response structure
2373 elif isinstance(continuation_contents, dict):
2374 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2375 for key, continuation_renderer in continuation_contents.items():
2376 if key not in known_continuation_renderers:
2377 continue
2378 if not isinstance(continuation_renderer, dict):
2379 continue
2380 if is_first_continuation:
2381 header_continuation_items = [continuation_renderer.get('header') or {}]
2382 total_comments, continuation = extract_header(header_continuation_items)
2383 if total_comments:
2384 yield total_comments
2385 is_first_continuation = False
2386 if continuation:
2387 break
2388
2389 # Sometimes YouTube provides a continuation without any comments
2390 # In most cases we end up just downloading these with very little comments to come.
2391 count = 0
2392 for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2393 yield entry
2394 continuation = self._extract_continuation(continuation_renderer)
2395 if count == 0:
2396 if not parent:
2397 self.report_warning('No comments received - assuming end of comments')
2398 continuation = None
2399 break
2400
2401 @staticmethod
2402 def _generate_comment_continuation(video_id):
2403 """
2404 Generates initial comment section continuation token from given video id
2405 """
2406 b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2407 parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2408 new_continuation_intlist = list(itertools.chain.from_iterable(
2409 [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2410 return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2411
2412 def _extract_comments(self, ytcfg, video_id, contents, webpage):
2413 """Entry for comment extraction"""
2414 def _real_comment_extract(contents):
2415 yield from self._comment_entries(
2416 traverse_obj(contents, (..., 'itemSectionRenderer'), get_all=False), ytcfg, video_id)
2417
2418 comments = []
2419 estimated_total = 0
2420 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
2421 # Force English regardless of account setting to prevent parsing issues
2422 # See: https://github.com/yt-dlp/yt-dlp/issues/532
2423 ytcfg = copy.deepcopy(ytcfg)
2424 traverse_obj(
2425 ytcfg, ('INNERTUBE_CONTEXT', 'client'), expected_type=dict, default={})['hl'] = 'en'
2426 try:
2427 for comment in _real_comment_extract(contents):
2428 if len(comments) >= max_comments:
2429 break
2430 if isinstance(comment, int):
2431 estimated_total = comment
2432 continue
2433 comments.append(comment)
2434 except KeyboardInterrupt:
2435 self.to_screen('Interrupted by user')
2436 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
2437 return {
2438 'comments': comments,
2439 'comment_count': len(comments),
2440 }
2441
2442 @staticmethod
2443 def _get_checkok_params():
2444 return {'contentCheckOk': True, 'racyCheckOk': True}
2445
2446 @classmethod
2447 def _generate_player_context(cls, sts=None):
2448 context = {
2449 'html5Preference': 'HTML5_PREF_WANTS',
2450 }
2451 if sts is not None:
2452 context['signatureTimestamp'] = sts
2453 return {
2454 'playbackContext': {
2455 'contentPlaybackContext': context
2456 },
2457 **cls._get_checkok_params()
2458 }
2459
2460 @staticmethod
2461 def _is_agegated(player_response):
2462 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
2463 return True
2464
2465 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
2466 AGE_GATE_REASONS = (
2467 'confirm your age', 'age-restricted', 'inappropriate', # reason
2468 'age_verification_required', 'age_check_required', # status
2469 )
2470 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
2471
2472 @staticmethod
2473 def _is_unplayable(player_response):
2474 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
2475
2476 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):
2477
2478 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2479 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
2480 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
2481 headers = self.generate_api_headers(
2482 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
2483
2484 yt_query = {'videoId': video_id}
2485 yt_query.update(self._generate_player_context(sts))
2486 return self._extract_response(
2487 item_id=video_id, ep='player', query=yt_query,
2488 ytcfg=player_ytcfg, headers=headers, fatal=True,
2489 default_client=client,
2490 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2491 ) or None
2492
2493 def _get_requested_clients(self, url, smuggled_data):
2494 requested_clients = []
2495 allowed_clients = sorted(
2496 [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],
2497 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
2498 for client in self._configuration_arg('player_client'):
2499 if client in allowed_clients:
2500 requested_clients.append(client)
2501 elif client == 'all':
2502 requested_clients.extend(allowed_clients)
2503 else:
2504 self.report_warning(f'Skipping unsupported client {client}')
2505 if not requested_clients:
2506 requested_clients = ['android', 'web']
2507
2508 if smuggled_data.get('is_music_url') or self.is_music_url(url):
2509 requested_clients.extend(
2510 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
2511
2512 return orderedSet(requested_clients)
2513
2514 def _extract_player_ytcfg(self, client, video_id):
2515 url = {
2516 'web_music': 'https://music.youtube.com',
2517 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
2518 }.get(client)
2519 if not url:
2520 return {}
2521 webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config')
2522 return self.extract_ytcfg(video_id, webpage) or {}
2523
2524 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):
2525 initial_pr = None
2526 if webpage:
2527 initial_pr = self._extract_yt_initial_variable(
2528 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2529 video_id, 'initial player response')
2530
2531 original_clients = clients
2532 clients = clients[::-1]
2533 prs = []
2534
2535 def append_client(client_name):
2536 if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:
2537 clients.append(client_name)
2538
2539 # Android player_response does not have microFormats which are needed for
2540 # extraction of some data. So we return the initial_pr with formats
2541 # stripped out even if not requested by the user
2542 # See: https://github.com/yt-dlp/yt-dlp/issues/501
2543 if initial_pr:
2544 pr = dict(initial_pr)
2545 pr['streamingData'] = None
2546 prs.append(pr)
2547
2548 last_error = None
2549 tried_iframe_fallback = False
2550 player_url = None
2551 while clients:
2552 client = clients.pop()
2553 player_ytcfg = master_ytcfg if client == 'web' else {}
2554 if 'configs' not in self._configuration_arg('player_skip'):
2555 player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
2556
2557 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
2558 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
2559 if 'js' in self._configuration_arg('player_skip'):
2560 require_js_player = False
2561 player_url = None
2562
2563 if not player_url and not tried_iframe_fallback and require_js_player:
2564 player_url = self._download_player_url(video_id)
2565 tried_iframe_fallback = True
2566
2567 try:
2568 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
2569 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)
2570 except ExtractorError as e:
2571 if last_error:
2572 self.report_warning(last_error)
2573 last_error = e
2574 continue
2575
2576 if pr:
2577 prs.append(pr)
2578
2579 # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
2580 if client.endswith('_agegate') and self._is_unplayable(pr) and self.is_authenticated:
2581 append_client(client.replace('_agegate', '_creator'))
2582 elif self._is_agegated(pr):
2583 append_client(f'{client}_agegate')
2584
2585 if last_error:
2586 if not len(prs):
2587 raise last_error
2588 self.report_warning(last_error)
2589 return prs, player_url
2590
2591 def _extract_formats(self, streaming_data, video_id, player_url, is_live):
2592 itags, stream_ids = [], []
2593 itag_qualities, res_qualities = {}, {}
2594 q = qualities([
2595 # Normally tiny is the smallest video-only formats. But
2596 # audio-only formats with unknown quality may get tagged as tiny
2597 'tiny',
2598 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
2599 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2600 ])
2601 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
2602
2603 for fmt in streaming_formats:
2604 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2605 continue
2606
2607 itag = str_or_none(fmt.get('itag'))
2608 audio_track = fmt.get('audioTrack') or {}
2609 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2610 if stream_id in stream_ids:
2611 continue
2612
2613 quality = fmt.get('quality')
2614 height = int_or_none(fmt.get('height'))
2615 if quality == 'tiny' or not quality:
2616 quality = fmt.get('audioQuality', '').lower() or quality
2617 # The 3gp format (17) in android client has a quality of "small",
2618 # but is actually worse than other formats
2619 if itag == '17':
2620 quality = 'tiny'
2621 if quality:
2622 if itag:
2623 itag_qualities[itag] = quality
2624 if height:
2625 res_qualities[height] = quality
2626 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2627 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2628 # number of fragment that would subsequently requested with (`&sq=N`)
2629 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2630 continue
2631
2632 fmt_url = fmt.get('url')
2633 if not fmt_url:
2634 sc = compat_parse_qs(fmt.get('signatureCipher'))
2635 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2636 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2637 if not (sc and fmt_url and encrypted_sig):
2638 continue
2639 if not player_url:
2640 continue
2641 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2642 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2643 fmt_url += '&' + sp + '=' + signature
2644
2645 if itag:
2646 itags.append(itag)
2647 stream_ids.append(stream_id)
2648
2649 tbr = float_or_none(
2650 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
2651 dct = {
2652 'asr': int_or_none(fmt.get('audioSampleRate')),
2653 'filesize': int_or_none(fmt.get('contentLength')),
2654 'format_id': itag,
2655 'format_note': ', '.join(filter(None, (
2656 '%s%s' % (audio_track.get('displayName') or '',
2657 ' (default)' if audio_track.get('audioIsDefault') else ''),
2658 fmt.get('qualityLabel') or quality.replace('audio_quality_', '')))),
2659 'fps': int_or_none(fmt.get('fps')),
2660 'height': height,
2661 'quality': q(quality),
2662 'tbr': tbr,
2663 'url': fmt_url,
2664 'width': int_or_none(fmt.get('width')),
2665 'language': audio_track.get('id', '').split('.')[0],
2666 'language_preference': 1 if audio_track.get('audioIsDefault') else -1,
2667 }
2668 mime_mobj = re.match(
2669 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2670 if mime_mobj:
2671 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2672 dct.update(parse_codecs(mime_mobj.group(2)))
2673 no_audio = dct.get('acodec') == 'none'
2674 no_video = dct.get('vcodec') == 'none'
2675 if no_audio:
2676 dct['vbr'] = tbr
2677 if no_video:
2678 dct['abr'] = tbr
2679 if no_audio or no_video:
2680 dct['downloader_options'] = {
2681 # Youtube throttles chunks >~10M
2682 'http_chunk_size': 10485760,
2683 }
2684 if dct.get('ext'):
2685 dct['container'] = dct['ext'] + '_dash'
2686 yield dct
2687
2688 skip_manifests = self._configuration_arg('skip')
2689 get_dash = (
2690 (not is_live or self._configuration_arg('include_live_dash'))
2691 and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True))
2692 get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2693
2694 def guess_quality(f):
2695 for val, qdict in ((f.get('format_id'), itag_qualities), (f.get('height'), res_qualities)):
2696 if val in qdict:
2697 return q(qdict[val])
2698 return -1
2699
2700 for sd in streaming_data:
2701 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
2702 if hls_manifest_url:
2703 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
2704 itag = self._search_regex(
2705 r'/itag/(\d+)', f['url'], 'itag', default=None)
2706 if itag in itags:
2707 continue
2708 if itag:
2709 f['format_id'] = itag
2710 itags.append(itag)
2711 f['quality'] = guess_quality(f)
2712 yield f
2713
2714 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2715 if dash_manifest_url:
2716 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
2717 itag = f['format_id']
2718 if itag in itags:
2719 continue
2720 if itag:
2721 itags.append(itag)
2722 f['quality'] = guess_quality(f)
2723 filesize = int_or_none(self._search_regex(
2724 r'/clen/(\d+)', f.get('fragment_base_url')
2725 or f['url'], 'file size', default=None))
2726 if filesize:
2727 f['filesize'] = filesize
2728 yield f
2729
2730 def _real_extract(self, url):
2731 url, smuggled_data = unsmuggle_url(url, {})
2732 video_id = self._match_id(url)
2733
2734 base_url = self.http_scheme() + '//www.youtube.com/'
2735 webpage_url = base_url + 'watch?v=' + video_id
2736 webpage = None
2737 if 'webpage' not in self._configuration_arg('player_skip'):
2738 webpage = self._download_webpage(
2739 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2740
2741 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2742
2743 player_responses, player_url = self._extract_player_responses(
2744 self._get_requested_clients(url, smuggled_data),
2745 video_id, webpage, master_ytcfg)
2746
2747 get_first = lambda obj, keys, **kwargs: traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
2748
2749 playability_statuses = traverse_obj(
2750 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2751
2752 trailer_video_id = get_first(
2753 playability_statuses,
2754 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2755 expected_type=str)
2756 if trailer_video_id:
2757 return self.url_result(
2758 trailer_video_id, self.ie_key(), trailer_video_id)
2759
2760 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2761 if webpage else (lambda x: None))
2762
2763 video_details = traverse_obj(
2764 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2765 microformats = traverse_obj(
2766 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2767 expected_type=dict, default=[])
2768 video_title = (
2769 get_first(video_details, 'title')
2770 or self._get_text(microformats, (..., 'title'))
2771 or search_meta(['og:title', 'twitter:title', 'title']))
2772 video_description = get_first(video_details, 'shortDescription')
2773
2774 if not smuggled_data.get('force_singlefeed', False):
2775 if not self.get_param('noplaylist'):
2776 multifeed_metadata_list = get_first(
2777 player_responses,
2778 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2779 expected_type=str)
2780 if multifeed_metadata_list:
2781 entries = []
2782 feed_ids = []
2783 for feed in multifeed_metadata_list.split(','):
2784 # Unquote should take place before split on comma (,) since textual
2785 # fields may contain comma as well (see
2786 # https://github.com/ytdl-org/youtube-dl/issues/8536)
2787 feed_data = compat_parse_qs(
2788 compat_urllib_parse_unquote_plus(feed))
2789
2790 def feed_entry(name):
2791 return try_get(
2792 feed_data, lambda x: x[name][0], compat_str)
2793
2794 feed_id = feed_entry('id')
2795 if not feed_id:
2796 continue
2797 feed_title = feed_entry('title')
2798 title = video_title
2799 if feed_title:
2800 title += ' (%s)' % feed_title
2801 entries.append({
2802 '_type': 'url_transparent',
2803 'ie_key': 'Youtube',
2804 'url': smuggle_url(
2805 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2806 {'force_singlefeed': True}),
2807 'title': title,
2808 })
2809 feed_ids.append(feed_id)
2810 self.to_screen(
2811 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2812 % (', '.join(feed_ids), video_id))
2813 return self.playlist_result(
2814 entries, video_id, video_title, video_description)
2815 else:
2816 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2817
2818 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
2819 is_live = get_first(video_details, 'isLive')
2820 if is_live is None:
2821 is_live = get_first(live_broadcast_details, 'isLiveNow')
2822
2823 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2824 formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
2825
2826 if not formats:
2827 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
2828 self.report_drm(video_id)
2829 pemr = get_first(
2830 playability_statuses,
2831 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2832 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2833 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
2834 if subreason:
2835 if subreason == 'The uploader has not made this video available in your country.':
2836 countries = get_first(microformats, 'availableCountries')
2837 if not countries:
2838 regions_allowed = search_meta('regionsAllowed')
2839 countries = regions_allowed.split(',') if regions_allowed else None
2840 self.raise_geo_restricted(subreason, countries, metadata_available=True)
2841 reason += f'. {subreason}'
2842 if reason:
2843 self.raise_no_formats(reason, expected=True)
2844
2845 for f in formats:
2846 if '&c=WEB&' in f['url'] and '&ratebypass=yes&' not in f['url']: # throttled
2847 f['source_preference'] = -10
2848 # TODO: this method is not reliable
2849 f['format_note'] = format_field(f, 'format_note', '%s ') + '(maybe throttled)'
2850
2851 # Source is given priority since formats that throttle are given lower source_preference
2852 # When throttling issue is fully fixed, remove this
2853 self._sort_formats(formats, ('quality', 'res', 'fps', 'source', 'codec:vp9.2', 'lang'))
2854
2855 keywords = get_first(video_details, 'keywords', expected_type=list) or []
2856 if not keywords and webpage:
2857 keywords = [
2858 unescapeHTML(m.group('content'))
2859 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2860 for keyword in keywords:
2861 if keyword.startswith('yt:stretch='):
2862 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2863 if mobj:
2864 # NB: float is intentional for forcing float division
2865 w, h = (float(v) for v in mobj.groups())
2866 if w > 0 and h > 0:
2867 ratio = w / h
2868 for f in formats:
2869 if f.get('vcodec') != 'none':
2870 f['stretched_ratio'] = ratio
2871 break
2872
2873 thumbnails = []
2874 thumbnail_dicts = traverse_obj(
2875 (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2876 expected_type=dict, default=[])
2877 for thumbnail in thumbnail_dicts:
2878 thumbnail_url = thumbnail.get('url')
2879 if not thumbnail_url:
2880 continue
2881 # Sometimes youtube gives a wrong thumbnail URL. See:
2882 # https://github.com/yt-dlp/yt-dlp/issues/233
2883 # https://github.com/ytdl-org/youtube-dl/issues/28023
2884 if 'maxresdefault' in thumbnail_url:
2885 thumbnail_url = thumbnail_url.split('?')[0]
2886 thumbnails.append({
2887 'url': thumbnail_url,
2888 'height': int_or_none(thumbnail.get('height')),
2889 'width': int_or_none(thumbnail.get('width')),
2890 })
2891 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2892 if thumbnail_url:
2893 thumbnails.append({
2894 'url': thumbnail_url,
2895 })
2896 # The best resolution thumbnails sometimes does not appear in the webpage
2897 # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
2898 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2899 hq_thumbnail_names = ['maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3']
2900 # TODO: Test them also? - For some videos, even these don't exist
2901 guaranteed_thumbnail_names = [
2902 'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2903 'mqdefault', 'mq1', 'mq2', 'mq3',
2904 'default', '1', '2', '3'
2905 ]
2906 thumbnail_names = hq_thumbnail_names + guaranteed_thumbnail_names
2907 n_thumbnail_names = len(thumbnail_names)
2908
2909 thumbnails.extend({
2910 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2911 video_id=video_id, name=name, ext=ext,
2912 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
2913 '_test_url': name in hq_thumbnail_names,
2914 } for name in thumbnail_names for ext in ('webp', 'jpg'))
2915 for thumb in thumbnails:
2916 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
2917 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
2918 self._remove_duplicate_formats(thumbnails)
2919
2920 category = get_first(microformats, 'category') or search_meta('genre')
2921 channel_id = str_or_none(
2922 get_first(video_details, 'channelId')
2923 or get_first(microformats, 'externalChannelId')
2924 or search_meta('channelId'))
2925 duration = int_or_none(
2926 get_first(video_details, 'lengthSeconds')
2927 or get_first(microformats, 'lengthSeconds')
2928 or parse_duration(search_meta('duration'))) or None
2929 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2930
2931 live_content = get_first(video_details, 'isLiveContent')
2932 is_upcoming = get_first(video_details, 'isUpcoming')
2933 if is_live is None:
2934 if is_upcoming or live_content is False:
2935 is_live = False
2936 if is_upcoming is None and (live_content or is_live):
2937 is_upcoming = False
2938 live_starttime = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2939 live_endtime = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2940 if not duration and live_endtime and live_starttime:
2941 duration = live_endtime - live_starttime
2942
2943 info = {
2944 'id': video_id,
2945 'title': self._live_title(video_title) if is_live else video_title,
2946 'formats': formats,
2947 'thumbnails': thumbnails,
2948 'description': video_description,
2949 'upload_date': unified_strdate(
2950 get_first(microformats, 'uploadDate')
2951 or search_meta('uploadDate')),
2952 'uploader': get_first(video_details, 'author'),
2953 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2954 'uploader_url': owner_profile_url,
2955 'channel_id': channel_id,
2956 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
2957 'duration': duration,
2958 'view_count': int_or_none(
2959 get_first((video_details, microformats), (..., 'viewCount'))
2960 or search_meta('interactionCount')),
2961 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
2962 'age_limit': 18 if (
2963 get_first(microformats, 'isFamilySafe') is False
2964 or search_meta('isFamilyFriendly') == 'false'
2965 or search_meta('og:restrictions:age') == '18+') else 0,
2966 'webpage_url': webpage_url,
2967 'categories': [category] if category else None,
2968 'tags': keywords,
2969 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
2970 'is_live': is_live,
2971 'was_live': (False if is_live or is_upcoming or live_content is False
2972 else None if is_live is None or is_upcoming is None
2973 else live_content),
2974 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
2975 'release_timestamp': live_starttime,
2976 }
2977
2978 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
2979 # Converted into dicts to remove duplicates
2980 captions = {
2981 sub.get('baseUrl'): sub
2982 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
2983 translation_languages = {
2984 lang.get('languageCode'): lang.get('languageName')
2985 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
2986 subtitles = {}
2987 if pctr:
2988 def process_language(container, base_url, lang_code, sub_name, query):
2989 lang_subs = container.setdefault(lang_code, [])
2990 for fmt in self._SUBTITLE_FORMATS:
2991 query.update({
2992 'fmt': fmt,
2993 })
2994 lang_subs.append({
2995 'ext': fmt,
2996 'url': update_url_query(base_url, query),
2997 'name': sub_name,
2998 })
2999
3000 for base_url, caption_track in captions.items():
3001 if not base_url:
3002 continue
3003 if caption_track.get('kind') != 'asr':
3004 lang_code = (
3005 remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
3006 or caption_track.get('languageCode'))
3007 if not lang_code:
3008 continue
3009 process_language(
3010 subtitles, base_url, lang_code,
3011 traverse_obj(caption_track, ('name', 'simpleText'), ('name', 'runs', ..., 'text'), get_all=False),
3012 {})
3013 continue
3014 automatic_captions = {}
3015 for trans_code, trans_name in translation_languages.items():
3016 if not trans_code:
3017 continue
3018 process_language(
3019 automatic_captions, base_url, trans_code,
3020 self._get_text(trans_name, max_runs=1),
3021 {'tlang': trans_code})
3022 info['automatic_captions'] = automatic_captions
3023 info['subtitles'] = subtitles
3024
3025 parsed_url = compat_urllib_parse_urlparse(url)
3026 for component in [parsed_url.fragment, parsed_url.query]:
3027 query = compat_parse_qs(component)
3028 for k, v in query.items():
3029 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
3030 d_k += '_time'
3031 if d_k not in info and k in s_ks:
3032 info[d_k] = parse_duration(query[k][0])
3033
3034 # Youtube Music Auto-generated description
3035 if video_description:
3036 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
3037 if mobj:
3038 release_year = mobj.group('release_year')
3039 release_date = mobj.group('release_date')
3040 if release_date:
3041 release_date = release_date.replace('-', '')
3042 if not release_year:
3043 release_year = release_date[:4]
3044 info.update({
3045 'album': mobj.group('album'.strip()),
3046 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
3047 'track': mobj.group('track').strip(),
3048 'release_date': release_date,
3049 'release_year': int_or_none(release_year),
3050 })
3051
3052 initial_data = None
3053 if webpage:
3054 initial_data = self._extract_yt_initial_variable(
3055 webpage, self._YT_INITIAL_DATA_RE, video_id,
3056 'yt initial data')
3057 if not initial_data:
3058 query = {'videoId': video_id}
3059 query.update(self._get_checkok_params())
3060 initial_data = self._extract_response(
3061 item_id=video_id, ep='next', fatal=False,
3062 ytcfg=master_ytcfg, query=query,
3063 headers=self.generate_api_headers(ytcfg=master_ytcfg),
3064 note='Downloading initial data API JSON')
3065
3066 try:
3067 # This will error if there is no livechat
3068 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
3069 info['subtitles']['live_chat'] = [{
3070 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
3071 'video_id': video_id,
3072 'ext': 'json',
3073 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
3074 }]
3075 except (KeyError, IndexError, TypeError):
3076 pass
3077
3078 if initial_data:
3079 info['chapters'] = (
3080 self._extract_chapters_from_json(initial_data, duration)
3081 or self._extract_chapters_from_engagement_panel(initial_data, duration)
3082 or None)
3083
3084 contents = try_get(
3085 initial_data,
3086 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
3087 list) or []
3088 for content in contents:
3089 vpir = content.get('videoPrimaryInfoRenderer')
3090 if vpir:
3091 stl = vpir.get('superTitleLink')
3092 if stl:
3093 stl = self._get_text(stl)
3094 if try_get(
3095 vpir,
3096 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3097 info['location'] = stl
3098 else:
3099 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
3100 if mobj:
3101 info.update({
3102 'series': mobj.group(1),
3103 'season_number': int(mobj.group(2)),
3104 'episode_number': int(mobj.group(3)),
3105 })
3106 for tlb in (try_get(
3107 vpir,
3108 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3109 list) or []):
3110 tbr = tlb.get('toggleButtonRenderer') or {}
3111 for getter, regex in [(
3112 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3113 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3114 lambda x: x['accessibility'],
3115 lambda x: x['accessibilityData']['accessibilityData'],
3116 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3117 label = (try_get(tbr, getter, dict) or {}).get('label')
3118 if label:
3119 mobj = re.match(regex, label)
3120 if mobj:
3121 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3122 break
3123 sbr_tooltip = try_get(
3124 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3125 if sbr_tooltip:
3126 like_count, dislike_count = sbr_tooltip.split(' / ')
3127 info.update({
3128 'like_count': str_to_int(like_count),
3129 'dislike_count': str_to_int(dislike_count),
3130 })
3131 vsir = content.get('videoSecondaryInfoRenderer')
3132 if vsir:
3133 info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
3134 rows = try_get(
3135 vsir,
3136 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3137 list) or []
3138 multiple_songs = False
3139 for row in rows:
3140 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3141 multiple_songs = True
3142 break
3143 for row in rows:
3144 mrr = row.get('metadataRowRenderer') or {}
3145 mrr_title = mrr.get('title')
3146 if not mrr_title:
3147 continue
3148 mrr_title = self._get_text(mrr, 'title')
3149 mrr_contents_text = self._get_text(mrr, ('contents', 0))
3150 if mrr_title == 'License':
3151 info['license'] = mrr_contents_text
3152 elif not multiple_songs:
3153 if mrr_title == 'Album':
3154 info['album'] = mrr_contents_text
3155 elif mrr_title == 'Artist':
3156 info['artist'] = mrr_contents_text
3157 elif mrr_title == 'Song':
3158 info['track'] = mrr_contents_text
3159
3160 fallbacks = {
3161 'channel': 'uploader',
3162 'channel_id': 'uploader_id',
3163 'channel_url': 'uploader_url',
3164 }
3165 for to, frm in fallbacks.items():
3166 if not info.get(to):
3167 info[to] = info.get(frm)
3168
3169 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3170 v = info.get(s_k)
3171 if v:
3172 info[d_k] = v
3173
3174 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3175 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
3176 is_membersonly = None
3177 is_premium = None
3178 if initial_data and is_private is not None:
3179 is_membersonly = False
3180 is_premium = False
3181 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3182 badge_labels = set()
3183 for content in contents:
3184 if not isinstance(content, dict):
3185 continue
3186 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3187 for badge_label in badge_labels:
3188 if badge_label.lower() == 'members only':
3189 is_membersonly = True
3190 elif badge_label.lower() == 'premium':
3191 is_premium = True
3192 elif badge_label.lower() == 'unlisted':
3193 is_unlisted = True
3194
3195 info['availability'] = self._availability(
3196 is_private=is_private,
3197 needs_premium=is_premium,
3198 needs_subscription=is_membersonly,
3199 needs_auth=info['age_limit'] >= 18,
3200 is_unlisted=None if is_private is None else is_unlisted)
3201
3202 if self.get_param('getcomments', False):
3203 info['__post_extractor'] = lambda: self._extract_comments(master_ytcfg, video_id, contents, webpage)
3204
3205 self.mark_watched(video_id, player_responses)
3206
3207 return info
3208
3209
3210 class YoutubeTabIE(YoutubeBaseInfoExtractor):
3211 IE_DESC = 'YouTube.com tab'
3212 _VALID_URL = r'''(?x)
3213 https?://
3214 (?:\w+\.)?
3215 (?:
3216 youtube(?:kids)?\.com|
3217 invidio\.us
3218 )/
3219 (?:
3220 (?P<channel_type>channel|c|user|browse)/|
3221 (?P<not_channel>
3222 feed/|hashtag/|
3223 (?:playlist|watch)\?.*?\blist=
3224 )|
3225 (?!(?:%s)\b) # Direct URLs
3226 )
3227 (?P<id>[^/?\#&]+)
3228 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
3229 IE_NAME = 'youtube:tab'
3230
3231 _TESTS = [{
3232 'note': 'playlists, multipage',
3233 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3234 'playlist_mincount': 94,
3235 'info_dict': {
3236 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3237 'title': 'Игорь Клейнер - Playlists',
3238 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3239 'uploader': 'Игорь Клейнер',
3240 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3241 },
3242 }, {
3243 'note': 'playlists, multipage, different order',
3244 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3245 'playlist_mincount': 94,
3246 'info_dict': {
3247 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3248 'title': 'Игорь Клейнер - Playlists',
3249 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3250 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3251 'uploader': 'Игорь Клейнер',
3252 },
3253 }, {
3254 'note': 'playlists, series',
3255 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3256 'playlist_mincount': 5,
3257 'info_dict': {
3258 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3259 'title': '3Blue1Brown - Playlists',
3260 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3261 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3262 'uploader': '3Blue1Brown',
3263 },
3264 }, {
3265 'note': 'playlists, singlepage',
3266 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3267 'playlist_mincount': 4,
3268 'info_dict': {
3269 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3270 'title': 'ThirstForScience - Playlists',
3271 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
3272 'uploader': 'ThirstForScience',
3273 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3274 }
3275 }, {
3276 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3277 'only_matching': True,
3278 }, {
3279 'note': 'basic, single video playlist',
3280 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3281 'info_dict': {
3282 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3283 'uploader': 'Sergey M.',
3284 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3285 'title': 'youtube-dl public playlist',
3286 },
3287 'playlist_count': 1,
3288 }, {
3289 'note': 'empty playlist',
3290 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3291 'info_dict': {
3292 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3293 'uploader': 'Sergey M.',
3294 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3295 'title': 'youtube-dl empty playlist',
3296 },
3297 'playlist_count': 0,
3298 }, {
3299 'note': 'Home tab',
3300 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
3301 'info_dict': {
3302 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3303 'title': 'lex will - Home',
3304 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3305 'uploader': 'lex will',
3306 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3307 },
3308 'playlist_mincount': 2,
3309 }, {
3310 'note': 'Videos tab',
3311 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
3312 'info_dict': {
3313 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3314 'title': 'lex will - Videos',
3315 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3316 'uploader': 'lex will',
3317 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3318 },
3319 'playlist_mincount': 975,
3320 }, {
3321 'note': 'Videos tab, sorted by popular',
3322 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
3323 'info_dict': {
3324 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3325 'title': 'lex will - Videos',
3326 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3327 'uploader': 'lex will',
3328 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3329 },
3330 'playlist_mincount': 199,
3331 }, {
3332 'note': 'Playlists tab',
3333 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
3334 'info_dict': {
3335 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3336 'title': 'lex will - Playlists',
3337 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3338 'uploader': 'lex will',
3339 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3340 },
3341 'playlist_mincount': 17,
3342 }, {
3343 'note': 'Community tab',
3344 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
3345 'info_dict': {
3346 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3347 'title': 'lex will - Community',
3348 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3349 'uploader': 'lex will',
3350 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3351 },
3352 'playlist_mincount': 18,
3353 }, {
3354 'note': 'Channels tab',
3355 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
3356 'info_dict': {
3357 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3358 'title': 'lex will - Channels',
3359 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3360 'uploader': 'lex will',
3361 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3362 },
3363 'playlist_mincount': 12,
3364 }, {
3365 'note': 'Search tab',
3366 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3367 'playlist_mincount': 40,
3368 'info_dict': {
3369 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3370 'title': '3Blue1Brown - Search - linear algebra',
3371 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3372 'uploader': '3Blue1Brown',
3373 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3374 },
3375 }, {
3376 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3377 'only_matching': True,
3378 }, {
3379 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3380 'only_matching': True,
3381 }, {
3382 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3383 'only_matching': True,
3384 }, {
3385 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3386 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3387 'info_dict': {
3388 'title': '29C3: Not my department',
3389 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3390 'uploader': 'Christiaan008',
3391 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
3392 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
3393 },
3394 'playlist_count': 96,
3395 }, {
3396 'note': 'Large playlist',
3397 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
3398 'info_dict': {
3399 'title': 'Uploads from Cauchemar',
3400 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3401 'uploader': 'Cauchemar',
3402 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
3403 },
3404 'playlist_mincount': 1123,
3405 }, {
3406 'note': 'even larger playlist, 8832 videos',
3407 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3408 'only_matching': True,
3409 }, {
3410 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3411 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3412 'info_dict': {
3413 'title': 'Uploads from Interstellar Movie',
3414 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
3415 'uploader': 'Interstellar Movie',
3416 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
3417 },
3418 'playlist_mincount': 21,
3419 }, {
3420 'note': 'Playlist with "show unavailable videos" button',
3421 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3422 'info_dict': {
3423 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3424 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3425 'uploader': 'Phim Siêu Nhân Nhật Bản',
3426 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3427 },
3428 'playlist_mincount': 200,
3429 }, {
3430 'note': 'Playlist with unavailable videos in page 7',
3431 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3432 'info_dict': {
3433 'title': 'Uploads from BlankTV',
3434 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3435 'uploader': 'BlankTV',
3436 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3437 },
3438 'playlist_mincount': 1000,
3439 }, {
3440 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
3441 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3442 'info_dict': {
3443 'title': 'Data Analysis with Dr Mike Pound',
3444 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3445 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3446 'uploader': 'Computerphile',
3447 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
3448 },
3449 'playlist_mincount': 11,
3450 }, {
3451 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3452 'only_matching': True,
3453 }, {
3454 'note': 'Playlist URL that does not actually serve a playlist',
3455 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3456 'info_dict': {
3457 'id': 'FqZTN594JQw',
3458 'ext': 'webm',
3459 'title': "Smiley's People 01 detective, Adventure Series, Action",
3460 'uploader': 'STREEM',
3461 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
3462 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
3463 'upload_date': '20150526',
3464 'license': 'Standard YouTube License',
3465 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3466 'categories': ['People & Blogs'],
3467 'tags': list,
3468 'view_count': int,
3469 'like_count': int,
3470 'dislike_count': int,
3471 },
3472 'params': {
3473 'skip_download': True,
3474 },
3475 'skip': 'This video is not available.',
3476 'add_ie': [YoutubeIE.ie_key()],
3477 }, {
3478 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
3479 'only_matching': True,
3480 }, {
3481 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
3482 'only_matching': True,
3483 }, {
3484 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3485 'info_dict': {
3486 'id': '3yImotZU3tw', # This will keep changing
3487 'ext': 'mp4',
3488 'title': compat_str,
3489 'uploader': 'Sky News',
3490 'uploader_id': 'skynews',
3491 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
3492 'upload_date': r're:\d{8}',
3493 'description': compat_str,
3494 'categories': ['News & Politics'],
3495 'tags': list,
3496 'like_count': int,
3497 'dislike_count': int,
3498 },
3499 'params': {
3500 'skip_download': True,
3501 },
3502 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
3503 }, {
3504 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3505 'info_dict': {
3506 'id': 'a48o2S1cPoo',
3507 'ext': 'mp4',
3508 'title': 'The Young Turks - Live Main Show',
3509 'uploader': 'The Young Turks',
3510 'uploader_id': 'TheYoungTurks',
3511 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3512 'upload_date': '20150715',
3513 'license': 'Standard YouTube License',
3514 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3515 'categories': ['News & Politics'],
3516 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3517 'like_count': int,
3518 'dislike_count': int,
3519 },
3520 'params': {
3521 'skip_download': True,
3522 },
3523 'only_matching': True,
3524 }, {
3525 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3526 'only_matching': True,
3527 }, {
3528 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3529 'only_matching': True,
3530 }, {
3531 'note': 'A channel that is not live. Should raise error',
3532 'url': 'https://www.youtube.com/user/numberphile/live',
3533 'only_matching': True,
3534 }, {
3535 'url': 'https://www.youtube.com/feed/trending',
3536 'only_matching': True,
3537 }, {
3538 'url': 'https://www.youtube.com/feed/library',
3539 'only_matching': True,
3540 }, {
3541 'url': 'https://www.youtube.com/feed/history',
3542 'only_matching': True,
3543 }, {
3544 'url': 'https://www.youtube.com/feed/subscriptions',
3545 'only_matching': True,
3546 }, {
3547 'url': 'https://www.youtube.com/feed/watch_later',
3548 'only_matching': True,
3549 }, {
3550 'note': 'Recommended - redirects to home page',
3551 'url': 'https://www.youtube.com/feed/recommended',
3552 'only_matching': True,
3553 }, {
3554 'note': 'inline playlist with not always working continuations',
3555 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3556 'only_matching': True,
3557 }, {
3558 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3559 'only_matching': True,
3560 }, {
3561 'url': 'https://www.youtube.com/course',
3562 'only_matching': True,
3563 }, {
3564 'url': 'https://www.youtube.com/zsecurity',
3565 'only_matching': True,
3566 }, {
3567 'url': 'http://www.youtube.com/NASAgovVideo/videos',
3568 'only_matching': True,
3569 }, {
3570 'url': 'https://www.youtube.com/TheYoungTurks/live',
3571 'only_matching': True,
3572 }, {
3573 'url': 'https://www.youtube.com/hashtag/cctv9',
3574 'info_dict': {
3575 'id': 'cctv9',
3576 'title': '#cctv9',
3577 },
3578 'playlist_mincount': 350,
3579 }, {
3580 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3581 'only_matching': True,
3582 }, {
3583 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
3584 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3585 'only_matching': True
3586 }, {
3587 'note': '/browse/ should redirect to /channel/',
3588 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3589 'only_matching': True
3590 }, {
3591 'note': 'VLPL, should redirect to playlist?list=PL...',
3592 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3593 'info_dict': {
3594 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3595 'uploader': 'NoCopyrightSounds',
3596 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3597 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3598 'title': 'NCS Releases',
3599 },
3600 'playlist_mincount': 166,
3601 }, {
3602 'note': 'Topic, should redirect to playlist?list=UU...',
3603 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3604 'info_dict': {
3605 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3606 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3607 'title': 'Uploads from Royalty Free Music - Topic',
3608 'uploader': 'Royalty Free Music - Topic',
3609 },
3610 'expected_warnings': [
3611 'A channel/user page was given',
3612 'The URL does not have a videos tab',
3613 ],
3614 'playlist_mincount': 101,
3615 }, {
3616 'note': 'Topic without a UU playlist',
3617 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3618 'info_dict': {
3619 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3620 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3621 },
3622 'expected_warnings': [
3623 'A channel/user page was given',
3624 'The URL does not have a videos tab',
3625 'Falling back to channel URL',
3626 ],
3627 'playlist_mincount': 9,
3628 }, {
3629 'note': 'Youtube music Album',
3630 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3631 'info_dict': {
3632 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3633 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3634 },
3635 'playlist_count': 50,
3636 }, {
3637 'note': 'unlisted single video playlist',
3638 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3639 'info_dict': {
3640 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3641 'uploader': 'colethedj',
3642 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3643 'title': 'yt-dlp unlisted playlist test',
3644 'availability': 'unlisted'
3645 },
3646 'playlist_count': 1,
3647 }]
3648
3649 @classmethod
3650 def suitable(cls, url):
3651 return False if YoutubeIE.suitable(url) else super(
3652 YoutubeTabIE, cls).suitable(url)
3653
3654 def _extract_channel_id(self, webpage):
3655 channel_id = self._html_search_meta(
3656 'channelId', webpage, 'channel id', default=None)
3657 if channel_id:
3658 return channel_id
3659 channel_url = self._html_search_meta(
3660 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3661 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3662 'twitter:app:url:googleplay'), webpage, 'channel url')
3663 return self._search_regex(
3664 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3665 channel_url, 'channel id')
3666
3667 @staticmethod
3668 def _extract_basic_item_renderer(item):
3669 # Modified from _extract_grid_item_renderer
3670 known_basic_renderers = (
3671 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
3672 )
3673 for key, renderer in item.items():
3674 if not isinstance(renderer, dict):
3675 continue
3676 elif key in known_basic_renderers:
3677 return renderer
3678 elif key.startswith('grid') and key.endswith('Renderer'):
3679 return renderer
3680
3681 def _grid_entries(self, grid_renderer):
3682 for item in grid_renderer['items']:
3683 if not isinstance(item, dict):
3684 continue
3685 renderer = self._extract_basic_item_renderer(item)
3686 if not isinstance(renderer, dict):
3687 continue
3688 title = self._get_text(renderer, 'title')
3689
3690 # playlist
3691 playlist_id = renderer.get('playlistId')
3692 if playlist_id:
3693 yield self.url_result(
3694 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3695 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3696 video_title=title)
3697 continue
3698 # video
3699 video_id = renderer.get('videoId')
3700 if video_id:
3701 yield self._extract_video(renderer)
3702 continue
3703 # channel
3704 channel_id = renderer.get('channelId')
3705 if channel_id:
3706 yield self.url_result(
3707 'https://www.youtube.com/channel/%s' % channel_id,
3708 ie=YoutubeTabIE.ie_key(), video_title=title)
3709 continue
3710 # generic endpoint URL support
3711 ep_url = urljoin('https://www.youtube.com/', try_get(
3712 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3713 compat_str))
3714 if ep_url:
3715 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3716 if ie.suitable(ep_url):
3717 yield self.url_result(
3718 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3719 break
3720
3721 def _shelf_entries_from_content(self, shelf_renderer):
3722 content = shelf_renderer.get('content')
3723 if not isinstance(content, dict):
3724 return
3725 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3726 if renderer:
3727 # TODO: add support for nested playlists so each shelf is processed
3728 # as separate playlist
3729 # TODO: this includes only first N items
3730 for entry in self._grid_entries(renderer):
3731 yield entry
3732 renderer = content.get('horizontalListRenderer')
3733 if renderer:
3734 # TODO
3735 pass
3736
3737 def _shelf_entries(self, shelf_renderer, skip_channels=False):
3738 ep = try_get(
3739 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3740 compat_str)
3741 shelf_url = urljoin('https://www.youtube.com', ep)
3742 if shelf_url:
3743 # Skipping links to another channels, note that checking for
3744 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3745 # will not work
3746 if skip_channels and '/channels?' in shelf_url:
3747 return
3748 title = self._get_text(shelf_renderer, 'title')
3749 yield self.url_result(shelf_url, video_title=title)
3750 # Shelf may not contain shelf URL, fallback to extraction from content
3751 for entry in self._shelf_entries_from_content(shelf_renderer):
3752 yield entry
3753
3754 def _playlist_entries(self, video_list_renderer):
3755 for content in video_list_renderer['contents']:
3756 if not isinstance(content, dict):
3757 continue
3758 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3759 if not isinstance(renderer, dict):
3760 continue
3761 video_id = renderer.get('videoId')
3762 if not video_id:
3763 continue
3764 yield self._extract_video(renderer)
3765
3766 def _rich_entries(self, rich_grid_renderer):
3767 renderer = try_get(
3768 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3769 video_id = renderer.get('videoId')
3770 if not video_id:
3771 return
3772 yield self._extract_video(renderer)
3773
3774 def _video_entry(self, video_renderer):
3775 video_id = video_renderer.get('videoId')
3776 if video_id:
3777 return self._extract_video(video_renderer)
3778
3779 def _post_thread_entries(self, post_thread_renderer):
3780 post_renderer = try_get(
3781 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3782 if not post_renderer:
3783 return
3784 # video attachment
3785 video_renderer = try_get(
3786 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3787 video_id = video_renderer.get('videoId')
3788 if video_id:
3789 entry = self._extract_video(video_renderer)
3790 if entry:
3791 yield entry
3792 # playlist attachment
3793 playlist_id = try_get(
3794 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3795 if playlist_id:
3796 yield self.url_result(
3797 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3798 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3799 # inline video links
3800 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3801 for run in runs:
3802 if not isinstance(run, dict):
3803 continue
3804 ep_url = try_get(
3805 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3806 if not ep_url:
3807 continue
3808 if not YoutubeIE.suitable(ep_url):
3809 continue
3810 ep_video_id = YoutubeIE._match_id(ep_url)
3811 if video_id == ep_video_id:
3812 continue
3813 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
3814
3815 def _post_thread_continuation_entries(self, post_thread_continuation):
3816 contents = post_thread_continuation.get('contents')
3817 if not isinstance(contents, list):
3818 return
3819 for content in contents:
3820 renderer = content.get('backstagePostThreadRenderer')
3821 if not isinstance(renderer, dict):
3822 continue
3823 for entry in self._post_thread_entries(renderer):
3824 yield entry
3825
3826 r''' # unused
3827 def _rich_grid_entries(self, contents):
3828 for content in contents:
3829 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3830 if video_renderer:
3831 entry = self._video_entry(video_renderer)
3832 if entry:
3833 yield entry
3834 '''
3835 def _entries(self, tab, item_id, account_syncid, ytcfg):
3836
3837 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3838 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3839 for content in contents:
3840 if not isinstance(content, dict):
3841 continue
3842 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3843 if not is_renderer:
3844 renderer = content.get('richItemRenderer')
3845 if renderer:
3846 for entry in self._rich_entries(renderer):
3847 yield entry
3848 continuation_list[0] = self._extract_continuation(parent_renderer)
3849 continue
3850 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3851 for isr_content in isr_contents:
3852 if not isinstance(isr_content, dict):
3853 continue
3854
3855 known_renderers = {
3856 'playlistVideoListRenderer': self._playlist_entries,
3857 'gridRenderer': self._grid_entries,
3858 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3859 'backstagePostThreadRenderer': self._post_thread_entries,
3860 'videoRenderer': lambda x: [self._video_entry(x)],
3861 }
3862 for key, renderer in isr_content.items():
3863 if key not in known_renderers:
3864 continue
3865 for entry in known_renderers[key](renderer):
3866 if entry:
3867 yield entry
3868 continuation_list[0] = self._extract_continuation(renderer)
3869 break
3870
3871 if not continuation_list[0]:
3872 continuation_list[0] = self._extract_continuation(is_renderer)
3873
3874 if not continuation_list[0]:
3875 continuation_list[0] = self._extract_continuation(parent_renderer)
3876
3877 continuation_list = [None] # Python 2 doesnot support nonlocal
3878 tab_content = try_get(tab, lambda x: x['content'], dict)
3879 if not tab_content:
3880 return
3881 parent_renderer = (
3882 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3883 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
3884 for entry in extract_entries(parent_renderer):
3885 yield entry
3886 continuation = continuation_list[0]
3887 visitor_data = None
3888
3889 for page_num in itertools.count(1):
3890 if not continuation:
3891 break
3892 headers = self.generate_api_headers(
3893 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
3894 response = self._extract_response(
3895 item_id='%s page %s' % (item_id, page_num),
3896 query=continuation, headers=headers, ytcfg=ytcfg,
3897 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3898
3899 if not response:
3900 break
3901 visitor_data = try_get(
3902 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
3903
3904 known_continuation_renderers = {
3905 'playlistVideoListContinuation': self._playlist_entries,
3906 'gridContinuation': self._grid_entries,
3907 'itemSectionContinuation': self._post_thread_continuation_entries,
3908 'sectionListContinuation': extract_entries, # for feeds
3909 }
3910 continuation_contents = try_get(
3911 response, lambda x: x['continuationContents'], dict) or {}
3912 continuation_renderer = None
3913 for key, value in continuation_contents.items():
3914 if key not in known_continuation_renderers:
3915 continue
3916 continuation_renderer = value
3917 continuation_list = [None]
3918 for entry in known_continuation_renderers[key](continuation_renderer):
3919 yield entry
3920 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3921 break
3922 if continuation_renderer:
3923 continue
3924
3925 known_renderers = {
3926 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3927 'gridVideoRenderer': (self._grid_entries, 'items'),
3928 'gridChannelRenderer': (self._grid_entries, 'items'),
3929 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
3930 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
3931 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
3932 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
3933 }
3934 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3935 continuation_items = try_get(
3936 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
3937 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3938 video_items_renderer = None
3939 for key, value in continuation_item.items():
3940 if key not in known_renderers:
3941 continue
3942 video_items_renderer = {known_renderers[key][1]: continuation_items}
3943 continuation_list = [None]
3944 for entry in known_renderers[key][0](video_items_renderer):
3945 yield entry
3946 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
3947 break
3948 if video_items_renderer:
3949 continue
3950 break
3951
3952 @staticmethod
3953 def _extract_selected_tab(tabs):
3954 for tab in tabs:
3955 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3956 if renderer.get('selected') is True:
3957 return renderer
3958 else:
3959 raise ExtractorError('Unable to find selected tab')
3960
3961 @classmethod
3962 def _extract_uploader(cls, data):
3963 uploader = {}
3964 renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3965 owner = try_get(
3966 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3967 if owner:
3968 uploader['uploader'] = owner.get('text')
3969 uploader['uploader_id'] = try_get(
3970 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3971 uploader['uploader_url'] = urljoin(
3972 'https://www.youtube.com/',
3973 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3974 return {k: v for k, v in uploader.items() if v is not None}
3975
3976 def _extract_from_tabs(self, item_id, webpage, data, tabs):
3977 playlist_id = title = description = channel_url = channel_name = channel_id = None
3978 thumbnails_list = tags = []
3979
3980 selected_tab = self._extract_selected_tab(tabs)
3981 renderer = try_get(
3982 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3983 if renderer:
3984 channel_name = renderer.get('title')
3985 channel_url = renderer.get('channelUrl')
3986 channel_id = renderer.get('externalId')
3987 else:
3988 renderer = try_get(
3989 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3990
3991 if renderer:
3992 title = renderer.get('title')
3993 description = renderer.get('description', '')
3994 playlist_id = channel_id
3995 tags = renderer.get('keywords', '').split()
3996 thumbnails_list = (
3997 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
3998 or try_get(
3999 self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
4000 lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
4001 list)
4002 or [])
4003
4004 thumbnails = []
4005 for t in thumbnails_list:
4006 if not isinstance(t, dict):
4007 continue
4008 thumbnail_url = url_or_none(t.get('url'))
4009 if not thumbnail_url:
4010 continue
4011 thumbnails.append({
4012 'url': thumbnail_url,
4013 'width': int_or_none(t.get('width')),
4014 'height': int_or_none(t.get('height')),
4015 })
4016 if playlist_id is None:
4017 playlist_id = item_id
4018 if title is None:
4019 title = (
4020 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
4021 or playlist_id)
4022 title += format_field(selected_tab, 'title', ' - %s')
4023 title += format_field(selected_tab, 'expandedText', ' - %s')
4024 metadata = {
4025 'playlist_id': playlist_id,
4026 'playlist_title': title,
4027 'playlist_description': description,
4028 'uploader': channel_name,
4029 'uploader_id': channel_id,
4030 'uploader_url': channel_url,
4031 'thumbnails': thumbnails,
4032 'tags': tags,
4033 }
4034 availability = self._extract_availability(data)
4035 if availability:
4036 metadata['availability'] = availability
4037 if not channel_id:
4038 metadata.update(self._extract_uploader(data))
4039 metadata.update({
4040 'channel': metadata['uploader'],
4041 'channel_id': metadata['uploader_id'],
4042 'channel_url': metadata['uploader_url']})
4043 ytcfg = self.extract_ytcfg(item_id, webpage)
4044 return self.playlist_result(
4045 self._entries(
4046 selected_tab, playlist_id,
4047 self._extract_account_syncid(ytcfg, data), ytcfg),
4048 **metadata)
4049
4050 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
4051 first_id = last_id = None
4052 ytcfg = self.extract_ytcfg(playlist_id, webpage)
4053 headers = self.generate_api_headers(
4054 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data))
4055 for page_num in itertools.count(1):
4056 videos = list(self._playlist_entries(playlist))
4057 if not videos:
4058 return
4059 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4060 if start >= len(videos):
4061 return
4062 for video in videos[start:]:
4063 if video['id'] == first_id:
4064 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
4065 return
4066 yield video
4067 first_id = first_id or videos[0]['id']
4068 last_id = videos[-1]['id']
4069 watch_endpoint = try_get(
4070 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
4071 query = {
4072 'playlistId': playlist_id,
4073 'videoId': watch_endpoint.get('videoId') or last_id,
4074 'index': watch_endpoint.get('index') or len(videos),
4075 'params': watch_endpoint.get('params') or 'OAE%3D'
4076 }
4077 response = self._extract_response(
4078 item_id='%s page %d' % (playlist_id, page_num),
4079 query=query, ep='next', headers=headers, ytcfg=ytcfg,
4080 check_get_keys='contents'
4081 )
4082 playlist = try_get(
4083 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4084
4085 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
4086 title = playlist.get('title') or try_get(
4087 data, lambda x: x['titleText']['simpleText'], compat_str)
4088 playlist_id = playlist.get('playlistId') or item_id
4089
4090 # Delegating everything except mix playlists to regular tab-based playlist URL
4091 playlist_url = urljoin(url, try_get(
4092 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4093 compat_str))
4094 if playlist_url and playlist_url != url:
4095 return self.url_result(
4096 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4097 video_title=title)
4098
4099 return self.playlist_result(
4100 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
4101 playlist_id=playlist_id, playlist_title=title)
4102
4103 def _extract_availability(self, data):
4104 """
4105 Gets the availability of a given playlist/tab.
4106 Note: Unless YouTube tells us explicitly, we do not assume it is public
4107 @param data: response
4108 """
4109 is_private = is_unlisted = None
4110 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4111 badge_labels = self._extract_badges(renderer)
4112
4113 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4114 privacy_dropdown_entries = try_get(
4115 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4116 for renderer_dict in privacy_dropdown_entries:
4117 is_selected = try_get(
4118 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4119 if not is_selected:
4120 continue
4121 label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
4122 if label:
4123 badge_labels.add(label.lower())
4124 break
4125
4126 for badge_label in badge_labels:
4127 if badge_label == 'unlisted':
4128 is_unlisted = True
4129 elif badge_label == 'private':
4130 is_private = True
4131 elif badge_label == 'public':
4132 is_unlisted = is_private = False
4133 return self._availability(is_private, False, False, False, is_unlisted)
4134
4135 @staticmethod
4136 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4137 sidebar_renderer = try_get(
4138 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4139 for item in sidebar_renderer:
4140 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4141 if renderer:
4142 return renderer
4143
4144 def _reload_with_unavailable_videos(self, item_id, data, webpage):
4145 """
4146 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4147 """
4148 browse_id = params = None
4149 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4150 if not renderer:
4151 return
4152 menu_renderer = try_get(
4153 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4154 for menu_item in menu_renderer:
4155 if not isinstance(menu_item, dict):
4156 continue
4157 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4158 text = try_get(
4159 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
4160 if not text or text.lower() != 'show unavailable videos':
4161 continue
4162 browse_endpoint = try_get(
4163 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4164 browse_id = browse_endpoint.get('browseId')
4165 params = browse_endpoint.get('params')
4166 break
4167
4168 ytcfg = self.extract_ytcfg(item_id, webpage)
4169 headers = self.generate_api_headers(
4170 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4171 visitor_data=try_get(self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
4172 query = {
4173 'params': params or 'wgYCCAA=',
4174 'browseId': browse_id or 'VL%s' % item_id
4175 }
4176 return self._extract_response(
4177 item_id=item_id, headers=headers, query=query,
4178 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
4179 note='Downloading API JSON with unavailable videos')
4180
4181 def _extract_webpage(self, url, item_id):
4182 retries = self.get_param('extractor_retries', 3)
4183 count = -1
4184 last_error = 'Incomplete yt initial data recieved'
4185 while count < retries:
4186 count += 1
4187 # Sometimes youtube returns a webpage with incomplete ytInitialData
4188 # See: https://github.com/yt-dlp/yt-dlp/issues/116
4189 if count:
4190 self.report_warning('%s. Retrying ...' % last_error)
4191 webpage = self._download_webpage(
4192 url, item_id,
4193 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
4194 data = self.extract_yt_initial_data(item_id, webpage)
4195 if data.get('contents') or data.get('currentVideoEndpoint'):
4196 break
4197 # Extract alerts here only when there is error
4198 self._extract_and_report_alerts(data)
4199 if count >= retries:
4200 raise ExtractorError(last_error)
4201 return webpage, data
4202
4203 @staticmethod
4204 def _smuggle_data(entries, data):
4205 for entry in entries:
4206 if data:
4207 entry['url'] = smuggle_url(entry['url'], data)
4208 yield entry
4209
4210 def _real_extract(self, url):
4211 url, smuggled_data = unsmuggle_url(url, {})
4212 if self.is_music_url(url):
4213 smuggled_data['is_music_url'] = True
4214 info_dict = self.__real_extract(url, smuggled_data)
4215 if info_dict.get('entries'):
4216 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4217 return info_dict
4218
4219 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4220
4221 def __real_extract(self, url, smuggled_data):
4222 item_id = self._match_id(url)
4223 url = compat_urlparse.urlunparse(
4224 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
4225 compat_opts = self.get_param('compat_opts', [])
4226
4227 def get_mobj(url):
4228 mobj = self._url_re.match(url).groupdict()
4229 mobj.update((k, '') for k, v in mobj.items() if v is None)
4230 return mobj
4231
4232 mobj = get_mobj(url)
4233 # Youtube returns incomplete data if tabname is not lower case
4234 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4235
4236 if is_channel:
4237 if smuggled_data.get('is_music_url'):
4238 if item_id[:2] == 'VL':
4239 # Youtube music VL channels have an equivalent playlist
4240 item_id = item_id[2:]
4241 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4242 elif item_id[:2] == 'MP':
4243 # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4244 item_id = self._search_regex(
4245 r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4246 self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4247 'playlist id')
4248 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4249 elif mobj['channel_type'] == 'browse':
4250 # Youtube music /browse/ should be changed to /channel/
4251 pre = 'https://www.youtube.com/channel/%s' % item_id
4252 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4253 # Home URLs should redirect to /videos/
4254 self.report_warning(
4255 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4256 'To download only the videos in the home page, add a "/featured" to the URL')
4257 tab = '/videos'
4258
4259 url = ''.join((pre, tab, post))
4260 mobj = get_mobj(url)
4261
4262 # Handle both video/playlist URLs
4263 qs = parse_qs(url)
4264 video_id = qs.get('v', [None])[0]
4265 playlist_id = qs.get('list', [None])[0]
4266
4267 if not video_id and mobj['not_channel'].startswith('watch'):
4268 if not playlist_id:
4269 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
4270 raise ExtractorError('Unable to recognize tab page')
4271 # Common mistake: https://www.youtube.com/watch?list=playlist_id
4272 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
4273 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
4274 mobj = get_mobj(url)
4275
4276 if video_id and playlist_id:
4277 if self.get_param('noplaylist'):
4278 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4279 return self.url_result(f'https://www.youtube.com/watch?v={video_id}', ie=YoutubeIE.ie_key(), video_id=video_id)
4280 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4281
4282 webpage, data = self._extract_webpage(url, item_id)
4283
4284 tabs = try_get(
4285 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4286 if tabs:
4287 selected_tab = self._extract_selected_tab(tabs)
4288 tab_name = selected_tab.get('title', '')
4289 if 'no-youtube-channel-redirect' not in compat_opts:
4290 if mobj['tab'] == '/live':
4291 # Live tab should have redirected to the video
4292 raise ExtractorError('The channel is not currently live', expected=True)
4293 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4294 if not mobj['not_channel'] and item_id[:2] == 'UC':
4295 # Topic channels don't have /videos. Use the equivalent playlist instead
4296 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4297 pl_id = 'UU%s' % item_id[2:]
4298 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4299 try:
4300 pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4301 for alert_type, alert_message in self._extract_alerts(pl_data):
4302 if alert_type == 'error':
4303 raise ExtractorError('Youtube said: %s' % alert_message)
4304 item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4305 except ExtractorError:
4306 self.report_warning('The playlist gave error. Falling back to channel URL')
4307 else:
4308 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
4309
4310 self.write_debug('Final URL: %s' % url)
4311
4312 # YouTube sometimes provides a button to reload playlist with unavailable videos.
4313 if 'no-youtube-unavailable-videos' not in compat_opts:
4314 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
4315 self._extract_and_report_alerts(data, only_once=True)
4316 tabs = try_get(
4317 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4318 if tabs:
4319 return self._extract_from_tabs(item_id, webpage, data, tabs)
4320
4321 playlist = try_get(
4322 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4323 if playlist:
4324 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
4325
4326 video_id = try_get(
4327 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4328 compat_str) or video_id
4329 if video_id:
4330 if mobj['tab'] != '/live': # live tab is expected to redirect to video
4331 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
4332 return self.url_result(f'https://www.youtube.com/watch?v={video_id}', ie=YoutubeIE.ie_key(), video_id=video_id)
4333
4334 raise ExtractorError('Unable to recognize tab page')
4335
4336
4337 class YoutubePlaylistIE(InfoExtractor):
4338 IE_DESC = 'YouTube.com playlists'
4339 _VALID_URL = r'''(?x)(?:
4340 (?:https?://)?
4341 (?:\w+\.)?
4342 (?:
4343 (?:
4344 youtube(?:kids)?\.com|
4345 invidio\.us
4346 )
4347 /.*?\?.*?\blist=
4348 )?
4349 (?P<id>%(playlist_id)s)
4350 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4351 IE_NAME = 'youtube:playlist'
4352 _TESTS = [{
4353 'note': 'issue #673',
4354 'url': 'PLBB231211A4F62143',
4355 'info_dict': {
4356 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4357 'id': 'PLBB231211A4F62143',
4358 'uploader': 'Wickydoo',
4359 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4360 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
4361 },
4362 'playlist_mincount': 29,
4363 }, {
4364 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4365 'info_dict': {
4366 'title': 'YDL_safe_search',
4367 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4368 },
4369 'playlist_count': 2,
4370 'skip': 'This playlist is private',
4371 }, {
4372 'note': 'embedded',
4373 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4374 'playlist_count': 4,
4375 'info_dict': {
4376 'title': 'JODA15',
4377 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4378 'uploader': 'milan',
4379 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
4380 }
4381 }, {
4382 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4383 'playlist_mincount': 654,
4384 'info_dict': {
4385 'title': '2018 Chinese New Singles (11/6 updated)',
4386 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4387 'uploader': 'LBK',
4388 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4389 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
4390 }
4391 }, {
4392 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4393 'only_matching': True,
4394 }, {
4395 # music album playlist
4396 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4397 'only_matching': True,
4398 }]
4399
4400 @classmethod
4401 def suitable(cls, url):
4402 if YoutubeTabIE.suitable(url):
4403 return False
4404 # Hack for lazy extractors until more generic solution is implemented
4405 # (see #28780)
4406 from .youtube import parse_qs
4407 qs = parse_qs(url)
4408 if qs.get('v', [None])[0]:
4409 return False
4410 return super(YoutubePlaylistIE, cls).suitable(url)
4411
4412 def _real_extract(self, url):
4413 playlist_id = self._match_id(url)
4414 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
4415 url = update_url_query(
4416 'https://www.youtube.com/playlist',
4417 parse_qs(url) or {'list': playlist_id})
4418 if is_music_url:
4419 url = smuggle_url(url, {'is_music_url': True})
4420 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4421
4422
4423 class YoutubeYtBeIE(InfoExtractor):
4424 IE_DESC = 'youtu.be'
4425 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4426 _TESTS = [{
4427 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4428 'info_dict': {
4429 'id': 'yeWKywCrFtk',
4430 'ext': 'mp4',
4431 'title': 'Small Scale Baler and Braiding Rugs',
4432 'uploader': 'Backus-Page House Museum',
4433 'uploader_id': 'backuspagemuseum',
4434 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4435 'upload_date': '20161008',
4436 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4437 'categories': ['Nonprofits & Activism'],
4438 'tags': list,
4439 'like_count': int,
4440 'dislike_count': int,
4441 },
4442 'params': {
4443 'noplaylist': True,
4444 'skip_download': True,
4445 },
4446 }, {
4447 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
4448 'only_matching': True,
4449 }]
4450
4451 def _real_extract(self, url):
4452 mobj = self._match_valid_url(url)
4453 video_id = mobj.group('id')
4454 playlist_id = mobj.group('playlist_id')
4455 return self.url_result(
4456 update_url_query('https://www.youtube.com/watch', {
4457 'v': video_id,
4458 'list': playlist_id,
4459 'feature': 'youtu.be',
4460 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4461
4462
4463 class YoutubeYtUserIE(InfoExtractor):
4464 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
4465 _VALID_URL = r'ytuser:(?P<id>.+)'
4466 _TESTS = [{
4467 'url': 'ytuser:phihag',
4468 'only_matching': True,
4469 }]
4470
4471 def _real_extract(self, url):
4472 user_id = self._match_id(url)
4473 return self.url_result(
4474 'https://www.youtube.com/user/%s' % user_id,
4475 ie=YoutubeTabIE.ie_key(), video_id=user_id)
4476
4477
4478 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
4479 IE_NAME = 'youtube:favorites'
4480 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4481 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4482 _LOGIN_REQUIRED = True
4483 _TESTS = [{
4484 'url': ':ytfav',
4485 'only_matching': True,
4486 }, {
4487 'url': ':ytfavorites',
4488 'only_matching': True,
4489 }]
4490
4491 def _real_extract(self, url):
4492 return self.url_result(
4493 'https://www.youtube.com/playlist?list=LL',
4494 ie=YoutubeTabIE.ie_key())
4495
4496
4497 class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
4498 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
4499 # there doesn't appear to be a real limit, for example if you search for
4500 # 'python' you get more than 8.000.000 results
4501 _MAX_RESULTS = float('inf')
4502 IE_NAME = 'youtube:search'
4503 _SEARCH_KEY = 'ytsearch'
4504 _SEARCH_PARAMS = None
4505 _TESTS = []
4506
4507 def _entries(self, query, n):
4508 data = {'query': query}
4509 if self._SEARCH_PARAMS:
4510 data['params'] = self._SEARCH_PARAMS
4511 total = 0
4512 continuation = {}
4513 for page_num in itertools.count(1):
4514 data.update(continuation)
4515 search = self._extract_response(
4516 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4517 check_get_keys=('contents', 'onResponseReceivedCommands')
4518 )
4519 if not search:
4520 break
4521 slr_contents = try_get(
4522 search,
4523 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4524 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4525 list)
4526 if not slr_contents:
4527 break
4528
4529 # Youtube sometimes adds promoted content to searches,
4530 # changing the index location of videos and token.
4531 # So we search through all entries till we find them.
4532 continuation = None
4533 for slr_content in slr_contents:
4534 if not continuation:
4535 continuation = self._extract_continuation({'contents': [slr_content]})
4536
4537 isr_contents = try_get(
4538 slr_content,
4539 lambda x: x['itemSectionRenderer']['contents'],
4540 list)
4541 if not isr_contents:
4542 continue
4543 for content in isr_contents:
4544 if not isinstance(content, dict):
4545 continue
4546 video = content.get('videoRenderer')
4547 if not isinstance(video, dict):
4548 continue
4549 video_id = video.get('videoId')
4550 if not video_id:
4551 continue
4552
4553 yield self._extract_video(video)
4554 total += 1
4555 if total == n:
4556 return
4557
4558 if not continuation:
4559 break
4560
4561 def _get_n_results(self, query, n):
4562 """Get a specified number of results for a query"""
4563 return self.playlist_result(self._entries(query, n), query, query)
4564
4565
4566 class YoutubeSearchDateIE(YoutubeSearchIE):
4567 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
4568 _SEARCH_KEY = 'ytsearchdate'
4569 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
4570 _SEARCH_PARAMS = 'CAI%3D'
4571
4572
4573 class YoutubeSearchURLIE(YoutubeSearchIE):
4574 IE_DESC = 'YouTube.com search URLs'
4575 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4576 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
4577 # _MAX_RESULTS = 100
4578 _TESTS = [{
4579 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4580 'playlist_mincount': 5,
4581 'info_dict': {
4582 'id': 'youtube-dl test video',
4583 'title': 'youtube-dl test video',
4584 }
4585 }, {
4586 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4587 'only_matching': True,
4588 }]
4589
4590 @classmethod
4591 def _make_valid_url(cls):
4592 return cls._VALID_URL
4593
4594 def _real_extract(self, url):
4595 qs = parse_qs(url)
4596 query = (qs.get('search_query') or qs.get('q'))[0]
4597 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4598 return self._get_n_results(query, self._MAX_RESULTS)
4599
4600
4601 class YoutubeFeedsInfoExtractor(YoutubeTabIE):
4602 """
4603 Base class for feed extractors
4604 Subclasses must define the _FEED_NAME property.
4605 """
4606 _LOGIN_REQUIRED = True
4607 _TESTS = []
4608
4609 @property
4610 def IE_NAME(self):
4611 return 'youtube:%s' % self._FEED_NAME
4612
4613 def _real_extract(self, url):
4614 return self.url_result(
4615 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4616 ie=YoutubeTabIE.ie_key())
4617
4618
4619 class YoutubeWatchLaterIE(InfoExtractor):
4620 IE_NAME = 'youtube:watchlater'
4621 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
4622 _VALID_URL = r':ytwatchlater'
4623 _TESTS = [{
4624 'url': ':ytwatchlater',
4625 'only_matching': True,
4626 }]
4627
4628 def _real_extract(self, url):
4629 return self.url_result(
4630 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
4631
4632
4633 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4634 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
4635 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
4636 _FEED_NAME = 'recommended'
4637 _LOGIN_REQUIRED = False
4638 _TESTS = [{
4639 'url': ':ytrec',
4640 'only_matching': True,
4641 }, {
4642 'url': ':ytrecommended',
4643 'only_matching': True,
4644 }, {
4645 'url': 'https://youtube.com',
4646 'only_matching': True,
4647 }]
4648
4649
4650 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
4651 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
4652 _VALID_URL = r':ytsub(?:scription)?s?'
4653 _FEED_NAME = 'subscriptions'
4654 _TESTS = [{
4655 'url': ':ytsubs',
4656 'only_matching': True,
4657 }, {
4658 'url': ':ytsubscriptions',
4659 'only_matching': True,
4660 }]
4661
4662
4663 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
4664 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4665 _VALID_URL = r':ythis(?:tory)?'
4666 _FEED_NAME = 'history'
4667 _TESTS = [{
4668 'url': ':ythistory',
4669 'only_matching': True,
4670 }]
4671
4672
4673 class YoutubeTruncatedURLIE(InfoExtractor):
4674 IE_NAME = 'youtube:truncated_url'
4675 IE_DESC = False # Do not list
4676 _VALID_URL = r'''(?x)
4677 (?:https?://)?
4678 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4679 (?:watch\?(?:
4680 feature=[a-z_]+|
4681 annotation_id=annotation_[^&]+|
4682 x-yt-cl=[0-9]+|
4683 hl=[^&]*|
4684 t=[0-9]+
4685 )?
4686 |
4687 attribution_link\?a=[^&]+
4688 )
4689 $
4690 '''
4691
4692 _TESTS = [{
4693 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
4694 'only_matching': True,
4695 }, {
4696 'url': 'https://www.youtube.com/watch?',
4697 'only_matching': True,
4698 }, {
4699 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4700 'only_matching': True,
4701 }, {
4702 'url': 'https://www.youtube.com/watch?feature=foo',
4703 'only_matching': True,
4704 }, {
4705 'url': 'https://www.youtube.com/watch?hl=en-GB',
4706 'only_matching': True,
4707 }, {
4708 'url': 'https://www.youtube.com/watch?t=2372',
4709 'only_matching': True,
4710 }]
4711
4712 def _real_extract(self, url):
4713 raise ExtractorError(
4714 'Did you forget to quote the URL? Remember that & is a meta '
4715 'character in most shells, so you want to put the URL in quotes, '
4716 'like youtube-dl '
4717 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
4718 ' or simply youtube-dl BaW_jenozKc .',
4719 expected=True)
4720
4721
4722 class YoutubeClipIE(InfoExtractor):
4723 IE_NAME = 'youtube:clip'
4724 IE_DESC = False # Do not list
4725 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'
4726
4727 def _real_extract(self, url):
4728 self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')
4729 return self.url_result(url, 'Generic')
4730
4731
4732 class YoutubeTruncatedIDIE(InfoExtractor):
4733 IE_NAME = 'youtube:truncated_id'
4734 IE_DESC = False # Do not list
4735 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
4736
4737 _TESTS = [{
4738 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4739 'only_matching': True,
4740 }]
4741
4742 def _real_extract(self, url):
4743 video_id = self._match_id(url)
4744 raise ExtractorError(
4745 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4746 expected=True)