]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/youtube.py
[youtube] Improve age-gate detection (#577)
[yt-dlp.git] / yt_dlp / extractor / youtube.py
1 # coding: utf-8
2
3 from __future__ import unicode_literals
4
5 import base64
6 import calendar
7 import copy
8 import datetime
9 import hashlib
10 import itertools
11 import json
12 import os.path
13 import random
14 import re
15 import time
16 import traceback
17
18 from .common import InfoExtractor, SearchInfoExtractor
19 from ..compat import (
20 compat_chr,
21 compat_HTTPError,
22 compat_parse_qs,
23 compat_str,
24 compat_urllib_parse_unquote_plus,
25 compat_urllib_parse_urlencode,
26 compat_urllib_parse_urlparse,
27 compat_urlparse,
28 )
29 from ..jsinterp import JSInterpreter
30 from ..utils import (
31 bytes_to_intlist,
32 clean_html,
33 datetime_from_str,
34 dict_get,
35 error_to_compat_str,
36 ExtractorError,
37 float_or_none,
38 format_field,
39 int_or_none,
40 intlist_to_bytes,
41 mimetype2ext,
42 network_exceptions,
43 orderedSet,
44 parse_codecs,
45 parse_count,
46 parse_duration,
47 parse_iso8601,
48 qualities,
49 remove_start,
50 smuggle_url,
51 str_or_none,
52 str_to_int,
53 traverse_obj,
54 try_get,
55 unescapeHTML,
56 unified_strdate,
57 unsmuggle_url,
58 update_url_query,
59 url_or_none,
60 urlencode_postdata,
61 urljoin,
62 variadic,
63 )
64
65
66 def parse_qs(url):
67 return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
68
69
70 class YoutubeBaseInfoExtractor(InfoExtractor):
71 """Provide base functions for Youtube extractors"""
72 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
73 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
74
75 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
76 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
77 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
78
79 _RESERVED_NAMES = (
80 r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|shorts|'
81 r'movies|results|shared|hashtag|trending|feed|feeds|oembed|get_video_info|'
82 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
83
84 _NETRC_MACHINE = 'youtube'
85 # If True it will raise an error if no login info is provided
86 _LOGIN_REQUIRED = False
87
88 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
89
90 def _login(self):
91 """
92 Attempt to log in to YouTube.
93 True is returned if successful or skipped.
94 False is returned if login failed.
95
96 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
97 """
98
99 def warn(message):
100 self.report_warning(message)
101
102 # username+password login is broken
103 if (self._LOGIN_REQUIRED
104 and self.get_param('cookiefile') is None
105 and self.get_param('cookiesfrombrowser') is None):
106 self.raise_login_required(
107 'Login details are needed to download this content', method='cookies')
108 username, password = self._get_login_info()
109 if username:
110 warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
111 return
112
113 # Everything below this is broken!
114 r'''
115 # No authentication to be performed
116 if username is None:
117 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
118 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
119 # if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
120 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
121 return True
122
123 login_page = self._download_webpage(
124 self._LOGIN_URL, None,
125 note='Downloading login page',
126 errnote='unable to fetch login page', fatal=False)
127 if login_page is False:
128 return
129
130 login_form = self._hidden_inputs(login_page)
131
132 def req(url, f_req, note, errnote):
133 data = login_form.copy()
134 data.update({
135 'pstMsg': 1,
136 'checkConnection': 'youtube',
137 'checkedDomains': 'youtube',
138 'hl': 'en',
139 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
140 'f.req': json.dumps(f_req),
141 'flowName': 'GlifWebSignIn',
142 'flowEntry': 'ServiceLogin',
143 # TODO: reverse actual botguard identifier generation algo
144 'bgRequest': '["identifier",""]',
145 })
146 return self._download_json(
147 url, None, note=note, errnote=errnote,
148 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
149 fatal=False,
150 data=urlencode_postdata(data), headers={
151 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
152 'Google-Accounts-XSRF': 1,
153 })
154
155 lookup_req = [
156 username,
157 None, [], None, 'US', None, None, 2, False, True,
158 [
159 None, None,
160 [2, 1, None, 1,
161 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
162 None, [], 4],
163 1, [None, None, []], None, None, None, True
164 ],
165 username,
166 ]
167
168 lookup_results = req(
169 self._LOOKUP_URL, lookup_req,
170 'Looking up account info', 'Unable to look up account info')
171
172 if lookup_results is False:
173 return False
174
175 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
176 if not user_hash:
177 warn('Unable to extract user hash')
178 return False
179
180 challenge_req = [
181 user_hash,
182 None, 1, None, [1, None, None, None, [password, None, True]],
183 [
184 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
185 1, [None, None, []], None, None, None, True
186 ]]
187
188 challenge_results = req(
189 self._CHALLENGE_URL, challenge_req,
190 'Logging in', 'Unable to log in')
191
192 if challenge_results is False:
193 return
194
195 login_res = try_get(challenge_results, lambda x: x[0][5], list)
196 if login_res:
197 login_msg = try_get(login_res, lambda x: x[5], compat_str)
198 warn(
199 'Unable to login: %s' % 'Invalid password'
200 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
201 return False
202
203 res = try_get(challenge_results, lambda x: x[0][-1], list)
204 if not res:
205 warn('Unable to extract result entry')
206 return False
207
208 login_challenge = try_get(res, lambda x: x[0][0], list)
209 if login_challenge:
210 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
211 if challenge_str == 'TWO_STEP_VERIFICATION':
212 # SEND_SUCCESS - TFA code has been successfully sent to phone
213 # QUOTA_EXCEEDED - reached the limit of TFA codes
214 status = try_get(login_challenge, lambda x: x[5], compat_str)
215 if status == 'QUOTA_EXCEEDED':
216 warn('Exceeded the limit of TFA codes, try later')
217 return False
218
219 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
220 if not tl:
221 warn('Unable to extract TL')
222 return False
223
224 tfa_code = self._get_tfa_info('2-step verification code')
225
226 if not tfa_code:
227 warn(
228 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
229 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
230 return False
231
232 tfa_code = remove_start(tfa_code, 'G-')
233
234 tfa_req = [
235 user_hash, None, 2, None,
236 [
237 9, None, None, None, None, None, None, None,
238 [None, tfa_code, True, 2]
239 ]]
240
241 tfa_results = req(
242 self._TFA_URL.format(tl), tfa_req,
243 'Submitting TFA code', 'Unable to submit TFA code')
244
245 if tfa_results is False:
246 return False
247
248 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
249 if tfa_res:
250 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
251 warn(
252 'Unable to finish TFA: %s' % 'Invalid TFA code'
253 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
254 return False
255
256 check_cookie_url = try_get(
257 tfa_results, lambda x: x[0][-1][2], compat_str)
258 else:
259 CHALLENGES = {
260 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
261 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
262 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
263 }
264 challenge = CHALLENGES.get(
265 challenge_str,
266 '%s returned error %s.' % (self.IE_NAME, challenge_str))
267 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
268 return False
269 else:
270 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
271
272 if not check_cookie_url:
273 warn('Unable to extract CheckCookie URL')
274 return False
275
276 check_cookie_results = self._download_webpage(
277 check_cookie_url, None, 'Checking cookie', fatal=False)
278
279 if check_cookie_results is False:
280 return False
281
282 if 'https://myaccount.google.com/' not in check_cookie_results:
283 warn('Unable to log in')
284 return False
285
286 return True
287 '''
288
289 def _initialize_consent(self):
290 cookies = self._get_cookies('https://www.youtube.com/')
291 if cookies.get('__Secure-3PSID'):
292 return
293 consent_id = None
294 consent = cookies.get('CONSENT')
295 if consent:
296 if 'YES' in consent.value:
297 return
298 consent_id = self._search_regex(
299 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
300 if not consent_id:
301 consent_id = random.randint(100, 999)
302 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
303
304 def _real_initialize(self):
305 self._initialize_consent()
306 if self._downloader is None:
307 return
308 if not self._login():
309 return
310
311 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
312 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
313 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
314
315 _YT_DEFAULT_YTCFGS = {
316 'WEB': {
317 'INNERTUBE_API_VERSION': 'v1',
318 'INNERTUBE_CLIENT_NAME': 'WEB',
319 'INNERTUBE_CLIENT_VERSION': '2.20210622.10.00',
320 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
321 'INNERTUBE_CONTEXT': {
322 'client': {
323 'clientName': 'WEB',
324 'clientVersion': '2.20210622.10.00',
325 'hl': 'en',
326 }
327 },
328 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
329 },
330 'WEB_AGEGATE': {
331 'INNERTUBE_API_VERSION': 'v1',
332 'INNERTUBE_CLIENT_NAME': 'WEB',
333 'INNERTUBE_CLIENT_VERSION': '2.20210622.10.00',
334 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
335 'INNERTUBE_CONTEXT': {
336 'client': {
337 'clientName': 'WEB',
338 'clientVersion': '2.20210622.10.00',
339 'clientScreen': 'EMBED',
340 'hl': 'en',
341 }
342 },
343 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
344 },
345 'WEB_REMIX': {
346 'INNERTUBE_API_VERSION': 'v1',
347 'INNERTUBE_CLIENT_NAME': 'WEB_REMIX',
348 'INNERTUBE_CLIENT_VERSION': '1.20210621.00.00',
349 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
350 'INNERTUBE_CONTEXT': {
351 'client': {
352 'clientName': 'WEB_REMIX',
353 'clientVersion': '1.20210621.00.00',
354 'hl': 'en',
355 }
356 },
357 'INNERTUBE_CONTEXT_CLIENT_NAME': 67
358 },
359 'WEB_EMBEDDED_PLAYER': {
360 'INNERTUBE_API_VERSION': 'v1',
361 'INNERTUBE_CLIENT_NAME': 'WEB_EMBEDDED_PLAYER',
362 'INNERTUBE_CLIENT_VERSION': '1.20210620.0.1',
363 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
364 'INNERTUBE_CONTEXT': {
365 'client': {
366 'clientName': 'WEB_EMBEDDED_PLAYER',
367 'clientVersion': '1.20210620.0.1',
368 'hl': 'en',
369 }
370 },
371 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
372 },
373 'ANDROID': {
374 'INNERTUBE_API_VERSION': 'v1',
375 'INNERTUBE_CLIENT_NAME': 'ANDROID',
376 'INNERTUBE_CLIENT_VERSION': '16.20',
377 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
378 'INNERTUBE_CONTEXT': {
379 'client': {
380 'clientName': 'ANDROID',
381 'clientVersion': '16.20',
382 'hl': 'en',
383 }
384 },
385 'INNERTUBE_CONTEXT_CLIENT_NAME': 3
386 },
387 'ANDROID_AGEGATE': {
388 'INNERTUBE_API_VERSION': 'v1',
389 'INNERTUBE_CLIENT_NAME': 'ANDROID',
390 'INNERTUBE_CLIENT_VERSION': '16.20',
391 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
392 'INNERTUBE_CONTEXT': {
393 'client': {
394 'clientName': 'ANDROID',
395 'clientVersion': '16.20',
396 'clientScreen': 'EMBED',
397 'hl': 'en',
398 }
399 },
400 'INNERTUBE_CONTEXT_CLIENT_NAME': 3
401 },
402 'ANDROID_EMBEDDED_PLAYER': {
403 'INNERTUBE_API_VERSION': 'v1',
404 'INNERTUBE_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER',
405 'INNERTUBE_CLIENT_VERSION': '16.20',
406 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
407 'INNERTUBE_CONTEXT': {
408 'client': {
409 'clientName': 'ANDROID_EMBEDDED_PLAYER',
410 'clientVersion': '16.20',
411 'hl': 'en',
412 }
413 },
414 'INNERTUBE_CONTEXT_CLIENT_NAME': 55
415 },
416 'ANDROID_MUSIC': {
417 'INNERTUBE_API_VERSION': 'v1',
418 'INNERTUBE_CLIENT_NAME': 'ANDROID_MUSIC',
419 'INNERTUBE_CLIENT_VERSION': '4.32',
420 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
421 'INNERTUBE_CONTEXT': {
422 'client': {
423 'clientName': 'ANDROID_MUSIC',
424 'clientVersion': '4.32',
425 'hl': 'en',
426 }
427 },
428 'INNERTUBE_CONTEXT_CLIENT_NAME': 21
429 },
430 'IOS': {
431 'INNERTUBE_API_VERSION': 'v1',
432 'INNERTUBE_CLIENT_NAME': 'IOS',
433 'INNERTUBE_CLIENT_VERSION': '16.20',
434 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
435 'INNERTUBE_CONTEXT': {
436 'client': {
437 'clientName': 'IOS',
438 'clientVersion': '16.20',
439 'hl': 'en',
440 }
441 },
442 'INNERTUBE_CONTEXT_CLIENT_NAME': 5
443 },
444 'IOS_AGEGATE': {
445 'INNERTUBE_API_VERSION': 'v1',
446 'INNERTUBE_CLIENT_NAME': 'IOS',
447 'INNERTUBE_CLIENT_VERSION': '16.20',
448 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
449 'INNERTUBE_CONTEXT': {
450 'client': {
451 'clientName': 'IOS',
452 'clientVersion': '16.20',
453 'clientScreen': 'EMBED',
454 'hl': 'en',
455 }
456 },
457 'INNERTUBE_CONTEXT_CLIENT_NAME': 5
458 },
459 'IOS_MUSIC': {
460 'INNERTUBE_API_VERSION': 'v1',
461 'INNERTUBE_CLIENT_NAME': 'IOS_MUSIC',
462 'INNERTUBE_CLIENT_VERSION': '4.32',
463 'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
464 'INNERTUBE_CONTEXT': {
465 'client': {
466 'clientName': 'IOS_MUSIC',
467 'clientVersion': '4.32',
468 'hl': 'en',
469 }
470 },
471 'INNERTUBE_CONTEXT_CLIENT_NAME': 26
472 },
473 'IOS_MESSAGES_EXTENSION': {
474 'INNERTUBE_API_VERSION': 'v1',
475 'INNERTUBE_CLIENT_NAME': 'IOS_MESSAGES_EXTENSION',
476 'INNERTUBE_CLIENT_VERSION': '16.20',
477 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
478 'INNERTUBE_CONTEXT': {
479 'client': {
480 'clientName': 'IOS_MESSAGES_EXTENSION',
481 'clientVersion': '16.20',
482 'hl': 'en',
483 }
484 },
485 'INNERTUBE_CONTEXT_CLIENT_NAME': 66
486 },
487 'MWEB': {
488 'INNERTUBE_API_VERSION': 'v1',
489 'INNERTUBE_CLIENT_NAME': 'MWEB',
490 'INNERTUBE_CLIENT_VERSION': '2.20210721.07.00',
491 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
492 'INNERTUBE_CONTEXT': {
493 'client': {
494 'clientName': 'MWEB',
495 'clientVersion': '2.20210721.07.00',
496 'hl': 'en',
497 }
498 },
499 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
500 },
501 'MWEB_AGEGATE': {
502 'INNERTUBE_API_VERSION': 'v1',
503 'INNERTUBE_CLIENT_NAME': 'MWEB',
504 'INNERTUBE_CLIENT_VERSION': '2.20210721.07.00',
505 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
506 'INNERTUBE_CONTEXT': {
507 'client': {
508 'clientName': 'MWEB',
509 'clientVersion': '2.20210721.07.00',
510 'clientScreen': 'EMBED',
511 'hl': 'en',
512 }
513 },
514 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
515 },
516 }
517
518 _YT_DEFAULT_INNERTUBE_HOSTS = {
519 'DIRECT': 'youtubei.googleapis.com',
520 'WEB': 'www.youtube.com',
521 'WEB_REMIX': 'music.youtube.com',
522 'ANDROID_MUSIC': 'music.youtube.com'
523 }
524
525 # clients starting with _ cannot be explicity requested by the user
526 _YT_CLIENTS = {
527 'android': 'ANDROID',
528 'android_music': 'ANDROID_MUSIC',
529 'android_embedded': 'ANDROID_EMBEDDED_PLAYER',
530 'android_agegate': 'ANDROID_AGEGATE',
531 'ios': 'IOS',
532 'ios_music': 'IOS_MUSIC',
533 'ios_embedded': 'IOS_MESSAGES_EXTENSION',
534 'ios_agegate': 'IOS_AGEGATE',
535 'web': 'WEB',
536 'web_music': 'WEB_REMIX',
537 'web_embedded': 'WEB_EMBEDDED_PLAYER',
538 'web_agegate': 'WEB_AGEGATE',
539 'mweb': 'MWEB',
540 'mweb_agegate': 'MWEB_AGEGATE',
541 }
542
543 def _get_default_ytcfg(self, client='WEB'):
544 if client in self._YT_DEFAULT_YTCFGS:
545 return copy.deepcopy(self._YT_DEFAULT_YTCFGS[client])
546 self.write_debug(f'INNERTUBE default client {client} does not exist - falling back to WEB client.')
547 return copy.deepcopy(self._YT_DEFAULT_YTCFGS['WEB'])
548
549 def _get_innertube_host(self, client='WEB'):
550 return dict_get(self._YT_DEFAULT_INNERTUBE_HOSTS, (client, 'WEB'))
551
552 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='WEB'):
553 # try_get but with fallback to default ytcfg client values when present
554 _func = lambda y: try_get(y, getter, expected_type)
555 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
556
557 def _extract_client_name(self, ytcfg, default_client='WEB'):
558 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str, default_client)
559
560 @staticmethod
561 def _extract_session_index(*data):
562 for ytcfg in data:
563 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
564 if session_index is not None:
565 return session_index
566
567 def _extract_client_version(self, ytcfg, default_client='WEB'):
568 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str, default_client)
569
570 def _extract_api_key(self, ytcfg=None, default_client='WEB'):
571 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
572
573 def _extract_context(self, ytcfg=None, default_client='WEB'):
574 _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
575 context = _get_context(ytcfg)
576 if context:
577 return context
578
579 context = _get_context(self._get_default_ytcfg(default_client))
580 if not ytcfg:
581 return context
582
583 # Recreate the client context (required)
584 context['client'].update({
585 'clientVersion': self._extract_client_version(ytcfg, default_client),
586 'clientName': self._extract_client_name(ytcfg, default_client),
587 })
588 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
589 if visitor_data:
590 context['client']['visitorData'] = visitor_data
591 return context
592
593 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
594 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
595 # See: https://github.com/yt-dlp/yt-dlp/issues/393
596 yt_cookies = self._get_cookies('https://www.youtube.com')
597 sapisid_cookie = dict_get(
598 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
599 if sapisid_cookie is None or not sapisid_cookie.value:
600 return
601 time_now = round(time.time())
602 # SAPISID cookie is required if not already present
603 if not yt_cookies.get('SAPISID'):
604 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie', only_once=True)
605 self._set_cookie(
606 '.youtube.com', 'SAPISID', sapisid_cookie.value, secure=True, expire_time=time_now + 3600)
607 self.write_debug('Extracted SAPISID cookie', only_once=True)
608 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
609 sapisidhash = hashlib.sha1(
610 f'{time_now} {sapisid_cookie.value} {origin}'.encode('utf-8')).hexdigest()
611 return f'SAPISIDHASH {time_now}_{sapisidhash}'
612
613 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
614 note='Downloading API JSON', errnote='Unable to download API page',
615 context=None, api_key=None, api_hostname=None, default_client='WEB'):
616
617 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
618 data.update(query)
619 real_headers = self.generate_api_headers(default_client=default_client)
620 real_headers.update({'content-type': 'application/json'})
621 if headers:
622 real_headers.update(headers)
623 return self._download_json(
624 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
625 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
626 data=json.dumps(data).encode('utf8'), headers=real_headers,
627 query={'key': api_key or self._extract_api_key()})
628
629 def extract_yt_initial_data(self, video_id, webpage):
630 return self._parse_json(
631 self._search_regex(
632 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
633 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
634 video_id)
635
636 def _extract_identity_token(self, webpage, item_id):
637 if not webpage:
638 return None
639 ytcfg = self.extract_ytcfg(item_id, webpage)
640 if ytcfg:
641 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
642 if token:
643 return token
644 return self._search_regex(
645 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
646 'identity token', default=None)
647
648 @staticmethod
649 def _extract_account_syncid(*args):
650 """
651 Extract syncId required to download private playlists of secondary channels
652 @params response and/or ytcfg
653 """
654 for data in args:
655 # ytcfg includes channel_syncid if on secondary channel
656 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
657 if delegated_sid:
658 return delegated_sid
659 sync_ids = (try_get(
660 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
661 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
662 if len(sync_ids) >= 2 and sync_ids[1]:
663 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
664 # and just "user_syncid||" for primary channel. We only want the channel_syncid
665 return sync_ids[0]
666
667 def extract_ytcfg(self, video_id, webpage):
668 if not webpage:
669 return {}
670 return self._parse_json(
671 self._search_regex(
672 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
673 default='{}'), video_id, fatal=False) or {}
674
675 def generate_api_headers(
676 self, ytcfg=None, identity_token=None, account_syncid=None,
677 visitor_data=None, api_hostname=None, default_client='WEB', session_index=None):
678 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
679 headers = {
680 'X-YouTube-Client-Name': compat_str(
681 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
682 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
683 'Origin': origin
684 }
685 if not visitor_data and ytcfg:
686 visitor_data = try_get(
687 self._extract_context(ytcfg, default_client), lambda x: x['client']['visitorData'], compat_str)
688 if identity_token:
689 headers['X-Youtube-Identity-Token'] = identity_token
690 if account_syncid:
691 headers['X-Goog-PageId'] = account_syncid
692 if session_index is None and ytcfg:
693 session_index = self._extract_session_index(ytcfg)
694 if account_syncid or session_index is not None:
695 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
696 if visitor_data:
697 headers['X-Goog-Visitor-Id'] = visitor_data
698 auth = self._generate_sapisidhash_header(origin)
699 if auth is not None:
700 headers['Authorization'] = auth
701 headers['X-Origin'] = origin
702 return headers
703
704 @staticmethod
705 def _build_api_continuation_query(continuation, ctp=None):
706 query = {
707 'continuation': continuation
708 }
709 # TODO: Inconsistency with clickTrackingParams.
710 # Currently we have a fixed ctp contained within context (from ytcfg)
711 # and a ctp in root query for continuation.
712 if ctp:
713 query['clickTracking'] = {'clickTrackingParams': ctp}
714 return query
715
716 @classmethod
717 def _extract_next_continuation_data(cls, renderer):
718 next_continuation = try_get(
719 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
720 lambda x: x['continuation']['reloadContinuationData']), dict)
721 if not next_continuation:
722 return
723 continuation = next_continuation.get('continuation')
724 if not continuation:
725 return
726 ctp = next_continuation.get('clickTrackingParams')
727 return cls._build_api_continuation_query(continuation, ctp)
728
729 @classmethod
730 def _extract_continuation_ep_data(cls, continuation_ep: dict):
731 if isinstance(continuation_ep, dict):
732 continuation = try_get(
733 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
734 if not continuation:
735 return
736 ctp = continuation_ep.get('clickTrackingParams')
737 return cls._build_api_continuation_query(continuation, ctp)
738
739 @classmethod
740 def _extract_continuation(cls, renderer):
741 next_continuation = cls._extract_next_continuation_data(renderer)
742 if next_continuation:
743 return next_continuation
744
745 contents = []
746 for key in ('contents', 'items'):
747 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
748
749 for content in contents:
750 if not isinstance(content, dict):
751 continue
752 continuation_ep = try_get(
753 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
754 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
755 dict)
756 continuation = cls._extract_continuation_ep_data(continuation_ep)
757 if continuation:
758 return continuation
759
760 @classmethod
761 def _extract_alerts(cls, data):
762 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
763 if not isinstance(alert_dict, dict):
764 continue
765 for alert in alert_dict.values():
766 alert_type = alert.get('type')
767 if not alert_type:
768 continue
769 message = cls._get_text(alert, 'text')
770 if message:
771 yield alert_type, message
772
773 def _report_alerts(self, alerts, expected=True):
774 errors = []
775 warnings = []
776 for alert_type, alert_message in alerts:
777 if alert_type.lower() == 'error':
778 errors.append([alert_type, alert_message])
779 else:
780 warnings.append([alert_type, alert_message])
781
782 for alert_type, alert_message in (warnings + errors[:-1]):
783 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
784 if errors:
785 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
786
787 def _extract_and_report_alerts(self, data, *args, **kwargs):
788 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
789
790 def _extract_badges(self, renderer: dict):
791 badges = set()
792 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
793 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
794 if label:
795 badges.add(label.lower())
796 return badges
797
798 @staticmethod
799 def _get_text(data, *path_list, max_runs=None):
800 for path in path_list or [None]:
801 if path is None:
802 obj = [data]
803 else:
804 obj = traverse_obj(data, path, default=[])
805 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
806 obj = [obj]
807 for item in obj:
808 text = try_get(item, lambda x: x['simpleText'], compat_str)
809 if text:
810 return text
811 runs = try_get(item, lambda x: x['runs'], list) or []
812 if not runs and isinstance(item, list):
813 runs = item
814
815 runs = runs[:min(len(runs), max_runs or len(runs))]
816 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
817 if text:
818 return text
819
820 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
821 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
822 default_client='WEB'):
823 response = None
824 last_error = None
825 count = -1
826 retries = self.get_param('extractor_retries', 3)
827 if check_get_keys is None:
828 check_get_keys = []
829 while count < retries:
830 count += 1
831 if last_error:
832 self.report_warning('%s. Retrying ...' % last_error)
833 try:
834 response = self._call_api(
835 ep=ep, fatal=True, headers=headers,
836 video_id=item_id, query=query,
837 context=self._extract_context(ytcfg, default_client),
838 api_key=self._extract_api_key(ytcfg, default_client),
839 api_hostname=api_hostname, default_client=default_client,
840 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
841 except ExtractorError as e:
842 if isinstance(e.cause, network_exceptions):
843 # Downloading page may result in intermittent 5xx HTTP error
844 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
845 # We also want to catch all other network exceptions since errors in later pages can be troublesome
846 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
847 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
848 last_error = error_to_compat_str(e.cause or e)
849 if count < retries:
850 continue
851 if fatal:
852 raise
853 else:
854 self.report_warning(error_to_compat_str(e))
855 return
856
857 else:
858 # Youtube may send alerts if there was an issue with the continuation page
859 try:
860 self._extract_and_report_alerts(response, expected=False)
861 except ExtractorError as e:
862 if fatal:
863 raise
864 self.report_warning(error_to_compat_str(e))
865 return
866 if not check_get_keys or dict_get(response, check_get_keys):
867 break
868 # Youtube sometimes sends incomplete data
869 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
870 last_error = 'Incomplete data received'
871 if count >= retries:
872 if fatal:
873 raise ExtractorError(last_error)
874 else:
875 self.report_warning(last_error)
876 return
877 return response
878
879 @staticmethod
880 def is_music_url(url):
881 return re.match(r'https?://music\.youtube\.com/', url) is not None
882
883 def _extract_video(self, renderer):
884 video_id = renderer.get('videoId')
885 title = self._get_text(renderer, 'title')
886 description = self._get_text(renderer, 'descriptionSnippet')
887 duration = parse_duration(self._get_text(
888 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
889 view_count_text = self._get_text(renderer, 'viewCountText') or ''
890 view_count = str_to_int(self._search_regex(
891 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
892 'view count', default=None))
893
894 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
895
896 return {
897 '_type': 'url',
898 'ie_key': YoutubeIE.ie_key(),
899 'id': video_id,
900 'url': video_id,
901 'title': title,
902 'description': description,
903 'duration': duration,
904 'view_count': view_count,
905 'uploader': uploader,
906 }
907
908
909 class YoutubeIE(YoutubeBaseInfoExtractor):
910 IE_DESC = 'YouTube.com'
911 _INVIDIOUS_SITES = (
912 # invidious-redirect websites
913 r'(?:www\.)?redirect\.invidious\.io',
914 r'(?:(?:www|dev)\.)?invidio\.us',
915 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
916 r'(?:www\.)?invidious\.pussthecat\.org',
917 r'(?:www\.)?invidious\.zee\.li',
918 r'(?:www\.)?invidious\.ethibox\.fr',
919 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
920 # youtube-dl invidious instances list
921 r'(?:(?:www|no)\.)?invidiou\.sh',
922 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
923 r'(?:www\.)?invidious\.kabi\.tk',
924 r'(?:www\.)?invidious\.mastodon\.host',
925 r'(?:www\.)?invidious\.zapashcanon\.fr',
926 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
927 r'(?:www\.)?invidious\.tinfoil-hat\.net',
928 r'(?:www\.)?invidious\.himiko\.cloud',
929 r'(?:www\.)?invidious\.reallyancient\.tech',
930 r'(?:www\.)?invidious\.tube',
931 r'(?:www\.)?invidiou\.site',
932 r'(?:www\.)?invidious\.site',
933 r'(?:www\.)?invidious\.xyz',
934 r'(?:www\.)?invidious\.nixnet\.xyz',
935 r'(?:www\.)?invidious\.048596\.xyz',
936 r'(?:www\.)?invidious\.drycat\.fr',
937 r'(?:www\.)?inv\.skyn3t\.in',
938 r'(?:www\.)?tube\.poal\.co',
939 r'(?:www\.)?tube\.connect\.cafe',
940 r'(?:www\.)?vid\.wxzm\.sx',
941 r'(?:www\.)?vid\.mint\.lgbt',
942 r'(?:www\.)?vid\.puffyan\.us',
943 r'(?:www\.)?yewtu\.be',
944 r'(?:www\.)?yt\.elukerio\.org',
945 r'(?:www\.)?yt\.lelux\.fi',
946 r'(?:www\.)?invidious\.ggc-project\.de',
947 r'(?:www\.)?yt\.maisputain\.ovh',
948 r'(?:www\.)?ytprivate\.com',
949 r'(?:www\.)?invidious\.13ad\.de',
950 r'(?:www\.)?invidious\.toot\.koeln',
951 r'(?:www\.)?invidious\.fdn\.fr',
952 r'(?:www\.)?watch\.nettohikari\.com',
953 r'(?:www\.)?invidious\.namazso\.eu',
954 r'(?:www\.)?invidious\.silkky\.cloud',
955 r'(?:www\.)?invidious\.exonip\.de',
956 r'(?:www\.)?invidious\.riverside\.rocks',
957 r'(?:www\.)?invidious\.blamefran\.net',
958 r'(?:www\.)?invidious\.moomoo\.de',
959 r'(?:www\.)?ytb\.trom\.tf',
960 r'(?:www\.)?yt\.cyberhost\.uk',
961 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
962 r'(?:www\.)?qklhadlycap4cnod\.onion',
963 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
964 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
965 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
966 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
967 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
968 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
969 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
970 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
971 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
972 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
973 )
974 _VALID_URL = r"""(?x)^
975 (
976 (?:https?://|//) # http(s):// or protocol-independent URL
977 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
978 (?:www\.)?deturl\.com/www\.youtube\.com|
979 (?:www\.)?pwnyoutube\.com|
980 (?:www\.)?hooktube\.com|
981 (?:www\.)?yourepeat\.com|
982 tube\.majestyc\.net|
983 %(invidious)s|
984 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
985 (?:.*?\#/)? # handle anchor (#/) redirect urls
986 (?: # the various things that can precede the ID:
987 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
988 |(?: # or the v= param in all its forms
989 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
990 (?:\?|\#!?) # the params delimiter ? or # or #!
991 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
992 v=
993 )
994 ))
995 |(?:
996 youtu\.be| # just youtu.be/xxxx
997 vid\.plus| # or vid.plus/xxxx
998 zwearz\.com/watch| # or zwearz.com/watch/xxxx
999 %(invidious)s
1000 )/
1001 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
1002 )
1003 )? # all until now is optional -> you can pass the naked ID
1004 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
1005 (?(1).+)? # if we found the ID, everything can follow
1006 (?:\#|$)""" % {
1007 'invidious': '|'.join(_INVIDIOUS_SITES),
1008 }
1009 _PLAYER_INFO_RE = (
1010 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
1011 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
1012 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
1013 )
1014 _formats = {
1015 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1016 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1017 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
1018 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
1019 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
1020 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1021 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1022 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1023 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
1024 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
1025 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1026 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1027 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1028 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1029 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
1030 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
1031 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1032 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1033
1034
1035 # 3D videos
1036 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1037 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1038 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1039 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1040 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
1041 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1042 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1043
1044 # Apple HTTP Live Streaming
1045 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1046 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1047 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1048 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1049 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1050 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1051 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1052 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
1053
1054 # DASH mp4 video
1055 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
1056 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
1057 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1058 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
1059 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
1060 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
1061 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
1062 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1063 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
1064 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1065 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1066 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
1067
1068 # Dash mp4 audio
1069 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1070 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1071 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1072 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1073 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1074 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1075 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
1076
1077 # Dash webm
1078 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1079 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1080 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1081 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1082 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1083 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1084 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1085 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1086 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1087 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1088 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1089 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1090 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1091 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1092 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1093 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
1094 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1095 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1096 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1097 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1098 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1099 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1100
1101 # Dash webm audio
1102 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1103 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
1104
1105 # Dash webm audio with opus inside
1106 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1107 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1108 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
1109
1110 # RTMP (unnamed)
1111 '_rtmp': {'protocol': 'rtmp'},
1112
1113 # av01 video only formats sometimes served with "unknown" codecs
1114 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1115 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1116 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1117 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1118 }
1119 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
1120
1121 _AGE_GATE_REASONS = (
1122 'Sign in to confirm your age',
1123 'This video may be inappropriate for some users.',
1124 'Sorry, this content is age-restricted.',
1125 'Please confirm your age.')
1126
1127 _AGE_GATE_STATUS_REASONS = (
1128 'AGE_VERIFICATION_REQUIRED',
1129 'AGE_CHECK_REQUIRED'
1130 )
1131
1132 _GEO_BYPASS = False
1133
1134 IE_NAME = 'youtube'
1135 _TESTS = [
1136 {
1137 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
1138 'info_dict': {
1139 'id': 'BaW_jenozKc',
1140 'ext': 'mp4',
1141 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1142 'uploader': 'Philipp Hagemeister',
1143 'uploader_id': 'phihag',
1144 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1145 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1146 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1147 'upload_date': '20121002',
1148 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1149 'categories': ['Science & Technology'],
1150 'tags': ['youtube-dl'],
1151 'duration': 10,
1152 'view_count': int,
1153 'like_count': int,
1154 'dislike_count': int,
1155 'start_time': 1,
1156 'end_time': 9,
1157 }
1158 },
1159 {
1160 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1161 'note': 'Embed-only video (#1746)',
1162 'info_dict': {
1163 'id': 'yZIXLfi8CZQ',
1164 'ext': 'mp4',
1165 'upload_date': '20120608',
1166 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1167 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1168 'uploader': 'SET India',
1169 'uploader_id': 'setindia',
1170 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
1171 'age_limit': 18,
1172 },
1173 'skip': 'Private video',
1174 },
1175 {
1176 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1177 'note': 'Use the first video ID in the URL',
1178 'info_dict': {
1179 'id': 'BaW_jenozKc',
1180 'ext': 'mp4',
1181 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1182 'uploader': 'Philipp Hagemeister',
1183 'uploader_id': 'phihag',
1184 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1185 'upload_date': '20121002',
1186 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1187 'categories': ['Science & Technology'],
1188 'tags': ['youtube-dl'],
1189 'duration': 10,
1190 'view_count': int,
1191 'like_count': int,
1192 'dislike_count': int,
1193 },
1194 'params': {
1195 'skip_download': True,
1196 },
1197 },
1198 {
1199 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1200 'note': '256k DASH audio (format 141) via DASH manifest',
1201 'info_dict': {
1202 'id': 'a9LDPn-MO4I',
1203 'ext': 'm4a',
1204 'upload_date': '20121002',
1205 'uploader_id': '8KVIDEO',
1206 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1207 'description': '',
1208 'uploader': '8KVIDEO',
1209 'title': 'UHDTV TEST 8K VIDEO.mp4'
1210 },
1211 'params': {
1212 'youtube_include_dash_manifest': True,
1213 'format': '141',
1214 },
1215 'skip': 'format 141 not served anymore',
1216 },
1217 # DASH manifest with encrypted signature
1218 {
1219 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1220 'info_dict': {
1221 'id': 'IB3lcPjvWLA',
1222 'ext': 'm4a',
1223 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1224 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1225 'duration': 244,
1226 'uploader': 'AfrojackVEVO',
1227 'uploader_id': 'AfrojackVEVO',
1228 'upload_date': '20131011',
1229 'abr': 129.495,
1230 },
1231 'params': {
1232 'youtube_include_dash_manifest': True,
1233 'format': '141/bestaudio[ext=m4a]',
1234 },
1235 },
1236 # Normal age-gate video (embed allowed)
1237 {
1238 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1239 'info_dict': {
1240 'id': 'HtVdAasjOgU',
1241 'ext': 'mp4',
1242 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1243 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1244 'duration': 142,
1245 'uploader': 'The Witcher',
1246 'uploader_id': 'WitcherGame',
1247 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1248 'upload_date': '20140605',
1249 'age_limit': 18,
1250 },
1251 },
1252 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1253 # YouTube Red ad is not captured for creator
1254 {
1255 'url': '__2ABJjxzNo',
1256 'info_dict': {
1257 'id': '__2ABJjxzNo',
1258 'ext': 'mp4',
1259 'duration': 266,
1260 'upload_date': '20100430',
1261 'uploader_id': 'deadmau5',
1262 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1263 'creator': 'deadmau5',
1264 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1265 'uploader': 'deadmau5',
1266 'title': 'Deadmau5 - Some Chords (HD)',
1267 'alt_title': 'Some Chords',
1268 },
1269 'expected_warnings': [
1270 'DASH manifest missing',
1271 ]
1272 },
1273 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1274 {
1275 'url': 'lqQg6PlCWgI',
1276 'info_dict': {
1277 'id': 'lqQg6PlCWgI',
1278 'ext': 'mp4',
1279 'duration': 6085,
1280 'upload_date': '20150827',
1281 'uploader_id': 'olympic',
1282 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1283 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
1284 'uploader': 'Olympics',
1285 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1286 },
1287 'params': {
1288 'skip_download': 'requires avconv',
1289 }
1290 },
1291 # Non-square pixels
1292 {
1293 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1294 'info_dict': {
1295 'id': '_b-2C3KPAM0',
1296 'ext': 'mp4',
1297 'stretched_ratio': 16 / 9.,
1298 'duration': 85,
1299 'upload_date': '20110310',
1300 'uploader_id': 'AllenMeow',
1301 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1302 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1303 'uploader': '孫ᄋᄅ',
1304 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1305 },
1306 },
1307 # url_encoded_fmt_stream_map is empty string
1308 {
1309 'url': 'qEJwOuvDf7I',
1310 'info_dict': {
1311 'id': 'qEJwOuvDf7I',
1312 'ext': 'webm',
1313 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1314 'description': '',
1315 'upload_date': '20150404',
1316 'uploader_id': 'spbelect',
1317 'uploader': 'Наблюдатели Петербурга',
1318 },
1319 'params': {
1320 'skip_download': 'requires avconv',
1321 },
1322 'skip': 'This live event has ended.',
1323 },
1324 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1325 {
1326 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1327 'info_dict': {
1328 'id': 'FIl7x6_3R5Y',
1329 'ext': 'webm',
1330 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1331 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1332 'duration': 220,
1333 'upload_date': '20150625',
1334 'uploader_id': 'dorappi2000',
1335 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1336 'uploader': 'dorappi2000',
1337 'formats': 'mincount:31',
1338 },
1339 'skip': 'not actual anymore',
1340 },
1341 # DASH manifest with segment_list
1342 {
1343 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1344 'md5': '8ce563a1d667b599d21064e982ab9e31',
1345 'info_dict': {
1346 'id': 'CsmdDsKjzN8',
1347 'ext': 'mp4',
1348 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1349 'uploader': 'Airtek',
1350 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1351 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1352 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1353 },
1354 'params': {
1355 'youtube_include_dash_manifest': True,
1356 'format': '135', # bestvideo
1357 },
1358 'skip': 'This live event has ended.',
1359 },
1360 {
1361 # Multifeed videos (multiple cameras), URL is for Main Camera
1362 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1363 'info_dict': {
1364 'id': 'jvGDaLqkpTg',
1365 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1366 'description': 'md5:e03b909557865076822aa169218d6a5d',
1367 },
1368 'playlist': [{
1369 'info_dict': {
1370 'id': 'jvGDaLqkpTg',
1371 'ext': 'mp4',
1372 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1373 'description': 'md5:e03b909557865076822aa169218d6a5d',
1374 'duration': 10643,
1375 'upload_date': '20161111',
1376 'uploader': 'Team PGP',
1377 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1378 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1379 },
1380 }, {
1381 'info_dict': {
1382 'id': '3AKt1R1aDnw',
1383 'ext': 'mp4',
1384 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1385 'description': 'md5:e03b909557865076822aa169218d6a5d',
1386 'duration': 10991,
1387 'upload_date': '20161111',
1388 'uploader': 'Team PGP',
1389 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1390 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1391 },
1392 }, {
1393 'info_dict': {
1394 'id': 'RtAMM00gpVc',
1395 'ext': 'mp4',
1396 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1397 'description': 'md5:e03b909557865076822aa169218d6a5d',
1398 'duration': 10995,
1399 'upload_date': '20161111',
1400 'uploader': 'Team PGP',
1401 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1402 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1403 },
1404 }, {
1405 'info_dict': {
1406 'id': '6N2fdlP3C5U',
1407 'ext': 'mp4',
1408 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1409 'description': 'md5:e03b909557865076822aa169218d6a5d',
1410 'duration': 10990,
1411 'upload_date': '20161111',
1412 'uploader': 'Team PGP',
1413 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1414 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1415 },
1416 }],
1417 'params': {
1418 'skip_download': True,
1419 },
1420 },
1421 {
1422 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1423 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1424 'info_dict': {
1425 'id': 'gVfLd0zydlo',
1426 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1427 },
1428 'playlist_count': 2,
1429 'skip': 'Not multifeed anymore',
1430 },
1431 {
1432 'url': 'https://vid.plus/FlRa-iH7PGw',
1433 'only_matching': True,
1434 },
1435 {
1436 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1437 'only_matching': True,
1438 },
1439 {
1440 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1441 # Also tests cut-off URL expansion in video description (see
1442 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1443 # https://github.com/ytdl-org/youtube-dl/issues/8164)
1444 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1445 'info_dict': {
1446 'id': 'lsguqyKfVQg',
1447 'ext': 'mp4',
1448 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1449 'alt_title': 'Dark Walk',
1450 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1451 'duration': 133,
1452 'upload_date': '20151119',
1453 'uploader_id': 'IronSoulElf',
1454 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1455 'uploader': 'IronSoulElf',
1456 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1457 'track': 'Dark Walk',
1458 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1459 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1460 },
1461 'params': {
1462 'skip_download': True,
1463 },
1464 },
1465 {
1466 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1467 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1468 'only_matching': True,
1469 },
1470 {
1471 # Video with yt:stretch=17:0
1472 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1473 'info_dict': {
1474 'id': 'Q39EVAstoRM',
1475 'ext': 'mp4',
1476 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1477 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1478 'upload_date': '20151107',
1479 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1480 'uploader': 'CH GAMER DROID',
1481 },
1482 'params': {
1483 'skip_download': True,
1484 },
1485 'skip': 'This video does not exist.',
1486 },
1487 {
1488 # Video with incomplete 'yt:stretch=16:'
1489 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1490 'only_matching': True,
1491 },
1492 {
1493 # Video licensed under Creative Commons
1494 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1495 'info_dict': {
1496 'id': 'M4gD1WSo5mA',
1497 'ext': 'mp4',
1498 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1499 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1500 'duration': 721,
1501 'upload_date': '20150127',
1502 'uploader_id': 'BerkmanCenter',
1503 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1504 'uploader': 'The Berkman Klein Center for Internet & Society',
1505 'license': 'Creative Commons Attribution license (reuse allowed)',
1506 },
1507 'params': {
1508 'skip_download': True,
1509 },
1510 },
1511 {
1512 # Channel-like uploader_url
1513 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1514 'info_dict': {
1515 'id': 'eQcmzGIKrzg',
1516 'ext': 'mp4',
1517 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1518 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1519 'duration': 4060,
1520 'upload_date': '20151119',
1521 'uploader': 'Bernie Sanders',
1522 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1523 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1524 'license': 'Creative Commons Attribution license (reuse allowed)',
1525 },
1526 'params': {
1527 'skip_download': True,
1528 },
1529 },
1530 {
1531 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1532 'only_matching': True,
1533 },
1534 {
1535 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1536 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1537 'only_matching': True,
1538 },
1539 {
1540 # Rental video preview
1541 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1542 'info_dict': {
1543 'id': 'uGpuVWrhIzE',
1544 'ext': 'mp4',
1545 'title': 'Piku - Trailer',
1546 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1547 'upload_date': '20150811',
1548 'uploader': 'FlixMatrix',
1549 'uploader_id': 'FlixMatrixKaravan',
1550 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1551 'license': 'Standard YouTube License',
1552 },
1553 'params': {
1554 'skip_download': True,
1555 },
1556 'skip': 'This video is not available.',
1557 },
1558 {
1559 # YouTube Red video with episode data
1560 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1561 'info_dict': {
1562 'id': 'iqKdEhx-dD4',
1563 'ext': 'mp4',
1564 'title': 'Isolation - Mind Field (Ep 1)',
1565 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1566 'duration': 2085,
1567 'upload_date': '20170118',
1568 'uploader': 'Vsauce',
1569 'uploader_id': 'Vsauce',
1570 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1571 'series': 'Mind Field',
1572 'season_number': 1,
1573 'episode_number': 1,
1574 },
1575 'params': {
1576 'skip_download': True,
1577 },
1578 'expected_warnings': [
1579 'Skipping DASH manifest',
1580 ],
1581 },
1582 {
1583 # The following content has been identified by the YouTube community
1584 # as inappropriate or offensive to some audiences.
1585 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1586 'info_dict': {
1587 'id': '6SJNVb0GnPI',
1588 'ext': 'mp4',
1589 'title': 'Race Differences in Intelligence',
1590 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1591 'duration': 965,
1592 'upload_date': '20140124',
1593 'uploader': 'New Century Foundation',
1594 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1595 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1596 },
1597 'params': {
1598 'skip_download': True,
1599 },
1600 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1601 },
1602 {
1603 # itag 212
1604 'url': '1t24XAntNCY',
1605 'only_matching': True,
1606 },
1607 {
1608 # geo restricted to JP
1609 'url': 'sJL6WA-aGkQ',
1610 'only_matching': True,
1611 },
1612 {
1613 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1614 'only_matching': True,
1615 },
1616 {
1617 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1618 'only_matching': True,
1619 },
1620 {
1621 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1622 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1623 'only_matching': True,
1624 },
1625 {
1626 # DRM protected
1627 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1628 'only_matching': True,
1629 },
1630 {
1631 # Video with unsupported adaptive stream type formats
1632 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1633 'info_dict': {
1634 'id': 'Z4Vy8R84T1U',
1635 'ext': 'mp4',
1636 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1637 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1638 'duration': 433,
1639 'upload_date': '20130923',
1640 'uploader': 'Amelia Putri Harwita',
1641 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1642 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1643 'formats': 'maxcount:10',
1644 },
1645 'params': {
1646 'skip_download': True,
1647 'youtube_include_dash_manifest': False,
1648 },
1649 'skip': 'not actual anymore',
1650 },
1651 {
1652 # Youtube Music Auto-generated description
1653 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1654 'info_dict': {
1655 'id': 'MgNrAu2pzNs',
1656 'ext': 'mp4',
1657 'title': 'Voyeur Girl',
1658 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1659 'upload_date': '20190312',
1660 'uploader': 'Stephen - Topic',
1661 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1662 'artist': 'Stephen',
1663 'track': 'Voyeur Girl',
1664 'album': 'it\'s too much love to know my dear',
1665 'release_date': '20190313',
1666 'release_year': 2019,
1667 },
1668 'params': {
1669 'skip_download': True,
1670 },
1671 },
1672 {
1673 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1674 'only_matching': True,
1675 },
1676 {
1677 # invalid -> valid video id redirection
1678 'url': 'DJztXj2GPfl',
1679 'info_dict': {
1680 'id': 'DJztXj2GPfk',
1681 'ext': 'mp4',
1682 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1683 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1684 'upload_date': '20090125',
1685 'uploader': 'Prochorowka',
1686 'uploader_id': 'Prochorowka',
1687 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1688 'artist': 'Panjabi MC',
1689 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1690 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1691 },
1692 'params': {
1693 'skip_download': True,
1694 },
1695 'skip': 'Video unavailable',
1696 },
1697 {
1698 # empty description results in an empty string
1699 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1700 'info_dict': {
1701 'id': 'x41yOUIvK2k',
1702 'ext': 'mp4',
1703 'title': 'IMG 3456',
1704 'description': '',
1705 'upload_date': '20170613',
1706 'uploader_id': 'ElevageOrVert',
1707 'uploader': 'ElevageOrVert',
1708 },
1709 'params': {
1710 'skip_download': True,
1711 },
1712 },
1713 {
1714 # with '};' inside yt initial data (see [1])
1715 # see [2] for an example with '};' inside ytInitialPlayerResponse
1716 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1717 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1718 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1719 'info_dict': {
1720 'id': 'CHqg6qOn4no',
1721 'ext': 'mp4',
1722 'title': 'Part 77 Sort a list of simple types in c#',
1723 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1724 'upload_date': '20130831',
1725 'uploader_id': 'kudvenkat',
1726 'uploader': 'kudvenkat',
1727 },
1728 'params': {
1729 'skip_download': True,
1730 },
1731 },
1732 {
1733 # another example of '};' in ytInitialData
1734 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1735 'only_matching': True,
1736 },
1737 {
1738 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1739 'only_matching': True,
1740 },
1741 {
1742 # https://github.com/ytdl-org/youtube-dl/pull/28094
1743 'url': 'OtqTfy26tG0',
1744 'info_dict': {
1745 'id': 'OtqTfy26tG0',
1746 'ext': 'mp4',
1747 'title': 'Burn Out',
1748 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1749 'upload_date': '20141120',
1750 'uploader': 'The Cinematic Orchestra - Topic',
1751 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1752 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1753 'artist': 'The Cinematic Orchestra',
1754 'track': 'Burn Out',
1755 'album': 'Every Day',
1756 'release_data': None,
1757 'release_year': None,
1758 },
1759 'params': {
1760 'skip_download': True,
1761 },
1762 },
1763 {
1764 # controversial video, only works with bpctr when authenticated with cookies
1765 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1766 'only_matching': True,
1767 },
1768 {
1769 # controversial video, requires bpctr/contentCheckOk
1770 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1771 'info_dict': {
1772 'id': 'SZJvDhaSDnc',
1773 'ext': 'mp4',
1774 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1775 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1776 'uploader': 'CBS This Morning',
1777 'uploader_id': 'CBSThisMorning',
1778 'upload_date': '20140716',
1779 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1780 }
1781 },
1782 {
1783 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1784 'url': 'cBvYw8_A0vQ',
1785 'info_dict': {
1786 'id': 'cBvYw8_A0vQ',
1787 'ext': 'mp4',
1788 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1789 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1790 'upload_date': '20201120',
1791 'uploader': 'Walk around Japan',
1792 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1793 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1794 },
1795 'params': {
1796 'skip_download': True,
1797 },
1798 }, {
1799 # Has multiple audio streams
1800 'url': 'WaOKSUlf4TM',
1801 'only_matching': True
1802 }, {
1803 # Requires Premium: has format 141 when requested using YTM url
1804 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1805 'only_matching': True
1806 }, {
1807 # multiple subtitles with same lang_code
1808 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1809 'only_matching': True,
1810 }, {
1811 # Force use android client fallback
1812 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1813 'info_dict': {
1814 'id': 'YOelRv7fMxY',
1815 'title': 'DIGGING A SECRET TUNNEL Part 1',
1816 'ext': '3gp',
1817 'upload_date': '20210624',
1818 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1819 'uploader': 'colinfurze',
1820 'uploader_id': 'colinfurze',
1821 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1822 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
1823 },
1824 'params': {
1825 'format': '17', # 3gp format available on android
1826 'extractor_args': {'youtube': {'player_client': ['android']}},
1827 },
1828 },
1829 {
1830 # Skip download of additional client configs (remix client config in this case)
1831 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1832 'only_matching': True,
1833 'params': {
1834 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1835 },
1836 }
1837 ]
1838
1839 @classmethod
1840 def suitable(cls, url):
1841 # Hack for lazy extractors until more generic solution is implemented
1842 # (see #28780)
1843 from .youtube import parse_qs
1844 qs = parse_qs(url)
1845 if qs.get('list', [None])[0]:
1846 return False
1847 return super(YoutubeIE, cls).suitable(url)
1848
1849 def __init__(self, *args, **kwargs):
1850 super(YoutubeIE, self).__init__(*args, **kwargs)
1851 self._code_cache = {}
1852 self._player_cache = {}
1853
1854 def _extract_player_url(self, ytcfg=None, webpage=None):
1855 player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)
1856 if not player_url and webpage:
1857 player_url = self._search_regex(
1858 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1859 webpage, 'player URL', fatal=False)
1860 if not player_url:
1861 return None
1862 if player_url.startswith('//'):
1863 player_url = 'https:' + player_url
1864 elif not re.match(r'https?://', player_url):
1865 player_url = compat_urlparse.urljoin(
1866 'https://www.youtube.com', player_url)
1867 return player_url
1868
1869 def _signature_cache_id(self, example_sig):
1870 """ Return a string representation of a signature """
1871 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1872
1873 @classmethod
1874 def _extract_player_info(cls, player_url):
1875 for player_re in cls._PLAYER_INFO_RE:
1876 id_m = re.search(player_re, player_url)
1877 if id_m:
1878 break
1879 else:
1880 raise ExtractorError('Cannot identify player %r' % player_url)
1881 return id_m.group('id')
1882
1883 def _load_player(self, video_id, player_url, fatal=True) -> bool:
1884 player_id = self._extract_player_info(player_url)
1885 if player_id not in self._code_cache:
1886 self._code_cache[player_id] = self._download_webpage(
1887 player_url, video_id, fatal=fatal,
1888 note='Downloading player ' + player_id,
1889 errnote='Download of %s failed' % player_url)
1890 return player_id in self._code_cache
1891
1892 def _extract_signature_function(self, video_id, player_url, example_sig):
1893 player_id = self._extract_player_info(player_url)
1894
1895 # Read from filesystem cache
1896 func_id = 'js_%s_%s' % (
1897 player_id, self._signature_cache_id(example_sig))
1898 assert os.path.basename(func_id) == func_id
1899
1900 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1901 if cache_spec is not None:
1902 return lambda s: ''.join(s[i] for i in cache_spec)
1903
1904 if self._load_player(video_id, player_url):
1905 code = self._code_cache[player_id]
1906 res = self._parse_sig_js(code)
1907
1908 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1909 cache_res = res(test_string)
1910 cache_spec = [ord(c) for c in cache_res]
1911
1912 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1913 return res
1914
1915 def _print_sig_code(self, func, example_sig):
1916 def gen_sig_code(idxs):
1917 def _genslice(start, end, step):
1918 starts = '' if start == 0 else str(start)
1919 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1920 steps = '' if step == 1 else (':%d' % step)
1921 return 's[%s%s%s]' % (starts, ends, steps)
1922
1923 step = None
1924 # Quelch pyflakes warnings - start will be set when step is set
1925 start = '(Never used)'
1926 for i, prev in zip(idxs[1:], idxs[:-1]):
1927 if step is not None:
1928 if i - prev == step:
1929 continue
1930 yield _genslice(start, prev, step)
1931 step = None
1932 continue
1933 if i - prev in [-1, 1]:
1934 step = i - prev
1935 start = prev
1936 continue
1937 else:
1938 yield 's[%d]' % prev
1939 if step is None:
1940 yield 's[%d]' % i
1941 else:
1942 yield _genslice(start, i, step)
1943
1944 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1945 cache_res = func(test_string)
1946 cache_spec = [ord(c) for c in cache_res]
1947 expr_code = ' + '.join(gen_sig_code(cache_spec))
1948 signature_id_tuple = '(%s)' % (
1949 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1950 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1951 ' return %s\n') % (signature_id_tuple, expr_code)
1952 self.to_screen('Extracted signature function:\n' + code)
1953
1954 def _parse_sig_js(self, jscode):
1955 funcname = self._search_regex(
1956 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1957 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1958 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1959 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1960 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
1961 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1962 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1963 # Obsolete patterns
1964 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1965 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1966 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1967 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1968 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1969 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1970 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1971 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1972 jscode, 'Initial JS player signature function name', group='sig')
1973
1974 jsi = JSInterpreter(jscode)
1975 initial_function = jsi.extract_function(funcname)
1976 return lambda s: initial_function([s])
1977
1978 def _decrypt_signature(self, s, video_id, player_url):
1979 """Turn the encrypted s field into a working signature"""
1980
1981 if player_url is None:
1982 raise ExtractorError('Cannot decrypt signature without player_url')
1983
1984 try:
1985 player_id = (player_url, self._signature_cache_id(s))
1986 if player_id not in self._player_cache:
1987 func = self._extract_signature_function(
1988 video_id, player_url, s
1989 )
1990 self._player_cache[player_id] = func
1991 func = self._player_cache[player_id]
1992 if self.get_param('youtube_print_sig_code'):
1993 self._print_sig_code(func, s)
1994 return func(s)
1995 except Exception as e:
1996 tb = traceback.format_exc()
1997 raise ExtractorError(
1998 'Signature extraction failed: ' + tb, cause=e)
1999
2000 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
2001 """
2002 Extract signatureTimestamp (sts)
2003 Required to tell API what sig/player version is in use.
2004 """
2005 sts = None
2006 if isinstance(ytcfg, dict):
2007 sts = int_or_none(ytcfg.get('STS'))
2008
2009 if not sts:
2010 # Attempt to extract from player
2011 if player_url is None:
2012 error_msg = 'Cannot extract signature timestamp without player_url.'
2013 if fatal:
2014 raise ExtractorError(error_msg)
2015 self.report_warning(error_msg)
2016 return
2017 if self._load_player(video_id, player_url, fatal=fatal):
2018 player_id = self._extract_player_info(player_url)
2019 code = self._code_cache[player_id]
2020 sts = int_or_none(self._search_regex(
2021 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2022 'JS player signature timestamp', group='sts', fatal=fatal))
2023 return sts
2024
2025 def _mark_watched(self, video_id, player_responses):
2026 playback_url = traverse_obj(
2027 player_responses, (..., 'playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
2028 expected_type=url_or_none, get_all=False)
2029 if not playback_url:
2030 self.report_warning('Unable to mark watched')
2031 return
2032 parsed_playback_url = compat_urlparse.urlparse(playback_url)
2033 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
2034
2035 # cpn generation algorithm is reverse engineered from base.js.
2036 # In fact it works even with dummy cpn.
2037 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2038 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
2039
2040 qs.update({
2041 'ver': ['2'],
2042 'cpn': [cpn],
2043 })
2044 playback_url = compat_urlparse.urlunparse(
2045 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
2046
2047 self._download_webpage(
2048 playback_url, video_id, 'Marking watched',
2049 'Unable to mark watched', fatal=False)
2050
2051 @staticmethod
2052 def _extract_urls(webpage):
2053 # Embedded YouTube player
2054 entries = [
2055 unescapeHTML(mobj.group('url'))
2056 for mobj in re.finditer(r'''(?x)
2057 (?:
2058 <iframe[^>]+?src=|
2059 data-video-url=|
2060 <embed[^>]+?src=|
2061 embedSWF\(?:\s*|
2062 <object[^>]+data=|
2063 new\s+SWFObject\(
2064 )
2065 (["\'])
2066 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
2067 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
2068 \1''', webpage)]
2069
2070 # lazyYT YouTube embed
2071 entries.extend(list(map(
2072 unescapeHTML,
2073 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
2074
2075 # Wordpress "YouTube Video Importer" plugin
2076 matches = re.findall(r'''(?x)<div[^>]+
2077 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2078 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
2079 entries.extend(m[-1] for m in matches)
2080
2081 return entries
2082
2083 @staticmethod
2084 def _extract_url(webpage):
2085 urls = YoutubeIE._extract_urls(webpage)
2086 return urls[0] if urls else None
2087
2088 @classmethod
2089 def extract_id(cls, url):
2090 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
2091 if mobj is None:
2092 raise ExtractorError('Invalid URL: %s' % url)
2093 video_id = mobj.group(2)
2094 return video_id
2095
2096 def _extract_chapters_from_json(self, data, duration):
2097 chapter_list = traverse_obj(
2098 data, (
2099 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2100 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2101 ), expected_type=list)
2102
2103 return self._extract_chapters(
2104 chapter_list,
2105 chapter_time=lambda chapter: float_or_none(
2106 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2107 chapter_title=lambda chapter: traverse_obj(
2108 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2109 duration=duration)
2110
2111 def _extract_chapters_from_engagement_panel(self, data, duration):
2112 content_list = traverse_obj(
2113 data,
2114 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
2115 expected_type=list, default=[])
2116 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2117 chapter_title = lambda chapter: self._get_text(chapter, 'title')
2118
2119 return next((
2120 filter(None, (
2121 self._extract_chapters(
2122 traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2123 chapter_time, chapter_title, duration)
2124 for contents in content_list
2125 ))), [])
2126
2127 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
2128 chapters = []
2129 last_chapter = {'start_time': 0}
2130 for idx, chapter in enumerate(chapter_list or []):
2131 title = chapter_title(chapter)
2132 start_time = chapter_time(chapter)
2133 if start_time is None:
2134 continue
2135 last_chapter['end_time'] = start_time
2136 if start_time < last_chapter['start_time']:
2137 if idx == 1:
2138 chapters.pop()
2139 self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2140 else:
2141 self.report_warning(f'Invalid start time for chapter "{title}"')
2142 continue
2143 last_chapter = {'start_time': start_time, 'title': title}
2144 chapters.append(last_chapter)
2145 last_chapter['end_time'] = duration
2146 return chapters
2147
2148 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2149 return self._parse_json(self._search_regex(
2150 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2151 regex), webpage, name, default='{}'), video_id, fatal=False)
2152
2153 @staticmethod
2154 def parse_time_text(time_text):
2155 """
2156 Parse the comment time text
2157 time_text is in the format 'X units ago (edited)'
2158 """
2159 time_text_split = time_text.split(' ')
2160 if len(time_text_split) >= 3:
2161 try:
2162 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
2163 except ValueError:
2164 return None
2165
2166 def _extract_comment(self, comment_renderer, parent=None):
2167 comment_id = comment_renderer.get('commentId')
2168 if not comment_id:
2169 return
2170
2171 text = self._get_text(comment_renderer, 'contentText')
2172
2173 # note: timestamp is an estimate calculated from the current time and time_text
2174 time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
2175 time_text_dt = self.parse_time_text(time_text)
2176 if isinstance(time_text_dt, datetime.datetime):
2177 timestamp = calendar.timegm(time_text_dt.timetuple())
2178 author = self._get_text(comment_renderer, 'authorText')
2179 author_id = try_get(comment_renderer,
2180 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
2181
2182 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2183 lambda x: x['likeCount']), compat_str)) or 0
2184 author_thumbnail = try_get(comment_renderer,
2185 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2186
2187 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2188 is_favorited = 'creatorHeart' in (try_get(
2189 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2190 return {
2191 'id': comment_id,
2192 'text': text,
2193 'timestamp': timestamp,
2194 'time_text': time_text,
2195 'like_count': votes,
2196 'is_favorited': is_favorited,
2197 'author': author,
2198 'author_id': author_id,
2199 'author_thumbnail': author_thumbnail,
2200 'author_is_uploader': author_is_uploader,
2201 'parent': parent or 'root'
2202 }
2203
2204 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
2205 ytcfg, video_id, parent=None, comment_counts=None):
2206
2207 def extract_header(contents):
2208 _total_comments = 0
2209 _continuation = None
2210 for content in contents:
2211 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
2212 expected_comment_count = parse_count(self._get_text(
2213 comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
2214
2215 if expected_comment_count:
2216 comment_counts[1] = expected_comment_count
2217 self.to_screen('Downloading ~%d comments' % expected_comment_count)
2218 _total_comments = comment_counts[1]
2219 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2220 comment_sort_index = int(sort_mode_str != 'top') # 1 = new, 0 = top
2221
2222 sort_menu_item = try_get(
2223 comments_header_renderer,
2224 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2225 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2226
2227 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2228 if not _continuation:
2229 continue
2230
2231 sort_text = sort_menu_item.get('title')
2232 if isinstance(sort_text, compat_str):
2233 sort_text = sort_text.lower()
2234 else:
2235 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2236 self.to_screen('Sorting comments by %s' % sort_text)
2237 break
2238 return _total_comments, _continuation
2239
2240 def extract_thread(contents):
2241 if not parent:
2242 comment_counts[2] = 0
2243 for content in contents:
2244 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2245 comment_renderer = try_get(
2246 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2247 content, (lambda x: x['commentRenderer'], dict))
2248
2249 if not comment_renderer:
2250 continue
2251 comment = self._extract_comment(comment_renderer, parent)
2252 if not comment:
2253 continue
2254 comment_counts[0] += 1
2255 yield comment
2256 # Attempt to get the replies
2257 comment_replies_renderer = try_get(
2258 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2259
2260 if comment_replies_renderer:
2261 comment_counts[2] += 1
2262 comment_entries_iter = self._comment_entries(
2263 comment_replies_renderer, identity_token, account_syncid, ytcfg,
2264 video_id, parent=comment.get('id'), comment_counts=comment_counts)
2265
2266 for reply_comment in comment_entries_iter:
2267 yield reply_comment
2268
2269 # YouTube comments have a max depth of 2
2270 max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2271 if max_depth == 1 and parent:
2272 return
2273 if not comment_counts:
2274 # comment so far, est. total comments, current comment thread #
2275 comment_counts = [0, 0, 0]
2276
2277 continuation = self._extract_continuation(root_continuation_data)
2278 if continuation and len(continuation['continuation']) < 27:
2279 self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2280 continuation_token = self._generate_comment_continuation(video_id)
2281 continuation = self._build_api_continuation_query(continuation_token, None)
2282
2283 visitor_data = None
2284 is_first_continuation = parent is None
2285
2286 for page_num in itertools.count(0):
2287 if not continuation:
2288 break
2289 headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
2290 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2291 if page_num == 0:
2292 if is_first_continuation:
2293 note_prefix = 'Downloading comment section API JSON'
2294 else:
2295 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
2296 comment_counts[2], comment_prog_str)
2297 else:
2298 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2299 ' ' if parent else '', ' replies' if parent else '',
2300 page_num, comment_prog_str)
2301
2302 response = self._extract_response(
2303 item_id=None, query=continuation,
2304 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2305 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
2306 if not response:
2307 break
2308 visitor_data = try_get(
2309 response,
2310 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2311 compat_str) or visitor_data
2312
2313 continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
2314
2315 continuation = None
2316 if isinstance(continuation_contents, list):
2317 for continuation_section in continuation_contents:
2318 if not isinstance(continuation_section, dict):
2319 continue
2320 continuation_items = try_get(
2321 continuation_section,
2322 (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2323 lambda x: x['appendContinuationItemsAction']['continuationItems']),
2324 list) or []
2325 if is_first_continuation:
2326 total_comments, continuation = extract_header(continuation_items)
2327 if total_comments:
2328 yield total_comments
2329 is_first_continuation = False
2330 if continuation:
2331 break
2332 continue
2333 count = 0
2334 for count, entry in enumerate(extract_thread(continuation_items)):
2335 yield entry
2336 continuation = self._extract_continuation({'contents': continuation_items})
2337 if continuation:
2338 # Sometimes YouTube provides a continuation without any comments
2339 # In most cases we end up just downloading these with very little comments to come.
2340 if count == 0:
2341 if not parent:
2342 self.report_warning('No comments received - assuming end of comments')
2343 continuation = None
2344 break
2345
2346 # Deprecated response structure
2347 elif isinstance(continuation_contents, dict):
2348 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2349 for key, continuation_renderer in continuation_contents.items():
2350 if key not in known_continuation_renderers:
2351 continue
2352 if not isinstance(continuation_renderer, dict):
2353 continue
2354 if is_first_continuation:
2355 header_continuation_items = [continuation_renderer.get('header') or {}]
2356 total_comments, continuation = extract_header(header_continuation_items)
2357 if total_comments:
2358 yield total_comments
2359 is_first_continuation = False
2360 if continuation:
2361 break
2362
2363 # Sometimes YouTube provides a continuation without any comments
2364 # In most cases we end up just downloading these with very little comments to come.
2365 count = 0
2366 for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2367 yield entry
2368 continuation = self._extract_continuation(continuation_renderer)
2369 if count == 0:
2370 if not parent:
2371 self.report_warning('No comments received - assuming end of comments')
2372 continuation = None
2373 break
2374
2375 @staticmethod
2376 def _generate_comment_continuation(video_id):
2377 """
2378 Generates initial comment section continuation token from given video id
2379 """
2380 b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2381 parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2382 new_continuation_intlist = list(itertools.chain.from_iterable(
2383 [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2384 return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2385
2386 def _extract_comments(self, ytcfg, video_id, contents, webpage):
2387 """Entry for comment extraction"""
2388 def _real_comment_extract(contents):
2389 if isinstance(contents, list):
2390 for entry in contents:
2391 for key, renderer in entry.items():
2392 if key not in known_entry_comment_renderers:
2393 continue
2394 yield from self._comment_entries(
2395 renderer, video_id=video_id, ytcfg=ytcfg,
2396 identity_token=self._extract_identity_token(webpage, item_id=video_id),
2397 account_syncid=self._extract_account_syncid(ytcfg))
2398 break
2399 comments = []
2400 known_entry_comment_renderers = ('itemSectionRenderer',)
2401 estimated_total = 0
2402 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
2403 # Force English regardless of account setting to prevent parsing issues
2404 # See: https://github.com/yt-dlp/yt-dlp/issues/532
2405 ytcfg = copy.deepcopy(ytcfg)
2406 traverse_obj(
2407 ytcfg, ('INNERTUBE_CONTEXT', 'client'), expected_type=dict, default={})['hl'] = 'en'
2408 try:
2409 for comment in _real_comment_extract(contents):
2410 if len(comments) >= max_comments:
2411 break
2412 if isinstance(comment, int):
2413 estimated_total = comment
2414 continue
2415 comments.append(comment)
2416 except KeyboardInterrupt:
2417 self.to_screen('Interrupted by user')
2418 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
2419 return {
2420 'comments': comments,
2421 'comment_count': len(comments),
2422 }
2423
2424 @staticmethod
2425 def _generate_player_context(sts=None):
2426 context = {
2427 'html5Preference': 'HTML5_PREF_WANTS',
2428 }
2429 if sts is not None:
2430 context['signatureTimestamp'] = sts
2431 return {
2432 'playbackContext': {
2433 'contentPlaybackContext': context
2434 },
2435 'contentCheckOk': True,
2436 'racyCheckOk': True
2437 }
2438
2439 def _is_agegated(self, player_response):
2440 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
2441 for reason in reasons:
2442 if reason in self._AGE_GATE_REASONS + self._AGE_GATE_STATUS_REASONS:
2443 return True
2444 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')) is not None:
2445 return True
2446 return False
2447
2448 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr):
2449
2450 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2451 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
2452 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False)
2453 headers = self.generate_api_headers(
2454 player_ytcfg, identity_token, syncid,
2455 default_client=self._YT_CLIENTS[client], session_index=session_index)
2456
2457 yt_query = {'videoId': video_id}
2458 yt_query.update(self._generate_player_context(sts))
2459 return self._extract_response(
2460 item_id=video_id, ep='player', query=yt_query,
2461 ytcfg=player_ytcfg, headers=headers, fatal=False,
2462 default_client=self._YT_CLIENTS[client],
2463 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2464 ) or None
2465
2466 def _get_requested_clients(self, url, smuggled_data):
2467 requested_clients = []
2468 allowed_clients = [client for client in self._YT_CLIENTS.keys() if client[:1] != '_']
2469 for client in self._configuration_arg('player_client'):
2470 if client in allowed_clients:
2471 requested_clients.append(client)
2472 elif client == 'all':
2473 requested_clients.extend(allowed_clients)
2474 else:
2475 self.report_warning(f'Skipping unsupported client {client}')
2476 if not requested_clients:
2477 requested_clients = ['android', 'web']
2478
2479 if smuggled_data.get('is_music_url') or self.is_music_url(url):
2480 requested_clients.extend(
2481 f'{client}_music' for client in requested_clients if not client.endswith('_music'))
2482
2483 return orderedSet(requested_clients)
2484
2485 def _extract_player_ytcfg(self, client, video_id):
2486 url = {
2487 'web_music': 'https://music.youtube.com',
2488 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
2489 }.get(client)
2490 if not url:
2491 return {}
2492 webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config')
2493 return self.extract_ytcfg(video_id, webpage) or {}
2494
2495 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, player_url, identity_token):
2496 initial_pr = None
2497 if webpage:
2498 initial_pr = self._extract_yt_initial_variable(
2499 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2500 video_id, 'initial player response')
2501
2502 original_clients = clients
2503 clients = clients[::-1]
2504 while clients:
2505 client = clients.pop()
2506 player_ytcfg = master_ytcfg if client == 'web' else {}
2507 if 'configs' not in self._configuration_arg('player_skip'):
2508 player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
2509
2510 pr = (
2511 initial_pr if client == 'web' and initial_pr
2512 else self._extract_player_response(
2513 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr))
2514 if pr:
2515 yield pr
2516
2517 if self._is_agegated(pr):
2518 client = f'{client}_agegate'
2519 if client in self._YT_CLIENTS and client not in original_clients:
2520 clients.append(client)
2521
2522 # Android player_response does not have microFormats which are needed for
2523 # extraction of some data. So we return the initial_pr with formats
2524 # stripped out even if not requested by the user
2525 # See: https://github.com/yt-dlp/yt-dlp/issues/501
2526 if initial_pr and 'web' not in original_clients:
2527 initial_pr['streamingData'] = None
2528 yield initial_pr
2529
2530 def _extract_formats(self, streaming_data, video_id, player_url, is_live):
2531 itags, stream_ids = [], []
2532 itag_qualities, res_qualities = {}, {}
2533 q = qualities([
2534 # Normally tiny is the smallest video-only formats. But
2535 # audio-only formats with unknown quality may get tagged as tiny
2536 'tiny',
2537 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
2538 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2539 ])
2540 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
2541
2542 for fmt in streaming_formats:
2543 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2544 continue
2545
2546 itag = str_or_none(fmt.get('itag'))
2547 audio_track = fmt.get('audioTrack') or {}
2548 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2549 if stream_id in stream_ids:
2550 continue
2551
2552 quality = fmt.get('quality')
2553 height = int_or_none(fmt.get('height'))
2554 if quality == 'tiny' or not quality:
2555 quality = fmt.get('audioQuality', '').lower() or quality
2556 # The 3gp format (17) in android client has a quality of "small",
2557 # but is actually worse than other formats
2558 if itag == '17':
2559 quality = 'tiny'
2560 if quality:
2561 if itag:
2562 itag_qualities[itag] = quality
2563 if height:
2564 res_qualities[height] = quality
2565 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2566 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2567 # number of fragment that would subsequently requested with (`&sq=N`)
2568 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2569 continue
2570
2571 fmt_url = fmt.get('url')
2572 if not fmt_url:
2573 sc = compat_parse_qs(fmt.get('signatureCipher'))
2574 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2575 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2576 if not (sc and fmt_url and encrypted_sig):
2577 continue
2578 if not player_url:
2579 continue
2580 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2581 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2582 fmt_url += '&' + sp + '=' + signature
2583
2584 if itag:
2585 itags.append(itag)
2586 stream_ids.append(stream_id)
2587
2588 tbr = float_or_none(
2589 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
2590 dct = {
2591 'asr': int_or_none(fmt.get('audioSampleRate')),
2592 'filesize': int_or_none(fmt.get('contentLength')),
2593 'format_id': itag,
2594 'format_note': ', '.join(filter(None, (
2595 audio_track.get('displayName'),
2596 fmt.get('qualityLabel') or quality.replace('audio_quality_', '')))),
2597 'fps': int_or_none(fmt.get('fps')),
2598 'height': height,
2599 'quality': q(quality),
2600 'tbr': tbr,
2601 'url': fmt_url,
2602 'width': int_or_none(fmt.get('width')),
2603 'language': audio_track.get('id', '').split('.')[0],
2604 }
2605 mime_mobj = re.match(
2606 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2607 if mime_mobj:
2608 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2609 dct.update(parse_codecs(mime_mobj.group(2)))
2610 no_audio = dct.get('acodec') == 'none'
2611 no_video = dct.get('vcodec') == 'none'
2612 if no_audio:
2613 dct['vbr'] = tbr
2614 if no_video:
2615 dct['abr'] = tbr
2616 if no_audio or no_video:
2617 dct['downloader_options'] = {
2618 # Youtube throttles chunks >~10M
2619 'http_chunk_size': 10485760,
2620 }
2621 if dct.get('ext'):
2622 dct['container'] = dct['ext'] + '_dash'
2623 yield dct
2624
2625 skip_manifests = self._configuration_arg('skip')
2626 get_dash = not is_live and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True)
2627 get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2628
2629 def guess_quality(f):
2630 for val, qdict in ((f.get('format_id'), itag_qualities), (f.get('height'), res_qualities)):
2631 if val in qdict:
2632 return q(qdict[val])
2633 return -1
2634
2635 for sd in streaming_data:
2636 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
2637 if hls_manifest_url:
2638 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
2639 itag = self._search_regex(
2640 r'/itag/(\d+)', f['url'], 'itag', default=None)
2641 if itag in itags:
2642 continue
2643 if itag:
2644 f['format_id'] = itag
2645 itags.append(itag)
2646 f['quality'] = guess_quality(f)
2647 yield f
2648
2649 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2650 if dash_manifest_url:
2651 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
2652 itag = f['format_id']
2653 if itag in itags:
2654 continue
2655 if itag:
2656 itags.append(itag)
2657 f['quality'] = guess_quality(f)
2658 filesize = int_or_none(self._search_regex(
2659 r'/clen/(\d+)', f.get('fragment_base_url')
2660 or f['url'], 'file size', default=None))
2661 if filesize:
2662 f['filesize'] = filesize
2663 yield f
2664
2665 def _real_extract(self, url):
2666 url, smuggled_data = unsmuggle_url(url, {})
2667 video_id = self._match_id(url)
2668
2669 base_url = self.http_scheme() + '//www.youtube.com/'
2670 webpage_url = base_url + 'watch?v=' + video_id
2671 webpage = self._download_webpage(
2672 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2673
2674 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2675 player_url = self._extract_player_url(master_ytcfg, webpage)
2676 identity_token = self._extract_identity_token(webpage, video_id)
2677
2678 player_responses = list(self._extract_player_responses(
2679 self._get_requested_clients(url, smuggled_data),
2680 video_id, webpage, master_ytcfg, player_url, identity_token))
2681
2682 get_first = lambda obj, keys, **kwargs: traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
2683
2684 playability_statuses = traverse_obj(
2685 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2686
2687 trailer_video_id = get_first(
2688 playability_statuses,
2689 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2690 expected_type=str)
2691 if trailer_video_id:
2692 return self.url_result(
2693 trailer_video_id, self.ie_key(), trailer_video_id)
2694
2695 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2696 if webpage else (lambda x: None))
2697
2698 video_details = traverse_obj(
2699 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2700 microformats = traverse_obj(
2701 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2702 expected_type=dict, default=[])
2703 video_title = (
2704 get_first(video_details, 'title')
2705 or self._get_text(microformats, (..., 'title'))
2706 or search_meta(['og:title', 'twitter:title', 'title']))
2707 video_description = get_first(video_details, 'shortDescription')
2708
2709 if not smuggled_data.get('force_singlefeed', False):
2710 if not self.get_param('noplaylist'):
2711 multifeed_metadata_list = get_first(
2712 player_responses,
2713 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2714 expected_type=str)
2715 if multifeed_metadata_list:
2716 entries = []
2717 feed_ids = []
2718 for feed in multifeed_metadata_list.split(','):
2719 # Unquote should take place before split on comma (,) since textual
2720 # fields may contain comma as well (see
2721 # https://github.com/ytdl-org/youtube-dl/issues/8536)
2722 feed_data = compat_parse_qs(
2723 compat_urllib_parse_unquote_plus(feed))
2724
2725 def feed_entry(name):
2726 return try_get(
2727 feed_data, lambda x: x[name][0], compat_str)
2728
2729 feed_id = feed_entry('id')
2730 if not feed_id:
2731 continue
2732 feed_title = feed_entry('title')
2733 title = video_title
2734 if feed_title:
2735 title += ' (%s)' % feed_title
2736 entries.append({
2737 '_type': 'url_transparent',
2738 'ie_key': 'Youtube',
2739 'url': smuggle_url(
2740 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2741 {'force_singlefeed': True}),
2742 'title': title,
2743 })
2744 feed_ids.append(feed_id)
2745 self.to_screen(
2746 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2747 % (', '.join(feed_ids), video_id))
2748 return self.playlist_result(
2749 entries, video_id, video_title, video_description)
2750 else:
2751 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2752
2753 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
2754 is_live = get_first(video_details, 'isLive')
2755 if is_live is None:
2756 is_live = get_first(live_broadcast_details, 'isLiveNow')
2757
2758 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2759 formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
2760
2761 if not formats:
2762 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
2763 self.raise_no_formats(
2764 'This video is DRM protected.', expected=True)
2765 pemr = get_first(
2766 playability_statuses,
2767 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2768 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2769 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
2770 if subreason:
2771 if subreason == 'The uploader has not made this video available in your country.':
2772 countries = get_first(microformats, 'availableCountries')
2773 if not countries:
2774 regions_allowed = search_meta('regionsAllowed')
2775 countries = regions_allowed.split(',') if regions_allowed else None
2776 self.raise_geo_restricted(subreason, countries, metadata_available=True)
2777 reason += f'. {subreason}'
2778 if reason:
2779 self.raise_no_formats(reason, expected=True)
2780
2781 for f in formats:
2782 if '&c=WEB&' in f['url'] and '&ratebypass=yes&' not in f['url']: # throttled
2783 f['source_preference'] = -10
2784 note = f.get('format_note')
2785 f['format_note'] = f'{note} (throttled)' if note else '(throttled)'
2786
2787 # Source is given priority since formats that throttle are given lower source_preference
2788 # When throttling issue is fully fixed, remove this
2789 self._sort_formats(formats, ('quality', 'height', 'fps', 'source'))
2790
2791 keywords = get_first(video_details, 'keywords', expected_type=list) or []
2792 if not keywords and webpage:
2793 keywords = [
2794 unescapeHTML(m.group('content'))
2795 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2796 for keyword in keywords:
2797 if keyword.startswith('yt:stretch='):
2798 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2799 if mobj:
2800 # NB: float is intentional for forcing float division
2801 w, h = (float(v) for v in mobj.groups())
2802 if w > 0 and h > 0:
2803 ratio = w / h
2804 for f in formats:
2805 if f.get('vcodec') != 'none':
2806 f['stretched_ratio'] = ratio
2807 break
2808
2809 thumbnails = []
2810 thumbnail_dicts = traverse_obj(
2811 (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2812 expected_type=dict, default=[])
2813 for thumbnail in thumbnail_dicts:
2814 thumbnail_url = thumbnail.get('url')
2815 if not thumbnail_url:
2816 continue
2817 # Sometimes youtube gives a wrong thumbnail URL. See:
2818 # https://github.com/yt-dlp/yt-dlp/issues/233
2819 # https://github.com/ytdl-org/youtube-dl/issues/28023
2820 if 'maxresdefault' in thumbnail_url:
2821 thumbnail_url = thumbnail_url.split('?')[0]
2822 thumbnails.append({
2823 'url': thumbnail_url,
2824 'height': int_or_none(thumbnail.get('height')),
2825 'width': int_or_none(thumbnail.get('width')),
2826 })
2827 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2828 if thumbnail_url:
2829 thumbnails.append({
2830 'url': thumbnail_url,
2831 })
2832 # The best resolution thumbnails sometimes does not appear in the webpage
2833 # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
2834 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2835 hq_thumbnail_names = ['maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3']
2836 # TODO: Test them also? - For some videos, even these don't exist
2837 guaranteed_thumbnail_names = [
2838 'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2839 'mqdefault', 'mq1', 'mq2', 'mq3',
2840 'default', '1', '2', '3'
2841 ]
2842 thumbnail_names = hq_thumbnail_names + guaranteed_thumbnail_names
2843 n_thumbnail_names = len(thumbnail_names)
2844
2845 thumbnails.extend({
2846 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2847 video_id=video_id, name=name, ext=ext,
2848 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
2849 '_test_url': name in hq_thumbnail_names,
2850 } for name in thumbnail_names for ext in ('webp', 'jpg'))
2851 for thumb in thumbnails:
2852 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
2853 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
2854 self._remove_duplicate_formats(thumbnails)
2855
2856 category = get_first(microformats, 'category') or search_meta('genre')
2857 channel_id = str_or_none(
2858 get_first(video_details, 'channelId')
2859 or get_first(microformats, 'externalChannelId')
2860 or search_meta('channelId'))
2861 duration = int_or_none(
2862 get_first(video_details, 'lengthSeconds')
2863 or get_first(microformats, 'lengthSeconds')
2864 or parse_duration(search_meta('duration'))) or None
2865 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2866
2867 live_content = get_first(video_details, 'isLiveContent')
2868 is_upcoming = get_first(video_details, 'isUpcoming')
2869 if is_live is None:
2870 if is_upcoming or live_content is False:
2871 is_live = False
2872 if is_upcoming is None and (live_content or is_live):
2873 is_upcoming = False
2874 live_starttime = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2875 live_endtime = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2876 if not duration and live_endtime and live_starttime:
2877 duration = live_endtime - live_starttime
2878
2879 info = {
2880 'id': video_id,
2881 'title': self._live_title(video_title) if is_live else video_title,
2882 'formats': formats,
2883 'thumbnails': thumbnails,
2884 'description': video_description,
2885 'upload_date': unified_strdate(
2886 get_first(microformats, 'uploadDate')
2887 or search_meta('uploadDate')),
2888 'uploader': get_first(video_details, 'author'),
2889 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2890 'uploader_url': owner_profile_url,
2891 'channel_id': channel_id,
2892 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
2893 'duration': duration,
2894 'view_count': int_or_none(
2895 get_first((video_details, microformats), (..., 'viewCount'))
2896 or search_meta('interactionCount')),
2897 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
2898 'age_limit': 18 if (
2899 get_first(microformats, 'isFamilySafe') is False
2900 or search_meta('isFamilyFriendly') == 'false'
2901 or search_meta('og:restrictions:age') == '18+') else 0,
2902 'webpage_url': webpage_url,
2903 'categories': [category] if category else None,
2904 'tags': keywords,
2905 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
2906 'is_live': is_live,
2907 'was_live': (False if is_live or is_upcoming or live_content is False
2908 else None if is_live is None or is_upcoming is None
2909 else live_content),
2910 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
2911 'release_timestamp': live_starttime,
2912 }
2913
2914 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
2915 # Converted into dicts to remove duplicates
2916 captions = {
2917 sub.get('baseUrl'): sub
2918 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
2919 translation_languages = {
2920 lang.get('languageCode'): lang.get('languageName')
2921 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
2922 subtitles = {}
2923 if pctr:
2924 def process_language(container, base_url, lang_code, sub_name, query):
2925 lang_subs = container.setdefault(lang_code, [])
2926 for fmt in self._SUBTITLE_FORMATS:
2927 query.update({
2928 'fmt': fmt,
2929 })
2930 lang_subs.append({
2931 'ext': fmt,
2932 'url': update_url_query(base_url, query),
2933 'name': sub_name,
2934 })
2935
2936 for base_url, caption_track in captions.items():
2937 if not base_url:
2938 continue
2939 if caption_track.get('kind') != 'asr':
2940 lang_code = (
2941 remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2942 or caption_track.get('languageCode'))
2943 if not lang_code:
2944 continue
2945 process_language(
2946 subtitles, base_url, lang_code,
2947 traverse_obj(caption_track, ('name', 'simpleText')),
2948 {})
2949 continue
2950 automatic_captions = {}
2951 for trans_code, trans_name in translation_languages.items():
2952 if not trans_code:
2953 continue
2954 process_language(
2955 automatic_captions, base_url, trans_code,
2956 self._get_text(trans_name, max_runs=1),
2957 {'tlang': trans_code})
2958 info['automatic_captions'] = automatic_captions
2959 info['subtitles'] = subtitles
2960
2961 parsed_url = compat_urllib_parse_urlparse(url)
2962 for component in [parsed_url.fragment, parsed_url.query]:
2963 query = compat_parse_qs(component)
2964 for k, v in query.items():
2965 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2966 d_k += '_time'
2967 if d_k not in info and k in s_ks:
2968 info[d_k] = parse_duration(query[k][0])
2969
2970 # Youtube Music Auto-generated description
2971 if video_description:
2972 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
2973 if mobj:
2974 release_year = mobj.group('release_year')
2975 release_date = mobj.group('release_date')
2976 if release_date:
2977 release_date = release_date.replace('-', '')
2978 if not release_year:
2979 release_year = release_date[:4]
2980 info.update({
2981 'album': mobj.group('album'.strip()),
2982 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2983 'track': mobj.group('track').strip(),
2984 'release_date': release_date,
2985 'release_year': int_or_none(release_year),
2986 })
2987
2988 initial_data = None
2989 if webpage:
2990 initial_data = self._extract_yt_initial_variable(
2991 webpage, self._YT_INITIAL_DATA_RE, video_id,
2992 'yt initial data')
2993 if not initial_data:
2994 headers = self.generate_api_headers(
2995 master_ytcfg, identity_token, self._extract_account_syncid(master_ytcfg),
2996 session_index=self._extract_session_index(master_ytcfg))
2997
2998 initial_data = self._extract_response(
2999 item_id=video_id, ep='next', fatal=False,
3000 ytcfg=master_ytcfg, headers=headers, query={'videoId': video_id},
3001 note='Downloading initial data API JSON')
3002
3003 try:
3004 # This will error if there is no livechat
3005 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
3006 info['subtitles']['live_chat'] = [{
3007 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
3008 'video_id': video_id,
3009 'ext': 'json',
3010 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
3011 }]
3012 except (KeyError, IndexError, TypeError):
3013 pass
3014
3015 if initial_data:
3016 info['chapters'] = (
3017 self._extract_chapters_from_json(initial_data, duration)
3018 or self._extract_chapters_from_engagement_panel(initial_data, duration)
3019 or None)
3020
3021 contents = try_get(
3022 initial_data,
3023 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
3024 list) or []
3025 for content in contents:
3026 vpir = content.get('videoPrimaryInfoRenderer')
3027 if vpir:
3028 stl = vpir.get('superTitleLink')
3029 if stl:
3030 stl = self._get_text(stl)
3031 if try_get(
3032 vpir,
3033 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3034 info['location'] = stl
3035 else:
3036 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
3037 if mobj:
3038 info.update({
3039 'series': mobj.group(1),
3040 'season_number': int(mobj.group(2)),
3041 'episode_number': int(mobj.group(3)),
3042 })
3043 for tlb in (try_get(
3044 vpir,
3045 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3046 list) or []):
3047 tbr = tlb.get('toggleButtonRenderer') or {}
3048 for getter, regex in [(
3049 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3050 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3051 lambda x: x['accessibility'],
3052 lambda x: x['accessibilityData']['accessibilityData'],
3053 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3054 label = (try_get(tbr, getter, dict) or {}).get('label')
3055 if label:
3056 mobj = re.match(regex, label)
3057 if mobj:
3058 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3059 break
3060 sbr_tooltip = try_get(
3061 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3062 if sbr_tooltip:
3063 like_count, dislike_count = sbr_tooltip.split(' / ')
3064 info.update({
3065 'like_count': str_to_int(like_count),
3066 'dislike_count': str_to_int(dislike_count),
3067 })
3068 vsir = content.get('videoSecondaryInfoRenderer')
3069 if vsir:
3070 info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
3071 rows = try_get(
3072 vsir,
3073 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3074 list) or []
3075 multiple_songs = False
3076 for row in rows:
3077 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3078 multiple_songs = True
3079 break
3080 for row in rows:
3081 mrr = row.get('metadataRowRenderer') or {}
3082 mrr_title = mrr.get('title')
3083 if not mrr_title:
3084 continue
3085 mrr_title = self._get_text(mrr, 'title')
3086 mrr_contents_text = self._get_text(mrr, ('contents', 0))
3087 if mrr_title == 'License':
3088 info['license'] = mrr_contents_text
3089 elif not multiple_songs:
3090 if mrr_title == 'Album':
3091 info['album'] = mrr_contents_text
3092 elif mrr_title == 'Artist':
3093 info['artist'] = mrr_contents_text
3094 elif mrr_title == 'Song':
3095 info['track'] = mrr_contents_text
3096
3097 fallbacks = {
3098 'channel': 'uploader',
3099 'channel_id': 'uploader_id',
3100 'channel_url': 'uploader_url',
3101 }
3102 for to, frm in fallbacks.items():
3103 if not info.get(to):
3104 info[to] = info.get(frm)
3105
3106 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3107 v = info.get(s_k)
3108 if v:
3109 info[d_k] = v
3110
3111 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3112 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
3113 is_membersonly = None
3114 is_premium = None
3115 if initial_data and is_private is not None:
3116 is_membersonly = False
3117 is_premium = False
3118 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3119 badge_labels = set()
3120 for content in contents:
3121 if not isinstance(content, dict):
3122 continue
3123 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3124 for badge_label in badge_labels:
3125 if badge_label.lower() == 'members only':
3126 is_membersonly = True
3127 elif badge_label.lower() == 'premium':
3128 is_premium = True
3129 elif badge_label.lower() == 'unlisted':
3130 is_unlisted = True
3131
3132 info['availability'] = self._availability(
3133 is_private=is_private,
3134 needs_premium=is_premium,
3135 needs_subscription=is_membersonly,
3136 needs_auth=info['age_limit'] >= 18,
3137 is_unlisted=None if is_private is None else is_unlisted)
3138
3139 # get xsrf for annotations or comments
3140 get_annotations = self.get_param('writeannotations', False)
3141 get_comments = self.get_param('getcomments', False)
3142 if get_annotations or get_comments:
3143 xsrf_token = None
3144 if master_ytcfg:
3145 xsrf_token = try_get(master_ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
3146 if not xsrf_token:
3147 xsrf_token = self._search_regex(
3148 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
3149 webpage, 'xsrf token', group='xsrf_token', fatal=False)
3150
3151 # annotations
3152 if get_annotations:
3153 invideo_url = get_first(
3154 player_responses,
3155 ('annotations', 0, 'playerAnnotationsUrlsRenderer', 'invideoUrl'),
3156 expected_type=str)
3157 if xsrf_token and invideo_url:
3158 xsrf_field_name = None
3159 if master_ytcfg:
3160 xsrf_field_name = try_get(master_ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
3161 if not xsrf_field_name:
3162 xsrf_field_name = self._search_regex(
3163 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
3164 webpage, 'xsrf field name',
3165 group='xsrf_field_name', default='session_token')
3166 info['annotations'] = self._download_webpage(
3167 self._proto_relative_url(invideo_url),
3168 video_id, note='Downloading annotations',
3169 errnote='Unable to download video annotations', fatal=False,
3170 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
3171
3172 if get_comments:
3173 info['__post_extractor'] = lambda: self._extract_comments(master_ytcfg, video_id, contents, webpage)
3174
3175 self.mark_watched(video_id, player_responses)
3176
3177 return info
3178
3179
3180 class YoutubeTabIE(YoutubeBaseInfoExtractor):
3181 IE_DESC = 'YouTube.com tab'
3182 _VALID_URL = r'''(?x)
3183 https?://
3184 (?:\w+\.)?
3185 (?:
3186 youtube(?:kids)?\.com|
3187 invidio\.us
3188 )/
3189 (?:
3190 (?P<channel_type>channel|c|user|browse)/|
3191 (?P<not_channel>
3192 feed/|hashtag/|
3193 (?:playlist|watch)\?.*?\blist=
3194 )|
3195 (?!(?:%s)\b) # Direct URLs
3196 )
3197 (?P<id>[^/?\#&]+)
3198 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
3199 IE_NAME = 'youtube:tab'
3200
3201 _TESTS = [{
3202 'note': 'playlists, multipage',
3203 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3204 'playlist_mincount': 94,
3205 'info_dict': {
3206 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3207 'title': 'Игорь Клейнер - Playlists',
3208 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3209 'uploader': 'Игорь Клейнер',
3210 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3211 },
3212 }, {
3213 'note': 'playlists, multipage, different order',
3214 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3215 'playlist_mincount': 94,
3216 'info_dict': {
3217 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3218 'title': 'Игорь Клейнер - Playlists',
3219 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3220 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3221 'uploader': 'Игорь Клейнер',
3222 },
3223 }, {
3224 'note': 'playlists, series',
3225 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3226 'playlist_mincount': 5,
3227 'info_dict': {
3228 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3229 'title': '3Blue1Brown - Playlists',
3230 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3231 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3232 'uploader': '3Blue1Brown',
3233 },
3234 }, {
3235 'note': 'playlists, singlepage',
3236 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3237 'playlist_mincount': 4,
3238 'info_dict': {
3239 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3240 'title': 'ThirstForScience - Playlists',
3241 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
3242 'uploader': 'ThirstForScience',
3243 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3244 }
3245 }, {
3246 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3247 'only_matching': True,
3248 }, {
3249 'note': 'basic, single video playlist',
3250 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3251 'info_dict': {
3252 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3253 'uploader': 'Sergey M.',
3254 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3255 'title': 'youtube-dl public playlist',
3256 },
3257 'playlist_count': 1,
3258 }, {
3259 'note': 'empty playlist',
3260 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3261 'info_dict': {
3262 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3263 'uploader': 'Sergey M.',
3264 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3265 'title': 'youtube-dl empty playlist',
3266 },
3267 'playlist_count': 0,
3268 }, {
3269 'note': 'Home tab',
3270 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
3271 'info_dict': {
3272 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3273 'title': 'lex will - Home',
3274 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3275 'uploader': 'lex will',
3276 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3277 },
3278 'playlist_mincount': 2,
3279 }, {
3280 'note': 'Videos tab',
3281 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
3282 'info_dict': {
3283 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3284 'title': 'lex will - Videos',
3285 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3286 'uploader': 'lex will',
3287 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3288 },
3289 'playlist_mincount': 975,
3290 }, {
3291 'note': 'Videos tab, sorted by popular',
3292 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
3293 'info_dict': {
3294 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3295 'title': 'lex will - Videos',
3296 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3297 'uploader': 'lex will',
3298 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3299 },
3300 'playlist_mincount': 199,
3301 }, {
3302 'note': 'Playlists tab',
3303 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
3304 'info_dict': {
3305 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3306 'title': 'lex will - Playlists',
3307 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3308 'uploader': 'lex will',
3309 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3310 },
3311 'playlist_mincount': 17,
3312 }, {
3313 'note': 'Community tab',
3314 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
3315 'info_dict': {
3316 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3317 'title': 'lex will - Community',
3318 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3319 'uploader': 'lex will',
3320 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3321 },
3322 'playlist_mincount': 18,
3323 }, {
3324 'note': 'Channels tab',
3325 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
3326 'info_dict': {
3327 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3328 'title': 'lex will - Channels',
3329 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3330 'uploader': 'lex will',
3331 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3332 },
3333 'playlist_mincount': 12,
3334 }, {
3335 'note': 'Search tab',
3336 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3337 'playlist_mincount': 40,
3338 'info_dict': {
3339 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3340 'title': '3Blue1Brown - Search - linear algebra',
3341 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3342 'uploader': '3Blue1Brown',
3343 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3344 },
3345 }, {
3346 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3347 'only_matching': True,
3348 }, {
3349 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3350 'only_matching': True,
3351 }, {
3352 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3353 'only_matching': True,
3354 }, {
3355 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3356 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3357 'info_dict': {
3358 'title': '29C3: Not my department',
3359 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3360 'uploader': 'Christiaan008',
3361 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
3362 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
3363 },
3364 'playlist_count': 96,
3365 }, {
3366 'note': 'Large playlist',
3367 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
3368 'info_dict': {
3369 'title': 'Uploads from Cauchemar',
3370 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3371 'uploader': 'Cauchemar',
3372 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
3373 },
3374 'playlist_mincount': 1123,
3375 }, {
3376 'note': 'even larger playlist, 8832 videos',
3377 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3378 'only_matching': True,
3379 }, {
3380 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3381 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3382 'info_dict': {
3383 'title': 'Uploads from Interstellar Movie',
3384 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
3385 'uploader': 'Interstellar Movie',
3386 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
3387 },
3388 'playlist_mincount': 21,
3389 }, {
3390 'note': 'Playlist with "show unavailable videos" button',
3391 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3392 'info_dict': {
3393 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3394 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3395 'uploader': 'Phim Siêu Nhân Nhật Bản',
3396 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3397 },
3398 'playlist_mincount': 200,
3399 }, {
3400 'note': 'Playlist with unavailable videos in page 7',
3401 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3402 'info_dict': {
3403 'title': 'Uploads from BlankTV',
3404 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3405 'uploader': 'BlankTV',
3406 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3407 },
3408 'playlist_mincount': 1000,
3409 }, {
3410 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
3411 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3412 'info_dict': {
3413 'title': 'Data Analysis with Dr Mike Pound',
3414 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3415 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3416 'uploader': 'Computerphile',
3417 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
3418 },
3419 'playlist_mincount': 11,
3420 }, {
3421 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3422 'only_matching': True,
3423 }, {
3424 'note': 'Playlist URL that does not actually serve a playlist',
3425 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3426 'info_dict': {
3427 'id': 'FqZTN594JQw',
3428 'ext': 'webm',
3429 'title': "Smiley's People 01 detective, Adventure Series, Action",
3430 'uploader': 'STREEM',
3431 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
3432 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
3433 'upload_date': '20150526',
3434 'license': 'Standard YouTube License',
3435 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3436 'categories': ['People & Blogs'],
3437 'tags': list,
3438 'view_count': int,
3439 'like_count': int,
3440 'dislike_count': int,
3441 },
3442 'params': {
3443 'skip_download': True,
3444 },
3445 'skip': 'This video is not available.',
3446 'add_ie': [YoutubeIE.ie_key()],
3447 }, {
3448 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
3449 'only_matching': True,
3450 }, {
3451 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
3452 'only_matching': True,
3453 }, {
3454 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3455 'info_dict': {
3456 'id': 'FMtPN8yp5LU', # This will keep changing
3457 'ext': 'mp4',
3458 'title': compat_str,
3459 'uploader': 'Sky News',
3460 'uploader_id': 'skynews',
3461 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
3462 'upload_date': r're:\d{8}',
3463 'description': compat_str,
3464 'categories': ['News & Politics'],
3465 'tags': list,
3466 'like_count': int,
3467 'dislike_count': int,
3468 },
3469 'params': {
3470 'skip_download': True,
3471 },
3472 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
3473 }, {
3474 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3475 'info_dict': {
3476 'id': 'a48o2S1cPoo',
3477 'ext': 'mp4',
3478 'title': 'The Young Turks - Live Main Show',
3479 'uploader': 'The Young Turks',
3480 'uploader_id': 'TheYoungTurks',
3481 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3482 'upload_date': '20150715',
3483 'license': 'Standard YouTube License',
3484 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3485 'categories': ['News & Politics'],
3486 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3487 'like_count': int,
3488 'dislike_count': int,
3489 },
3490 'params': {
3491 'skip_download': True,
3492 },
3493 'only_matching': True,
3494 }, {
3495 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3496 'only_matching': True,
3497 }, {
3498 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3499 'only_matching': True,
3500 }, {
3501 'note': 'A channel that is not live. Should raise error',
3502 'url': 'https://www.youtube.com/user/numberphile/live',
3503 'only_matching': True,
3504 }, {
3505 'url': 'https://www.youtube.com/feed/trending',
3506 'only_matching': True,
3507 }, {
3508 'url': 'https://www.youtube.com/feed/library',
3509 'only_matching': True,
3510 }, {
3511 'url': 'https://www.youtube.com/feed/history',
3512 'only_matching': True,
3513 }, {
3514 'url': 'https://www.youtube.com/feed/subscriptions',
3515 'only_matching': True,
3516 }, {
3517 'url': 'https://www.youtube.com/feed/watch_later',
3518 'only_matching': True,
3519 }, {
3520 'note': 'Recommended - redirects to home page',
3521 'url': 'https://www.youtube.com/feed/recommended',
3522 'only_matching': True,
3523 }, {
3524 'note': 'inline playlist with not always working continuations',
3525 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3526 'only_matching': True,
3527 }, {
3528 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3529 'only_matching': True,
3530 }, {
3531 'url': 'https://www.youtube.com/course',
3532 'only_matching': True,
3533 }, {
3534 'url': 'https://www.youtube.com/zsecurity',
3535 'only_matching': True,
3536 }, {
3537 'url': 'http://www.youtube.com/NASAgovVideo/videos',
3538 'only_matching': True,
3539 }, {
3540 'url': 'https://www.youtube.com/TheYoungTurks/live',
3541 'only_matching': True,
3542 }, {
3543 'url': 'https://www.youtube.com/hashtag/cctv9',
3544 'info_dict': {
3545 'id': 'cctv9',
3546 'title': '#cctv9',
3547 },
3548 'playlist_mincount': 350,
3549 }, {
3550 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3551 'only_matching': True,
3552 }, {
3553 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
3554 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3555 'only_matching': True
3556 }, {
3557 'note': '/browse/ should redirect to /channel/',
3558 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3559 'only_matching': True
3560 }, {
3561 'note': 'VLPL, should redirect to playlist?list=PL...',
3562 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3563 'info_dict': {
3564 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3565 'uploader': 'NoCopyrightSounds',
3566 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3567 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3568 'title': 'NCS Releases',
3569 },
3570 'playlist_mincount': 166,
3571 }, {
3572 'note': 'Topic, should redirect to playlist?list=UU...',
3573 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3574 'info_dict': {
3575 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3576 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3577 'title': 'Uploads from Royalty Free Music - Topic',
3578 'uploader': 'Royalty Free Music - Topic',
3579 },
3580 'expected_warnings': [
3581 'A channel/user page was given',
3582 'The URL does not have a videos tab',
3583 ],
3584 'playlist_mincount': 101,
3585 }, {
3586 'note': 'Topic without a UU playlist',
3587 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3588 'info_dict': {
3589 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3590 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3591 },
3592 'expected_warnings': [
3593 'A channel/user page was given',
3594 'The URL does not have a videos tab',
3595 'Falling back to channel URL',
3596 ],
3597 'playlist_mincount': 9,
3598 }, {
3599 'note': 'Youtube music Album',
3600 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3601 'info_dict': {
3602 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3603 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3604 },
3605 'playlist_count': 50,
3606 }, {
3607 'note': 'unlisted single video playlist',
3608 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3609 'info_dict': {
3610 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3611 'uploader': 'colethedj',
3612 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3613 'title': 'yt-dlp unlisted playlist test',
3614 'availability': 'unlisted'
3615 },
3616 'playlist_count': 1,
3617 }]
3618
3619 @classmethod
3620 def suitable(cls, url):
3621 return False if YoutubeIE.suitable(url) else super(
3622 YoutubeTabIE, cls).suitable(url)
3623
3624 def _extract_channel_id(self, webpage):
3625 channel_id = self._html_search_meta(
3626 'channelId', webpage, 'channel id', default=None)
3627 if channel_id:
3628 return channel_id
3629 channel_url = self._html_search_meta(
3630 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3631 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3632 'twitter:app:url:googleplay'), webpage, 'channel url')
3633 return self._search_regex(
3634 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3635 channel_url, 'channel id')
3636
3637 @staticmethod
3638 def _extract_basic_item_renderer(item):
3639 # Modified from _extract_grid_item_renderer
3640 known_basic_renderers = (
3641 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
3642 )
3643 for key, renderer in item.items():
3644 if not isinstance(renderer, dict):
3645 continue
3646 elif key in known_basic_renderers:
3647 return renderer
3648 elif key.startswith('grid') and key.endswith('Renderer'):
3649 return renderer
3650
3651 def _grid_entries(self, grid_renderer):
3652 for item in grid_renderer['items']:
3653 if not isinstance(item, dict):
3654 continue
3655 renderer = self._extract_basic_item_renderer(item)
3656 if not isinstance(renderer, dict):
3657 continue
3658 title = self._get_text(renderer, 'title')
3659
3660 # playlist
3661 playlist_id = renderer.get('playlistId')
3662 if playlist_id:
3663 yield self.url_result(
3664 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3665 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3666 video_title=title)
3667 continue
3668 # video
3669 video_id = renderer.get('videoId')
3670 if video_id:
3671 yield self._extract_video(renderer)
3672 continue
3673 # channel
3674 channel_id = renderer.get('channelId')
3675 if channel_id:
3676 yield self.url_result(
3677 'https://www.youtube.com/channel/%s' % channel_id,
3678 ie=YoutubeTabIE.ie_key(), video_title=title)
3679 continue
3680 # generic endpoint URL support
3681 ep_url = urljoin('https://www.youtube.com/', try_get(
3682 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3683 compat_str))
3684 if ep_url:
3685 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3686 if ie.suitable(ep_url):
3687 yield self.url_result(
3688 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3689 break
3690
3691 def _shelf_entries_from_content(self, shelf_renderer):
3692 content = shelf_renderer.get('content')
3693 if not isinstance(content, dict):
3694 return
3695 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3696 if renderer:
3697 # TODO: add support for nested playlists so each shelf is processed
3698 # as separate playlist
3699 # TODO: this includes only first N items
3700 for entry in self._grid_entries(renderer):
3701 yield entry
3702 renderer = content.get('horizontalListRenderer')
3703 if renderer:
3704 # TODO
3705 pass
3706
3707 def _shelf_entries(self, shelf_renderer, skip_channels=False):
3708 ep = try_get(
3709 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3710 compat_str)
3711 shelf_url = urljoin('https://www.youtube.com', ep)
3712 if shelf_url:
3713 # Skipping links to another channels, note that checking for
3714 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3715 # will not work
3716 if skip_channels and '/channels?' in shelf_url:
3717 return
3718 title = self._get_text(shelf_renderer, 'title')
3719 yield self.url_result(shelf_url, video_title=title)
3720 # Shelf may not contain shelf URL, fallback to extraction from content
3721 for entry in self._shelf_entries_from_content(shelf_renderer):
3722 yield entry
3723
3724 def _playlist_entries(self, video_list_renderer):
3725 for content in video_list_renderer['contents']:
3726 if not isinstance(content, dict):
3727 continue
3728 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3729 if not isinstance(renderer, dict):
3730 continue
3731 video_id = renderer.get('videoId')
3732 if not video_id:
3733 continue
3734 yield self._extract_video(renderer)
3735
3736 def _rich_entries(self, rich_grid_renderer):
3737 renderer = try_get(
3738 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3739 video_id = renderer.get('videoId')
3740 if not video_id:
3741 return
3742 yield self._extract_video(renderer)
3743
3744 def _video_entry(self, video_renderer):
3745 video_id = video_renderer.get('videoId')
3746 if video_id:
3747 return self._extract_video(video_renderer)
3748
3749 def _post_thread_entries(self, post_thread_renderer):
3750 post_renderer = try_get(
3751 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3752 if not post_renderer:
3753 return
3754 # video attachment
3755 video_renderer = try_get(
3756 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3757 video_id = video_renderer.get('videoId')
3758 if video_id:
3759 entry = self._extract_video(video_renderer)
3760 if entry:
3761 yield entry
3762 # playlist attachment
3763 playlist_id = try_get(
3764 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3765 if playlist_id:
3766 yield self.url_result(
3767 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3768 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3769 # inline video links
3770 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3771 for run in runs:
3772 if not isinstance(run, dict):
3773 continue
3774 ep_url = try_get(
3775 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3776 if not ep_url:
3777 continue
3778 if not YoutubeIE.suitable(ep_url):
3779 continue
3780 ep_video_id = YoutubeIE._match_id(ep_url)
3781 if video_id == ep_video_id:
3782 continue
3783 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
3784
3785 def _post_thread_continuation_entries(self, post_thread_continuation):
3786 contents = post_thread_continuation.get('contents')
3787 if not isinstance(contents, list):
3788 return
3789 for content in contents:
3790 renderer = content.get('backstagePostThreadRenderer')
3791 if not isinstance(renderer, dict):
3792 continue
3793 for entry in self._post_thread_entries(renderer):
3794 yield entry
3795
3796 r''' # unused
3797 def _rich_grid_entries(self, contents):
3798 for content in contents:
3799 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3800 if video_renderer:
3801 entry = self._video_entry(video_renderer)
3802 if entry:
3803 yield entry
3804 '''
3805 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3806
3807 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3808 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3809 for content in contents:
3810 if not isinstance(content, dict):
3811 continue
3812 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3813 if not is_renderer:
3814 renderer = content.get('richItemRenderer')
3815 if renderer:
3816 for entry in self._rich_entries(renderer):
3817 yield entry
3818 continuation_list[0] = self._extract_continuation(parent_renderer)
3819 continue
3820 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3821 for isr_content in isr_contents:
3822 if not isinstance(isr_content, dict):
3823 continue
3824
3825 known_renderers = {
3826 'playlistVideoListRenderer': self._playlist_entries,
3827 'gridRenderer': self._grid_entries,
3828 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3829 'backstagePostThreadRenderer': self._post_thread_entries,
3830 'videoRenderer': lambda x: [self._video_entry(x)],
3831 }
3832 for key, renderer in isr_content.items():
3833 if key not in known_renderers:
3834 continue
3835 for entry in known_renderers[key](renderer):
3836 if entry:
3837 yield entry
3838 continuation_list[0] = self._extract_continuation(renderer)
3839 break
3840
3841 if not continuation_list[0]:
3842 continuation_list[0] = self._extract_continuation(is_renderer)
3843
3844 if not continuation_list[0]:
3845 continuation_list[0] = self._extract_continuation(parent_renderer)
3846
3847 continuation_list = [None] # Python 2 doesnot support nonlocal
3848 tab_content = try_get(tab, lambda x: x['content'], dict)
3849 if not tab_content:
3850 return
3851 parent_renderer = (
3852 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3853 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
3854 for entry in extract_entries(parent_renderer):
3855 yield entry
3856 continuation = continuation_list[0]
3857 visitor_data = None
3858
3859 for page_num in itertools.count(1):
3860 if not continuation:
3861 break
3862 headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
3863 response = self._extract_response(
3864 item_id='%s page %s' % (item_id, page_num),
3865 query=continuation, headers=headers, ytcfg=ytcfg,
3866 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3867
3868 if not response:
3869 break
3870 visitor_data = try_get(
3871 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
3872
3873 known_continuation_renderers = {
3874 'playlistVideoListContinuation': self._playlist_entries,
3875 'gridContinuation': self._grid_entries,
3876 'itemSectionContinuation': self._post_thread_continuation_entries,
3877 'sectionListContinuation': extract_entries, # for feeds
3878 }
3879 continuation_contents = try_get(
3880 response, lambda x: x['continuationContents'], dict) or {}
3881 continuation_renderer = None
3882 for key, value in continuation_contents.items():
3883 if key not in known_continuation_renderers:
3884 continue
3885 continuation_renderer = value
3886 continuation_list = [None]
3887 for entry in known_continuation_renderers[key](continuation_renderer):
3888 yield entry
3889 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3890 break
3891 if continuation_renderer:
3892 continue
3893
3894 known_renderers = {
3895 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3896 'gridVideoRenderer': (self._grid_entries, 'items'),
3897 'gridChannelRenderer': (self._grid_entries, 'items'),
3898 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
3899 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
3900 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
3901 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
3902 }
3903 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3904 continuation_items = try_get(
3905 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
3906 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3907 video_items_renderer = None
3908 for key, value in continuation_item.items():
3909 if key not in known_renderers:
3910 continue
3911 video_items_renderer = {known_renderers[key][1]: continuation_items}
3912 continuation_list = [None]
3913 for entry in known_renderers[key][0](video_items_renderer):
3914 yield entry
3915 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
3916 break
3917 if video_items_renderer:
3918 continue
3919 break
3920
3921 @staticmethod
3922 def _extract_selected_tab(tabs):
3923 for tab in tabs:
3924 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3925 if renderer.get('selected') is True:
3926 return renderer
3927 else:
3928 raise ExtractorError('Unable to find selected tab')
3929
3930 @classmethod
3931 def _extract_uploader(cls, data):
3932 uploader = {}
3933 renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3934 owner = try_get(
3935 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3936 if owner:
3937 uploader['uploader'] = owner.get('text')
3938 uploader['uploader_id'] = try_get(
3939 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3940 uploader['uploader_url'] = urljoin(
3941 'https://www.youtube.com/',
3942 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3943 return {k: v for k, v in uploader.items() if v is not None}
3944
3945 def _extract_from_tabs(self, item_id, webpage, data, tabs):
3946 playlist_id = title = description = channel_url = channel_name = channel_id = None
3947 thumbnails_list = tags = []
3948
3949 selected_tab = self._extract_selected_tab(tabs)
3950 renderer = try_get(
3951 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3952 if renderer:
3953 channel_name = renderer.get('title')
3954 channel_url = renderer.get('channelUrl')
3955 channel_id = renderer.get('externalId')
3956 else:
3957 renderer = try_get(
3958 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3959
3960 if renderer:
3961 title = renderer.get('title')
3962 description = renderer.get('description', '')
3963 playlist_id = channel_id
3964 tags = renderer.get('keywords', '').split()
3965 thumbnails_list = (
3966 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
3967 or try_get(
3968 self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3969 lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3970 list)
3971 or [])
3972
3973 thumbnails = []
3974 for t in thumbnails_list:
3975 if not isinstance(t, dict):
3976 continue
3977 thumbnail_url = url_or_none(t.get('url'))
3978 if not thumbnail_url:
3979 continue
3980 thumbnails.append({
3981 'url': thumbnail_url,
3982 'width': int_or_none(t.get('width')),
3983 'height': int_or_none(t.get('height')),
3984 })
3985 if playlist_id is None:
3986 playlist_id = item_id
3987 if title is None:
3988 title = (
3989 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3990 or playlist_id)
3991 title += format_field(selected_tab, 'title', ' - %s')
3992 title += format_field(selected_tab, 'expandedText', ' - %s')
3993 metadata = {
3994 'playlist_id': playlist_id,
3995 'playlist_title': title,
3996 'playlist_description': description,
3997 'uploader': channel_name,
3998 'uploader_id': channel_id,
3999 'uploader_url': channel_url,
4000 'thumbnails': thumbnails,
4001 'tags': tags,
4002 }
4003 availability = self._extract_availability(data)
4004 if availability:
4005 metadata['availability'] = availability
4006 if not channel_id:
4007 metadata.update(self._extract_uploader(data))
4008 metadata.update({
4009 'channel': metadata['uploader'],
4010 'channel_id': metadata['uploader_id'],
4011 'channel_url': metadata['uploader_url']})
4012 ytcfg = self.extract_ytcfg(item_id, webpage)
4013 return self.playlist_result(
4014 self._entries(
4015 selected_tab, playlist_id,
4016 self._extract_identity_token(webpage, item_id),
4017 self._extract_account_syncid(ytcfg, data), ytcfg),
4018 **metadata)
4019
4020 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
4021 first_id = last_id = None
4022 ytcfg = self.extract_ytcfg(playlist_id, webpage)
4023 headers = self.generate_api_headers(
4024 ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4025 identity_token=self._extract_identity_token(webpage, item_id=playlist_id))
4026 for page_num in itertools.count(1):
4027 videos = list(self._playlist_entries(playlist))
4028 if not videos:
4029 return
4030 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4031 if start >= len(videos):
4032 return
4033 for video in videos[start:]:
4034 if video['id'] == first_id:
4035 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
4036 return
4037 yield video
4038 first_id = first_id or videos[0]['id']
4039 last_id = videos[-1]['id']
4040 watch_endpoint = try_get(
4041 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
4042 query = {
4043 'playlistId': playlist_id,
4044 'videoId': watch_endpoint.get('videoId') or last_id,
4045 'index': watch_endpoint.get('index') or len(videos),
4046 'params': watch_endpoint.get('params') or 'OAE%3D'
4047 }
4048 response = self._extract_response(
4049 item_id='%s page %d' % (playlist_id, page_num),
4050 query=query, ep='next', headers=headers, ytcfg=ytcfg,
4051 check_get_keys='contents'
4052 )
4053 playlist = try_get(
4054 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4055
4056 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
4057 title = playlist.get('title') or try_get(
4058 data, lambda x: x['titleText']['simpleText'], compat_str)
4059 playlist_id = playlist.get('playlistId') or item_id
4060
4061 # Delegating everything except mix playlists to regular tab-based playlist URL
4062 playlist_url = urljoin(url, try_get(
4063 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4064 compat_str))
4065 if playlist_url and playlist_url != url:
4066 return self.url_result(
4067 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4068 video_title=title)
4069
4070 return self.playlist_result(
4071 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
4072 playlist_id=playlist_id, playlist_title=title)
4073
4074 def _extract_availability(self, data):
4075 """
4076 Gets the availability of a given playlist/tab.
4077 Note: Unless YouTube tells us explicitly, we do not assume it is public
4078 @param data: response
4079 """
4080 is_private = is_unlisted = None
4081 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4082 badge_labels = self._extract_badges(renderer)
4083
4084 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4085 privacy_dropdown_entries = try_get(
4086 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4087 for renderer_dict in privacy_dropdown_entries:
4088 is_selected = try_get(
4089 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4090 if not is_selected:
4091 continue
4092 label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
4093 if label:
4094 badge_labels.add(label.lower())
4095 break
4096
4097 for badge_label in badge_labels:
4098 if badge_label == 'unlisted':
4099 is_unlisted = True
4100 elif badge_label == 'private':
4101 is_private = True
4102 elif badge_label == 'public':
4103 is_unlisted = is_private = False
4104 return self._availability(is_private, False, False, False, is_unlisted)
4105
4106 @staticmethod
4107 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4108 sidebar_renderer = try_get(
4109 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4110 for item in sidebar_renderer:
4111 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4112 if renderer:
4113 return renderer
4114
4115 def _reload_with_unavailable_videos(self, item_id, data, webpage):
4116 """
4117 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4118 """
4119 browse_id = params = None
4120 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4121 if not renderer:
4122 return
4123 menu_renderer = try_get(
4124 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4125 for menu_item in menu_renderer:
4126 if not isinstance(menu_item, dict):
4127 continue
4128 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4129 text = try_get(
4130 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
4131 if not text or text.lower() != 'show unavailable videos':
4132 continue
4133 browse_endpoint = try_get(
4134 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4135 browse_id = browse_endpoint.get('browseId')
4136 params = browse_endpoint.get('params')
4137 break
4138
4139 ytcfg = self.extract_ytcfg(item_id, webpage)
4140 headers = self.generate_api_headers(
4141 ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4142 identity_token=self._extract_identity_token(webpage, item_id=item_id),
4143 visitor_data=try_get(
4144 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
4145 query = {
4146 'params': params or 'wgYCCAA=',
4147 'browseId': browse_id or 'VL%s' % item_id
4148 }
4149 return self._extract_response(
4150 item_id=item_id, headers=headers, query=query,
4151 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
4152 note='Downloading API JSON with unavailable videos')
4153
4154 def _extract_webpage(self, url, item_id):
4155 retries = self.get_param('extractor_retries', 3)
4156 count = -1
4157 last_error = 'Incomplete yt initial data recieved'
4158 while count < retries:
4159 count += 1
4160 # Sometimes youtube returns a webpage with incomplete ytInitialData
4161 # See: https://github.com/yt-dlp/yt-dlp/issues/116
4162 if count:
4163 self.report_warning('%s. Retrying ...' % last_error)
4164 webpage = self._download_webpage(
4165 url, item_id,
4166 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
4167 data = self.extract_yt_initial_data(item_id, webpage)
4168 if data.get('contents') or data.get('currentVideoEndpoint'):
4169 break
4170 # Extract alerts here only when there is error
4171 self._extract_and_report_alerts(data)
4172 if count >= retries:
4173 raise ExtractorError(last_error)
4174 return webpage, data
4175
4176 @staticmethod
4177 def _smuggle_data(entries, data):
4178 for entry in entries:
4179 if data:
4180 entry['url'] = smuggle_url(entry['url'], data)
4181 yield entry
4182
4183 def _real_extract(self, url):
4184 url, smuggled_data = unsmuggle_url(url, {})
4185 if self.is_music_url(url):
4186 smuggled_data['is_music_url'] = True
4187 info_dict = self.__real_extract(url, smuggled_data)
4188 if info_dict.get('entries'):
4189 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4190 return info_dict
4191
4192 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4193
4194 def __real_extract(self, url, smuggled_data):
4195 item_id = self._match_id(url)
4196 url = compat_urlparse.urlunparse(
4197 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
4198 compat_opts = self.get_param('compat_opts', [])
4199
4200 def get_mobj(url):
4201 mobj = self._url_re.match(url).groupdict()
4202 mobj.update((k, '') for k, v in mobj.items() if v is None)
4203 return mobj
4204
4205 mobj = get_mobj(url)
4206 # Youtube returns incomplete data if tabname is not lower case
4207 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4208
4209 if is_channel:
4210 if smuggled_data.get('is_music_url'):
4211 if item_id[:2] == 'VL':
4212 # Youtube music VL channels have an equivalent playlist
4213 item_id = item_id[2:]
4214 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4215 elif item_id[:2] == 'MP':
4216 # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4217 item_id = self._search_regex(
4218 r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4219 self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4220 'playlist id')
4221 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4222 elif mobj['channel_type'] == 'browse':
4223 # Youtube music /browse/ should be changed to /channel/
4224 pre = 'https://www.youtube.com/channel/%s' % item_id
4225 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4226 # Home URLs should redirect to /videos/
4227 self.report_warning(
4228 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4229 'To download only the videos in the home page, add a "/featured" to the URL')
4230 tab = '/videos'
4231
4232 url = ''.join((pre, tab, post))
4233 mobj = get_mobj(url)
4234
4235 # Handle both video/playlist URLs
4236 qs = parse_qs(url)
4237 video_id = qs.get('v', [None])[0]
4238 playlist_id = qs.get('list', [None])[0]
4239
4240 if not video_id and mobj['not_channel'].startswith('watch'):
4241 if not playlist_id:
4242 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
4243 raise ExtractorError('Unable to recognize tab page')
4244 # Common mistake: https://www.youtube.com/watch?list=playlist_id
4245 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
4246 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
4247 mobj = get_mobj(url)
4248
4249 if video_id and playlist_id:
4250 if self.get_param('noplaylist'):
4251 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4252 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4253 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4254
4255 webpage, data = self._extract_webpage(url, item_id)
4256
4257 tabs = try_get(
4258 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4259 if tabs:
4260 selected_tab = self._extract_selected_tab(tabs)
4261 tab_name = selected_tab.get('title', '')
4262 if 'no-youtube-channel-redirect' not in compat_opts:
4263 if mobj['tab'] == '/live':
4264 # Live tab should have redirected to the video
4265 raise ExtractorError('The channel is not currently live', expected=True)
4266 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4267 if not mobj['not_channel'] and item_id[:2] == 'UC':
4268 # Topic channels don't have /videos. Use the equivalent playlist instead
4269 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4270 pl_id = 'UU%s' % item_id[2:]
4271 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4272 try:
4273 pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4274 for alert_type, alert_message in self._extract_alerts(pl_data):
4275 if alert_type == 'error':
4276 raise ExtractorError('Youtube said: %s' % alert_message)
4277 item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4278 except ExtractorError:
4279 self.report_warning('The playlist gave error. Falling back to channel URL')
4280 else:
4281 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
4282
4283 self.write_debug('Final URL: %s' % url)
4284
4285 # YouTube sometimes provides a button to reload playlist with unavailable videos.
4286 if 'no-youtube-unavailable-videos' not in compat_opts:
4287 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
4288 self._extract_and_report_alerts(data)
4289 tabs = try_get(
4290 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4291 if tabs:
4292 return self._extract_from_tabs(item_id, webpage, data, tabs)
4293
4294 playlist = try_get(
4295 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4296 if playlist:
4297 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
4298
4299 video_id = try_get(
4300 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4301 compat_str) or video_id
4302 if video_id:
4303 if mobj['tab'] != '/live': # live tab is expected to redirect to video
4304 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
4305 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4306
4307 raise ExtractorError('Unable to recognize tab page')
4308
4309
4310 class YoutubePlaylistIE(InfoExtractor):
4311 IE_DESC = 'YouTube.com playlists'
4312 _VALID_URL = r'''(?x)(?:
4313 (?:https?://)?
4314 (?:\w+\.)?
4315 (?:
4316 (?:
4317 youtube(?:kids)?\.com|
4318 invidio\.us
4319 )
4320 /.*?\?.*?\blist=
4321 )?
4322 (?P<id>%(playlist_id)s)
4323 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4324 IE_NAME = 'youtube:playlist'
4325 _TESTS = [{
4326 'note': 'issue #673',
4327 'url': 'PLBB231211A4F62143',
4328 'info_dict': {
4329 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4330 'id': 'PLBB231211A4F62143',
4331 'uploader': 'Wickydoo',
4332 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4333 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
4334 },
4335 'playlist_mincount': 29,
4336 }, {
4337 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4338 'info_dict': {
4339 'title': 'YDL_safe_search',
4340 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4341 },
4342 'playlist_count': 2,
4343 'skip': 'This playlist is private',
4344 }, {
4345 'note': 'embedded',
4346 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4347 'playlist_count': 4,
4348 'info_dict': {
4349 'title': 'JODA15',
4350 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4351 'uploader': 'milan',
4352 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
4353 }
4354 }, {
4355 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4356 'playlist_mincount': 654,
4357 'info_dict': {
4358 'title': '2018 Chinese New Singles (11/6 updated)',
4359 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4360 'uploader': 'LBK',
4361 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4362 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
4363 }
4364 }, {
4365 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4366 'only_matching': True,
4367 }, {
4368 # music album playlist
4369 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4370 'only_matching': True,
4371 }]
4372
4373 @classmethod
4374 def suitable(cls, url):
4375 if YoutubeTabIE.suitable(url):
4376 return False
4377 # Hack for lazy extractors until more generic solution is implemented
4378 # (see #28780)
4379 from .youtube import parse_qs
4380 qs = parse_qs(url)
4381 if qs.get('v', [None])[0]:
4382 return False
4383 return super(YoutubePlaylistIE, cls).suitable(url)
4384
4385 def _real_extract(self, url):
4386 playlist_id = self._match_id(url)
4387 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
4388 url = update_url_query(
4389 'https://www.youtube.com/playlist',
4390 parse_qs(url) or {'list': playlist_id})
4391 if is_music_url:
4392 url = smuggle_url(url, {'is_music_url': True})
4393 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4394
4395
4396 class YoutubeYtBeIE(InfoExtractor):
4397 IE_DESC = 'youtu.be'
4398 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4399 _TESTS = [{
4400 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4401 'info_dict': {
4402 'id': 'yeWKywCrFtk',
4403 'ext': 'mp4',
4404 'title': 'Small Scale Baler and Braiding Rugs',
4405 'uploader': 'Backus-Page House Museum',
4406 'uploader_id': 'backuspagemuseum',
4407 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4408 'upload_date': '20161008',
4409 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4410 'categories': ['Nonprofits & Activism'],
4411 'tags': list,
4412 'like_count': int,
4413 'dislike_count': int,
4414 },
4415 'params': {
4416 'noplaylist': True,
4417 'skip_download': True,
4418 },
4419 }, {
4420 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
4421 'only_matching': True,
4422 }]
4423
4424 def _real_extract(self, url):
4425 mobj = re.match(self._VALID_URL, url)
4426 video_id = mobj.group('id')
4427 playlist_id = mobj.group('playlist_id')
4428 return self.url_result(
4429 update_url_query('https://www.youtube.com/watch', {
4430 'v': video_id,
4431 'list': playlist_id,
4432 'feature': 'youtu.be',
4433 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4434
4435
4436 class YoutubeYtUserIE(InfoExtractor):
4437 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
4438 _VALID_URL = r'ytuser:(?P<id>.+)'
4439 _TESTS = [{
4440 'url': 'ytuser:phihag',
4441 'only_matching': True,
4442 }]
4443
4444 def _real_extract(self, url):
4445 user_id = self._match_id(url)
4446 return self.url_result(
4447 'https://www.youtube.com/user/%s' % user_id,
4448 ie=YoutubeTabIE.ie_key(), video_id=user_id)
4449
4450
4451 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
4452 IE_NAME = 'youtube:favorites'
4453 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4454 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4455 _LOGIN_REQUIRED = True
4456 _TESTS = [{
4457 'url': ':ytfav',
4458 'only_matching': True,
4459 }, {
4460 'url': ':ytfavorites',
4461 'only_matching': True,
4462 }]
4463
4464 def _real_extract(self, url):
4465 return self.url_result(
4466 'https://www.youtube.com/playlist?list=LL',
4467 ie=YoutubeTabIE.ie_key())
4468
4469
4470 class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
4471 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
4472 # there doesn't appear to be a real limit, for example if you search for
4473 # 'python' you get more than 8.000.000 results
4474 _MAX_RESULTS = float('inf')
4475 IE_NAME = 'youtube:search'
4476 _SEARCH_KEY = 'ytsearch'
4477 _SEARCH_PARAMS = None
4478 _TESTS = []
4479
4480 def _entries(self, query, n):
4481 data = {'query': query}
4482 if self._SEARCH_PARAMS:
4483 data['params'] = self._SEARCH_PARAMS
4484 total = 0
4485 continuation = {}
4486 for page_num in itertools.count(1):
4487 data.update(continuation)
4488 search = self._extract_response(
4489 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4490 check_get_keys=('contents', 'onResponseReceivedCommands')
4491 )
4492 if not search:
4493 break
4494 slr_contents = try_get(
4495 search,
4496 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4497 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4498 list)
4499 if not slr_contents:
4500 break
4501
4502 # Youtube sometimes adds promoted content to searches,
4503 # changing the index location of videos and token.
4504 # So we search through all entries till we find them.
4505 continuation = None
4506 for slr_content in slr_contents:
4507 if not continuation:
4508 continuation = self._extract_continuation({'contents': [slr_content]})
4509
4510 isr_contents = try_get(
4511 slr_content,
4512 lambda x: x['itemSectionRenderer']['contents'],
4513 list)
4514 if not isr_contents:
4515 continue
4516 for content in isr_contents:
4517 if not isinstance(content, dict):
4518 continue
4519 video = content.get('videoRenderer')
4520 if not isinstance(video, dict):
4521 continue
4522 video_id = video.get('videoId')
4523 if not video_id:
4524 continue
4525
4526 yield self._extract_video(video)
4527 total += 1
4528 if total == n:
4529 return
4530
4531 if not continuation:
4532 break
4533
4534 def _get_n_results(self, query, n):
4535 """Get a specified number of results for a query"""
4536 return self.playlist_result(self._entries(query, n), query, query)
4537
4538
4539 class YoutubeSearchDateIE(YoutubeSearchIE):
4540 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
4541 _SEARCH_KEY = 'ytsearchdate'
4542 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
4543 _SEARCH_PARAMS = 'CAI%3D'
4544
4545
4546 class YoutubeSearchURLIE(YoutubeSearchIE):
4547 IE_DESC = 'YouTube.com search URLs'
4548 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4549 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
4550 # _MAX_RESULTS = 100
4551 _TESTS = [{
4552 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4553 'playlist_mincount': 5,
4554 'info_dict': {
4555 'id': 'youtube-dl test video',
4556 'title': 'youtube-dl test video',
4557 }
4558 }, {
4559 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4560 'only_matching': True,
4561 }]
4562
4563 @classmethod
4564 def _make_valid_url(cls):
4565 return cls._VALID_URL
4566
4567 def _real_extract(self, url):
4568 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4569 query = (qs.get('search_query') or qs.get('q'))[0]
4570 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4571 return self._get_n_results(query, self._MAX_RESULTS)
4572
4573
4574 class YoutubeFeedsInfoExtractor(YoutubeTabIE):
4575 """
4576 Base class for feed extractors
4577 Subclasses must define the _FEED_NAME property.
4578 """
4579 _LOGIN_REQUIRED = True
4580 _TESTS = []
4581
4582 @property
4583 def IE_NAME(self):
4584 return 'youtube:%s' % self._FEED_NAME
4585
4586 def _real_extract(self, url):
4587 return self.url_result(
4588 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4589 ie=YoutubeTabIE.ie_key())
4590
4591
4592 class YoutubeWatchLaterIE(InfoExtractor):
4593 IE_NAME = 'youtube:watchlater'
4594 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
4595 _VALID_URL = r':ytwatchlater'
4596 _TESTS = [{
4597 'url': ':ytwatchlater',
4598 'only_matching': True,
4599 }]
4600
4601 def _real_extract(self, url):
4602 return self.url_result(
4603 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
4604
4605
4606 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4607 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
4608 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
4609 _FEED_NAME = 'recommended'
4610 _LOGIN_REQUIRED = False
4611 _TESTS = [{
4612 'url': ':ytrec',
4613 'only_matching': True,
4614 }, {
4615 'url': ':ytrecommended',
4616 'only_matching': True,
4617 }, {
4618 'url': 'https://youtube.com',
4619 'only_matching': True,
4620 }]
4621
4622
4623 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
4624 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
4625 _VALID_URL = r':ytsub(?:scription)?s?'
4626 _FEED_NAME = 'subscriptions'
4627 _TESTS = [{
4628 'url': ':ytsubs',
4629 'only_matching': True,
4630 }, {
4631 'url': ':ytsubscriptions',
4632 'only_matching': True,
4633 }]
4634
4635
4636 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
4637 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4638 _VALID_URL = r':ythis(?:tory)?'
4639 _FEED_NAME = 'history'
4640 _TESTS = [{
4641 'url': ':ythistory',
4642 'only_matching': True,
4643 }]
4644
4645
4646 class YoutubeTruncatedURLIE(InfoExtractor):
4647 IE_NAME = 'youtube:truncated_url'
4648 IE_DESC = False # Do not list
4649 _VALID_URL = r'''(?x)
4650 (?:https?://)?
4651 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4652 (?:watch\?(?:
4653 feature=[a-z_]+|
4654 annotation_id=annotation_[^&]+|
4655 x-yt-cl=[0-9]+|
4656 hl=[^&]*|
4657 t=[0-9]+
4658 )?
4659 |
4660 attribution_link\?a=[^&]+
4661 )
4662 $
4663 '''
4664
4665 _TESTS = [{
4666 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
4667 'only_matching': True,
4668 }, {
4669 'url': 'https://www.youtube.com/watch?',
4670 'only_matching': True,
4671 }, {
4672 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4673 'only_matching': True,
4674 }, {
4675 'url': 'https://www.youtube.com/watch?feature=foo',
4676 'only_matching': True,
4677 }, {
4678 'url': 'https://www.youtube.com/watch?hl=en-GB',
4679 'only_matching': True,
4680 }, {
4681 'url': 'https://www.youtube.com/watch?t=2372',
4682 'only_matching': True,
4683 }]
4684
4685 def _real_extract(self, url):
4686 raise ExtractorError(
4687 'Did you forget to quote the URL? Remember that & is a meta '
4688 'character in most shells, so you want to put the URL in quotes, '
4689 'like youtube-dl '
4690 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
4691 ' or simply youtube-dl BaW_jenozKc .',
4692 expected=True)
4693
4694
4695 class YoutubeTruncatedIDIE(InfoExtractor):
4696 IE_NAME = 'youtube:truncated_id'
4697 IE_DESC = False # Do not list
4698 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
4699
4700 _TESTS = [{
4701 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4702 'only_matching': True,
4703 }]
4704
4705 def _real_extract(self, url):
4706 video_id = self._match_id(url)
4707 raise ExtractorError(
4708 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4709 expected=True)