]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[youtube] Disable `get_video_info` age-gate workaround
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
2d6659b9 5import base64
d92f5d5a 6import calendar
109dd3b2 7import copy
fe93e2c4 8import datetime
a5c56234 9import hashlib
0ca96d48 10import itertools
c5e8d7af 11import json
c4417ddb 12import os.path
d77ab8e2 13import random
c5e8d7af 14import re
8a784c74 15import time
e0df6211 16import traceback
c5e8d7af 17
b05654f0 18from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 19from ..compat import (
edf3e38e 20 compat_chr,
29f7c58a 21 compat_HTTPError,
c5e8d7af 22 compat_parse_qs,
545cc85d 23 compat_str,
7fd002c0 24 compat_urllib_parse_unquote_plus,
15707c7e 25 compat_urllib_parse_urlencode,
7c80519c 26 compat_urllib_parse_urlparse,
7c61bd36 27 compat_urlparse,
4bb4a188 28)
545cc85d 29from ..jsinterp import JSInterpreter
4bb4a188 30from ..utils import (
2d6659b9 31 bytes_to_intlist,
c5e8d7af 32 clean_html,
d92f5d5a 33 datetime_from_str,
11f9be09 34 dict_get,
358de58c 35 error_to_compat_str,
c5e8d7af 36 ExtractorError,
2d30521a 37 float_or_none,
11f9be09 38 format_field,
dd27fd17 39 int_or_none,
2d6659b9 40 intlist_to_bytes,
94278f72 41 mimetype2ext,
9c0d7f49 42 network_exceptions,
11f9be09 43 orderedSet,
6310acf5 44 parse_codecs,
49bd8c66 45 parse_count,
7c80519c 46 parse_duration,
7ea65411 47 parse_iso8601,
dca3ff4a 48 qualities,
3995d37d 49 remove_start,
cf7e015f 50 smuggle_url,
dbdaaa23 51 str_or_none,
c93d53f5 52 str_to_int,
7c365c21 53 traverse_obj,
556dbe7f 54 try_get,
c5e8d7af
PH
55 unescapeHTML,
56 unified_strdate,
cf7e015f 57 unsmuggle_url,
8bdd16b4 58 update_url_query,
21c340b8 59 url_or_none,
6e6bc8da 60 urlencode_postdata,
fe93e2c4 61 urljoin,
7c365c21 62 variadic,
c5e8d7af
PH
63)
64
5f6a1245 65
201c1459 66def parse_qs(url):
67 return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
68
69
de7f3446 70class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
71 """Provide base functions for Youtube extractors"""
72 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 73 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
74
75 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
76 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
77 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 78
3462ffa8 79 _RESERVED_NAMES = (
bea74222 80 r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|shorts|'
46953e7e 81 r'movies|results|shared|hashtag|trending|feed|feeds|oembed|get_video_info|'
cd7c66cf 82 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 83
b2e8bc1b
JMF
84 _NETRC_MACHINE = 'youtube'
85 # If True it will raise an error if no login info is provided
86 _LOGIN_REQUIRED = False
87
70d5c17b 88 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 89
b2e8bc1b 90 def _login(self):
83317f69 91 """
92 Attempt to log in to YouTube.
93 True is returned if successful or skipped.
94 False is returned if login failed.
95
96 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
97 """
9d5d4d64 98
99 def warn(message):
100 self.report_warning(message)
101
102 # username+password login is broken
982ee69a
MB
103 if (self._LOGIN_REQUIRED
104 and self.get_param('cookiefile') is None
105 and self.get_param('cookiesfrombrowser') is None):
9d5d4d64 106 self.raise_login_required(
107 'Login details are needed to download this content', method='cookies')
68217024 108 username, password = self._get_login_info()
9d5d4d64 109 if username:
110 warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
111 return
9d5d4d64 112
2d6659b9 113 # Everything below this is broken!
114 r'''
b2e8bc1b
JMF
115 # No authentication to be performed
116 if username is None:
a06916d9 117 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
69ea8ca4 118 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
a06916d9 119 # if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
545cc85d 120 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 121 return True
b2e8bc1b 122
7cc3570e
PH
123 login_page = self._download_webpage(
124 self._LOGIN_URL, None,
69ea8ca4
PH
125 note='Downloading login page',
126 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
127 if login_page is False:
128 return
b2e8bc1b 129
1212e997 130 login_form = self._hidden_inputs(login_page)
c5e8d7af 131
e00eb564
S
132 def req(url, f_req, note, errnote):
133 data = login_form.copy()
134 data.update({
135 'pstMsg': 1,
136 'checkConnection': 'youtube',
137 'checkedDomains': 'youtube',
138 'hl': 'en',
139 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 140 'f.req': json.dumps(f_req),
e00eb564
S
141 'flowName': 'GlifWebSignIn',
142 'flowEntry': 'ServiceLogin',
baf67a60
S
143 # TODO: reverse actual botguard identifier generation algo
144 'bgRequest': '["identifier",""]',
041bc3ad 145 })
e00eb564
S
146 return self._download_json(
147 url, None, note=note, errnote=errnote,
148 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
149 fatal=False,
150 data=urlencode_postdata(data), headers={
151 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
152 'Google-Accounts-XSRF': 1,
153 })
154
3995d37d
S
155 lookup_req = [
156 username,
157 None, [], None, 'US', None, None, 2, False, True,
158 [
159 None, None,
160 [2, 1, None, 1,
161 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
162 None, [], 4],
163 1, [None, None, []], None, None, None, True
164 ],
165 username,
166 ]
167
e00eb564 168 lookup_results = req(
3995d37d 169 self._LOOKUP_URL, lookup_req,
e00eb564
S
170 'Looking up account info', 'Unable to look up account info')
171
172 if lookup_results is False:
173 return False
041bc3ad 174
3995d37d
S
175 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
176 if not user_hash:
177 warn('Unable to extract user hash')
178 return False
179
180 challenge_req = [
181 user_hash,
182 None, 1, None, [1, None, None, None, [password, None, True]],
183 [
184 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
185 1, [None, None, []], None, None, None, True
186 ]]
83317f69 187
3995d37d
S
188 challenge_results = req(
189 self._CHALLENGE_URL, challenge_req,
190 'Logging in', 'Unable to log in')
83317f69 191
3995d37d 192 if challenge_results is False:
e00eb564 193 return
83317f69 194
3995d37d
S
195 login_res = try_get(challenge_results, lambda x: x[0][5], list)
196 if login_res:
197 login_msg = try_get(login_res, lambda x: x[5], compat_str)
198 warn(
199 'Unable to login: %s' % 'Invalid password'
200 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
201 return False
202
203 res = try_get(challenge_results, lambda x: x[0][-1], list)
204 if not res:
205 warn('Unable to extract result entry')
206 return False
207
9a6628aa
S
208 login_challenge = try_get(res, lambda x: x[0][0], list)
209 if login_challenge:
210 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
211 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
212 # SEND_SUCCESS - TFA code has been successfully sent to phone
213 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 214 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
215 if status == 'QUOTA_EXCEEDED':
216 warn('Exceeded the limit of TFA codes, try later')
217 return False
218
219 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
220 if not tl:
221 warn('Unable to extract TL')
222 return False
223
224 tfa_code = self._get_tfa_info('2-step verification code')
225
226 if not tfa_code:
227 warn(
228 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
229 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
230 return False
231
232 tfa_code = remove_start(tfa_code, 'G-')
233
234 tfa_req = [
235 user_hash, None, 2, None,
236 [
237 9, None, None, None, None, None, None, None,
238 [None, tfa_code, True, 2]
239 ]]
240
241 tfa_results = req(
242 self._TFA_URL.format(tl), tfa_req,
243 'Submitting TFA code', 'Unable to submit TFA code')
244
245 if tfa_results is False:
246 return False
247
248 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
249 if tfa_res:
250 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
251 warn(
252 'Unable to finish TFA: %s' % 'Invalid TFA code'
253 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
254 return False
255
256 check_cookie_url = try_get(
257 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
258 else:
259 CHALLENGES = {
260 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
261 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
262 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
263 }
264 challenge = CHALLENGES.get(
265 challenge_str,
266 '%s returned error %s.' % (self.IE_NAME, challenge_str))
267 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
268 return False
3995d37d
S
269 else:
270 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
271
272 if not check_cookie_url:
273 warn('Unable to extract CheckCookie URL')
274 return False
e00eb564
S
275
276 check_cookie_results = self._download_webpage(
3995d37d
S
277 check_cookie_url, None, 'Checking cookie', fatal=False)
278
279 if check_cookie_results is False:
280 return False
e00eb564 281
3995d37d
S
282 if 'https://myaccount.google.com/' not in check_cookie_results:
283 warn('Unable to log in')
b2e8bc1b 284 return False
e00eb564 285
b2e8bc1b 286 return True
2d6659b9 287 '''
b2e8bc1b 288
cce889b9 289 def _initialize_consent(self):
290 cookies = self._get_cookies('https://www.youtube.com/')
291 if cookies.get('__Secure-3PSID'):
292 return
293 consent_id = None
294 consent = cookies.get('CONSENT')
295 if consent:
296 if 'YES' in consent.value:
297 return
298 consent_id = self._search_regex(
299 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
300 if not consent_id:
301 consent_id = random.randint(100, 999)
302 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 303
b2e8bc1b 304 def _real_initialize(self):
cce889b9 305 self._initialize_consent()
b2e8bc1b
JMF
306 if self._downloader is None:
307 return
b2e8bc1b
JMF
308 if not self._login():
309 return
c5e8d7af 310
a0566bbf 311 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 312 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
313 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 314
109dd3b2 315 _YT_DEFAULT_YTCFGS = {
316 'WEB': {
317 'INNERTUBE_API_VERSION': 'v1',
318 'INNERTUBE_CLIENT_NAME': 'WEB',
319 'INNERTUBE_CLIENT_VERSION': '2.20210622.10.00',
320 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
321 'INNERTUBE_CONTEXT': {
322 'client': {
323 'clientName': 'WEB',
324 'clientVersion': '2.20210622.10.00',
325 'hl': 'en',
326 }
327 },
328 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
329 },
330 'WEB_REMIX': {
331 'INNERTUBE_API_VERSION': 'v1',
332 'INNERTUBE_CLIENT_NAME': 'WEB_REMIX',
333 'INNERTUBE_CLIENT_VERSION': '1.20210621.00.00',
334 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
335 'INNERTUBE_CONTEXT': {
336 'client': {
337 'clientName': 'WEB_REMIX',
338 'clientVersion': '1.20210621.00.00',
339 'hl': 'en',
340 }
341 },
342 'INNERTUBE_CONTEXT_CLIENT_NAME': 67
343 },
344 'WEB_EMBEDDED_PLAYER': {
345 'INNERTUBE_API_VERSION': 'v1',
346 'INNERTUBE_CLIENT_NAME': 'WEB_EMBEDDED_PLAYER',
347 'INNERTUBE_CLIENT_VERSION': '1.20210620.0.1',
348 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
349 'INNERTUBE_CONTEXT': {
350 'client': {
351 'clientName': 'WEB_EMBEDDED_PLAYER',
352 'clientVersion': '1.20210620.0.1',
353 'hl': 'en',
354 }
355 },
356 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
357 },
358 'ANDROID': {
359 'INNERTUBE_API_VERSION': 'v1',
360 'INNERTUBE_CLIENT_NAME': 'ANDROID',
361 'INNERTUBE_CLIENT_VERSION': '16.20',
362 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
363 'INNERTUBE_CONTEXT': {
364 'client': {
365 'clientName': 'ANDROID',
366 'clientVersion': '16.20',
367 'hl': 'en',
368 }
369 },
fe93e2c4 370 'INNERTUBE_CONTEXT_CLIENT_NAME': 3
109dd3b2 371 },
372 'ANDROID_EMBEDDED_PLAYER': {
373 'INNERTUBE_API_VERSION': 'v1',
374 'INNERTUBE_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER',
375 'INNERTUBE_CLIENT_VERSION': '16.20',
376 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
377 'INNERTUBE_CONTEXT': {
378 'client': {
379 'clientName': 'ANDROID_EMBEDDED_PLAYER',
380 'clientVersion': '16.20',
381 'hl': 'en',
382 }
383 },
fe93e2c4 384 'INNERTUBE_CONTEXT_CLIENT_NAME': 55
109dd3b2 385 },
386 'ANDROID_MUSIC': {
387 'INNERTUBE_API_VERSION': 'v1',
388 'INNERTUBE_CLIENT_NAME': 'ANDROID_MUSIC',
389 'INNERTUBE_CLIENT_VERSION': '4.32',
390 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
391 'INNERTUBE_CONTEXT': {
392 'client': {
393 'clientName': 'ANDROID_MUSIC',
394 'clientVersion': '4.32',
395 'hl': 'en',
396 }
397 },
fe93e2c4 398 'INNERTUBE_CONTEXT_CLIENT_NAME': 21
11f9be09 399 },
400 'IOS': {
401 'INNERTUBE_API_VERSION': 'v1',
402 'INNERTUBE_CLIENT_NAME': 'IOS',
403 'INNERTUBE_CLIENT_VERSION': '16.20',
404 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
405 'INNERTUBE_CONTEXT': {
406 'client': {
407 'clientName': 'IOS',
408 'clientVersion': '16.20',
409 'hl': 'en',
410 }
411 },
412 'INNERTUBE_CONTEXT_CLIENT_NAME': 5
413
414 },
415 'IOS_MUSIC': {
416 'INNERTUBE_API_VERSION': 'v1',
417 'INNERTUBE_CLIENT_NAME': 'IOS_MUSIC',
418 'INNERTUBE_CLIENT_VERSION': '4.32',
419 'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
420 'INNERTUBE_CONTEXT': {
421 'client': {
422 'clientName': 'IOS_MUSIC',
423 'clientVersion': '4.32',
424 'hl': 'en',
425 }
426 },
427 'INNERTUBE_CONTEXT_CLIENT_NAME': 26
428 },
429 'IOS_MESSAGES_EXTENSION': {
430 'INNERTUBE_API_VERSION': 'v1',
431 'INNERTUBE_CLIENT_NAME': 'IOS_MESSAGES_EXTENSION',
432 'INNERTUBE_CLIENT_VERSION': '16.20',
433 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
434 'INNERTUBE_CONTEXT': {
435 'client': {
436 'clientName': 'IOS_MESSAGES_EXTENSION',
437 'clientVersion': '16.20',
438 'hl': 'en',
439 }
440 },
441 'INNERTUBE_CONTEXT_CLIENT_NAME': 66
109dd3b2 442 }
443 }
444
445 _YT_DEFAULT_INNERTUBE_HOSTS = {
446 'DIRECT': 'youtubei.googleapis.com',
447 'WEB': 'www.youtube.com',
448 'WEB_REMIX': 'music.youtube.com',
449 'ANDROID_MUSIC': 'music.youtube.com'
450 }
451
11f9be09 452 # clients starting with _ cannot be explicity requested by the user
453 _YT_CLIENTS = {
454 'web': 'WEB',
455 'web_music': 'WEB_REMIX',
456 '_web_embedded': 'WEB_EMBEDDED_PLAYER',
457 '_web_agegate': 'TVHTML5',
458 'android': 'ANDROID',
459 'android_music': 'ANDROID_MUSIC',
460 '_android_embedded': 'ANDROID_EMBEDDED_PLAYER',
461 '_android_agegate': 'ANDROID',
462 'ios': 'IOS',
463 'ios_music': 'IOS_MUSIC',
464 '_ios_embedded': 'IOS_MESSAGES_EXTENSION',
465 '_ios_agegate': 'IOS'
466 }
467
109dd3b2 468 def _get_default_ytcfg(self, client='WEB'):
469 if client in self._YT_DEFAULT_YTCFGS:
470 return copy.deepcopy(self._YT_DEFAULT_YTCFGS[client])
471 self.write_debug(f'INNERTUBE default client {client} does not exist - falling back to WEB client.')
472 return copy.deepcopy(self._YT_DEFAULT_YTCFGS['WEB'])
473
474 def _get_innertube_host(self, client='WEB'):
475 return dict_get(self._YT_DEFAULT_INNERTUBE_HOSTS, (client, 'WEB'))
476
477 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='WEB'):
478 # try_get but with fallback to default ytcfg client values when present
479 _func = lambda y: try_get(y, getter, expected_type)
480 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
481
482 def _extract_client_name(self, ytcfg, default_client='WEB'):
483 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str, default_client)
484
314ee305 485 @staticmethod
11f9be09 486 def _extract_session_index(*data):
487 for ytcfg in data:
488 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
489 if session_index is not None:
490 return session_index
314ee305 491
109dd3b2 492 def _extract_client_version(self, ytcfg, default_client='WEB'):
493 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str, default_client)
494
495 def _extract_api_key(self, ytcfg=None, default_client='WEB'):
496 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
497
498 def _extract_context(self, ytcfg=None, default_client='WEB'):
499 _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
500 context = _get_context(ytcfg)
501 if context:
502 return context
503
504 context = _get_context(self._get_default_ytcfg(default_client))
505 if not ytcfg:
506 return context
507
508 # Recreate the client context (required)
509 context['client'].update({
510 'clientVersion': self._extract_client_version(ytcfg, default_client),
511 'clientName': self._extract_client_name(ytcfg, default_client),
512 })
513 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
514 if visitor_data:
515 context['client']['visitorData'] = visitor_data
516 return context
517
518 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
1974e99f 519 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
520 # See: https://github.com/yt-dlp/yt-dlp/issues/393
521 yt_cookies = self._get_cookies('https://www.youtube.com')
522 sapisid_cookie = dict_get(
523 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
c926c954 524 if sapisid_cookie is None or not sapisid_cookie.value:
a5c56234
M
525 return
526 time_now = round(time.time())
1974e99f 527 # SAPISID cookie is required if not already present
528 if not yt_cookies.get('SAPISID'):
c926c954 529 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie', only_once=True)
1974e99f 530 self._set_cookie(
531 '.youtube.com', 'SAPISID', sapisid_cookie.value, secure=True, expire_time=time_now + 3600)
c926c954 532 self.write_debug('Extracted SAPISID cookie', only_once=True)
1974e99f 533 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
534 sapisidhash = hashlib.sha1(
109dd3b2 535 f'{time_now} {sapisid_cookie.value} {origin}'.encode('utf-8')).hexdigest()
1974e99f 536 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
537
538 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 539 note='Downloading API JSON', errnote='Unable to download API page',
109dd3b2 540 context=None, api_key=None, api_hostname=None, default_client='WEB'):
f4f751af 541
109dd3b2 542 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 543 data.update(query)
11f9be09 544 real_headers = self.generate_api_headers(default_client=default_client)
f4f751af 545 real_headers.update({'content-type': 'application/json'})
546 if headers:
547 real_headers.update(headers)
545cc85d 548 return self._download_json(
109dd3b2 549 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
a5c56234 550 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 551 data=json.dumps(data).encode('utf8'), headers=real_headers,
552 query={'key': api_key or self._extract_api_key()})
553
11f9be09 554 def extract_yt_initial_data(self, video_id, webpage):
8bdd16b4 555 return self._parse_json(
556 self._search_regex(
29f7c58a 557 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 558 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 559 video_id)
0c148415 560
a1c5d2ca 561 def _extract_identity_token(self, webpage, item_id):
11f9be09 562 if not webpage:
563 return None
564 ytcfg = self.extract_ytcfg(item_id, webpage)
a1c5d2ca
M
565 if ytcfg:
566 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
567 if token:
568 return token
569 return self._search_regex(
570 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
571 'identity token', default=None)
572
573 @staticmethod
fe93e2c4 574 def _extract_account_syncid(*args):
8ea3f7b9 575 """
576 Extract syncId required to download private playlists of secondary channels
fe93e2c4 577 @params response and/or ytcfg
8ea3f7b9 578 """
fe93e2c4 579 for data in args:
580 # ytcfg includes channel_syncid if on secondary channel
581 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
582 if delegated_sid:
583 return delegated_sid
584 sync_ids = (try_get(
585 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
586 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
587 if len(sync_ids) >= 2 and sync_ids[1]:
588 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
589 # and just "user_syncid||" for primary channel. We only want the channel_syncid
590 return sync_ids[0]
a1c5d2ca 591
11f9be09 592 def extract_ytcfg(self, video_id, webpage):
8c54a305 593 if not webpage:
594 return {}
29f7c58a 595 return self._parse_json(
596 self._search_regex(
597 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 598 default='{}'), video_id, fatal=False) or {}
599
11f9be09 600 def generate_api_headers(
601 self, ytcfg=None, identity_token=None, account_syncid=None,
602 visitor_data=None, api_hostname=None, default_client='WEB', session_index=None):
603 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
f4f751af 604 headers = {
109dd3b2 605 'X-YouTube-Client-Name': compat_str(
11f9be09 606 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
607 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
109dd3b2 608 'Origin': origin
f4f751af 609 }
2d6659b9 610 if not visitor_data and ytcfg:
611 visitor_data = try_get(
11f9be09 612 self._extract_context(ytcfg, default_client), lambda x: x['client']['visitorData'], compat_str)
f4f751af 613 if identity_token:
109dd3b2 614 headers['X-Youtube-Identity-Token'] = identity_token
f4f751af 615 if account_syncid:
616 headers['X-Goog-PageId'] = account_syncid
314ee305 617 if session_index is None and ytcfg:
618 session_index = self._extract_session_index(ytcfg)
619 if account_syncid or session_index is not None:
620 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
f4f751af 621 if visitor_data:
109dd3b2 622 headers['X-Goog-Visitor-Id'] = visitor_data
623 auth = self._generate_sapisidhash_header(origin)
f4f751af 624 if auth is not None:
625 headers['Authorization'] = auth
109dd3b2 626 headers['X-Origin'] = origin
f4f751af 627 return headers
29f7c58a 628
2d6659b9 629 @staticmethod
630 def _build_api_continuation_query(continuation, ctp=None):
631 query = {
632 'continuation': continuation
633 }
634 # TODO: Inconsistency with clickTrackingParams.
635 # Currently we have a fixed ctp contained within context (from ytcfg)
636 # and a ctp in root query for continuation.
637 if ctp:
638 query['clickTracking'] = {'clickTrackingParams': ctp}
639 return query
640
2d6659b9 641 @classmethod
642 def _extract_next_continuation_data(cls, renderer):
643 next_continuation = try_get(
644 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
645 lambda x: x['continuation']['reloadContinuationData']), dict)
646 if not next_continuation:
647 return
648 continuation = next_continuation.get('continuation')
649 if not continuation:
650 return
651 ctp = next_continuation.get('clickTrackingParams')
fe93e2c4 652 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 653
654 @classmethod
655 def _extract_continuation_ep_data(cls, continuation_ep: dict):
656 if isinstance(continuation_ep, dict):
657 continuation = try_get(
658 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
659 if not continuation:
660 return
661 ctp = continuation_ep.get('clickTrackingParams')
fe93e2c4 662 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 663
664 @classmethod
665 def _extract_continuation(cls, renderer):
666 next_continuation = cls._extract_next_continuation_data(renderer)
667 if next_continuation:
668 return next_continuation
fe93e2c4 669
2d6659b9 670 contents = []
671 for key in ('contents', 'items'):
672 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
fe93e2c4 673
2d6659b9 674 for content in contents:
675 if not isinstance(content, dict):
676 continue
677 continuation_ep = try_get(
678 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
679 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
680 dict)
681 continuation = cls._extract_continuation_ep_data(continuation_ep)
682 if continuation:
683 return continuation
684
fe93e2c4 685 @classmethod
686 def _extract_alerts(cls, data):
109dd3b2 687 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
688 if not isinstance(alert_dict, dict):
689 continue
690 for alert in alert_dict.values():
691 alert_type = alert.get('type')
692 if not alert_type:
693 continue
fe93e2c4 694 message = cls._get_text(alert.get('text'))
109dd3b2 695 if message:
696 yield alert_type, message
697
698 def _report_alerts(self, alerts, expected=True):
699 errors = []
700 warnings = []
701 for alert_type, alert_message in alerts:
702 if alert_type.lower() == 'error':
703 errors.append([alert_type, alert_message])
704 else:
705 warnings.append([alert_type, alert_message])
706
707 for alert_type, alert_message in (warnings + errors[:-1]):
708 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
709 if errors:
710 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
711
712 def _extract_and_report_alerts(self, data, *args, **kwargs):
713 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
714
47193e02 715 def _extract_badges(self, renderer: dict):
716 badges = set()
717 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
718 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
719 if label:
720 badges.add(label.lower())
721 return badges
722
723 @staticmethod
fe93e2c4 724 def _get_text(data, getter=None, max_runs=None):
725 for get in variadic(getter):
726 d = try_get(data, get) if get is not None else data
727 text = try_get(d, lambda x: x['simpleText'], compat_str)
728 if text:
729 return text
730 runs = try_get(d, lambda x: x['runs'], list) or []
731 if not runs and isinstance(d, list):
732 runs = d
733
734 def get_runs(runs):
735 for run in runs[:min(len(runs), max_runs or len(runs))]:
736 yield try_get(run, lambda x: x['text'], compat_str) or ''
737
738 text = ''.join(get_runs(runs))
739 if text:
740 return text
47193e02 741
109dd3b2 742 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
743 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
744 default_client='WEB'):
745 response = None
746 last_error = None
747 count = -1
748 retries = self.get_param('extractor_retries', 3)
749 if check_get_keys is None:
750 check_get_keys = []
751 while count < retries:
752 count += 1
753 if last_error:
754 self.report_warning('%s. Retrying ...' % last_error)
755 try:
756 response = self._call_api(
757 ep=ep, fatal=True, headers=headers,
758 video_id=item_id, query=query,
759 context=self._extract_context(ytcfg, default_client),
760 api_key=self._extract_api_key(ytcfg, default_client),
761 api_hostname=api_hostname, default_client=default_client,
762 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
763 except ExtractorError as e:
9c0d7f49 764 if isinstance(e.cause, network_exceptions):
109dd3b2 765 # Downloading page may result in intermittent 5xx HTTP error
766 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
9c0d7f49 767 # We also want to catch all other network exceptions since errors in later pages can be troublesome
768 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
769 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
770 last_error = error_to_compat_str(e.cause or e)
771 if count < retries:
772 continue
109dd3b2 773 if fatal:
774 raise
775 else:
776 self.report_warning(error_to_compat_str(e))
777 return
778
779 else:
780 # Youtube may send alerts if there was an issue with the continuation page
781 try:
782 self._extract_and_report_alerts(response, expected=False)
783 except ExtractorError as e:
784 if fatal:
785 raise
786 self.report_warning(error_to_compat_str(e))
787 return
788 if not check_get_keys or dict_get(response, check_get_keys):
789 break
790 # Youtube sometimes sends incomplete data
791 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
792 last_error = 'Incomplete data received'
793 if count >= retries:
794 if fatal:
795 raise ExtractorError(last_error)
796 else:
797 self.report_warning(last_error)
798 return
799 return response
800
9297939e 801 @staticmethod
802 def is_music_url(url):
803 return re.match(r'https?://music\.youtube\.com/', url) is not None
804
30a074c2 805 def _extract_video(self, renderer):
806 video_id = renderer.get('videoId')
fe93e2c4 807 title = self._get_text(renderer.get('title'))
808 description = self._get_text(renderer.get('descriptionSnippet'))
809 duration = parse_duration(self._get_text(renderer.get('lengthText')))
810 view_count_text = self._get_text(renderer.get('viewCountText')) or ''
30a074c2 811 view_count = str_to_int(self._search_regex(
812 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
813 'view count', default=None))
fe93e2c4 814
815 uploader = self._get_text(renderer, (lambda x: x['ownerText'], lambda x: x['shortBylineText']))
816
30a074c2 817 return {
39ed931e 818 '_type': 'url',
30a074c2 819 'ie_key': YoutubeIE.ie_key(),
820 'id': video_id,
821 'url': video_id,
822 'title': title,
823 'description': description,
824 'duration': duration,
825 'view_count': view_count,
826 'uploader': uploader,
827 }
828
0c148415 829
360e1ca5 830class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 831 IE_DESC = 'YouTube.com'
bc2ca1bb 832 _INVIDIOUS_SITES = (
833 # invidious-redirect websites
834 r'(?:www\.)?redirect\.invidious\.io',
835 r'(?:(?:www|dev)\.)?invidio\.us',
836 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
837 r'(?:www\.)?invidious\.pussthecat\.org',
bc2ca1bb 838 r'(?:www\.)?invidious\.zee\.li',
bc2ca1bb 839 r'(?:www\.)?invidious\.ethibox\.fr',
bc2ca1bb 840 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
bc2ca1bb 841 # youtube-dl invidious instances list
842 r'(?:(?:www|no)\.)?invidiou\.sh',
843 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
844 r'(?:www\.)?invidious\.kabi\.tk',
bc2ca1bb 845 r'(?:www\.)?invidious\.mastodon\.host',
846 r'(?:www\.)?invidious\.zapashcanon\.fr',
ed807c18 847 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
201c1459 848 r'(?:www\.)?invidious\.tinfoil-hat\.net',
849 r'(?:www\.)?invidious\.himiko\.cloud',
850 r'(?:www\.)?invidious\.reallyancient\.tech',
bc2ca1bb 851 r'(?:www\.)?invidious\.tube',
852 r'(?:www\.)?invidiou\.site',
853 r'(?:www\.)?invidious\.site',
854 r'(?:www\.)?invidious\.xyz',
855 r'(?:www\.)?invidious\.nixnet\.xyz',
201c1459 856 r'(?:www\.)?invidious\.048596\.xyz',
bc2ca1bb 857 r'(?:www\.)?invidious\.drycat\.fr',
201c1459 858 r'(?:www\.)?inv\.skyn3t\.in',
bc2ca1bb 859 r'(?:www\.)?tube\.poal\.co',
860 r'(?:www\.)?tube\.connect\.cafe',
861 r'(?:www\.)?vid\.wxzm\.sx',
862 r'(?:www\.)?vid\.mint\.lgbt',
201c1459 863 r'(?:www\.)?vid\.puffyan\.us',
bc2ca1bb 864 r'(?:www\.)?yewtu\.be',
865 r'(?:www\.)?yt\.elukerio\.org',
866 r'(?:www\.)?yt\.lelux\.fi',
867 r'(?:www\.)?invidious\.ggc-project\.de',
868 r'(?:www\.)?yt\.maisputain\.ovh',
201c1459 869 r'(?:www\.)?ytprivate\.com',
870 r'(?:www\.)?invidious\.13ad\.de',
bc2ca1bb 871 r'(?:www\.)?invidious\.toot\.koeln',
872 r'(?:www\.)?invidious\.fdn\.fr',
873 r'(?:www\.)?watch\.nettohikari\.com',
ed807c18 874 r'(?:www\.)?invidious\.namazso\.eu',
875 r'(?:www\.)?invidious\.silkky\.cloud',
876 r'(?:www\.)?invidious\.exonip\.de',
877 r'(?:www\.)?invidious\.riverside\.rocks',
878 r'(?:www\.)?invidious\.blamefran\.net',
879 r'(?:www\.)?invidious\.moomoo\.de',
880 r'(?:www\.)?ytb\.trom\.tf',
881 r'(?:www\.)?yt\.cyberhost\.uk',
bc2ca1bb 882 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
883 r'(?:www\.)?qklhadlycap4cnod\.onion',
884 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
885 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
886 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
887 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
888 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
889 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
ed807c18 890 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
891 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
892 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
893 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
bc2ca1bb 894 )
cb7dfeea 895 _VALID_URL = r"""(?x)^
c5e8d7af 896 (
edb53e2d 897 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 898 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
899 (?:www\.)?deturl\.com/www\.youtube\.com|
900 (?:www\.)?pwnyoutube\.com|
901 (?:www\.)?hooktube\.com|
902 (?:www\.)?yourepeat\.com|
903 tube\.majestyc\.net|
904 %(invidious)s|
905 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
906 (?:.*?\#/)? # handle anchor (#/) redirect urls
907 (?: # the various things that can precede the ID:
ac7553d0 908 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 909 |(?: # or the v= param in all its forms
f7000f3a 910 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 911 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 912 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
913 v=
914 )
f4b05232 915 ))
cbaed4bb
S
916 |(?:
917 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
918 vid\.plus| # or vid.plus/xxxx
919 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 920 %(invidious)s
cbaed4bb 921 )/
edb53e2d 922 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 923 )
c5e8d7af 924 )? # all until now is optional -> you can pass the naked ID
201c1459 925 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 926 (?(1).+)? # if we found the ID, everything can follow
9297939e 927 (?:\#|$)""" % {
bc2ca1bb 928 'invidious': '|'.join(_INVIDIOUS_SITES),
929 }
e40c758c 930 _PLAYER_INFO_RE = (
cc2db878 931 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
932 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 933 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 934 )
2c62dc26 935 _formats = {
c2d3cb4c 936 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
937 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
938 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
939 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
940 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
941 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
942 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
943 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 944 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 945 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
946 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
947 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
948 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
949 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
950 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 951 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 952 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
953 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 954
955
956 # 3D videos
c2d3cb4c 957 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
958 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
959 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
960 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 961 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
962 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
963 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 964
96fb5605 965 # Apple HTTP Live Streaming
11f12195 966 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 967 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
968 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
969 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
970 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
971 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 972 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
973 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
974
975 # DASH mp4 video
d23028a8
S
976 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
977 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
978 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
979 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
980 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 981 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
982 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
983 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
984 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
985 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
986 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
987 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 988
f6f1fc92 989 # Dash mp4 audio
d23028a8
S
990 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
991 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
992 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
993 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
994 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
995 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
996 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
997
998 # Dash webm
d23028a8
S
999 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1000 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1001 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1002 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1003 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1004 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1005 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1006 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1007 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1008 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1009 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1010 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1011 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1012 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1013 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 1014 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
1015 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1016 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1017 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1018 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1019 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1020 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
1021
1022 # Dash webm audio
d23028a8
S
1023 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1024 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 1025
0857baad 1026 # Dash webm audio with opus inside
d23028a8
S
1027 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1028 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1029 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 1030
ce6b9a2d
PH
1031 # RTMP (unnamed)
1032 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
1033
1034 # av01 video only formats sometimes served with "unknown" codecs
1035 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1036 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1037 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1038 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 1039 }
29f7c58a 1040 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 1041
109dd3b2 1042 _AGE_GATE_REASONS = (
1043 'Sign in to confirm your age',
1044 'This video may be inappropriate for some users.',
1045 'Sorry, this content is age-restricted.')
1046
fd5c4aab
S
1047 _GEO_BYPASS = False
1048
78caa52a 1049 IE_NAME = 'youtube'
2eb88d95
PH
1050 _TESTS = [
1051 {
2d3d2997 1052 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
1053 'info_dict': {
1054 'id': 'BaW_jenozKc',
1055 'ext': 'mp4',
3867038a 1056 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
1057 'uploader': 'Philipp Hagemeister',
1058 'uploader_id': 'phihag',
ec85ded8 1059 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
1060 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1061 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 1062 'upload_date': '20121002',
3867038a 1063 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 1064 'categories': ['Science & Technology'],
3867038a 1065 'tags': ['youtube-dl'],
556dbe7f 1066 'duration': 10,
dbdaaa23 1067 'view_count': int,
3e7c1224
PH
1068 'like_count': int,
1069 'dislike_count': int,
7c80519c 1070 'start_time': 1,
297a564b 1071 'end_time': 9,
2eb88d95 1072 }
0e853ca4 1073 },
fccd3771 1074 {
4bc3a23e
PH
1075 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1076 'note': 'Embed-only video (#1746)',
1077 'info_dict': {
1078 'id': 'yZIXLfi8CZQ',
1079 'ext': 'mp4',
1080 'upload_date': '20120608',
1081 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1082 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1083 'uploader': 'SET India',
94bfcd23 1084 'uploader_id': 'setindia',
ec85ded8 1085 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 1086 'age_limit': 18,
545cc85d 1087 },
1088 'skip': 'Private video',
fccd3771 1089 },
11b56058 1090 {
8bdd16b4 1091 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
1092 'note': 'Use the first video ID in the URL',
1093 'info_dict': {
1094 'id': 'BaW_jenozKc',
1095 'ext': 'mp4',
3867038a 1096 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
1097 'uploader': 'Philipp Hagemeister',
1098 'uploader_id': 'phihag',
ec85ded8 1099 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 1100 'upload_date': '20121002',
3867038a 1101 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 1102 'categories': ['Science & Technology'],
3867038a 1103 'tags': ['youtube-dl'],
556dbe7f 1104 'duration': 10,
dbdaaa23 1105 'view_count': int,
11b56058
PM
1106 'like_count': int,
1107 'dislike_count': int,
34a7de29
S
1108 },
1109 'params': {
1110 'skip_download': True,
1111 },
11b56058 1112 },
dd27fd17 1113 {
2d3d2997 1114 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
1115 'note': '256k DASH audio (format 141) via DASH manifest',
1116 'info_dict': {
1117 'id': 'a9LDPn-MO4I',
1118 'ext': 'm4a',
1119 'upload_date': '20121002',
1120 'uploader_id': '8KVIDEO',
ec85ded8 1121 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
1122 'description': '',
1123 'uploader': '8KVIDEO',
1124 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 1125 },
4bc3a23e
PH
1126 'params': {
1127 'youtube_include_dash_manifest': True,
1128 'format': '141',
4919603f 1129 },
de3c7fe0 1130 'skip': 'format 141 not served anymore',
dd27fd17 1131 },
8bdd16b4 1132 # DASH manifest with encrypted signature
1133 {
1134 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1135 'info_dict': {
1136 'id': 'IB3lcPjvWLA',
1137 'ext': 'm4a',
1138 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1139 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1140 'duration': 244,
1141 'uploader': 'AfrojackVEVO',
1142 'uploader_id': 'AfrojackVEVO',
1143 'upload_date': '20131011',
cc2db878 1144 'abr': 129.495,
8bdd16b4 1145 },
1146 'params': {
1147 'youtube_include_dash_manifest': True,
1148 'format': '141/bestaudio[ext=m4a]',
1149 },
1150 },
dd2d55f1 1151 # Normal age-gate video (embed allowed)
c522adb1 1152 {
2d3d2997 1153 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1154 'info_dict': {
1155 'id': 'HtVdAasjOgU',
1156 'ext': 'mp4',
1157 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1158 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1159 'duration': 142,
c522adb1
JMF
1160 'uploader': 'The Witcher',
1161 'uploader_id': 'WitcherGame',
ec85ded8 1162 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 1163 'upload_date': '20140605',
34952f09 1164 'age_limit': 18,
c522adb1
JMF
1165 },
1166 },
8bdd16b4 1167 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1168 # YouTube Red ad is not captured for creator
1169 {
1170 'url': '__2ABJjxzNo',
1171 'info_dict': {
1172 'id': '__2ABJjxzNo',
1173 'ext': 'mp4',
1174 'duration': 266,
1175 'upload_date': '20100430',
1176 'uploader_id': 'deadmau5',
1177 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 1178 'creator': 'deadmau5',
1179 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1180 'uploader': 'deadmau5',
1181 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1182 'alt_title': 'Some Chords',
8bdd16b4 1183 },
1184 'expected_warnings': [
1185 'DASH manifest missing',
1186 ]
1187 },
067aa17e 1188 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1189 {
1190 'url': 'lqQg6PlCWgI',
1191 'info_dict': {
1192 'id': 'lqQg6PlCWgI',
1193 'ext': 'mp4',
556dbe7f 1194 'duration': 6085,
90227264 1195 'upload_date': '20150827',
cbe2bd91 1196 'uploader_id': 'olympic',
ec85ded8 1197 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 1198 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
11f9be09 1199 'uploader': 'Olympics',
cbe2bd91
PH
1200 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1201 },
1202 'params': {
1203 'skip_download': 'requires avconv',
e52a40ab 1204 }
cbe2bd91 1205 },
6271f1ca
PH
1206 # Non-square pixels
1207 {
1208 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1209 'info_dict': {
1210 'id': '_b-2C3KPAM0',
1211 'ext': 'mp4',
1212 'stretched_ratio': 16 / 9.,
556dbe7f 1213 'duration': 85,
6271f1ca
PH
1214 'upload_date': '20110310',
1215 'uploader_id': 'AllenMeow',
ec85ded8 1216 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 1217 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 1218 'uploader': '孫ᄋᄅ',
6271f1ca
PH
1219 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1220 },
06b491eb
S
1221 },
1222 # url_encoded_fmt_stream_map is empty string
1223 {
1224 'url': 'qEJwOuvDf7I',
1225 'info_dict': {
1226 'id': 'qEJwOuvDf7I',
f57b7835 1227 'ext': 'webm',
06b491eb
S
1228 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1229 'description': '',
1230 'upload_date': '20150404',
1231 'uploader_id': 'spbelect',
1232 'uploader': 'Наблюдатели Петербурга',
1233 },
1234 'params': {
1235 'skip_download': 'requires avconv',
e323cf3f
S
1236 },
1237 'skip': 'This live event has ended.',
06b491eb 1238 },
067aa17e 1239 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1240 {
1241 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1242 'info_dict': {
1243 'id': 'FIl7x6_3R5Y',
eb6793ba 1244 'ext': 'webm',
da77d856
S
1245 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1246 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1247 'duration': 220,
da77d856
S
1248 'upload_date': '20150625',
1249 'uploader_id': 'dorappi2000',
ec85ded8 1250 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 1251 'uploader': 'dorappi2000',
eb6793ba 1252 'formats': 'mincount:31',
da77d856 1253 },
eb6793ba 1254 'skip': 'not actual anymore',
2ee8f5d8 1255 },
8a1a26ce
YCH
1256 # DASH manifest with segment_list
1257 {
1258 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1259 'md5': '8ce563a1d667b599d21064e982ab9e31',
1260 'info_dict': {
1261 'id': 'CsmdDsKjzN8',
1262 'ext': 'mp4',
17ee98e1 1263 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
1264 'uploader': 'Airtek',
1265 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1266 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1267 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1268 },
1269 'params': {
1270 'youtube_include_dash_manifest': True,
1271 'format': '135', # bestvideo
be49068d
S
1272 },
1273 'skip': 'This live event has ended.',
2ee8f5d8 1274 },
cf7e015f
S
1275 {
1276 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 1277 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 1278 'info_dict': {
545cc85d 1279 'id': 'jvGDaLqkpTg',
1280 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1281 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
1282 },
1283 'playlist': [{
1284 'info_dict': {
545cc85d 1285 'id': 'jvGDaLqkpTg',
cf7e015f 1286 'ext': 'mp4',
545cc85d 1287 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1288 'description': 'md5:e03b909557865076822aa169218d6a5d',
1289 'duration': 10643,
1290 'upload_date': '20161111',
1291 'uploader': 'Team PGP',
1292 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1293 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1294 },
1295 }, {
1296 'info_dict': {
545cc85d 1297 'id': '3AKt1R1aDnw',
cf7e015f 1298 'ext': 'mp4',
545cc85d 1299 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1300 'description': 'md5:e03b909557865076822aa169218d6a5d',
1301 'duration': 10991,
1302 'upload_date': '20161111',
1303 'uploader': 'Team PGP',
1304 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1305 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1306 },
1307 }, {
1308 'info_dict': {
545cc85d 1309 'id': 'RtAMM00gpVc',
cf7e015f 1310 'ext': 'mp4',
545cc85d 1311 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1312 'description': 'md5:e03b909557865076822aa169218d6a5d',
1313 'duration': 10995,
1314 'upload_date': '20161111',
1315 'uploader': 'Team PGP',
1316 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1317 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1318 },
1319 }, {
1320 'info_dict': {
545cc85d 1321 'id': '6N2fdlP3C5U',
cf7e015f 1322 'ext': 'mp4',
545cc85d 1323 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1324 'description': 'md5:e03b909557865076822aa169218d6a5d',
1325 'duration': 10990,
1326 'upload_date': '20161111',
1327 'uploader': 'Team PGP',
1328 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1329 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1330 },
1331 }],
1332 'params': {
1333 'skip_download': True,
1334 },
cbaed4bb 1335 },
f9f49d87 1336 {
067aa17e 1337 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1338 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1339 'info_dict': {
1340 'id': 'gVfLd0zydlo',
1341 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1342 },
1343 'playlist_count': 2,
be49068d 1344 'skip': 'Not multifeed anymore',
f9f49d87 1345 },
cbaed4bb 1346 {
2d3d2997 1347 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1348 'only_matching': True,
0e49d9a6 1349 },
6d4fc66b 1350 {
2d3d2997 1351 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1352 'only_matching': True,
1353 },
0e49d9a6 1354 {
067aa17e 1355 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1356 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1357 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1358 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1359 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1360 'info_dict': {
1361 'id': 'lsguqyKfVQg',
1362 'ext': 'mp4',
1363 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
11f9be09 1364 'alt_title': 'Dark Walk',
0e49d9a6 1365 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1366 'duration': 133,
0e49d9a6
LL
1367 'upload_date': '20151119',
1368 'uploader_id': 'IronSoulElf',
ec85ded8 1369 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1370 'uploader': 'IronSoulElf',
11f9be09 1371 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1372 'track': 'Dark Walk',
1373 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
92bc97d3 1374 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
1375 },
1376 'params': {
1377 'skip_download': True,
1378 },
1379 },
61f92af1 1380 {
067aa17e 1381 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1382 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1383 'only_matching': True,
1384 },
313dfc45
LL
1385 {
1386 # Video with yt:stretch=17:0
1387 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1388 'info_dict': {
1389 'id': 'Q39EVAstoRM',
1390 'ext': 'mp4',
1391 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1392 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1393 'upload_date': '20151107',
1394 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1395 'uploader': 'CH GAMER DROID',
1396 },
1397 'params': {
1398 'skip_download': True,
1399 },
be49068d 1400 'skip': 'This video does not exist.',
313dfc45 1401 },
201c1459 1402 {
1403 # Video with incomplete 'yt:stretch=16:'
1404 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1405 'only_matching': True,
1406 },
7caf9830
S
1407 {
1408 # Video licensed under Creative Commons
1409 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1410 'info_dict': {
1411 'id': 'M4gD1WSo5mA',
1412 'ext': 'mp4',
1413 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1414 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1415 'duration': 721,
7caf9830
S
1416 'upload_date': '20150127',
1417 'uploader_id': 'BerkmanCenter',
ec85ded8 1418 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1419 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1420 'license': 'Creative Commons Attribution license (reuse allowed)',
1421 },
1422 'params': {
1423 'skip_download': True,
1424 },
1425 },
fd050249
S
1426 {
1427 # Channel-like uploader_url
1428 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1429 'info_dict': {
1430 'id': 'eQcmzGIKrzg',
1431 'ext': 'mp4',
1432 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1433 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1434 'duration': 4060,
fd050249 1435 'upload_date': '20151119',
eb6793ba 1436 'uploader': 'Bernie Sanders',
fd050249 1437 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1438 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1439 'license': 'Creative Commons Attribution license (reuse allowed)',
1440 },
1441 'params': {
1442 'skip_download': True,
1443 },
1444 },
040ac686
S
1445 {
1446 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1447 'only_matching': True,
7f29cf54
S
1448 },
1449 {
067aa17e 1450 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1451 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1452 'only_matching': True,
6496ccb4
S
1453 },
1454 {
1455 # Rental video preview
1456 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1457 'info_dict': {
1458 'id': 'uGpuVWrhIzE',
1459 'ext': 'mp4',
1460 'title': 'Piku - Trailer',
1461 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1462 'upload_date': '20150811',
1463 'uploader': 'FlixMatrix',
1464 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1465 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1466 'license': 'Standard YouTube License',
1467 },
1468 'params': {
1469 'skip_download': True,
1470 },
eb6793ba 1471 'skip': 'This video is not available.',
022a5d66 1472 },
12afdc2a
S
1473 {
1474 # YouTube Red video with episode data
1475 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1476 'info_dict': {
1477 'id': 'iqKdEhx-dD4',
1478 'ext': 'mp4',
1479 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1480 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1481 'duration': 2085,
12afdc2a
S
1482 'upload_date': '20170118',
1483 'uploader': 'Vsauce',
1484 'uploader_id': 'Vsauce',
1485 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1486 'series': 'Mind Field',
1487 'season_number': 1,
1488 'episode_number': 1,
1489 },
1490 'params': {
1491 'skip_download': True,
1492 },
1493 'expected_warnings': [
1494 'Skipping DASH manifest',
1495 ],
1496 },
c7121fa7
S
1497 {
1498 # The following content has been identified by the YouTube community
1499 # as inappropriate or offensive to some audiences.
1500 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1501 'info_dict': {
1502 'id': '6SJNVb0GnPI',
1503 'ext': 'mp4',
1504 'title': 'Race Differences in Intelligence',
1505 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1506 'duration': 965,
1507 'upload_date': '20140124',
1508 'uploader': 'New Century Foundation',
1509 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1510 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1511 },
1512 'params': {
1513 'skip_download': True,
1514 },
545cc85d 1515 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1516 },
022a5d66
S
1517 {
1518 # itag 212
1519 'url': '1t24XAntNCY',
1520 'only_matching': True,
fd5c4aab
S
1521 },
1522 {
1523 # geo restricted to JP
1524 'url': 'sJL6WA-aGkQ',
1525 'only_matching': True,
1526 },
cd5a74a2
S
1527 {
1528 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1529 'only_matching': True,
1530 },
bc2ca1bb 1531 {
1532 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1533 'only_matching': True,
1534 },
1535 {
1536 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1537 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1538 'only_matching': True,
1539 },
825cd268
RA
1540 {
1541 # DRM protected
1542 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1543 'only_matching': True,
4fe54c12
S
1544 },
1545 {
1546 # Video with unsupported adaptive stream type formats
1547 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1548 'info_dict': {
1549 'id': 'Z4Vy8R84T1U',
1550 'ext': 'mp4',
1551 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1552 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1553 'duration': 433,
1554 'upload_date': '20130923',
1555 'uploader': 'Amelia Putri Harwita',
1556 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1557 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1558 'formats': 'maxcount:10',
1559 },
1560 'params': {
1561 'skip_download': True,
1562 'youtube_include_dash_manifest': False,
1563 },
5429d6a9 1564 'skip': 'not actual anymore',
5caabd3c 1565 },
1566 {
822b9d9c 1567 # Youtube Music Auto-generated description
5caabd3c 1568 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1569 'info_dict': {
1570 'id': 'MgNrAu2pzNs',
1571 'ext': 'mp4',
1572 'title': 'Voyeur Girl',
1573 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1574 'upload_date': '20190312',
5429d6a9
S
1575 'uploader': 'Stephen - Topic',
1576 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1577 'artist': 'Stephen',
1578 'track': 'Voyeur Girl',
1579 'album': 'it\'s too much love to know my dear',
1580 'release_date': '20190313',
1581 'release_year': 2019,
1582 },
1583 'params': {
1584 'skip_download': True,
1585 },
1586 },
66b48727
RA
1587 {
1588 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1589 'only_matching': True,
1590 },
011e75e6
S
1591 {
1592 # invalid -> valid video id redirection
1593 'url': 'DJztXj2GPfl',
1594 'info_dict': {
1595 'id': 'DJztXj2GPfk',
1596 'ext': 'mp4',
1597 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1598 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1599 'upload_date': '20090125',
1600 'uploader': 'Prochorowka',
1601 'uploader_id': 'Prochorowka',
1602 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1603 'artist': 'Panjabi MC',
1604 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1605 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1606 },
1607 'params': {
1608 'skip_download': True,
1609 },
545cc85d 1610 'skip': 'Video unavailable',
ea74e00b
DP
1611 },
1612 {
1613 # empty description results in an empty string
1614 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1615 'info_dict': {
1616 'id': 'x41yOUIvK2k',
1617 'ext': 'mp4',
1618 'title': 'IMG 3456',
1619 'description': '',
1620 'upload_date': '20170613',
1621 'uploader_id': 'ElevageOrVert',
1622 'uploader': 'ElevageOrVert',
1623 },
1624 'params': {
1625 'skip_download': True,
1626 },
1627 },
a0566bbf 1628 {
29f7c58a 1629 # with '};' inside yt initial data (see [1])
1630 # see [2] for an example with '};' inside ytInitialPlayerResponse
1631 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1632 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1633 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1634 'info_dict': {
1635 'id': 'CHqg6qOn4no',
1636 'ext': 'mp4',
1637 'title': 'Part 77 Sort a list of simple types in c#',
1638 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1639 'upload_date': '20130831',
1640 'uploader_id': 'kudvenkat',
1641 'uploader': 'kudvenkat',
1642 },
1643 'params': {
1644 'skip_download': True,
1645 },
1646 },
29f7c58a 1647 {
1648 # another example of '};' in ytInitialData
1649 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1650 'only_matching': True,
1651 },
1652 {
1653 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1654 'only_matching': True,
1655 },
545cc85d 1656 {
cc2db878 1657 # https://github.com/ytdl-org/youtube-dl/pull/28094
1658 'url': 'OtqTfy26tG0',
1659 'info_dict': {
1660 'id': 'OtqTfy26tG0',
1661 'ext': 'mp4',
1662 'title': 'Burn Out',
1663 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1664 'upload_date': '20141120',
1665 'uploader': 'The Cinematic Orchestra - Topic',
1666 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1667 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1668 'artist': 'The Cinematic Orchestra',
1669 'track': 'Burn Out',
1670 'album': 'Every Day',
1671 'release_data': None,
1672 'release_year': None,
1673 },
1674 'params': {
1675 'skip_download': True,
1676 },
545cc85d 1677 },
bc2ca1bb 1678 {
1679 # controversial video, only works with bpctr when authenticated with cookies
1680 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1681 'only_matching': True,
1682 },
a1a7907b 1683 {
1684 # controversial video, requires bpctr/contentCheckOk
1685 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1686 'info_dict': {
1687 'id': 'SZJvDhaSDnc',
1688 'ext': 'mp4',
1689 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1690 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1691 'uploader': 'CBS This Morning',
11f9be09 1692 'uploader_id': 'CBSThisMorning',
a1a7907b 1693 'upload_date': '20140716',
1694 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1695 }
1696 },
f7ad7160 1697 {
1698 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1699 'url': 'cBvYw8_A0vQ',
1700 'info_dict': {
1701 'id': 'cBvYw8_A0vQ',
1702 'ext': 'mp4',
1703 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1704 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1705 'upload_date': '20201120',
1706 'uploader': 'Walk around Japan',
1707 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1708 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1709 },
1710 'params': {
1711 'skip_download': True,
1712 },
0fb983f6 1713 }, {
1714 # Has multiple audio streams
1715 'url': 'WaOKSUlf4TM',
1716 'only_matching': True
9297939e 1717 }, {
1718 # Requires Premium: has format 141 when requested using YTM url
1719 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1720 'only_matching': True
1721 }, {
120916da 1722 # multiple subtitles with same lang_code
1723 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1724 'only_matching': True,
109dd3b2 1725 }, {
1726 # Force use android client fallback
1727 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1728 'info_dict': {
1729 'id': 'YOelRv7fMxY',
11f9be09 1730 'title': 'DIGGING A SECRET TUNNEL Part 1',
109dd3b2 1731 'ext': '3gp',
1732 'upload_date': '20210624',
1733 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1734 'uploader': 'colinfurze',
11f9be09 1735 'uploader_id': 'colinfurze',
109dd3b2 1736 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
11f9be09 1737 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
109dd3b2 1738 },
1739 'params': {
1740 'format': '17', # 3gp format available on android
1741 'extractor_args': {'youtube': {'player_client': ['android']}},
1742 },
120916da 1743 },
109dd3b2 1744 {
1745 # Skip download of additional client configs (remix client config in this case)
1746 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1747 'only_matching': True,
1748 'params': {
1749 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1750 },
1751 }
2eb88d95
PH
1752 ]
1753
201c1459 1754 @classmethod
1755 def suitable(cls, url):
1bdae7d3 1756 # Hack for lazy extractors until more generic solution is implemented
1757 # (see #28780)
1758 from .youtube import parse_qs
201c1459 1759 qs = parse_qs(url)
1760 if qs.get('list', [None])[0]:
1761 return False
1762 return super(YoutubeIE, cls).suitable(url)
1763
e0df6211
PH
1764 def __init__(self, *args, **kwargs):
1765 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1766 self._code_cache = {}
83799698 1767 self._player_cache = {}
e0df6211 1768
109dd3b2 1769 def _extract_player_url(self, ytcfg=None, webpage=None):
1770 player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)
11f9be09 1771 if not player_url and webpage:
109dd3b2 1772 player_url = self._search_regex(
1773 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1774 webpage, 'player URL', fatal=False)
11f9be09 1775 if not player_url:
1776 return None
109dd3b2 1777 if player_url.startswith('//'):
1778 player_url = 'https:' + player_url
1779 elif not re.match(r'https?://', player_url):
1780 player_url = compat_urlparse.urljoin(
1781 'https://www.youtube.com', player_url)
1782 return player_url
1783
60064c53
PH
1784 def _signature_cache_id(self, example_sig):
1785 """ Return a string representation of a signature """
78caa52a 1786 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1787
e40c758c
S
1788 @classmethod
1789 def _extract_player_info(cls, player_url):
1790 for player_re in cls._PLAYER_INFO_RE:
1791 id_m = re.search(player_re, player_url)
1792 if id_m:
1793 break
1794 else:
c081b35c 1795 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1796 return id_m.group('id')
e40c758c 1797
109dd3b2 1798 def _load_player(self, video_id, player_url, fatal=True) -> bool:
1799 player_id = self._extract_player_info(player_url)
1800 if player_id not in self._code_cache:
1801 self._code_cache[player_id] = self._download_webpage(
1802 player_url, video_id, fatal=fatal,
1803 note='Downloading player ' + player_id,
1804 errnote='Download of %s failed' % player_url)
1805 return player_id in self._code_cache
1806
e40c758c 1807 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1808 player_id = self._extract_player_info(player_url)
e0df6211 1809
c4417ddb 1810 # Read from filesystem cache
545cc85d 1811 func_id = 'js_%s_%s' % (
1812 player_id, self._signature_cache_id(example_sig))
c4417ddb 1813 assert os.path.basename(func_id) == func_id
a0e07d31 1814
69ea8ca4 1815 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1816 if cache_spec is not None:
78caa52a 1817 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1818
109dd3b2 1819 if self._load_player(video_id, player_url):
1820 code = self._code_cache[player_id]
1821 res = self._parse_sig_js(code)
e0df6211 1822
109dd3b2 1823 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1824 cache_res = res(test_string)
1825 cache_spec = [ord(c) for c in cache_res]
83799698 1826
109dd3b2 1827 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1828 return res
83799698 1829
60064c53 1830 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1831 def gen_sig_code(idxs):
1832 def _genslice(start, end, step):
78caa52a 1833 starts = '' if start == 0 else str(start)
8bcc8756 1834 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1835 steps = '' if step == 1 else (':%d' % step)
78caa52a 1836 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1837
1838 step = None
7af808a5
PH
1839 # Quelch pyflakes warnings - start will be set when step is set
1840 start = '(Never used)'
edf3e38e
PH
1841 for i, prev in zip(idxs[1:], idxs[:-1]):
1842 if step is not None:
1843 if i - prev == step:
1844 continue
1845 yield _genslice(start, prev, step)
1846 step = None
1847 continue
1848 if i - prev in [-1, 1]:
1849 step = i - prev
1850 start = prev
1851 continue
1852 else:
78caa52a 1853 yield 's[%d]' % prev
edf3e38e 1854 if step is None:
78caa52a 1855 yield 's[%d]' % i
edf3e38e
PH
1856 else:
1857 yield _genslice(start, i, step)
1858
78caa52a 1859 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1860 cache_res = func(test_string)
edf3e38e 1861 cache_spec = [ord(c) for c in cache_res]
78caa52a 1862 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1863 signature_id_tuple = '(%s)' % (
1864 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1865 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1866 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1867 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1868
e0df6211
PH
1869 def _parse_sig_js(self, jscode):
1870 funcname = self._search_regex(
abefc03f
S
1871 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1872 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1873 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1874 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1875 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1876 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1877 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1878 # Obsolete patterns
1879 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1880 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1881 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1882 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1883 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1884 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1885 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1886 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1887 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1888
1889 jsi = JSInterpreter(jscode)
1890 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1891 return lambda s: initial_function([s])
1892
545cc85d 1893 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1894 """Turn the encrypted s field into a working signature"""
6b37f0be 1895
c8bf86d5 1896 if player_url is None:
69ea8ca4 1897 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1898
c8bf86d5 1899 try:
62af3a0e 1900 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1901 if player_id not in self._player_cache:
1902 func = self._extract_signature_function(
60064c53 1903 video_id, player_url, s
c8bf86d5
PH
1904 )
1905 self._player_cache[player_id] = func
1906 func = self._player_cache[player_id]
a06916d9 1907 if self.get_param('youtube_print_sig_code'):
60064c53 1908 self._print_sig_code(func, s)
c8bf86d5
PH
1909 return func(s)
1910 except Exception as e:
1911 tb = traceback.format_exc()
1912 raise ExtractorError(
78caa52a 1913 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1914
109dd3b2 1915 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1916 """
1917 Extract signatureTimestamp (sts)
1918 Required to tell API what sig/player version is in use.
1919 """
1920 sts = None
1921 if isinstance(ytcfg, dict):
1922 sts = int_or_none(ytcfg.get('STS'))
1923
1924 if not sts:
1925 # Attempt to extract from player
1926 if player_url is None:
1927 error_msg = 'Cannot extract signature timestamp without player_url.'
1928 if fatal:
1929 raise ExtractorError(error_msg)
1930 self.report_warning(error_msg)
1931 return
1932 if self._load_player(video_id, player_url, fatal=fatal):
1933 player_id = self._extract_player_info(player_url)
1934 code = self._code_cache[player_id]
1935 sts = int_or_none(self._search_regex(
1936 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
1937 'JS player signature timestamp', group='sts', fatal=fatal))
1938 return sts
1939
11f9be09 1940 def _mark_watched(self, video_id, player_responses):
352d63fd 1941 playback_url = traverse_obj(
1942 player_responses, (..., 'playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
1943 expected_type=url_or_none, get_all=False)
d77ab8e2 1944 if not playback_url:
352d63fd 1945 self.report_warning('Unable to mark watched')
d77ab8e2
S
1946 return
1947 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1948 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1949
1950 # cpn generation algorithm is reverse engineered from base.js.
1951 # In fact it works even with dummy cpn.
1952 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1953 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1954
1955 qs.update({
1956 'ver': ['2'],
1957 'cpn': [cpn],
1958 })
1959 playback_url = compat_urlparse.urlunparse(
15707c7e 1960 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1961
1962 self._download_webpage(
1963 playback_url, video_id, 'Marking watched',
1964 'Unable to mark watched', fatal=False)
1965
66c9fa36
S
1966 @staticmethod
1967 def _extract_urls(webpage):
1968 # Embedded YouTube player
1969 entries = [
1970 unescapeHTML(mobj.group('url'))
1971 for mobj in re.finditer(r'''(?x)
1972 (?:
1973 <iframe[^>]+?src=|
1974 data-video-url=|
1975 <embed[^>]+?src=|
1976 embedSWF\(?:\s*|
1977 <object[^>]+data=|
1978 new\s+SWFObject\(
1979 )
1980 (["\'])
1981 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1982 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1983 \1''', webpage)]
1984
1985 # lazyYT YouTube embed
1986 entries.extend(list(map(
1987 unescapeHTML,
1988 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1989
1990 # Wordpress "YouTube Video Importer" plugin
1991 matches = re.findall(r'''(?x)<div[^>]+
1992 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1993 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1994 entries.extend(m[-1] for m in matches)
1995
1996 return entries
1997
1998 @staticmethod
1999 def _extract_url(webpage):
2000 urls = YoutubeIE._extract_urls(webpage)
2001 return urls[0] if urls else None
2002
97665381
PH
2003 @classmethod
2004 def extract_id(cls, url):
2005 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 2006 if mobj is None:
69ea8ca4 2007 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
2008 video_id = mobj.group(2)
2009 return video_id
2010
7c365c21 2011 def _extract_chapters_from_json(self, data, duration):
2012 chapter_list = traverse_obj(
2013 data, (
2014 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2015 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2016 ), expected_type=list)
2017
2018 return self._extract_chapters(
2019 chapter_list,
2020 chapter_time=lambda chapter: float_or_none(
2021 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2022 chapter_title=lambda chapter: traverse_obj(
2023 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2024 duration=duration)
2025
2026 def _extract_chapters_from_engagement_panel(self, data, duration):
2027 content_list = traverse_obj(
8bdd16b4 2028 data,
7c365c21 2029 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
da503b7a 2030 expected_type=list, default=[])
7c365c21 2031 chapter_time = lambda chapter: parse_duration(self._get_text(chapter.get('timeDescription')))
2032 chapter_title = lambda chapter: self._get_text(chapter.get('title'))
2033
2034 return next((
2035 filter(None, (
2036 self._extract_chapters(
2037 traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2038 chapter_time, chapter_title, duration)
2039 for contents in content_list
2040 ))), [])
2041
2042 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
84213ea8 2043 chapters = []
7c365c21 2044 last_chapter = {'start_time': 0}
2045 for idx, chapter in enumerate(chapter_list or []):
2046 title = chapter_title(chapter)
84213ea8
S
2047 start_time = chapter_time(chapter)
2048 if start_time is None:
2049 continue
7c365c21 2050 last_chapter['end_time'] = start_time
2051 if start_time < last_chapter['start_time']:
2052 if idx == 1:
2053 chapters.pop()
2054 self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2055 else:
2056 self.report_warning(f'Invalid start time for chapter "{title}"')
2057 continue
2058 last_chapter = {'start_time': start_time, 'title': title}
2059 chapters.append(last_chapter)
2060 last_chapter['end_time'] = duration
84213ea8
S
2061 return chapters
2062
545cc85d 2063 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2064 return self._parse_json(self._search_regex(
2065 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2066 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 2067
d92f5d5a 2068 @staticmethod
2069 def parse_time_text(time_text):
2070 """
2071 Parse the comment time text
2072 time_text is in the format 'X units ago (edited)'
2073 """
2074 time_text_split = time_text.split(' ')
2075 if len(time_text_split) >= 3:
da503b7a 2076 try:
2077 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
2078 except ValueError:
2079 return None
d92f5d5a 2080
a1c5d2ca
M
2081 def _extract_comment(self, comment_renderer, parent=None):
2082 comment_id = comment_renderer.get('commentId')
2083 if not comment_id:
2084 return
fe93e2c4 2085
2086 text = self._get_text(comment_renderer.get('contentText'))
2087
49bd8c66 2088 # note: timestamp is an estimate calculated from the current time and time_text
fe93e2c4 2089 time_text = self._get_text(comment_renderer.get('publishedTimeText')) or ''
2090 time_text_dt = self.parse_time_text(time_text)
2091 if isinstance(time_text_dt, datetime.datetime):
2092 timestamp = calendar.timegm(time_text_dt.timetuple())
2093 author = self._get_text(comment_renderer.get('authorText'))
a1c5d2ca
M
2094 author_id = try_get(comment_renderer,
2095 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
fe93e2c4 2096
49bd8c66 2097 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2098 lambda x: x['likeCount']), compat_str)) or 0
a1c5d2ca
M
2099 author_thumbnail = try_get(comment_renderer,
2100 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2101
2102 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
97524332 2103 is_favorited = 'creatorHeart' in (try_get(
2104 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
a1c5d2ca
M
2105 return {
2106 'id': comment_id,
2107 'text': text,
d92f5d5a 2108 'timestamp': timestamp,
a1c5d2ca
M
2109 'time_text': time_text,
2110 'like_count': votes,
97524332 2111 'is_favorited': is_favorited,
a1c5d2ca
M
2112 'author': author,
2113 'author_id': author_id,
2114 'author_thumbnail': author_thumbnail,
2115 'author_is_uploader': author_is_uploader,
2116 'parent': parent or 'root'
2117 }
2118
2119 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
2d6659b9 2120 ytcfg, video_id, parent=None, comment_counts=None):
2121
2122 def extract_header(contents):
2123 _total_comments = 0
2124 _continuation = None
2125 for content in contents:
2126 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
fe93e2c4 2127 expected_comment_count = parse_count(self._get_text(
2128 comments_header_renderer, (lambda x: x['countText'], lambda x: x['commentsCount']), max_runs=1))
2129
2d6659b9 2130 if expected_comment_count:
fe93e2c4 2131 comment_counts[1] = expected_comment_count
2132 self.to_screen('Downloading ~%d comments' % expected_comment_count)
2d6659b9 2133 _total_comments = comment_counts[1]
2134 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2135 comment_sort_index = int(sort_mode_str != 'top') # 1 = new, 0 = top
2136
2137 sort_menu_item = try_get(
2138 comments_header_renderer,
2139 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2140 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2141
2142 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2143 if not _continuation:
2144 continue
2145
2146 sort_text = sort_menu_item.get('title')
2147 if isinstance(sort_text, compat_str):
2148 sort_text = sort_text.lower()
2149 else:
2150 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2151 self.to_screen('Sorting comments by %s' % sort_text)
2152 break
2153 return _total_comments, _continuation
a1c5d2ca 2154
2d6659b9 2155 def extract_thread(contents):
a1c5d2ca
M
2156 if not parent:
2157 comment_counts[2] = 0
2158 for content in contents:
2159 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2160 comment_renderer = try_get(
2161 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2162 content, (lambda x: x['commentRenderer'], dict))
2163
2164 if not comment_renderer:
2165 continue
2166 comment = self._extract_comment(comment_renderer, parent)
2167 if not comment:
2168 continue
2169 comment_counts[0] += 1
2170 yield comment
2171 # Attempt to get the replies
2172 comment_replies_renderer = try_get(
2173 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2174
2175 if comment_replies_renderer:
2176 comment_counts[2] += 1
2177 comment_entries_iter = self._comment_entries(
f4f751af 2178 comment_replies_renderer, identity_token, account_syncid, ytcfg,
2d6659b9 2179 video_id, parent=comment.get('id'), comment_counts=comment_counts)
a1c5d2ca
M
2180
2181 for reply_comment in comment_entries_iter:
2182 yield reply_comment
2183
2d6659b9 2184 # YouTube comments have a max depth of 2
2185 max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2186 if max_depth == 1 and parent:
2187 return
a1c5d2ca
M
2188 if not comment_counts:
2189 # comment so far, est. total comments, current comment thread #
2190 comment_counts = [0, 0, 0]
a1c5d2ca 2191
2d6659b9 2192 continuation = self._extract_continuation(root_continuation_data)
fe93e2c4 2193 if continuation and len(continuation['continuation']) < 27:
2d6659b9 2194 self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2195 continuation_token = self._generate_comment_continuation(video_id)
fe93e2c4 2196 continuation = self._build_api_continuation_query(continuation_token, None)
2d6659b9 2197
2198 visitor_data = None
2199 is_first_continuation = parent is None
a1c5d2ca
M
2200
2201 for page_num in itertools.count(0):
2202 if not continuation:
2203 break
11f9be09 2204 headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
2d6659b9 2205 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2206 if page_num == 0:
2207 if is_first_continuation:
2208 note_prefix = 'Downloading comment section API JSON'
a1c5d2ca 2209 else:
2d6659b9 2210 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
2211 comment_counts[2], comment_prog_str)
2212 else:
2213 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2214 ' ' if parent else '', ' replies' if parent else '',
2215 page_num, comment_prog_str)
2216
2217 response = self._extract_response(
fe93e2c4 2218 item_id=None, query=continuation,
2d6659b9 2219 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2220 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca
M
2221 if not response:
2222 break
f4f751af 2223 visitor_data = try_get(
2224 response,
2225 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2226 compat_str) or visitor_data
a1c5d2ca 2227
2d6659b9 2228 continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca 2229
2d6659b9 2230 continuation = None
2231 if isinstance(continuation_contents, list):
2232 for continuation_section in continuation_contents:
2233 if not isinstance(continuation_section, dict):
2234 continue
2235 continuation_items = try_get(
2236 continuation_section,
2237 (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2238 lambda x: x['appendContinuationItemsAction']['continuationItems']),
2239 list) or []
2240 if is_first_continuation:
2241 total_comments, continuation = extract_header(continuation_items)
2242 if total_comments:
2243 yield total_comments
2244 is_first_continuation = False
2245 if continuation:
2246 break
2247 continue
2248 count = 0
2249 for count, entry in enumerate(extract_thread(continuation_items)):
2250 yield entry
2251 continuation = self._extract_continuation({'contents': continuation_items})
2252 if continuation:
2253 # Sometimes YouTube provides a continuation without any comments
2254 # In most cases we end up just downloading these with very little comments to come.
2255 if count == 0:
2256 if not parent:
2257 self.report_warning('No comments received - assuming end of comments')
2258 continuation = None
a1c5d2ca
M
2259 break
2260
2d6659b9 2261 # Deprecated response structure
2262 elif isinstance(continuation_contents, dict):
2263 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2264 for key, continuation_renderer in continuation_contents.items():
2265 if key not in known_continuation_renderers:
2266 continue
2267 if not isinstance(continuation_renderer, dict):
2268 continue
2269 if is_first_continuation:
2270 header_continuation_items = [continuation_renderer.get('header') or {}]
2271 total_comments, continuation = extract_header(header_continuation_items)
2272 if total_comments:
2273 yield total_comments
2274 is_first_continuation = False
2275 if continuation:
2276 break
a1c5d2ca 2277
2d6659b9 2278 # Sometimes YouTube provides a continuation without any comments
2279 # In most cases we end up just downloading these with very little comments to come.
2280 count = 0
2281 for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2282 yield entry
2283 continuation = self._extract_continuation(continuation_renderer)
2284 if count == 0:
2285 if not parent:
2286 self.report_warning('No comments received - assuming end of comments')
2287 continuation = None
2288 break
a1c5d2ca 2289
2d6659b9 2290 @staticmethod
2291 def _generate_comment_continuation(video_id):
2292 """
2293 Generates initial comment section continuation token from given video id
2294 """
2295 b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2296 parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2297 new_continuation_intlist = list(itertools.chain.from_iterable(
2298 [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2299 return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2300
2301 def _extract_comments(self, ytcfg, video_id, contents, webpage):
a1c5d2ca 2302 """Entry for comment extraction"""
2d6659b9 2303 def _real_comment_extract(contents):
2304 if isinstance(contents, list):
2305 for entry in contents:
2306 for key, renderer in entry.items():
2307 if key not in known_entry_comment_renderers:
2308 continue
2309 yield from self._comment_entries(
2310 renderer, video_id=video_id, ytcfg=ytcfg,
2311 identity_token=self._extract_identity_token(webpage, item_id=video_id),
2312 account_syncid=self._extract_account_syncid(ytcfg))
2313 break
a1c5d2ca 2314 comments = []
2d6659b9 2315 known_entry_comment_renderers = ('itemSectionRenderer',)
a1c5d2ca 2316 estimated_total = 0
2d6659b9 2317 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
a1c5d2ca 2318
2d6659b9 2319 try:
2320 for comment in _real_comment_extract(contents):
2321 if len(comments) >= max_comments:
2322 break
2323 if isinstance(comment, int):
2324 estimated_total = comment
2325 continue
2326 comments.append(comment)
2327 except KeyboardInterrupt:
2328 self.to_screen('Interrupted by user')
d92f5d5a 2329 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
a1c5d2ca
M
2330 return {
2331 'comments': comments,
2332 'comment_count': len(comments),
2333 }
2334
109dd3b2 2335 @staticmethod
2336 def _generate_player_context(sts=None):
2337 context = {
2338 'html5Preference': 'HTML5_PREF_WANTS',
2339 }
2340 if sts is not None:
2341 context['signatureTimestamp'] = sts
2342 return {
2343 'playbackContext': {
2344 'contentPlaybackContext': context
a1a7907b 2345 },
2fd226f6 2346 'contentCheckOk': True,
2347 'racyCheckOk': True
109dd3b2 2348 }
2349
4e6767b5 2350 @staticmethod
c888ffb9 2351 def _get_video_info_params(video_id, client='TVHTML5'):
2352 GVI_CLIENTS = {
2353 'ANDROID': {
2354 'c': 'ANDROID',
2355 'cver': '16.20',
2356 },
2357 'TVHTML5': {
2358 'c': 'TVHTML5',
2359 'cver': '6.20180913',
11f9be09 2360 },
2361 'IOS': {
2362 'c': 'IOS',
2363 'cver': '16.20'
c888ffb9 2364 }
2365 }
2366 query = {
4e6767b5 2367 'video_id': video_id,
2368 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
c888ffb9 2369 'html5': '1'
4e6767b5 2370 }
c888ffb9 2371 query.update(GVI_CLIENTS.get(client))
2372 return query
4e6767b5 2373
11f9be09 2374 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr):
109dd3b2 2375
11f9be09 2376 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2377 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
2378 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False)
2379 headers = self.generate_api_headers(
2380 player_ytcfg, identity_token, syncid,
2381 default_client=self._YT_CLIENTS[client], session_index=session_index)
9297939e 2382
11f9be09 2383 yt_query = {'videoId': video_id}
2384 yt_query.update(self._generate_player_context(sts))
2385 return self._extract_response(
2386 item_id=video_id, ep='player', query=yt_query,
2387 ytcfg=player_ytcfg, headers=headers, fatal=False,
2388 default_client=self._YT_CLIENTS[client],
2389 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2390 ) or None
2391
2392 def _extract_age_gated_player_response(self, client, video_id, ytcfg, identity_token, player_url, initial_pr):
c8fa48fd 2393 # get_video_info endpoint seems to be completely dead
2394 gvi_client = None # self._YT_CLIENTS.get(f'_{client}_agegate')
2395 if gvi_client:
2396 pr = self._parse_json(traverse_obj(
2397 compat_parse_qs(self._download_webpage(
2398 self.http_scheme() + '//www.youtube.com/get_video_info', video_id,
2399 'Refetching age-gated %s info webpage' % gvi_client.lower(),
2400 'unable to download video info webpage', fatal=False,
2401 query=self._get_video_info_params(video_id, client=gvi_client))),
2402 ('player_response', 0), expected_type=str) or '{}', video_id)
2403 if pr:
2404 return pr
2405 self.report_warning('Falling back to embedded-only age-gate workaround')
2406
2407 if not self._YT_CLIENTS.get(f'_{client}_embedded'):
11f9be09 2408 return
11f9be09 2409 embed_webpage = None
2410 if client == 'web' and 'configs' not in self._configuration_arg('player_skip'):
2411 embed_webpage = self._download_webpage(
2412 'https://www.youtube.com/embed/%s?html5=1' % video_id,
2413 video_id=video_id, note=f'Downloading age-gated {client} embed config')
2414
2415 ytcfg_age = self.extract_ytcfg(video_id, embed_webpage) or {}
2416 # If we extracted the embed webpage, it'll tell us if we can view the video
2417 embedded_pr = self._parse_json(
2418 traverse_obj(ytcfg_age, ('PLAYER_VARS', 'embedded_player_response'), expected_type=str) or '{}',
2419 video_id=video_id)
2420 embedded_ps_reason = traverse_obj(embedded_pr, ('playabilityStatus', 'reason'), expected_type=str) or ''
2421 if embedded_ps_reason in self._AGE_GATE_REASONS:
2422 return
2423 return self._extract_player_response(
2424 f'_{client}_embedded', video_id,
2425 ytcfg_age or ytcfg, ytcfg_age if client == 'web' else {},
2426 identity_token, player_url, initial_pr)
545cc85d 2427
11f9be09 2428 def _get_requested_clients(self, url, smuggled_data):
2429 requested_clients = [client for client in self._configuration_arg('player_client')
2430 if client[:0] != '_' and client in self._YT_CLIENTS]
2431 if not requested_clients:
2432 requested_clients = ['android', 'web']
cf7e015f 2433
11f9be09 2434 if smuggled_data.get('is_music_url') or self.is_music_url(url):
2435 requested_clients.extend(
2436 f'{client}_music' for client in requested_clients if not client.endswith('_music'))
dbdaaa23 2437
11f9be09 2438 return orderedSet(requested_clients)
cf7e015f 2439
11f9be09 2440 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, player_url, identity_token):
2441 initial_pr = None
2442 if webpage:
2443 initial_pr = self._extract_yt_initial_variable(
2444 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2445 video_id, 'initial player response')
6b09401b 2446
11f9be09 2447 age_gated = False
2448 for client in clients:
2449 player_ytcfg = master_ytcfg if client == 'web' else {}
2450 if age_gated:
2451 pr = None
2452 elif client == 'web' and initial_pr:
2453 pr = initial_pr
8fe10494 2454 else:
11f9be09 2455 if client == 'web_music' and 'configs' not in self._configuration_arg('player_skip'):
2456 ytm_webpage = self._download_webpage(
2457 'https://music.youtube.com',
2458 video_id, fatal=False, note='Downloading remix client config')
2459 player_ytcfg = self.extract_ytcfg(video_id, ytm_webpage) or {}
2460 pr = self._extract_player_response(
2461 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr)
2462 if pr:
2463 yield pr
2464 if age_gated or traverse_obj(pr, ('playabilityStatus', 'reason')) in self._AGE_GATE_REASONS:
2465 age_gated = True
2466 pr = self._extract_age_gated_player_response(
2467 client, video_id, player_ytcfg or master_ytcfg, identity_token, player_url, initial_pr)
2468 if pr:
2469 yield pr
2470 # Android player_response does not have microFormats which are needed for
2471 # extraction of some data. So we return the initial_pr with formats
2472 # stripped out even if not requested by the user
2473 # See: https://github.com/yt-dlp/yt-dlp/issues/501
2474 if initial_pr and 'web' not in clients:
2475 initial_pr['streamingData'] = None
2476 yield initial_pr
2477
2478 def _extract_formats(self, streaming_data, video_id, player_url, is_live):
2479 itags, stream_ids = [], []
cc2db878 2480 itag_qualities = {}
d3fc8074 2481 q = qualities([
60bdb7bd 2482 # "tiny" is the smallest video-only format. But some audio-only formats
2483 # was also labeled "tiny". It is not clear if such formats still exist
d3fc8074 2484 'tiny', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
2485 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2486 ])
11f9be09 2487 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
9297939e 2488
545cc85d 2489 for fmt in streaming_formats:
2490 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2491 continue
321bf820 2492
cc2db878 2493 itag = str_or_none(fmt.get('itag'))
9297939e 2494 audio_track = fmt.get('audioTrack') or {}
2495 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2496 if stream_id in stream_ids:
2497 continue
2498
cc2db878 2499 quality = fmt.get('quality')
d3fc8074 2500 if quality == 'tiny' or not quality:
2501 quality = fmt.get('audioQuality', '').lower() or quality
cc2db878 2502 if itag and quality:
2503 itag_qualities[itag] = quality
2504 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2505 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2506 # number of fragment that would subsequently requested with (`&sq=N`)
2507 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2508 continue
2509
545cc85d 2510 fmt_url = fmt.get('url')
2511 if not fmt_url:
2512 sc = compat_parse_qs(fmt.get('signatureCipher'))
2513 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2514 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2515 if not (sc and fmt_url and encrypted_sig):
2516 continue
545cc85d 2517 if not player_url:
201e9eaa 2518 continue
545cc85d 2519 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2520 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2521 fmt_url += '&' + sp + '=' + signature
2522
545cc85d 2523 if itag:
2524 itags.append(itag)
9297939e 2525 stream_ids.append(stream_id)
2526
cc2db878 2527 tbr = float_or_none(
2528 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 2529 dct = {
2530 'asr': int_or_none(fmt.get('audioSampleRate')),
2531 'filesize': int_or_none(fmt.get('contentLength')),
2532 'format_id': itag,
11f9be09 2533 'format_note': ', '.join(filter(None, (
2534 audio_track.get('displayName'), fmt.get('qualityLabel') or quality))),
545cc85d 2535 'fps': int_or_none(fmt.get('fps')),
2536 'height': int_or_none(fmt.get('height')),
dca3ff4a 2537 'quality': q(quality),
cc2db878 2538 'tbr': tbr,
545cc85d 2539 'url': fmt_url,
2540 'width': fmt.get('width'),
0fb983f6 2541 'language': audio_track.get('id', '').split('.')[0],
545cc85d 2542 }
60bdb7bd 2543 mime_mobj = re.match(
2544 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2545 if mime_mobj:
2546 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2547 dct.update(parse_codecs(mime_mobj.group(2)))
2548 # The 3gp format in android client has a quality of "small",
2549 # but is actually worse than all other formats
2550 if dct['ext'] == '3gp':
2551 dct['quality'] = q('tiny')
11f9be09 2552 dct['preference'] = -10
cc2db878 2553 no_audio = dct.get('acodec') == 'none'
2554 no_video = dct.get('vcodec') == 'none'
2555 if no_audio:
2556 dct['vbr'] = tbr
2557 if no_video:
2558 dct['abr'] = tbr
2559 if no_audio or no_video:
545cc85d 2560 dct['downloader_options'] = {
2561 # Youtube throttles chunks >~10M
2562 'http_chunk_size': 10485760,
bf1317d2 2563 }
7c60c33e 2564 if dct.get('ext'):
2565 dct['container'] = dct['ext'] + '_dash'
11f9be09 2566 yield dct
545cc85d 2567
4bb6b02f 2568 skip_manifests = self._configuration_arg('skip')
11f9be09 2569 get_dash = not is_live and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True)
5d3a0e79 2570 get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2571
11f9be09 2572 for sd in streaming_data:
5d3a0e79 2573 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
9297939e 2574 if hls_manifest_url:
2575 for f in self._extract_m3u8_formats(
2576 hls_manifest_url, video_id, 'mp4', fatal=False):
2577 itag = self._search_regex(
2578 r'/itag/(\d+)', f['url'], 'itag', default=None)
11f9be09 2579 if itag in itags:
2580 continue
9297939e 2581 if itag:
2582 f['format_id'] = itag
11f9be09 2583 itags.append(itag)
2584 yield f
545cc85d 2585
5d3a0e79 2586 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2587 if dash_manifest_url:
2588 for f in self._extract_mpd_formats(
2589 dash_manifest_url, video_id, fatal=False):
2590 itag = f['format_id']
2591 if itag in itags:
2592 continue
11f9be09 2593 if itag:
2594 itags.append(itag)
5d3a0e79 2595 if itag in itag_qualities:
2596 f['quality'] = q(itag_qualities[itag])
2597 filesize = int_or_none(self._search_regex(
2598 r'/clen/(\d+)', f.get('fragment_base_url')
2599 or f['url'], 'file size', default=None))
2600 if filesize:
2601 f['filesize'] = filesize
11f9be09 2602 yield f
2603
2604 def _real_extract(self, url):
2605 url, smuggled_data = unsmuggle_url(url, {})
2606 video_id = self._match_id(url)
2607
2608 base_url = self.http_scheme() + '//www.youtube.com/'
2609 webpage_url = base_url + 'watch?v=' + video_id
2610 webpage = self._download_webpage(
2611 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2612
2613 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2614 player_url = self._extract_player_url(master_ytcfg, webpage)
2615 identity_token = self._extract_identity_token(webpage, video_id)
2616
2617 player_responses = list(self._extract_player_responses(
2618 self._get_requested_clients(url, smuggled_data),
2619 video_id, webpage, master_ytcfg, player_url, identity_token))
2620
352d63fd 2621 get_first = lambda obj, keys, **kwargs: traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
11f9be09 2622
2623 playability_statuses = traverse_obj(
2624 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2625
2626 trailer_video_id = get_first(
2627 playability_statuses,
2628 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2629 expected_type=str)
2630 if trailer_video_id:
2631 return self.url_result(
2632 trailer_video_id, self.ie_key(), trailer_video_id)
2633
2634 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2635 if webpage else (lambda x: None))
2636
2637 video_details = traverse_obj(
2638 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2639 microformats = traverse_obj(
2640 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2641 expected_type=dict, default=[])
2642 video_title = (
2643 get_first(video_details, 'title')
2644 or self._get_text(microformats, (..., 'title'))
2645 or search_meta(['og:title', 'twitter:title', 'title']))
2646 video_description = get_first(video_details, 'shortDescription')
2647
2648 if not smuggled_data.get('force_singlefeed', False):
2649 if not self.get_param('noplaylist'):
2650 multifeed_metadata_list = get_first(
2651 player_responses,
2652 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2653 expected_type=str)
2654 if multifeed_metadata_list:
2655 entries = []
2656 feed_ids = []
2657 for feed in multifeed_metadata_list.split(','):
2658 # Unquote should take place before split on comma (,) since textual
2659 # fields may contain comma as well (see
2660 # https://github.com/ytdl-org/youtube-dl/issues/8536)
2661 feed_data = compat_parse_qs(
2662 compat_urllib_parse_unquote_plus(feed))
2663
2664 def feed_entry(name):
2665 return try_get(
2666 feed_data, lambda x: x[name][0], compat_str)
2667
2668 feed_id = feed_entry('id')
2669 if not feed_id:
2670 continue
2671 feed_title = feed_entry('title')
2672 title = video_title
2673 if feed_title:
2674 title += ' (%s)' % feed_title
2675 entries.append({
2676 '_type': 'url_transparent',
2677 'ie_key': 'Youtube',
2678 'url': smuggle_url(
2679 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2680 {'force_singlefeed': True}),
2681 'title': title,
2682 })
2683 feed_ids.append(feed_id)
2684 self.to_screen(
2685 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2686 % (', '.join(feed_ids), video_id))
2687 return self.playlist_result(
2688 entries, video_id, video_title, video_description)
2689 else:
2690 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2691
7ea65411 2692 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
11f9be09 2693 is_live = get_first(video_details, 'isLive')
7ea65411 2694 if is_live is None:
2695 is_live = get_first(live_broadcast_details, 'isLiveNow')
11f9be09 2696
2697 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2698 formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
bf1317d2 2699
545cc85d 2700 if not formats:
11f9be09 2701 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
b7da73eb 2702 self.raise_no_formats(
545cc85d 2703 'This video is DRM protected.', expected=True)
11f9be09 2704 pemr = get_first(
2705 playability_statuses,
2706 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2707 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2708 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
545cc85d 2709 if subreason:
545cc85d 2710 if subreason == 'The uploader has not made this video available in your country.':
11f9be09 2711 countries = get_first(microformats, 'availableCountries')
545cc85d 2712 if not countries:
2713 regions_allowed = search_meta('regionsAllowed')
2714 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2715 self.raise_geo_restricted(subreason, countries, metadata_available=True)
11f9be09 2716 reason += f'. {subreason}'
545cc85d 2717 if reason:
b7da73eb 2718 self.raise_no_formats(reason, expected=True)
bf1317d2 2719
11f9be09 2720 for f in formats:
2721 # TODO: detect if throttled
2722 if '&n=' in f['url']: # possibly throttled
2723 f['source_preference'] = -10
2724 # note = f.get('format_note')
2725 # f['format_note'] = f'{note} (throttled)' if note else '(throttled)'
2726
545cc85d 2727 self._sort_formats(formats)
bf1317d2 2728
11f9be09 2729 keywords = get_first(video_details, 'keywords', expected_type=list) or []
545cc85d 2730 if not keywords and webpage:
2731 keywords = [
2732 unescapeHTML(m.group('content'))
2733 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2734 for keyword in keywords:
2735 if keyword.startswith('yt:stretch='):
201c1459 2736 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2737 if mobj:
2738 # NB: float is intentional for forcing float division
2739 w, h = (float(v) for v in mobj.groups())
2740 if w > 0 and h > 0:
2741 ratio = w / h
2742 for f in formats:
2743 if f.get('vcodec') != 'none':
2744 f['stretched_ratio'] = ratio
2745 break
6449cd80 2746
545cc85d 2747 thumbnails = []
11f9be09 2748 thumbnail_dicts = traverse_obj(
2749 (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2750 expected_type=dict, default=[])
2751 for thumbnail in thumbnail_dicts:
2752 thumbnail_url = thumbnail.get('url')
2753 if not thumbnail_url:
2754 continue
2755 # Sometimes youtube gives a wrong thumbnail URL. See:
2756 # https://github.com/yt-dlp/yt-dlp/issues/233
2757 # https://github.com/ytdl-org/youtube-dl/issues/28023
2758 if 'maxresdefault' in thumbnail_url:
2759 thumbnail_url = thumbnail_url.split('?')[0]
2760 thumbnails.append({
2761 'url': thumbnail_url,
2762 'height': int_or_none(thumbnail.get('height')),
2763 'width': int_or_none(thumbnail.get('width')),
2764 })
ff2751ac 2765 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2766 if thumbnail_url:
2767 thumbnails.append({
2768 'url': thumbnail_url,
ff2751ac 2769 })
0ba692ac 2770 # The best resolution thumbnails sometimes does not appear in the webpage
2771 # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
cca80fe6 2772 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2773 hq_thumbnail_names = ['maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3']
245524e6 2774 # TODO: Test them also? - For some videos, even these don't exist
cca80fe6 2775 guaranteed_thumbnail_names = [
2776 'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2777 'mqdefault', 'mq1', 'mq2', 'mq3',
2778 'default', '1', '2', '3'
2779 ]
2780 thumbnail_names = hq_thumbnail_names + guaranteed_thumbnail_names
2781 n_thumbnail_names = len(thumbnail_names)
2782
0ba692ac 2783 thumbnails.extend({
2784 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2785 video_id=video_id, name=name, ext=ext,
2786 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
cca80fe6 2787 '_test_url': name in hq_thumbnail_names,
2788 } for name in thumbnail_names for ext in ('webp', 'jpg'))
0ba692ac 2789 for thumb in thumbnails:
cca80fe6 2790 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
0ba692ac 2791 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
ff2751ac 2792 self._remove_duplicate_formats(thumbnails)
545cc85d 2793
7ea65411 2794 category = get_first(microformats, 'category') or search_meta('genre')
2795 channel_id = str_or_none(
2796 get_first(video_details, 'channelId')
2797 or get_first(microformats, 'externalChannelId')
2798 or search_meta('channelId'))
2799 duration = int_or_none(
2800 get_first(video_details, 'lengthSeconds')
2801 or get_first(microformats, 'lengthSeconds')
2802 or parse_duration(search_meta('duration'))) or None
2803 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2804
2805 live_content = get_first(video_details, 'isLiveContent')
2806 is_upcoming = get_first(video_details, 'isUpcoming')
2807 if is_live is None:
2808 if is_upcoming or live_content is False:
2809 is_live = False
2810 if is_upcoming is None and (live_content or is_live):
2811 is_upcoming = False
2812 live_starttime = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2813 live_endtime = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2814 if not duration and live_endtime and live_starttime:
2815 duration = live_endtime - live_starttime
2816
545cc85d 2817 info = {
2818 'id': video_id,
2819 'title': self._live_title(video_title) if is_live else video_title,
2820 'formats': formats,
2821 'thumbnails': thumbnails,
2822 'description': video_description,
2823 'upload_date': unified_strdate(
11f9be09 2824 get_first(microformats, 'uploadDate')
545cc85d 2825 or search_meta('uploadDate')),
11f9be09 2826 'uploader': get_first(video_details, 'author'),
545cc85d 2827 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2828 'uploader_url': owner_profile_url,
2829 'channel_id': channel_id,
11f9be09 2830 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
545cc85d 2831 'duration': duration,
2832 'view_count': int_or_none(
11f9be09 2833 get_first((video_details, microformats), (..., 'viewCount'))
545cc85d 2834 or search_meta('interactionCount')),
11f9be09 2835 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
545cc85d 2836 'age_limit': 18 if (
11f9be09 2837 get_first(microformats, 'isFamilySafe') is False
545cc85d 2838 or search_meta('isFamilyFriendly') == 'false'
2839 or search_meta('og:restrictions:age') == '18+') else 0,
2840 'webpage_url': webpage_url,
2841 'categories': [category] if category else None,
2842 'tags': keywords,
11f9be09 2843 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
7ea65411 2844 'is_live': is_live,
2845 'was_live': (False if is_live or is_upcoming or live_content is False
2846 else None if is_live is None or is_upcoming is None
2847 else live_content),
2848 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
2849 'release_timestamp': live_starttime,
545cc85d 2850 }
b477fc13 2851
11f9be09 2852 pctr = get_first(player_responses, ('captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
545cc85d 2853 subtitles = {}
2854 if pctr:
774d79cc 2855 def process_language(container, base_url, lang_code, sub_name, query):
120916da 2856 lang_subs = container.setdefault(lang_code, [])
545cc85d 2857 for fmt in self._SUBTITLE_FORMATS:
2858 query.update({
2859 'fmt': fmt,
2860 })
2861 lang_subs.append({
2862 'ext': fmt,
2863 'url': update_url_query(base_url, query),
774d79cc 2864 'name': sub_name,
545cc85d 2865 })
7e72694b 2866
545cc85d 2867 for caption_track in (pctr.get('captionTracks') or []):
2868 base_url = caption_track.get('baseUrl')
2869 if not base_url:
2870 continue
2871 if caption_track.get('kind') != 'asr':
120916da 2872 lang_code = (
2873 remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2874 or caption_track.get('languageCode'))
545cc85d 2875 if not lang_code:
2876 continue
2877 process_language(
774d79cc 2878 subtitles, base_url, lang_code,
2d6659b9 2879 try_get(caption_track, lambda x: x['name']['simpleText']),
774d79cc 2880 {})
545cc85d 2881 continue
2882 automatic_captions = {}
2883 for translation_language in (pctr.get('translationLanguages') or []):
2884 translation_language_code = translation_language.get('languageCode')
2885 if not translation_language_code:
2886 continue
2887 process_language(
2888 automatic_captions, base_url, translation_language_code,
fe93e2c4 2889 self._get_text(translation_language.get('languageName'), max_runs=1),
545cc85d 2890 {'tlang': translation_language_code})
2891 info['automatic_captions'] = automatic_captions
2892 info['subtitles'] = subtitles
7e72694b 2893
545cc85d 2894 parsed_url = compat_urllib_parse_urlparse(url)
2895 for component in [parsed_url.fragment, parsed_url.query]:
2896 query = compat_parse_qs(component)
2897 for k, v in query.items():
2898 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2899 d_k += '_time'
2900 if d_k not in info and k in s_ks:
2901 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2902
2903 # Youtube Music Auto-generated description
822b9d9c 2904 if video_description:
38d70284 2905 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2906 if mobj:
822b9d9c
RA
2907 release_year = mobj.group('release_year')
2908 release_date = mobj.group('release_date')
2909 if release_date:
2910 release_date = release_date.replace('-', '')
2911 if not release_year:
545cc85d 2912 release_year = release_date[:4]
2913 info.update({
2914 'album': mobj.group('album'.strip()),
2915 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2916 'track': mobj.group('track').strip(),
2917 'release_date': release_date,
cc2db878 2918 'release_year': int_or_none(release_year),
545cc85d 2919 })
7e72694b 2920
545cc85d 2921 initial_data = None
2922 if webpage:
2923 initial_data = self._extract_yt_initial_variable(
2924 webpage, self._YT_INITIAL_DATA_RE, video_id,
2925 'yt initial data')
2926 if not initial_data:
11f9be09 2927 headers = self.generate_api_headers(
2928 master_ytcfg, identity_token, self._extract_account_syncid(master_ytcfg),
2929 session_index=self._extract_session_index(master_ytcfg))
2930
109dd3b2 2931 initial_data = self._extract_response(
2932 item_id=video_id, ep='next', fatal=False,
11f9be09 2933 ytcfg=master_ytcfg, headers=headers, query={'videoId': video_id},
109dd3b2 2934 note='Downloading initial data API JSON')
545cc85d 2935
c60ee3a2 2936 try:
2937 # This will error if there is no livechat
2938 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2939 info['subtitles']['live_chat'] = [{
2940 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
2941 'video_id': video_id,
2942 'ext': 'json',
f6745c49 2943 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
c60ee3a2 2944 }]
2945 except (KeyError, IndexError, TypeError):
2946 pass
545cc85d 2947
2948 if initial_data:
7c365c21 2949 info['chapters'] = (
2950 self._extract_chapters_from_json(initial_data, duration)
2951 or self._extract_chapters_from_engagement_panel(initial_data, duration)
2952 or None)
545cc85d 2953
2954 contents = try_get(
2955 initial_data,
2956 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2957 list) or []
2958 for content in contents:
2959 vpir = content.get('videoPrimaryInfoRenderer')
2960 if vpir:
2961 stl = vpir.get('superTitleLink')
2962 if stl:
fe93e2c4 2963 stl = self._get_text(stl)
545cc85d 2964 if try_get(
2965 vpir,
2966 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2967 info['location'] = stl
2968 else:
2969 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2970 if mobj:
2971 info.update({
2972 'series': mobj.group(1),
2973 'season_number': int(mobj.group(2)),
2974 'episode_number': int(mobj.group(3)),
2975 })
2976 for tlb in (try_get(
2977 vpir,
2978 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2979 list) or []):
2980 tbr = tlb.get('toggleButtonRenderer') or {}
2981 for getter, regex in [(
2982 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2983 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2984 lambda x: x['accessibility'],
2985 lambda x: x['accessibilityData']['accessibilityData'],
2986 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2987 label = (try_get(tbr, getter, dict) or {}).get('label')
2988 if label:
2989 mobj = re.match(regex, label)
2990 if mobj:
2991 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2992 break
2993 sbr_tooltip = try_get(
2994 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2995 if sbr_tooltip:
2996 like_count, dislike_count = sbr_tooltip.split(' / ')
2997 info.update({
2998 'like_count': str_to_int(like_count),
2999 'dislike_count': str_to_int(dislike_count),
3000 })
3001 vsir = content.get('videoSecondaryInfoRenderer')
3002 if vsir:
fe93e2c4 3003 info['channel'] = self._get_text(try_get(
545cc85d 3004 vsir,
3005 lambda x: x['owner']['videoOwnerRenderer']['title'],
cce889b9 3006 dict))
545cc85d 3007 rows = try_get(
3008 vsir,
3009 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3010 list) or []
3011 multiple_songs = False
3012 for row in rows:
3013 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3014 multiple_songs = True
3015 break
3016 for row in rows:
3017 mrr = row.get('metadataRowRenderer') or {}
3018 mrr_title = mrr.get('title')
3019 if not mrr_title:
3020 continue
fe93e2c4 3021 mrr_title = self._get_text(mrr['title'])
3022 mrr_contents_text = self._get_text(mrr['contents'][0])
545cc85d 3023 if mrr_title == 'License':
3024 info['license'] = mrr_contents_text
3025 elif not multiple_songs:
3026 if mrr_title == 'Album':
3027 info['album'] = mrr_contents_text
3028 elif mrr_title == 'Artist':
3029 info['artist'] = mrr_contents_text
3030 elif mrr_title == 'Song':
3031 info['track'] = mrr_contents_text
3032
3033 fallbacks = {
3034 'channel': 'uploader',
3035 'channel_id': 'uploader_id',
3036 'channel_url': 'uploader_url',
3037 }
3038 for to, frm in fallbacks.items():
3039 if not info.get(to):
3040 info[to] = info.get(frm)
3041
3042 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3043 v = info.get(s_k)
3044 if v:
3045 info[d_k] = v
b84071c0 3046
11f9be09 3047 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3048 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
c224251a 3049 is_membersonly = None
b28f8d24 3050 is_premium = None
c224251a
M
3051 if initial_data and is_private is not None:
3052 is_membersonly = False
b28f8d24 3053 is_premium = False
47193e02 3054 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3055 badge_labels = set()
3056 for content in contents:
3057 if not isinstance(content, dict):
3058 continue
3059 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3060 for badge_label in badge_labels:
3061 if badge_label.lower() == 'members only':
3062 is_membersonly = True
3063 elif badge_label.lower() == 'premium':
3064 is_premium = True
3065 elif badge_label.lower() == 'unlisted':
3066 is_unlisted = True
c224251a 3067
c224251a
M
3068 info['availability'] = self._availability(
3069 is_private=is_private,
b28f8d24 3070 needs_premium=is_premium,
c224251a
M
3071 needs_subscription=is_membersonly,
3072 needs_auth=info['age_limit'] >= 18,
3073 is_unlisted=None if is_private is None else is_unlisted)
3074
06167fbb 3075 # get xsrf for annotations or comments
a06916d9 3076 get_annotations = self.get_param('writeannotations', False)
3077 get_comments = self.get_param('getcomments', False)
06167fbb 3078 if get_annotations or get_comments:
29f7c58a 3079 xsrf_token = None
11f9be09 3080 if master_ytcfg:
3081 xsrf_token = try_get(master_ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
29f7c58a 3082 if not xsrf_token:
3083 xsrf_token = self._search_regex(
3084 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 3085 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 3086
3087 # annotations
06167fbb 3088 if get_annotations:
11f9be09 3089 invideo_url = get_first(
3090 player_responses,
3091 ('annotations', 0, 'playerAnnotationsUrlsRenderer', 'invideoUrl'),
3092 expected_type=str)
64b6a4e9 3093 if xsrf_token and invideo_url:
29f7c58a 3094 xsrf_field_name = None
11f9be09 3095 if master_ytcfg:
3096 xsrf_field_name = try_get(master_ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
29f7c58a 3097 if not xsrf_field_name:
3098 xsrf_field_name = self._search_regex(
3099 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 3100 webpage, 'xsrf field name',
29f7c58a 3101 group='xsrf_field_name', default='session_token')
8a784c74 3102 info['annotations'] = self._download_webpage(
64b6a4e9
RA
3103 self._proto_relative_url(invideo_url),
3104 video_id, note='Downloading annotations',
3105 errnote='Unable to download video annotations', fatal=False,
3106 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 3107
277d6ff5 3108 if get_comments:
11f9be09 3109 info['__post_extractor'] = lambda: self._extract_comments(master_ytcfg, video_id, contents, webpage)
4ea3be0a 3110
11f9be09 3111 self.mark_watched(video_id, player_responses)
d77ab8e2 3112
545cc85d 3113 return info
c5e8d7af 3114
5f6a1245 3115
8bdd16b4 3116class YoutubeTabIE(YoutubeBaseInfoExtractor):
3117 IE_DESC = 'YouTube.com tab'
70d5c17b 3118 _VALID_URL = r'''(?x)
3119 https?://
3120 (?:\w+\.)?
3121 (?:
3122 youtube(?:kids)?\.com|
3123 invidio\.us
3124 )/
3125 (?:
fe03a6cd 3126 (?P<channel_type>channel|c|user|browse)/|
70d5c17b 3127 (?P<not_channel>
9ba5705a 3128 feed/|hashtag/|
70d5c17b 3129 (?:playlist|watch)\?.*?\blist=
3130 )|
29f7c58a 3131 (?!(?:%s)\b) # Direct URLs
70d5c17b 3132 )
3133 (?P<id>[^/?\#&]+)
3134 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 3135 IE_NAME = 'youtube:tab'
3136
81127aa5 3137 _TESTS = [{
da692b79 3138 'note': 'playlists, multipage',
8bdd16b4 3139 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3140 'playlist_mincount': 94,
3141 'info_dict': {
3142 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3143 'title': 'Игорь Клейнер - Playlists',
3144 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3145 'uploader': 'Игорь Клейнер',
3146 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 3147 },
3148 }, {
da692b79 3149 'note': 'playlists, multipage, different order',
8bdd16b4 3150 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3151 'playlist_mincount': 94,
3152 'info_dict': {
3153 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3154 'title': 'Игорь Клейнер - Playlists',
3155 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3156 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3157 'uploader': 'Игорь Клейнер',
8bdd16b4 3158 },
201c1459 3159 }, {
da692b79 3160 'note': 'playlists, series',
201c1459 3161 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3162 'playlist_mincount': 5,
3163 'info_dict': {
3164 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3165 'title': '3Blue1Brown - Playlists',
3166 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
da692b79 3167 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3168 'uploader': '3Blue1Brown',
201c1459 3169 },
8bdd16b4 3170 }, {
da692b79 3171 'note': 'playlists, singlepage',
8bdd16b4 3172 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3173 'playlist_mincount': 4,
3174 'info_dict': {
3175 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3176 'title': 'ThirstForScience - Playlists',
3177 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 3178 'uploader': 'ThirstForScience',
3179 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 3180 }
3181 }, {
3182 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3183 'only_matching': True,
3184 }, {
da692b79 3185 'note': 'basic, single video playlist',
0e30a7b9 3186 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 3187 'info_dict': {
0e30a7b9 3188 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3189 'uploader': 'Sergey M.',
3190 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 3191 'title': 'youtube-dl public playlist',
81127aa5 3192 },
0e30a7b9 3193 'playlist_count': 1,
9291475f 3194 }, {
da692b79 3195 'note': 'empty playlist',
0e30a7b9 3196 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 3197 'info_dict': {
0e30a7b9 3198 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3199 'uploader': 'Sergey M.',
3200 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 3201 'title': 'youtube-dl empty playlist',
9291475f
PH
3202 },
3203 'playlist_count': 0,
3204 }, {
da692b79 3205 'note': 'Home tab',
8bdd16b4 3206 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 3207 'info_dict': {
8bdd16b4 3208 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3209 'title': 'lex will - Home',
3210 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3211 'uploader': 'lex will',
3212 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3213 },
8bdd16b4 3214 'playlist_mincount': 2,
9291475f 3215 }, {
da692b79 3216 'note': 'Videos tab',
8bdd16b4 3217 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 3218 'info_dict': {
8bdd16b4 3219 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3220 'title': 'lex will - Videos',
3221 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3222 'uploader': 'lex will',
3223 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3224 },
8bdd16b4 3225 'playlist_mincount': 975,
9291475f 3226 }, {
da692b79 3227 'note': 'Videos tab, sorted by popular',
8bdd16b4 3228 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 3229 'info_dict': {
8bdd16b4 3230 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3231 'title': 'lex will - Videos',
3232 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3233 'uploader': 'lex will',
3234 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3235 },
8bdd16b4 3236 'playlist_mincount': 199,
9291475f 3237 }, {
da692b79 3238 'note': 'Playlists tab',
8bdd16b4 3239 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 3240 'info_dict': {
8bdd16b4 3241 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3242 'title': 'lex will - Playlists',
3243 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3244 'uploader': 'lex will',
3245 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3246 },
8bdd16b4 3247 'playlist_mincount': 17,
ac7553d0 3248 }, {
da692b79 3249 'note': 'Community tab',
8bdd16b4 3250 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 3251 'info_dict': {
8bdd16b4 3252 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3253 'title': 'lex will - Community',
3254 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3255 'uploader': 'lex will',
3256 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3257 },
3258 'playlist_mincount': 18,
87dadd45 3259 }, {
da692b79 3260 'note': 'Channels tab',
8bdd16b4 3261 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 3262 'info_dict': {
8bdd16b4 3263 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3264 'title': 'lex will - Channels',
3265 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3266 'uploader': 'lex will',
3267 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3268 },
deaec5af 3269 'playlist_mincount': 12,
cd684175 3270 }, {
3271 'note': 'Search tab',
3272 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3273 'playlist_mincount': 40,
3274 'info_dict': {
3275 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3276 'title': '3Blue1Brown - Search - linear algebra',
3277 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3278 'uploader': '3Blue1Brown',
3279 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3280 },
6b08cdf6 3281 }, {
a0566bbf 3282 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3283 'only_matching': True,
3284 }, {
a0566bbf 3285 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3286 'only_matching': True,
3287 }, {
a0566bbf 3288 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3289 'only_matching': True,
3290 }, {
3291 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3292 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3293 'info_dict': {
3294 'title': '29C3: Not my department',
3295 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3296 'uploader': 'Christiaan008',
3297 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 3298 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 3299 },
3300 'playlist_count': 96,
3301 }, {
3302 'note': 'Large playlist',
3303 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 3304 'info_dict': {
8bdd16b4 3305 'title': 'Uploads from Cauchemar',
3306 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3307 'uploader': 'Cauchemar',
3308 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 3309 },
8bdd16b4 3310 'playlist_mincount': 1123,
3311 }, {
da692b79 3312 'note': 'even larger playlist, 8832 videos',
8bdd16b4 3313 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3314 'only_matching': True,
4b7df0d3
JMF
3315 }, {
3316 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3317 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3318 'info_dict': {
acf757f4
PH
3319 'title': 'Uploads from Interstellar Movie',
3320 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 3321 'uploader': 'Interstellar Movie',
8bdd16b4 3322 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 3323 },
481cc733 3324 'playlist_mincount': 21,
358de58c 3325 }, {
3326 'note': 'Playlist with "show unavailable videos" button',
3327 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3328 'info_dict': {
3329 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3330 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3331 'uploader': 'Phim Siêu Nhân Nhật Bản',
3332 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3333 },
da692b79 3334 'playlist_mincount': 200,
5d342002 3335 }, {
da692b79 3336 'note': 'Playlist with unavailable videos in page 7',
5d342002 3337 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3338 'info_dict': {
3339 'title': 'Uploads from BlankTV',
3340 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3341 'uploader': 'BlankTV',
3342 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3343 },
da692b79 3344 'playlist_mincount': 1000,
8bdd16b4 3345 }, {
da692b79 3346 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
8bdd16b4 3347 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3348 'info_dict': {
3349 'title': 'Data Analysis with Dr Mike Pound',
3350 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3351 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3352 'uploader': 'Computerphile',
deaec5af 3353 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 3354 },
3355 'playlist_mincount': 11,
3356 }, {
a0566bbf 3357 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 3358 'only_matching': True,
dacb3a86 3359 }, {
da692b79 3360 'note': 'Playlist URL that does not actually serve a playlist',
dacb3a86
S
3361 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3362 'info_dict': {
3363 'id': 'FqZTN594JQw',
3364 'ext': 'webm',
3365 'title': "Smiley's People 01 detective, Adventure Series, Action",
3366 'uploader': 'STREEM',
3367 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 3368 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
3369 'upload_date': '20150526',
3370 'license': 'Standard YouTube License',
3371 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3372 'categories': ['People & Blogs'],
3373 'tags': list,
dbdaaa23 3374 'view_count': int,
dacb3a86
S
3375 'like_count': int,
3376 'dislike_count': int,
3377 },
3378 'params': {
3379 'skip_download': True,
3380 },
13a75688 3381 'skip': 'This video is not available.',
dacb3a86 3382 'add_ie': [YoutubeIE.ie_key()],
481cc733 3383 }, {
8bdd16b4 3384 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 3385 'only_matching': True,
66b48727 3386 }, {
8bdd16b4 3387 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 3388 'only_matching': True,
a0566bbf 3389 }, {
3390 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3391 'info_dict': {
11f9be09 3392 'id': 'FMtPN8yp5LU', # This will keep changing
a0566bbf 3393 'ext': 'mp4',
deaec5af 3394 'title': compat_str,
a0566bbf 3395 'uploader': 'Sky News',
3396 'uploader_id': 'skynews',
3397 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
da692b79 3398 'upload_date': r're:\d{8}',
3399 'description': compat_str,
a0566bbf 3400 'categories': ['News & Politics'],
3401 'tags': list,
3402 'like_count': int,
3403 'dislike_count': int,
3404 },
3405 'params': {
3406 'skip_download': True,
3407 },
da692b79 3408 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
a0566bbf 3409 }, {
3410 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3411 'info_dict': {
3412 'id': 'a48o2S1cPoo',
3413 'ext': 'mp4',
3414 'title': 'The Young Turks - Live Main Show',
3415 'uploader': 'The Young Turks',
3416 'uploader_id': 'TheYoungTurks',
3417 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3418 'upload_date': '20150715',
3419 'license': 'Standard YouTube License',
3420 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3421 'categories': ['News & Politics'],
3422 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3423 'like_count': int,
3424 'dislike_count': int,
3425 },
3426 'params': {
3427 'skip_download': True,
3428 },
3429 'only_matching': True,
3430 }, {
3431 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3432 'only_matching': True,
3433 }, {
3434 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3435 'only_matching': True,
09f1580e 3436 }, {
3437 'note': 'A channel that is not live. Should raise error',
3438 'url': 'https://www.youtube.com/user/numberphile/live',
3439 'only_matching': True,
3d3dddc9 3440 }, {
3441 'url': 'https://www.youtube.com/feed/trending',
3442 'only_matching': True,
3443 }, {
3d3dddc9 3444 'url': 'https://www.youtube.com/feed/library',
3445 'only_matching': True,
3446 }, {
3d3dddc9 3447 'url': 'https://www.youtube.com/feed/history',
3448 'only_matching': True,
3449 }, {
3d3dddc9 3450 'url': 'https://www.youtube.com/feed/subscriptions',
3451 'only_matching': True,
3452 }, {
3d3dddc9 3453 'url': 'https://www.youtube.com/feed/watch_later',
3454 'only_matching': True,
3455 }, {
da692b79 3456 'note': 'Recommended - redirects to home page',
3d3dddc9 3457 'url': 'https://www.youtube.com/feed/recommended',
3458 'only_matching': True,
29f7c58a 3459 }, {
da692b79 3460 'note': 'inline playlist with not always working continuations',
29f7c58a 3461 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3462 'only_matching': True,
3463 }, {
3464 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3465 'only_matching': True,
3466 }, {
3467 'url': 'https://www.youtube.com/course',
3468 'only_matching': True,
3469 }, {
3470 'url': 'https://www.youtube.com/zsecurity',
3471 'only_matching': True,
3472 }, {
3473 'url': 'http://www.youtube.com/NASAgovVideo/videos',
3474 'only_matching': True,
3475 }, {
3476 'url': 'https://www.youtube.com/TheYoungTurks/live',
3477 'only_matching': True,
39ed931e 3478 }, {
3479 'url': 'https://www.youtube.com/hashtag/cctv9',
3480 'info_dict': {
3481 'id': 'cctv9',
3482 'title': '#cctv9',
3483 },
3484 'playlist_mincount': 350,
201c1459 3485 }, {
3486 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3487 'only_matching': True,
9297939e 3488 }, {
da692b79 3489 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
9297939e 3490 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3491 'only_matching': True
fe03a6cd 3492 }, {
3493 'note': '/browse/ should redirect to /channel/',
3494 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3495 'only_matching': True
3496 }, {
3497 'note': 'VLPL, should redirect to playlist?list=PL...',
3498 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3499 'info_dict': {
3500 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3501 'uploader': 'NoCopyrightSounds',
3502 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3503 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3504 'title': 'NCS Releases',
3505 },
3506 'playlist_mincount': 166,
18db7548 3507 }, {
3508 'note': 'Topic, should redirect to playlist?list=UU...',
3509 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3510 'info_dict': {
3511 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3512 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3513 'title': 'Uploads from Royalty Free Music - Topic',
3514 'uploader': 'Royalty Free Music - Topic',
3515 },
3516 'expected_warnings': [
3517 'A channel/user page was given',
3518 'The URL does not have a videos tab',
3519 ],
3520 'playlist_mincount': 101,
3521 }, {
3522 'note': 'Topic without a UU playlist',
3523 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3524 'info_dict': {
3525 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3526 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3527 },
3528 'expected_warnings': [
3529 'A channel/user page was given',
3530 'The URL does not have a videos tab',
3531 'Falling back to channel URL',
3532 ],
3533 'playlist_mincount': 9,
abcdd12b 3534 }, {
3535 'note': 'Youtube music Album',
3536 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3537 'info_dict': {
3538 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3539 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3540 },
3541 'playlist_count': 50,
47193e02 3542 }, {
3543 'note': 'unlisted single video playlist',
3544 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3545 'info_dict': {
3546 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3547 'uploader': 'colethedj',
3548 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3549 'title': 'yt-dlp unlisted playlist test',
3550 'availability': 'unlisted'
3551 },
3552 'playlist_count': 1,
29f7c58a 3553 }]
3554
3555 @classmethod
3556 def suitable(cls, url):
3557 return False if YoutubeIE.suitable(url) else super(
3558 YoutubeTabIE, cls).suitable(url)
8bdd16b4 3559
3560 def _extract_channel_id(self, webpage):
3561 channel_id = self._html_search_meta(
3562 'channelId', webpage, 'channel id', default=None)
3563 if channel_id:
3564 return channel_id
3565 channel_url = self._html_search_meta(
3566 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3567 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3568 'twitter:app:url:googleplay'), webpage, 'channel url')
3569 return self._search_regex(
3570 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3571 channel_url, 'channel id')
15f6397c 3572
8bdd16b4 3573 @staticmethod
cd7c66cf 3574 def _extract_basic_item_renderer(item):
3575 # Modified from _extract_grid_item_renderer
201c1459 3576 known_basic_renderers = (
3577 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 3578 )
3579 for key, renderer in item.items():
201c1459 3580 if not isinstance(renderer, dict):
cd7c66cf 3581 continue
201c1459 3582 elif key in known_basic_renderers:
3583 return renderer
3584 elif key.startswith('grid') and key.endswith('Renderer'):
3585 return renderer
8bdd16b4 3586
8bdd16b4 3587 def _grid_entries(self, grid_renderer):
3588 for item in grid_renderer['items']:
3589 if not isinstance(item, dict):
39b62db1 3590 continue
cd7c66cf 3591 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 3592 if not isinstance(renderer, dict):
3593 continue
fe93e2c4 3594 title = self._get_text(renderer.get('title'))
3595
8bdd16b4 3596 # playlist
3597 playlist_id = renderer.get('playlistId')
3598 if playlist_id:
3599 yield self.url_result(
3600 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3601 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3602 video_title=title)
201c1459 3603 continue
8bdd16b4 3604 # video
3605 video_id = renderer.get('videoId')
3606 if video_id:
3607 yield self._extract_video(renderer)
201c1459 3608 continue
8bdd16b4 3609 # channel
3610 channel_id = renderer.get('channelId')
3611 if channel_id:
8bdd16b4 3612 yield self.url_result(
3613 'https://www.youtube.com/channel/%s' % channel_id,
3614 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 3615 continue
3616 # generic endpoint URL support
3617 ep_url = urljoin('https://www.youtube.com/', try_get(
3618 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3619 compat_str))
3620 if ep_url:
3621 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3622 if ie.suitable(ep_url):
3623 yield self.url_result(
3624 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3625 break
8bdd16b4 3626
3d3dddc9 3627 def _shelf_entries_from_content(self, shelf_renderer):
3628 content = shelf_renderer.get('content')
3629 if not isinstance(content, dict):
8bdd16b4 3630 return
cd7c66cf 3631 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 3632 if renderer:
3633 # TODO: add support for nested playlists so each shelf is processed
3634 # as separate playlist
3635 # TODO: this includes only first N items
3636 for entry in self._grid_entries(renderer):
3637 yield entry
3638 renderer = content.get('horizontalListRenderer')
3639 if renderer:
3640 # TODO
3641 pass
8bdd16b4 3642
29f7c58a 3643 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3644 ep = try_get(
3645 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3646 compat_str)
3647 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3648 if shelf_url:
29f7c58a 3649 # Skipping links to another channels, note that checking for
3650 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3651 # will not work
3652 if skip_channels and '/channels?' in shelf_url:
3653 return
fe93e2c4 3654 title = self._get_text(shelf_renderer, lambda x: x['title'])
3d3dddc9 3655 yield self.url_result(shelf_url, video_title=title)
3656 # Shelf may not contain shelf URL, fallback to extraction from content
3657 for entry in self._shelf_entries_from_content(shelf_renderer):
3658 yield entry
c5e8d7af 3659
8bdd16b4 3660 def _playlist_entries(self, video_list_renderer):
3661 for content in video_list_renderer['contents']:
3662 if not isinstance(content, dict):
3663 continue
3664 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3665 if not isinstance(renderer, dict):
3666 continue
3667 video_id = renderer.get('videoId')
3668 if not video_id:
3669 continue
3670 yield self._extract_video(renderer)
07aeced6 3671
3462ffa8 3672 def _rich_entries(self, rich_grid_renderer):
3673 renderer = try_get(
70d5c17b 3674 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3675 video_id = renderer.get('videoId')
3676 if not video_id:
3677 return
3678 yield self._extract_video(renderer)
3679
8bdd16b4 3680 def _video_entry(self, video_renderer):
3681 video_id = video_renderer.get('videoId')
3682 if video_id:
3683 return self._extract_video(video_renderer)
dacb3a86 3684
8bdd16b4 3685 def _post_thread_entries(self, post_thread_renderer):
3686 post_renderer = try_get(
3687 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3688 if not post_renderer:
3689 return
3690 # video attachment
3691 video_renderer = try_get(
895b0931 3692 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3693 video_id = video_renderer.get('videoId')
3694 if video_id:
3695 entry = self._extract_video(video_renderer)
8bdd16b4 3696 if entry:
3697 yield entry
895b0931 3698 # playlist attachment
3699 playlist_id = try_get(
3700 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3701 if playlist_id:
3702 yield self.url_result(
e28f1c0a 3703 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3704 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3705 # inline video links
3706 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3707 for run in runs:
3708 if not isinstance(run, dict):
3709 continue
3710 ep_url = try_get(
3711 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3712 if not ep_url:
3713 continue
3714 if not YoutubeIE.suitable(ep_url):
3715 continue
3716 ep_video_id = YoutubeIE._match_id(ep_url)
3717 if video_id == ep_video_id:
3718 continue
895b0931 3719 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 3720
8bdd16b4 3721 def _post_thread_continuation_entries(self, post_thread_continuation):
3722 contents = post_thread_continuation.get('contents')
3723 if not isinstance(contents, list):
3724 return
3725 for content in contents:
3726 renderer = content.get('backstagePostThreadRenderer')
3727 if not isinstance(renderer, dict):
3728 continue
3729 for entry in self._post_thread_entries(renderer):
3730 yield entry
07aeced6 3731
39ed931e 3732 r''' # unused
3733 def _rich_grid_entries(self, contents):
3734 for content in contents:
3735 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3736 if video_renderer:
3737 entry = self._video_entry(video_renderer)
3738 if entry:
3739 yield entry
3740 '''
f4f751af 3741 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3462ffa8 3742
70d5c17b 3743 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3744 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3745 for content in contents:
3746 if not isinstance(content, dict):
8bdd16b4 3747 continue
70d5c17b 3748 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3749 if not is_renderer:
70d5c17b 3750 renderer = content.get('richItemRenderer')
3462ffa8 3751 if renderer:
3752 for entry in self._rich_entries(renderer):
3753 yield entry
3754 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3755 continue
3462ffa8 3756 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3757 for isr_content in isr_contents:
3758 if not isinstance(isr_content, dict):
3759 continue
69184e41 3760
3761 known_renderers = {
3762 'playlistVideoListRenderer': self._playlist_entries,
3763 'gridRenderer': self._grid_entries,
3764 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3765 'backstagePostThreadRenderer': self._post_thread_entries,
3766 'videoRenderer': lambda x: [self._video_entry(x)],
3767 }
3768 for key, renderer in isr_content.items():
3769 if key not in known_renderers:
3770 continue
3771 for entry in known_renderers[key](renderer):
3772 if entry:
3773 yield entry
3462ffa8 3774 continuation_list[0] = self._extract_continuation(renderer)
69184e41 3775 break
70d5c17b 3776
3462ffa8 3777 if not continuation_list[0]:
3778 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3779
3780 if not continuation_list[0]:
3781 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3782
3783 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3784 tab_content = try_get(tab, lambda x: x['content'], dict)
3785 if not tab_content:
3786 return
3462ffa8 3787 parent_renderer = (
29f7c58a 3788 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3789 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3790 for entry in extract_entries(parent_renderer):
3791 yield entry
3462ffa8 3792 continuation = continuation_list[0]
fe93e2c4 3793 visitor_data = None
d069eca7 3794
8bdd16b4 3795 for page_num in itertools.count(1):
3796 if not continuation:
3797 break
11f9be09 3798 headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
79360d99 3799 response = self._extract_response(
3800 item_id='%s page %s' % (item_id, page_num),
fe93e2c4 3801 query=continuation, headers=headers, ytcfg=ytcfg,
79360d99 3802 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3803
3804 if not response:
8bdd16b4 3805 break
f4f751af 3806 visitor_data = try_get(
3807 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
ebf1b291 3808
69184e41 3809 known_continuation_renderers = {
3810 'playlistVideoListContinuation': self._playlist_entries,
3811 'gridContinuation': self._grid_entries,
3812 'itemSectionContinuation': self._post_thread_continuation_entries,
3813 'sectionListContinuation': extract_entries, # for feeds
3814 }
8bdd16b4 3815 continuation_contents = try_get(
69184e41 3816 response, lambda x: x['continuationContents'], dict) or {}
3817 continuation_renderer = None
3818 for key, value in continuation_contents.items():
3819 if key not in known_continuation_renderers:
3462ffa8 3820 continue
69184e41 3821 continuation_renderer = value
3822 continuation_list = [None]
3823 for entry in known_continuation_renderers[key](continuation_renderer):
3824 yield entry
3825 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3826 break
3827 if continuation_renderer:
3828 continue
c5e8d7af 3829
a1b535bd 3830 known_renderers = {
3831 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3832 'gridVideoRenderer': (self._grid_entries, 'items'),
d61fc646 3833 'gridChannelRenderer': (self._grid_entries, 'items'),
a1b535bd 3834 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3835 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3836 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3837 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3838 }
cce889b9 3839 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3840 continuation_items = try_get(
cce889b9 3841 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3842 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3843 video_items_renderer = None
3844 for key, value in continuation_item.items():
3845 if key not in known_renderers:
8bdd16b4 3846 continue
a1b535bd 3847 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3848 continuation_list = [None]
a1b535bd 3849 for entry in known_renderers[key][0](video_items_renderer):
3850 yield entry
9ba5705a 3851 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3852 break
3853 if video_items_renderer:
3854 continue
8bdd16b4 3855 break
9558dcec 3856
8bdd16b4 3857 @staticmethod
3858 def _extract_selected_tab(tabs):
3859 for tab in tabs:
cd684175 3860 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3861 if renderer.get('selected') is True:
3862 return renderer
2b3c2546 3863 else:
8bdd16b4 3864 raise ExtractorError('Unable to find selected tab')
b82f815f 3865
47193e02 3866 @classmethod
3867 def _extract_uploader(cls, data):
8bdd16b4 3868 uploader = {}
47193e02 3869 renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3870 owner = try_get(
3871 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3872 if owner:
3873 uploader['uploader'] = owner.get('text')
3874 uploader['uploader_id'] = try_get(
3875 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3876 uploader['uploader_url'] = urljoin(
3877 'https://www.youtube.com/',
3878 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3879 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3880
d069eca7 3881 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3882 playlist_id = title = description = channel_url = channel_name = channel_id = None
3883 thumbnails_list = tags = []
3884
8bdd16b4 3885 selected_tab = self._extract_selected_tab(tabs)
3886 renderer = try_get(
3887 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3888 if renderer:
b60419c5 3889 channel_name = renderer.get('title')
3890 channel_url = renderer.get('channelUrl')
3891 channel_id = renderer.get('externalId')
39ed931e 3892 else:
64c0d954 3893 renderer = try_get(
3894 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3895
8bdd16b4 3896 if renderer:
3897 title = renderer.get('title')
ecc97af3 3898 description = renderer.get('description', '')
b60419c5 3899 playlist_id = channel_id
3900 tags = renderer.get('keywords', '').split()
3901 thumbnails_list = (
3902 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3903 or try_get(
47193e02 3904 self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3905 lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
ff84930c 3906 list)
b60419c5 3907 or [])
3908
3909 thumbnails = []
3910 for t in thumbnails_list:
3911 if not isinstance(t, dict):
3912 continue
3913 thumbnail_url = url_or_none(t.get('url'))
3914 if not thumbnail_url:
3915 continue
3916 thumbnails.append({
3917 'url': thumbnail_url,
3918 'width': int_or_none(t.get('width')),
3919 'height': int_or_none(t.get('height')),
3920 })
3462ffa8 3921 if playlist_id is None:
70d5c17b 3922 playlist_id = item_id
3923 if title is None:
39ed931e 3924 title = (
3925 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3926 or playlist_id)
b60419c5 3927 title += format_field(selected_tab, 'title', ' - %s')
cd684175 3928 title += format_field(selected_tab, 'expandedText', ' - %s')
b60419c5 3929 metadata = {
3930 'playlist_id': playlist_id,
3931 'playlist_title': title,
3932 'playlist_description': description,
3933 'uploader': channel_name,
3934 'uploader_id': channel_id,
3935 'uploader_url': channel_url,
3936 'thumbnails': thumbnails,
3937 'tags': tags,
3938 }
47193e02 3939 availability = self._extract_availability(data)
3940 if availability:
3941 metadata['availability'] = availability
b60419c5 3942 if not channel_id:
3943 metadata.update(self._extract_uploader(data))
3944 metadata.update({
3945 'channel': metadata['uploader'],
3946 'channel_id': metadata['uploader_id'],
3947 'channel_url': metadata['uploader_url']})
11f9be09 3948 ytcfg = self.extract_ytcfg(item_id, webpage)
b60419c5 3949 return self.playlist_result(
d069eca7
M
3950 self._entries(
3951 selected_tab, playlist_id,
3952 self._extract_identity_token(webpage, item_id),
fe93e2c4 3953 self._extract_account_syncid(ytcfg, data), ytcfg),
b60419c5 3954 **metadata)
73c4ac2c 3955
79360d99 3956 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
2be71994 3957 first_id = last_id = None
11f9be09 3958 ytcfg = self.extract_ytcfg(playlist_id, webpage)
3959 headers = self.generate_api_headers(
fe93e2c4 3960 ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3961 identity_token=self._extract_identity_token(webpage, item_id=playlist_id))
2be71994 3962 for page_num in itertools.count(1):
cd7c66cf 3963 videos = list(self._playlist_entries(playlist))
3964 if not videos:
3965 return
2be71994 3966 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3967 if start >= len(videos):
3968 return
3969 for video in videos[start:]:
3970 if video['id'] == first_id:
3971 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3972 return
3973 yield video
3974 first_id = first_id or videos[0]['id']
3975 last_id = videos[-1]['id']
79360d99 3976 watch_endpoint = try_get(
3977 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3978 query = {
3979 'playlistId': playlist_id,
3980 'videoId': watch_endpoint.get('videoId') or last_id,
3981 'index': watch_endpoint.get('index') or len(videos),
3982 'params': watch_endpoint.get('params') or 'OAE%3D'
3983 }
3984 response = self._extract_response(
3985 item_id='%s page %d' % (playlist_id, page_num),
fe93e2c4 3986 query=query, ep='next', headers=headers, ytcfg=ytcfg,
79360d99 3987 check_get_keys='contents'
3988 )
cd7c66cf 3989 playlist = try_get(
79360d99 3990 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 3991
79360d99 3992 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
8bdd16b4 3993 title = playlist.get('title') or try_get(
3994 data, lambda x: x['titleText']['simpleText'], compat_str)
3995 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3996
3997 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3998 playlist_url = urljoin(url, try_get(
3999 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4000 compat_str))
4001 if playlist_url and playlist_url != url:
4002 return self.url_result(
4003 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4004 video_title=title)
cd7c66cf 4005
8bdd16b4 4006 return self.playlist_result(
79360d99 4007 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
cd7c66cf 4008 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 4009
47193e02 4010 def _extract_availability(self, data):
4011 """
4012 Gets the availability of a given playlist/tab.
4013 Note: Unless YouTube tells us explicitly, we do not assume it is public
4014 @param data: response
4015 """
4016 is_private = is_unlisted = None
4017 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4018 badge_labels = self._extract_badges(renderer)
4019
4020 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4021 privacy_dropdown_entries = try_get(
4022 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4023 for renderer_dict in privacy_dropdown_entries:
4024 is_selected = try_get(
4025 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4026 if not is_selected:
4027 continue
fe93e2c4 4028 label = self._get_text(
4029 try_get(renderer_dict, lambda x: x['privacyDropdownItemRenderer']['label'], dict) or [])
47193e02 4030 if label:
4031 badge_labels.add(label.lower())
4032 break
4033
4034 for badge_label in badge_labels:
4035 if badge_label == 'unlisted':
4036 is_unlisted = True
4037 elif badge_label == 'private':
4038 is_private = True
4039 elif badge_label == 'public':
4040 is_unlisted = is_private = False
4041 return self._availability(is_private, False, False, False, is_unlisted)
4042
4043 @staticmethod
4044 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4045 sidebar_renderer = try_get(
4046 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4047 for item in sidebar_renderer:
4048 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4049 if renderer:
4050 return renderer
4051
358de58c 4052 def _reload_with_unavailable_videos(self, item_id, data, webpage):
4053 """
4054 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4055 """
5d342002 4056 browse_id = params = None
47193e02 4057 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4058 if not renderer:
4059 return
4060 menu_renderer = try_get(
4061 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4062 for menu_item in menu_renderer:
4063 if not isinstance(menu_item, dict):
358de58c 4064 continue
47193e02 4065 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4066 text = try_get(
4067 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
4068 if not text or text.lower() != 'show unavailable videos':
4069 continue
4070 browse_endpoint = try_get(
4071 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4072 browse_id = browse_endpoint.get('browseId')
4073 params = browse_endpoint.get('params')
4074 break
5d342002 4075
11f9be09 4076 ytcfg = self.extract_ytcfg(item_id, webpage)
4077 headers = self.generate_api_headers(
fe93e2c4 4078 ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
47193e02 4079 identity_token=self._extract_identity_token(webpage, item_id=item_id),
4080 visitor_data=try_get(
4081 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
4082 query = {
4083 'params': params or 'wgYCCAA=',
4084 'browseId': browse_id or 'VL%s' % item_id
4085 }
4086 return self._extract_response(
4087 item_id=item_id, headers=headers, query=query,
fe93e2c4 4088 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
47193e02 4089 note='Downloading API JSON with unavailable videos')
358de58c 4090
cd7c66cf 4091 def _extract_webpage(self, url, item_id):
a06916d9 4092 retries = self.get_param('extractor_retries', 3)
62bff2c1 4093 count = -1
c705177d 4094 last_error = 'Incomplete yt initial data recieved'
14fdfea9 4095 while count < retries:
62bff2c1 4096 count += 1
14fdfea9 4097 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 4098 # See: https://github.com/yt-dlp/yt-dlp/issues/116
4099 if count:
c705177d 4100 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 4101 webpage = self._download_webpage(
4102 url, item_id,
cd7c66cf 4103 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
11f9be09 4104 data = self.extract_yt_initial_data(item_id, webpage)
14fdfea9 4105 if data.get('contents') or data.get('currentVideoEndpoint'):
4106 break
95c01b6c 4107 # Extract alerts here only when there is error
4108 self._extract_and_report_alerts(data)
c705177d 4109 if count >= retries:
6a39ee13 4110 raise ExtractorError(last_error)
cd7c66cf 4111 return webpage, data
4112
9297939e 4113 @staticmethod
4114 def _smuggle_data(entries, data):
4115 for entry in entries:
4116 if data:
4117 entry['url'] = smuggle_url(entry['url'], data)
4118 yield entry
4119
cd7c66cf 4120 def _real_extract(self, url):
9297939e 4121 url, smuggled_data = unsmuggle_url(url, {})
4122 if self.is_music_url(url):
4123 smuggled_data['is_music_url'] = True
fe03a6cd 4124 info_dict = self.__real_extract(url, smuggled_data)
9297939e 4125 if info_dict.get('entries'):
4126 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4127 return info_dict
4128
fe03a6cd 4129 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4130
4131 def __real_extract(self, url, smuggled_data):
cd7c66cf 4132 item_id = self._match_id(url)
4133 url = compat_urlparse.urlunparse(
4134 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 4135 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 4136
fe03a6cd 4137 def get_mobj(url):
4138 mobj = self._url_re.match(url).groupdict()
07cce701 4139 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 4140 return mobj
4141
4142 mobj = get_mobj(url)
4143 # Youtube returns incomplete data if tabname is not lower case
4144 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4145
4146 if is_channel:
4147 if smuggled_data.get('is_music_url'):
4148 if item_id[:2] == 'VL':
4149 # Youtube music VL channels have an equivalent playlist
4150 item_id = item_id[2:]
4151 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
abcdd12b 4152 elif item_id[:2] == 'MP':
4153 # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4154 item_id = self._search_regex(
4155 r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4156 self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4157 'playlist id')
4158 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
fe03a6cd 4159 elif mobj['channel_type'] == 'browse':
4160 # Youtube music /browse/ should be changed to /channel/
4161 pre = 'https://www.youtube.com/channel/%s' % item_id
4162 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4163 # Home URLs should redirect to /videos/
6a39ee13 4164 self.report_warning(
cd7c66cf 4165 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4166 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 4167 tab = '/videos'
4168
4169 url = ''.join((pre, tab, post))
4170 mobj = get_mobj(url)
cd7c66cf 4171
4172 # Handle both video/playlist URLs
201c1459 4173 qs = parse_qs(url)
cd7c66cf 4174 video_id = qs.get('v', [None])[0]
4175 playlist_id = qs.get('list', [None])[0]
4176
fe03a6cd 4177 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 4178 if not playlist_id:
fe03a6cd 4179 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 4180 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 4181 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6a39ee13 4182 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
cd7c66cf 4183 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
18db7548 4184 mobj = get_mobj(url)
cd7c66cf 4185
4186 if video_id and playlist_id:
a06916d9 4187 if self.get_param('noplaylist'):
cd7c66cf 4188 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4189 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4190 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4191
4192 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 4193
18db7548 4194 tabs = try_get(
4195 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4196 if tabs:
4197 selected_tab = self._extract_selected_tab(tabs)
4198 tab_name = selected_tab.get('title', '')
09f1580e 4199 if 'no-youtube-channel-redirect' not in compat_opts:
4200 if mobj['tab'] == '/live':
4201 # Live tab should have redirected to the video
4202 raise ExtractorError('The channel is not currently live', expected=True)
4203 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4204 if not mobj['not_channel'] and item_id[:2] == 'UC':
4205 # Topic channels don't have /videos. Use the equivalent playlist instead
4206 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4207 pl_id = 'UU%s' % item_id[2:]
4208 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4209 try:
4210 pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4211 for alert_type, alert_message in self._extract_alerts(pl_data):
4212 if alert_type == 'error':
4213 raise ExtractorError('Youtube said: %s' % alert_message)
4214 item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4215 except ExtractorError:
4216 self.report_warning('The playlist gave error. Falling back to channel URL')
4217 else:
4218 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
18db7548 4219
4220 self.write_debug('Final URL: %s' % url)
4221
358de58c 4222 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 4223 if 'no-youtube-unavailable-videos' not in compat_opts:
4224 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
95c01b6c 4225 self._extract_and_report_alerts(data)
8bdd16b4 4226 tabs = try_get(
4227 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4228 if tabs:
d069eca7 4229 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 4230
8bdd16b4 4231 playlist = try_get(
4232 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4233 if playlist:
79360d99 4234 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
cd7c66cf 4235
a0566bbf 4236 video_id = try_get(
4237 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4238 compat_str) or video_id
8bdd16b4 4239 if video_id:
09f1580e 4240 if mobj['tab'] != '/live': # live tab is expected to redirect to video
4241 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 4242 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 4243
8bdd16b4 4244 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 4245
c5e8d7af 4246
8bdd16b4 4247class YoutubePlaylistIE(InfoExtractor):
4248 IE_DESC = 'YouTube.com playlists'
4249 _VALID_URL = r'''(?x)(?:
4250 (?:https?://)?
4251 (?:\w+\.)?
4252 (?:
4253 (?:
4254 youtube(?:kids)?\.com|
29f7c58a 4255 invidio\.us
8bdd16b4 4256 )
4257 /.*?\?.*?\blist=
4258 )?
4259 (?P<id>%(playlist_id)s)
4260 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4261 IE_NAME = 'youtube:playlist'
cdc628a4 4262 _TESTS = [{
8bdd16b4 4263 'note': 'issue #673',
4264 'url': 'PLBB231211A4F62143',
cdc628a4 4265 'info_dict': {
8bdd16b4 4266 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4267 'id': 'PLBB231211A4F62143',
4268 'uploader': 'Wickydoo',
4269 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
11f9be09 4270 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
8bdd16b4 4271 },
4272 'playlist_mincount': 29,
4273 }, {
4274 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4275 'info_dict': {
4276 'title': 'YDL_safe_search',
4277 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4278 },
4279 'playlist_count': 2,
4280 'skip': 'This playlist is private',
9558dcec 4281 }, {
8bdd16b4 4282 'note': 'embedded',
4283 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4284 'playlist_count': 4,
9558dcec 4285 'info_dict': {
8bdd16b4 4286 'title': 'JODA15',
4287 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4288 'uploader': 'milan',
4289 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 4290 }
cdc628a4 4291 }, {
8bdd16b4 4292 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
11f9be09 4293 'playlist_mincount': 654,
8bdd16b4 4294 'info_dict': {
4295 'title': '2018 Chinese New Singles (11/6 updated)',
4296 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4297 'uploader': 'LBK',
4298 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
11f9be09 4299 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
8bdd16b4 4300 }
daa0df9e 4301 }, {
29f7c58a 4302 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4303 'only_matching': True,
4304 }, {
4305 # music album playlist
4306 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4307 'only_matching': True,
4308 }]
4309
4310 @classmethod
4311 def suitable(cls, url):
201c1459 4312 if YoutubeTabIE.suitable(url):
4313 return False
1bdae7d3 4314 # Hack for lazy extractors until more generic solution is implemented
4315 # (see #28780)
4316 from .youtube import parse_qs
201c1459 4317 qs = parse_qs(url)
4318 if qs.get('v', [None])[0]:
4319 return False
4320 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 4321
4322 def _real_extract(self, url):
4323 playlist_id = self._match_id(url)
46953e7e 4324 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 4325 url = update_url_query(
4326 'https://www.youtube.com/playlist',
4327 parse_qs(url) or {'list': playlist_id})
4328 if is_music_url:
4329 url = smuggle_url(url, {'is_music_url': True})
4330 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 4331
4332
4333class YoutubeYtBeIE(InfoExtractor):
c76eb41b 4334 IE_DESC = 'youtu.be'
29f7c58a 4335 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4336 _TESTS = [{
8bdd16b4 4337 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4338 'info_dict': {
4339 'id': 'yeWKywCrFtk',
4340 'ext': 'mp4',
4341 'title': 'Small Scale Baler and Braiding Rugs',
4342 'uploader': 'Backus-Page House Museum',
4343 'uploader_id': 'backuspagemuseum',
4344 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4345 'upload_date': '20161008',
4346 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4347 'categories': ['Nonprofits & Activism'],
4348 'tags': list,
4349 'like_count': int,
4350 'dislike_count': int,
4351 },
4352 'params': {
4353 'noplaylist': True,
4354 'skip_download': True,
4355 },
39e7107d 4356 }, {
8bdd16b4 4357 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 4358 'only_matching': True,
cdc628a4
PH
4359 }]
4360
8bdd16b4 4361 def _real_extract(self, url):
29f7c58a 4362 mobj = re.match(self._VALID_URL, url)
4363 video_id = mobj.group('id')
4364 playlist_id = mobj.group('playlist_id')
8bdd16b4 4365 return self.url_result(
29f7c58a 4366 update_url_query('https://www.youtube.com/watch', {
4367 'v': video_id,
4368 'list': playlist_id,
4369 'feature': 'youtu.be',
4370 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 4371
4372
4373class YoutubeYtUserIE(InfoExtractor):
c76eb41b 4374 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 4375 _VALID_URL = r'ytuser:(?P<id>.+)'
4376 _TESTS = [{
4377 'url': 'ytuser:phihag',
4378 'only_matching': True,
4379 }]
4380
4381 def _real_extract(self, url):
4382 user_id = self._match_id(url)
4383 return self.url_result(
4384 'https://www.youtube.com/user/%s' % user_id,
4385 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 4386
b05654f0 4387
3d3dddc9 4388class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 4389 IE_NAME = 'youtube:favorites'
4390 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4391 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4392 _LOGIN_REQUIRED = True
4393 _TESTS = [{
4394 'url': ':ytfav',
4395 'only_matching': True,
4396 }, {
4397 'url': ':ytfavorites',
4398 'only_matching': True,
4399 }]
4400
4401 def _real_extract(self, url):
4402 return self.url_result(
4403 'https://www.youtube.com/playlist?list=LL',
4404 ie=YoutubeTabIE.ie_key())
4405
4406
79360d99 4407class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
69184e41 4408 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
4409 # there doesn't appear to be a real limit, for example if you search for
4410 # 'python' you get more than 8.000.000 results
4411 _MAX_RESULTS = float('inf')
78caa52a 4412 IE_NAME = 'youtube:search'
b05654f0 4413 _SEARCH_KEY = 'ytsearch'
6c894ea1 4414 _SEARCH_PARAMS = None
9dd8e46a 4415 _TESTS = []
b05654f0 4416
6c894ea1 4417 def _entries(self, query, n):
a5c56234 4418 data = {'query': query}
6c894ea1
U
4419 if self._SEARCH_PARAMS:
4420 data['params'] = self._SEARCH_PARAMS
4421 total = 0
fe93e2c4 4422 continuation = {}
6c894ea1 4423 for page_num in itertools.count(1):
fe93e2c4 4424 data.update(continuation)
79360d99 4425 search = self._extract_response(
4426 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4427 check_get_keys=('contents', 'onResponseReceivedCommands')
4428 )
6c894ea1 4429 if not search:
b4c08069 4430 break
6c894ea1
U
4431 slr_contents = try_get(
4432 search,
4433 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4434 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4435 list)
4436 if not slr_contents:
a22b2fd1 4437 break
0366ae87 4438
0366ae87
M
4439 # Youtube sometimes adds promoted content to searches,
4440 # changing the index location of videos and token.
4441 # So we search through all entries till we find them.
fe93e2c4 4442 continuation = None
30a074c2 4443 for slr_content in slr_contents:
fe93e2c4 4444 if not continuation:
4445 continuation = self._extract_continuation({'contents': [slr_content]})
a96c6d15 4446
30a074c2 4447 isr_contents = try_get(
4448 slr_content,
4449 lambda x: x['itemSectionRenderer']['contents'],
4450 list)
9da76d30 4451 if not isr_contents:
30a074c2 4452 continue
4453 for content in isr_contents:
4454 if not isinstance(content, dict):
4455 continue
4456 video = content.get('videoRenderer')
4457 if not isinstance(video, dict):
4458 continue
4459 video_id = video.get('videoId')
4460 if not video_id:
4461 continue
4462
4463 yield self._extract_video(video)
4464 total += 1
4465 if total == n:
4466 return
0366ae87 4467
fe93e2c4 4468 if not continuation:
6c894ea1 4469 break
b05654f0 4470
6c894ea1
U
4471 def _get_n_results(self, query, n):
4472 """Get a specified number of results for a query"""
11f9be09 4473 return self.playlist_result(self._entries(query, n), query, query)
75dff0ee 4474
c9ae7b95 4475
a3dd9248 4476class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 4477 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 4478 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 4479 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 4480 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 4481
c9ae7b95 4482
386e1dd9 4483class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 4484 IE_DESC = 'YouTube.com search URLs'
386e1dd9 4485 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4486 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 4487 # _MAX_RESULTS = 100
3462ffa8 4488 _TESTS = [{
4489 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4490 'playlist_mincount': 5,
4491 'info_dict': {
11f9be09 4492 'id': 'youtube-dl test video',
3462ffa8 4493 'title': 'youtube-dl test video',
4494 }
4495 }, {
4496 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4497 'only_matching': True,
4498 }]
4499
386e1dd9 4500 @classmethod
4501 def _make_valid_url(cls):
4502 return cls._VALID_URL
4503
3462ffa8 4504 def _real_extract(self, url):
386e1dd9 4505 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4506 query = (qs.get('search_query') or qs.get('q'))[0]
4507 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4508 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 4509
4510
4511class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 4512 """
25f14e9f 4513 Base class for feed extractors
3d3dddc9 4514 Subclasses must define the _FEED_NAME property.
d7ae0639 4515 """
b2e8bc1b 4516 _LOGIN_REQUIRED = True
ef2f3c7f 4517 _TESTS = []
d7ae0639
JMF
4518
4519 @property
4520 def IE_NAME(self):
78caa52a 4521 return 'youtube:%s' % self._FEED_NAME
04cc9617 4522
3853309f 4523 def _real_extract(self, url):
3d3dddc9 4524 return self.url_result(
4525 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4526 ie=YoutubeTabIE.ie_key())
25f14e9f
S
4527
4528
ef2f3c7f 4529class YoutubeWatchLaterIE(InfoExtractor):
4530 IE_NAME = 'youtube:watchlater'
70d5c17b 4531 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 4532 _VALID_URL = r':ytwatchlater'
bc7a9cd8 4533 _TESTS = [{
8bdd16b4 4534 'url': ':ytwatchlater',
bc7a9cd8
S
4535 'only_matching': True,
4536 }]
25f14e9f
S
4537
4538 def _real_extract(self, url):
ef2f3c7f 4539 return self.url_result(
4540 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 4541
4542
25f14e9f
S
4543class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4544 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 4545 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 4546 _FEED_NAME = 'recommended'
45db527f 4547 _LOGIN_REQUIRED = False
3d3dddc9 4548 _TESTS = [{
4549 'url': ':ytrec',
4550 'only_matching': True,
4551 }, {
4552 'url': ':ytrecommended',
4553 'only_matching': True,
4554 }, {
4555 'url': 'https://youtube.com',
4556 'only_matching': True,
4557 }]
1ed5b5c9 4558
1ed5b5c9 4559
25f14e9f 4560class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 4561 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 4562 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 4563 _FEED_NAME = 'subscriptions'
3d3dddc9 4564 _TESTS = [{
4565 'url': ':ytsubs',
4566 'only_matching': True,
4567 }, {
4568 'url': ':ytsubscriptions',
4569 'only_matching': True,
4570 }]
1ed5b5c9 4571
1ed5b5c9 4572
25f14e9f 4573class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
4574 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4575 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 4576 _FEED_NAME = 'history'
3d3dddc9 4577 _TESTS = [{
4578 'url': ':ythistory',
4579 'only_matching': True,
4580 }]
1ed5b5c9
JMF
4581
4582
15870e90
PH
4583class YoutubeTruncatedURLIE(InfoExtractor):
4584 IE_NAME = 'youtube:truncated_url'
4585 IE_DESC = False # Do not list
975d35db 4586 _VALID_URL = r'''(?x)
b95aab84
PH
4587 (?:https?://)?
4588 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4589 (?:watch\?(?:
c4808c60 4590 feature=[a-z_]+|
b95aab84
PH
4591 annotation_id=annotation_[^&]+|
4592 x-yt-cl=[0-9]+|
c1708b89 4593 hl=[^&]*|
287be8c6 4594 t=[0-9]+
b95aab84
PH
4595 )?
4596 |
4597 attribution_link\?a=[^&]+
4598 )
4599 $
975d35db 4600 '''
15870e90 4601
c4808c60 4602 _TESTS = [{
2d3d2997 4603 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 4604 'only_matching': True,
dc2fc736 4605 }, {
2d3d2997 4606 'url': 'https://www.youtube.com/watch?',
dc2fc736 4607 'only_matching': True,
b95aab84
PH
4608 }, {
4609 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4610 'only_matching': True,
4611 }, {
4612 'url': 'https://www.youtube.com/watch?feature=foo',
4613 'only_matching': True,
c1708b89
PH
4614 }, {
4615 'url': 'https://www.youtube.com/watch?hl=en-GB',
4616 'only_matching': True,
287be8c6
PH
4617 }, {
4618 'url': 'https://www.youtube.com/watch?t=2372',
4619 'only_matching': True,
c4808c60
PH
4620 }]
4621
15870e90
PH
4622 def _real_extract(self, url):
4623 raise ExtractorError(
78caa52a
PH
4624 'Did you forget to quote the URL? Remember that & is a meta '
4625 'character in most shells, so you want to put the URL in quotes, '
3867038a 4626 'like youtube-dl '
2d3d2997 4627 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 4628 ' or simply youtube-dl BaW_jenozKc .',
15870e90 4629 expected=True)
772fd5cc
PH
4630
4631
4632class YoutubeTruncatedIDIE(InfoExtractor):
4633 IE_NAME = 'youtube:truncated_id'
4634 IE_DESC = False # Do not list
b95aab84 4635 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
4636
4637 _TESTS = [{
4638 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4639 'only_matching': True,
4640 }]
4641
4642 def _real_extract(self, url):
4643 video_id = self._match_id(url)
4644 raise ExtractorError(
4645 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4646 expected=True)