]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
Fix bug where `original_url` was not propagated when `_type`=`url`
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
2d6659b9 5import base64
d92f5d5a 6import calendar
109dd3b2 7import copy
fe93e2c4 8import datetime
a5c56234 9import hashlib
0ca96d48 10import itertools
c5e8d7af 11import json
c4417ddb 12import os.path
d77ab8e2 13import random
c5e8d7af 14import re
8a784c74 15import time
e0df6211 16import traceback
c5e8d7af 17
b05654f0 18from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 19from ..compat import (
edf3e38e 20 compat_chr,
29f7c58a 21 compat_HTTPError,
c5e8d7af 22 compat_parse_qs,
545cc85d 23 compat_str,
7fd002c0 24 compat_urllib_parse_unquote_plus,
15707c7e 25 compat_urllib_parse_urlencode,
7c80519c 26 compat_urllib_parse_urlparse,
7c61bd36 27 compat_urlparse,
4bb4a188 28)
545cc85d 29from ..jsinterp import JSInterpreter
4bb4a188 30from ..utils import (
2d6659b9 31 bytes_to_intlist,
c5e8d7af 32 clean_html,
d92f5d5a 33 datetime_from_str,
11f9be09 34 dict_get,
358de58c 35 error_to_compat_str,
c5e8d7af 36 ExtractorError,
2d30521a 37 float_or_none,
11f9be09 38 format_field,
dd27fd17 39 int_or_none,
2d6659b9 40 intlist_to_bytes,
94278f72 41 mimetype2ext,
11f9be09 42 orderedSet,
6310acf5 43 parse_codecs,
49bd8c66 44 parse_count,
7c80519c 45 parse_duration,
7ea65411 46 parse_iso8601,
dca3ff4a 47 qualities,
3995d37d 48 remove_start,
cf7e015f 49 smuggle_url,
dbdaaa23 50 str_or_none,
c93d53f5 51 str_to_int,
7c365c21 52 traverse_obj,
556dbe7f 53 try_get,
c5e8d7af
PH
54 unescapeHTML,
55 unified_strdate,
cf7e015f 56 unsmuggle_url,
8bdd16b4 57 update_url_query,
21c340b8 58 url_or_none,
6e6bc8da 59 urlencode_postdata,
fe93e2c4 60 urljoin,
7c365c21 61 variadic,
c5e8d7af
PH
62)
63
5f6a1245 64
201c1459 65def parse_qs(url):
66 return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
67
68
de7f3446 69class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
70 """Provide base functions for Youtube extractors"""
71 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 72 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
73
74 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
75 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
76 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 77
3462ffa8 78 _RESERVED_NAMES = (
bea74222 79 r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|shorts|'
46953e7e 80 r'movies|results|shared|hashtag|trending|feed|feeds|oembed|get_video_info|'
cd7c66cf 81 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 82
b2e8bc1b
JMF
83 _NETRC_MACHINE = 'youtube'
84 # If True it will raise an error if no login info is provided
85 _LOGIN_REQUIRED = False
86
70d5c17b 87 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 88
b2e8bc1b 89 def _login(self):
83317f69 90 """
91 Attempt to log in to YouTube.
92 True is returned if successful or skipped.
93 False is returned if login failed.
94
95 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
96 """
9d5d4d64 97
98 def warn(message):
99 self.report_warning(message)
100
101 # username+password login is broken
982ee69a
MB
102 if (self._LOGIN_REQUIRED
103 and self.get_param('cookiefile') is None
104 and self.get_param('cookiesfrombrowser') is None):
9d5d4d64 105 self.raise_login_required(
106 'Login details are needed to download this content', method='cookies')
68217024 107 username, password = self._get_login_info()
9d5d4d64 108 if username:
109 warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
110 return
9d5d4d64 111
2d6659b9 112 # Everything below this is broken!
113 r'''
b2e8bc1b
JMF
114 # No authentication to be performed
115 if username is None:
a06916d9 116 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
69ea8ca4 117 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
a06916d9 118 # if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
545cc85d 119 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 120 return True
b2e8bc1b 121
7cc3570e
PH
122 login_page = self._download_webpage(
123 self._LOGIN_URL, None,
69ea8ca4
PH
124 note='Downloading login page',
125 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
126 if login_page is False:
127 return
b2e8bc1b 128
1212e997 129 login_form = self._hidden_inputs(login_page)
c5e8d7af 130
e00eb564
S
131 def req(url, f_req, note, errnote):
132 data = login_form.copy()
133 data.update({
134 'pstMsg': 1,
135 'checkConnection': 'youtube',
136 'checkedDomains': 'youtube',
137 'hl': 'en',
138 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 139 'f.req': json.dumps(f_req),
e00eb564
S
140 'flowName': 'GlifWebSignIn',
141 'flowEntry': 'ServiceLogin',
baf67a60
S
142 # TODO: reverse actual botguard identifier generation algo
143 'bgRequest': '["identifier",""]',
041bc3ad 144 })
e00eb564
S
145 return self._download_json(
146 url, None, note=note, errnote=errnote,
147 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
148 fatal=False,
149 data=urlencode_postdata(data), headers={
150 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
151 'Google-Accounts-XSRF': 1,
152 })
153
3995d37d
S
154 lookup_req = [
155 username,
156 None, [], None, 'US', None, None, 2, False, True,
157 [
158 None, None,
159 [2, 1, None, 1,
160 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
161 None, [], 4],
162 1, [None, None, []], None, None, None, True
163 ],
164 username,
165 ]
166
e00eb564 167 lookup_results = req(
3995d37d 168 self._LOOKUP_URL, lookup_req,
e00eb564
S
169 'Looking up account info', 'Unable to look up account info')
170
171 if lookup_results is False:
172 return False
041bc3ad 173
3995d37d
S
174 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
175 if not user_hash:
176 warn('Unable to extract user hash')
177 return False
178
179 challenge_req = [
180 user_hash,
181 None, 1, None, [1, None, None, None, [password, None, True]],
182 [
183 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
184 1, [None, None, []], None, None, None, True
185 ]]
83317f69 186
3995d37d
S
187 challenge_results = req(
188 self._CHALLENGE_URL, challenge_req,
189 'Logging in', 'Unable to log in')
83317f69 190
3995d37d 191 if challenge_results is False:
e00eb564 192 return
83317f69 193
3995d37d
S
194 login_res = try_get(challenge_results, lambda x: x[0][5], list)
195 if login_res:
196 login_msg = try_get(login_res, lambda x: x[5], compat_str)
197 warn(
198 'Unable to login: %s' % 'Invalid password'
199 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
200 return False
201
202 res = try_get(challenge_results, lambda x: x[0][-1], list)
203 if not res:
204 warn('Unable to extract result entry')
205 return False
206
9a6628aa
S
207 login_challenge = try_get(res, lambda x: x[0][0], list)
208 if login_challenge:
209 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
210 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
211 # SEND_SUCCESS - TFA code has been successfully sent to phone
212 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 213 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
214 if status == 'QUOTA_EXCEEDED':
215 warn('Exceeded the limit of TFA codes, try later')
216 return False
217
218 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
219 if not tl:
220 warn('Unable to extract TL')
221 return False
222
223 tfa_code = self._get_tfa_info('2-step verification code')
224
225 if not tfa_code:
226 warn(
227 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
228 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
229 return False
230
231 tfa_code = remove_start(tfa_code, 'G-')
232
233 tfa_req = [
234 user_hash, None, 2, None,
235 [
236 9, None, None, None, None, None, None, None,
237 [None, tfa_code, True, 2]
238 ]]
239
240 tfa_results = req(
241 self._TFA_URL.format(tl), tfa_req,
242 'Submitting TFA code', 'Unable to submit TFA code')
243
244 if tfa_results is False:
245 return False
246
247 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
248 if tfa_res:
249 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
250 warn(
251 'Unable to finish TFA: %s' % 'Invalid TFA code'
252 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
253 return False
254
255 check_cookie_url = try_get(
256 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
257 else:
258 CHALLENGES = {
259 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
260 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
261 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
262 }
263 challenge = CHALLENGES.get(
264 challenge_str,
265 '%s returned error %s.' % (self.IE_NAME, challenge_str))
266 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
267 return False
3995d37d
S
268 else:
269 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
270
271 if not check_cookie_url:
272 warn('Unable to extract CheckCookie URL')
273 return False
e00eb564
S
274
275 check_cookie_results = self._download_webpage(
3995d37d
S
276 check_cookie_url, None, 'Checking cookie', fatal=False)
277
278 if check_cookie_results is False:
279 return False
e00eb564 280
3995d37d
S
281 if 'https://myaccount.google.com/' not in check_cookie_results:
282 warn('Unable to log in')
b2e8bc1b 283 return False
e00eb564 284
b2e8bc1b 285 return True
2d6659b9 286 '''
b2e8bc1b 287
cce889b9 288 def _initialize_consent(self):
289 cookies = self._get_cookies('https://www.youtube.com/')
290 if cookies.get('__Secure-3PSID'):
291 return
292 consent_id = None
293 consent = cookies.get('CONSENT')
294 if consent:
295 if 'YES' in consent.value:
296 return
297 consent_id = self._search_regex(
298 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
299 if not consent_id:
300 consent_id = random.randint(100, 999)
301 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 302
b2e8bc1b 303 def _real_initialize(self):
cce889b9 304 self._initialize_consent()
b2e8bc1b
JMF
305 if self._downloader is None:
306 return
b2e8bc1b
JMF
307 if not self._login():
308 return
c5e8d7af 309
a0566bbf 310 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 311 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
312 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 313
109dd3b2 314 _YT_DEFAULT_YTCFGS = {
315 'WEB': {
316 'INNERTUBE_API_VERSION': 'v1',
317 'INNERTUBE_CLIENT_NAME': 'WEB',
318 'INNERTUBE_CLIENT_VERSION': '2.20210622.10.00',
319 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
320 'INNERTUBE_CONTEXT': {
321 'client': {
322 'clientName': 'WEB',
323 'clientVersion': '2.20210622.10.00',
324 'hl': 'en',
325 }
326 },
327 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
328 },
329 'WEB_REMIX': {
330 'INNERTUBE_API_VERSION': 'v1',
331 'INNERTUBE_CLIENT_NAME': 'WEB_REMIX',
332 'INNERTUBE_CLIENT_VERSION': '1.20210621.00.00',
333 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
334 'INNERTUBE_CONTEXT': {
335 'client': {
336 'clientName': 'WEB_REMIX',
337 'clientVersion': '1.20210621.00.00',
338 'hl': 'en',
339 }
340 },
341 'INNERTUBE_CONTEXT_CLIENT_NAME': 67
342 },
343 'WEB_EMBEDDED_PLAYER': {
344 'INNERTUBE_API_VERSION': 'v1',
345 'INNERTUBE_CLIENT_NAME': 'WEB_EMBEDDED_PLAYER',
346 'INNERTUBE_CLIENT_VERSION': '1.20210620.0.1',
347 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
348 'INNERTUBE_CONTEXT': {
349 'client': {
350 'clientName': 'WEB_EMBEDDED_PLAYER',
351 'clientVersion': '1.20210620.0.1',
352 'hl': 'en',
353 }
354 },
355 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
356 },
357 'ANDROID': {
358 'INNERTUBE_API_VERSION': 'v1',
359 'INNERTUBE_CLIENT_NAME': 'ANDROID',
360 'INNERTUBE_CLIENT_VERSION': '16.20',
361 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
362 'INNERTUBE_CONTEXT': {
363 'client': {
364 'clientName': 'ANDROID',
365 'clientVersion': '16.20',
366 'hl': 'en',
367 }
368 },
fe93e2c4 369 'INNERTUBE_CONTEXT_CLIENT_NAME': 3
109dd3b2 370 },
371 'ANDROID_EMBEDDED_PLAYER': {
372 'INNERTUBE_API_VERSION': 'v1',
373 'INNERTUBE_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER',
374 'INNERTUBE_CLIENT_VERSION': '16.20',
375 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
376 'INNERTUBE_CONTEXT': {
377 'client': {
378 'clientName': 'ANDROID_EMBEDDED_PLAYER',
379 'clientVersion': '16.20',
380 'hl': 'en',
381 }
382 },
fe93e2c4 383 'INNERTUBE_CONTEXT_CLIENT_NAME': 55
109dd3b2 384 },
385 'ANDROID_MUSIC': {
386 'INNERTUBE_API_VERSION': 'v1',
387 'INNERTUBE_CLIENT_NAME': 'ANDROID_MUSIC',
388 'INNERTUBE_CLIENT_VERSION': '4.32',
389 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
390 'INNERTUBE_CONTEXT': {
391 'client': {
392 'clientName': 'ANDROID_MUSIC',
393 'clientVersion': '4.32',
394 'hl': 'en',
395 }
396 },
fe93e2c4 397 'INNERTUBE_CONTEXT_CLIENT_NAME': 21
11f9be09 398 },
399 'IOS': {
400 'INNERTUBE_API_VERSION': 'v1',
401 'INNERTUBE_CLIENT_NAME': 'IOS',
402 'INNERTUBE_CLIENT_VERSION': '16.20',
403 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
404 'INNERTUBE_CONTEXT': {
405 'client': {
406 'clientName': 'IOS',
407 'clientVersion': '16.20',
408 'hl': 'en',
409 }
410 },
411 'INNERTUBE_CONTEXT_CLIENT_NAME': 5
412
413 },
414 'IOS_MUSIC': {
415 'INNERTUBE_API_VERSION': 'v1',
416 'INNERTUBE_CLIENT_NAME': 'IOS_MUSIC',
417 'INNERTUBE_CLIENT_VERSION': '4.32',
418 'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
419 'INNERTUBE_CONTEXT': {
420 'client': {
421 'clientName': 'IOS_MUSIC',
422 'clientVersion': '4.32',
423 'hl': 'en',
424 }
425 },
426 'INNERTUBE_CONTEXT_CLIENT_NAME': 26
427 },
428 'IOS_MESSAGES_EXTENSION': {
429 'INNERTUBE_API_VERSION': 'v1',
430 'INNERTUBE_CLIENT_NAME': 'IOS_MESSAGES_EXTENSION',
431 'INNERTUBE_CLIENT_VERSION': '16.20',
432 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
433 'INNERTUBE_CONTEXT': {
434 'client': {
435 'clientName': 'IOS_MESSAGES_EXTENSION',
436 'clientVersion': '16.20',
437 'hl': 'en',
438 }
439 },
440 'INNERTUBE_CONTEXT_CLIENT_NAME': 66
109dd3b2 441 }
442 }
443
444 _YT_DEFAULT_INNERTUBE_HOSTS = {
445 'DIRECT': 'youtubei.googleapis.com',
446 'WEB': 'www.youtube.com',
447 'WEB_REMIX': 'music.youtube.com',
448 'ANDROID_MUSIC': 'music.youtube.com'
449 }
450
11f9be09 451 # clients starting with _ cannot be explicity requested by the user
452 _YT_CLIENTS = {
453 'web': 'WEB',
454 'web_music': 'WEB_REMIX',
455 '_web_embedded': 'WEB_EMBEDDED_PLAYER',
456 '_web_agegate': 'TVHTML5',
457 'android': 'ANDROID',
458 'android_music': 'ANDROID_MUSIC',
459 '_android_embedded': 'ANDROID_EMBEDDED_PLAYER',
460 '_android_agegate': 'ANDROID',
461 'ios': 'IOS',
462 'ios_music': 'IOS_MUSIC',
463 '_ios_embedded': 'IOS_MESSAGES_EXTENSION',
464 '_ios_agegate': 'IOS'
465 }
466
109dd3b2 467 def _get_default_ytcfg(self, client='WEB'):
468 if client in self._YT_DEFAULT_YTCFGS:
469 return copy.deepcopy(self._YT_DEFAULT_YTCFGS[client])
470 self.write_debug(f'INNERTUBE default client {client} does not exist - falling back to WEB client.')
471 return copy.deepcopy(self._YT_DEFAULT_YTCFGS['WEB'])
472
473 def _get_innertube_host(self, client='WEB'):
474 return dict_get(self._YT_DEFAULT_INNERTUBE_HOSTS, (client, 'WEB'))
475
476 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='WEB'):
477 # try_get but with fallback to default ytcfg client values when present
478 _func = lambda y: try_get(y, getter, expected_type)
479 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
480
481 def _extract_client_name(self, ytcfg, default_client='WEB'):
482 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str, default_client)
483
314ee305 484 @staticmethod
11f9be09 485 def _extract_session_index(*data):
486 for ytcfg in data:
487 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
488 if session_index is not None:
489 return session_index
314ee305 490
109dd3b2 491 def _extract_client_version(self, ytcfg, default_client='WEB'):
492 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str, default_client)
493
494 def _extract_api_key(self, ytcfg=None, default_client='WEB'):
495 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
496
497 def _extract_context(self, ytcfg=None, default_client='WEB'):
498 _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
499 context = _get_context(ytcfg)
500 if context:
501 return context
502
503 context = _get_context(self._get_default_ytcfg(default_client))
504 if not ytcfg:
505 return context
506
507 # Recreate the client context (required)
508 context['client'].update({
509 'clientVersion': self._extract_client_version(ytcfg, default_client),
510 'clientName': self._extract_client_name(ytcfg, default_client),
511 })
512 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
513 if visitor_data:
514 context['client']['visitorData'] = visitor_data
515 return context
516
517 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
1974e99f 518 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
519 # See: https://github.com/yt-dlp/yt-dlp/issues/393
520 yt_cookies = self._get_cookies('https://www.youtube.com')
521 sapisid_cookie = dict_get(
522 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
c926c954 523 if sapisid_cookie is None or not sapisid_cookie.value:
a5c56234
M
524 return
525 time_now = round(time.time())
1974e99f 526 # SAPISID cookie is required if not already present
527 if not yt_cookies.get('SAPISID'):
c926c954 528 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie', only_once=True)
1974e99f 529 self._set_cookie(
530 '.youtube.com', 'SAPISID', sapisid_cookie.value, secure=True, expire_time=time_now + 3600)
c926c954 531 self.write_debug('Extracted SAPISID cookie', only_once=True)
1974e99f 532 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
533 sapisidhash = hashlib.sha1(
109dd3b2 534 f'{time_now} {sapisid_cookie.value} {origin}'.encode('utf-8')).hexdigest()
1974e99f 535 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
536
537 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 538 note='Downloading API JSON', errnote='Unable to download API page',
109dd3b2 539 context=None, api_key=None, api_hostname=None, default_client='WEB'):
f4f751af 540
109dd3b2 541 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 542 data.update(query)
11f9be09 543 real_headers = self.generate_api_headers(default_client=default_client)
f4f751af 544 real_headers.update({'content-type': 'application/json'})
545 if headers:
546 real_headers.update(headers)
545cc85d 547 return self._download_json(
109dd3b2 548 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
a5c56234 549 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 550 data=json.dumps(data).encode('utf8'), headers=real_headers,
551 query={'key': api_key or self._extract_api_key()})
552
11f9be09 553 def extract_yt_initial_data(self, video_id, webpage):
8bdd16b4 554 return self._parse_json(
555 self._search_regex(
29f7c58a 556 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 557 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 558 video_id)
0c148415 559
a1c5d2ca 560 def _extract_identity_token(self, webpage, item_id):
11f9be09 561 if not webpage:
562 return None
563 ytcfg = self.extract_ytcfg(item_id, webpage)
a1c5d2ca
M
564 if ytcfg:
565 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
566 if token:
567 return token
568 return self._search_regex(
569 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
570 'identity token', default=None)
571
572 @staticmethod
fe93e2c4 573 def _extract_account_syncid(*args):
8ea3f7b9 574 """
575 Extract syncId required to download private playlists of secondary channels
fe93e2c4 576 @params response and/or ytcfg
8ea3f7b9 577 """
fe93e2c4 578 for data in args:
579 # ytcfg includes channel_syncid if on secondary channel
580 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
581 if delegated_sid:
582 return delegated_sid
583 sync_ids = (try_get(
584 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
585 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
586 if len(sync_ids) >= 2 and sync_ids[1]:
587 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
588 # and just "user_syncid||" for primary channel. We only want the channel_syncid
589 return sync_ids[0]
a1c5d2ca 590
11f9be09 591 def extract_ytcfg(self, video_id, webpage):
8c54a305 592 if not webpage:
593 return {}
29f7c58a 594 return self._parse_json(
595 self._search_regex(
596 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 597 default='{}'), video_id, fatal=False) or {}
598
11f9be09 599 def generate_api_headers(
600 self, ytcfg=None, identity_token=None, account_syncid=None,
601 visitor_data=None, api_hostname=None, default_client='WEB', session_index=None):
602 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
f4f751af 603 headers = {
109dd3b2 604 'X-YouTube-Client-Name': compat_str(
11f9be09 605 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
606 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
109dd3b2 607 'Origin': origin
f4f751af 608 }
2d6659b9 609 if not visitor_data and ytcfg:
610 visitor_data = try_get(
11f9be09 611 self._extract_context(ytcfg, default_client), lambda x: x['client']['visitorData'], compat_str)
f4f751af 612 if identity_token:
109dd3b2 613 headers['X-Youtube-Identity-Token'] = identity_token
f4f751af 614 if account_syncid:
615 headers['X-Goog-PageId'] = account_syncid
314ee305 616 if session_index is None and ytcfg:
617 session_index = self._extract_session_index(ytcfg)
618 if account_syncid or session_index is not None:
619 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
f4f751af 620 if visitor_data:
109dd3b2 621 headers['X-Goog-Visitor-Id'] = visitor_data
622 auth = self._generate_sapisidhash_header(origin)
f4f751af 623 if auth is not None:
624 headers['Authorization'] = auth
109dd3b2 625 headers['X-Origin'] = origin
f4f751af 626 return headers
29f7c58a 627
2d6659b9 628 @staticmethod
629 def _build_api_continuation_query(continuation, ctp=None):
630 query = {
631 'continuation': continuation
632 }
633 # TODO: Inconsistency with clickTrackingParams.
634 # Currently we have a fixed ctp contained within context (from ytcfg)
635 # and a ctp in root query for continuation.
636 if ctp:
637 query['clickTracking'] = {'clickTrackingParams': ctp}
638 return query
639
2d6659b9 640 @classmethod
641 def _extract_next_continuation_data(cls, renderer):
642 next_continuation = try_get(
643 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
644 lambda x: x['continuation']['reloadContinuationData']), dict)
645 if not next_continuation:
646 return
647 continuation = next_continuation.get('continuation')
648 if not continuation:
649 return
650 ctp = next_continuation.get('clickTrackingParams')
fe93e2c4 651 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 652
653 @classmethod
654 def _extract_continuation_ep_data(cls, continuation_ep: dict):
655 if isinstance(continuation_ep, dict):
656 continuation = try_get(
657 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
658 if not continuation:
659 return
660 ctp = continuation_ep.get('clickTrackingParams')
fe93e2c4 661 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 662
663 @classmethod
664 def _extract_continuation(cls, renderer):
665 next_continuation = cls._extract_next_continuation_data(renderer)
666 if next_continuation:
667 return next_continuation
fe93e2c4 668
2d6659b9 669 contents = []
670 for key in ('contents', 'items'):
671 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
fe93e2c4 672
2d6659b9 673 for content in contents:
674 if not isinstance(content, dict):
675 continue
676 continuation_ep = try_get(
677 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
678 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
679 dict)
680 continuation = cls._extract_continuation_ep_data(continuation_ep)
681 if continuation:
682 return continuation
683
fe93e2c4 684 @classmethod
685 def _extract_alerts(cls, data):
109dd3b2 686 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
687 if not isinstance(alert_dict, dict):
688 continue
689 for alert in alert_dict.values():
690 alert_type = alert.get('type')
691 if not alert_type:
692 continue
fe93e2c4 693 message = cls._get_text(alert.get('text'))
109dd3b2 694 if message:
695 yield alert_type, message
696
697 def _report_alerts(self, alerts, expected=True):
698 errors = []
699 warnings = []
700 for alert_type, alert_message in alerts:
701 if alert_type.lower() == 'error':
702 errors.append([alert_type, alert_message])
703 else:
704 warnings.append([alert_type, alert_message])
705
706 for alert_type, alert_message in (warnings + errors[:-1]):
707 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
708 if errors:
709 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
710
711 def _extract_and_report_alerts(self, data, *args, **kwargs):
712 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
713
47193e02 714 def _extract_badges(self, renderer: dict):
715 badges = set()
716 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
717 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
718 if label:
719 badges.add(label.lower())
720 return badges
721
722 @staticmethod
fe93e2c4 723 def _get_text(data, getter=None, max_runs=None):
724 for get in variadic(getter):
725 d = try_get(data, get) if get is not None else data
726 text = try_get(d, lambda x: x['simpleText'], compat_str)
727 if text:
728 return text
729 runs = try_get(d, lambda x: x['runs'], list) or []
730 if not runs and isinstance(d, list):
731 runs = d
732
733 def get_runs(runs):
734 for run in runs[:min(len(runs), max_runs or len(runs))]:
735 yield try_get(run, lambda x: x['text'], compat_str) or ''
736
737 text = ''.join(get_runs(runs))
738 if text:
739 return text
47193e02 740
109dd3b2 741 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
742 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
743 default_client='WEB'):
744 response = None
745 last_error = None
746 count = -1
747 retries = self.get_param('extractor_retries', 3)
748 if check_get_keys is None:
749 check_get_keys = []
750 while count < retries:
751 count += 1
752 if last_error:
753 self.report_warning('%s. Retrying ...' % last_error)
754 try:
755 response = self._call_api(
756 ep=ep, fatal=True, headers=headers,
757 video_id=item_id, query=query,
758 context=self._extract_context(ytcfg, default_client),
759 api_key=self._extract_api_key(ytcfg, default_client),
760 api_hostname=api_hostname, default_client=default_client,
761 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
762 except ExtractorError as e:
763 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
764 # Downloading page may result in intermittent 5xx HTTP error
765 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
766 last_error = 'HTTP Error %s' % e.cause.code
767 if count < retries:
768 continue
769 if fatal:
770 raise
771 else:
772 self.report_warning(error_to_compat_str(e))
773 return
774
775 else:
776 # Youtube may send alerts if there was an issue with the continuation page
777 try:
778 self._extract_and_report_alerts(response, expected=False)
779 except ExtractorError as e:
780 if fatal:
781 raise
782 self.report_warning(error_to_compat_str(e))
783 return
784 if not check_get_keys or dict_get(response, check_get_keys):
785 break
786 # Youtube sometimes sends incomplete data
787 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
788 last_error = 'Incomplete data received'
789 if count >= retries:
790 if fatal:
791 raise ExtractorError(last_error)
792 else:
793 self.report_warning(last_error)
794 return
795 return response
796
9297939e 797 @staticmethod
798 def is_music_url(url):
799 return re.match(r'https?://music\.youtube\.com/', url) is not None
800
30a074c2 801 def _extract_video(self, renderer):
802 video_id = renderer.get('videoId')
fe93e2c4 803 title = self._get_text(renderer.get('title'))
804 description = self._get_text(renderer.get('descriptionSnippet'))
805 duration = parse_duration(self._get_text(renderer.get('lengthText')))
806 view_count_text = self._get_text(renderer.get('viewCountText')) or ''
30a074c2 807 view_count = str_to_int(self._search_regex(
808 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
809 'view count', default=None))
fe93e2c4 810
811 uploader = self._get_text(renderer, (lambda x: x['ownerText'], lambda x: x['shortBylineText']))
812
30a074c2 813 return {
39ed931e 814 '_type': 'url',
30a074c2 815 'ie_key': YoutubeIE.ie_key(),
816 'id': video_id,
817 'url': video_id,
818 'title': title,
819 'description': description,
820 'duration': duration,
821 'view_count': view_count,
822 'uploader': uploader,
823 }
824
0c148415 825
360e1ca5 826class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 827 IE_DESC = 'YouTube.com'
bc2ca1bb 828 _INVIDIOUS_SITES = (
829 # invidious-redirect websites
830 r'(?:www\.)?redirect\.invidious\.io',
831 r'(?:(?:www|dev)\.)?invidio\.us',
832 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
833 r'(?:www\.)?invidious\.pussthecat\.org',
bc2ca1bb 834 r'(?:www\.)?invidious\.zee\.li',
bc2ca1bb 835 r'(?:www\.)?invidious\.ethibox\.fr',
bc2ca1bb 836 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
bc2ca1bb 837 # youtube-dl invidious instances list
838 r'(?:(?:www|no)\.)?invidiou\.sh',
839 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
840 r'(?:www\.)?invidious\.kabi\.tk',
bc2ca1bb 841 r'(?:www\.)?invidious\.mastodon\.host',
842 r'(?:www\.)?invidious\.zapashcanon\.fr',
ed807c18 843 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
201c1459 844 r'(?:www\.)?invidious\.tinfoil-hat\.net',
845 r'(?:www\.)?invidious\.himiko\.cloud',
846 r'(?:www\.)?invidious\.reallyancient\.tech',
bc2ca1bb 847 r'(?:www\.)?invidious\.tube',
848 r'(?:www\.)?invidiou\.site',
849 r'(?:www\.)?invidious\.site',
850 r'(?:www\.)?invidious\.xyz',
851 r'(?:www\.)?invidious\.nixnet\.xyz',
201c1459 852 r'(?:www\.)?invidious\.048596\.xyz',
bc2ca1bb 853 r'(?:www\.)?invidious\.drycat\.fr',
201c1459 854 r'(?:www\.)?inv\.skyn3t\.in',
bc2ca1bb 855 r'(?:www\.)?tube\.poal\.co',
856 r'(?:www\.)?tube\.connect\.cafe',
857 r'(?:www\.)?vid\.wxzm\.sx',
858 r'(?:www\.)?vid\.mint\.lgbt',
201c1459 859 r'(?:www\.)?vid\.puffyan\.us',
bc2ca1bb 860 r'(?:www\.)?yewtu\.be',
861 r'(?:www\.)?yt\.elukerio\.org',
862 r'(?:www\.)?yt\.lelux\.fi',
863 r'(?:www\.)?invidious\.ggc-project\.de',
864 r'(?:www\.)?yt\.maisputain\.ovh',
201c1459 865 r'(?:www\.)?ytprivate\.com',
866 r'(?:www\.)?invidious\.13ad\.de',
bc2ca1bb 867 r'(?:www\.)?invidious\.toot\.koeln',
868 r'(?:www\.)?invidious\.fdn\.fr',
869 r'(?:www\.)?watch\.nettohikari\.com',
ed807c18 870 r'(?:www\.)?invidious\.namazso\.eu',
871 r'(?:www\.)?invidious\.silkky\.cloud',
872 r'(?:www\.)?invidious\.exonip\.de',
873 r'(?:www\.)?invidious\.riverside\.rocks',
874 r'(?:www\.)?invidious\.blamefran\.net',
875 r'(?:www\.)?invidious\.moomoo\.de',
876 r'(?:www\.)?ytb\.trom\.tf',
877 r'(?:www\.)?yt\.cyberhost\.uk',
bc2ca1bb 878 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
879 r'(?:www\.)?qklhadlycap4cnod\.onion',
880 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
881 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
882 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
883 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
884 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
885 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
ed807c18 886 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
887 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
888 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
889 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
bc2ca1bb 890 )
cb7dfeea 891 _VALID_URL = r"""(?x)^
c5e8d7af 892 (
edb53e2d 893 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 894 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
895 (?:www\.)?deturl\.com/www\.youtube\.com|
896 (?:www\.)?pwnyoutube\.com|
897 (?:www\.)?hooktube\.com|
898 (?:www\.)?yourepeat\.com|
899 tube\.majestyc\.net|
900 %(invidious)s|
901 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
902 (?:.*?\#/)? # handle anchor (#/) redirect urls
903 (?: # the various things that can precede the ID:
ac7553d0 904 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 905 |(?: # or the v= param in all its forms
f7000f3a 906 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 907 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 908 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
909 v=
910 )
f4b05232 911 ))
cbaed4bb
S
912 |(?:
913 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
914 vid\.plus| # or vid.plus/xxxx
915 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 916 %(invidious)s
cbaed4bb 917 )/
edb53e2d 918 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 919 )
c5e8d7af 920 )? # all until now is optional -> you can pass the naked ID
201c1459 921 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 922 (?(1).+)? # if we found the ID, everything can follow
9297939e 923 (?:\#|$)""" % {
bc2ca1bb 924 'invidious': '|'.join(_INVIDIOUS_SITES),
925 }
e40c758c 926 _PLAYER_INFO_RE = (
cc2db878 927 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
928 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 929 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 930 )
2c62dc26 931 _formats = {
c2d3cb4c 932 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
933 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
934 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
935 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
936 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
937 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
938 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
939 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 940 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 941 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
942 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
943 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
944 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
945 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
946 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 947 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 948 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
949 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 950
951
952 # 3D videos
c2d3cb4c 953 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
954 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
955 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
956 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 957 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
958 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
959 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 960
96fb5605 961 # Apple HTTP Live Streaming
11f12195 962 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 963 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
964 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
965 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
966 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
967 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 968 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
969 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
970
971 # DASH mp4 video
d23028a8
S
972 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
973 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
974 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
975 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
976 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 977 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
978 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
979 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
980 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
981 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
982 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
983 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 984
f6f1fc92 985 # Dash mp4 audio
d23028a8
S
986 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
987 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
988 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
989 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
990 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
991 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
992 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
993
994 # Dash webm
d23028a8
S
995 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
996 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
997 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
998 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
999 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1000 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1001 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1002 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1003 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1004 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1005 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1006 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1007 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1008 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1009 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 1010 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
1011 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1012 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1013 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1014 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1015 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1016 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
1017
1018 # Dash webm audio
d23028a8
S
1019 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1020 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 1021
0857baad 1022 # Dash webm audio with opus inside
d23028a8
S
1023 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1024 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1025 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 1026
ce6b9a2d
PH
1027 # RTMP (unnamed)
1028 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
1029
1030 # av01 video only formats sometimes served with "unknown" codecs
1031 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1032 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1033 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1034 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 1035 }
29f7c58a 1036 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 1037
109dd3b2 1038 _AGE_GATE_REASONS = (
1039 'Sign in to confirm your age',
1040 'This video may be inappropriate for some users.',
1041 'Sorry, this content is age-restricted.')
1042
fd5c4aab
S
1043 _GEO_BYPASS = False
1044
78caa52a 1045 IE_NAME = 'youtube'
2eb88d95
PH
1046 _TESTS = [
1047 {
2d3d2997 1048 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
1049 'info_dict': {
1050 'id': 'BaW_jenozKc',
1051 'ext': 'mp4',
3867038a 1052 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
1053 'uploader': 'Philipp Hagemeister',
1054 'uploader_id': 'phihag',
ec85ded8 1055 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
1056 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1057 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 1058 'upload_date': '20121002',
3867038a 1059 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 1060 'categories': ['Science & Technology'],
3867038a 1061 'tags': ['youtube-dl'],
556dbe7f 1062 'duration': 10,
dbdaaa23 1063 'view_count': int,
3e7c1224
PH
1064 'like_count': int,
1065 'dislike_count': int,
7c80519c 1066 'start_time': 1,
297a564b 1067 'end_time': 9,
2eb88d95 1068 }
0e853ca4 1069 },
fccd3771 1070 {
4bc3a23e
PH
1071 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1072 'note': 'Embed-only video (#1746)',
1073 'info_dict': {
1074 'id': 'yZIXLfi8CZQ',
1075 'ext': 'mp4',
1076 'upload_date': '20120608',
1077 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1078 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1079 'uploader': 'SET India',
94bfcd23 1080 'uploader_id': 'setindia',
ec85ded8 1081 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 1082 'age_limit': 18,
545cc85d 1083 },
1084 'skip': 'Private video',
fccd3771 1085 },
11b56058 1086 {
8bdd16b4 1087 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
1088 'note': 'Use the first video ID in the URL',
1089 'info_dict': {
1090 'id': 'BaW_jenozKc',
1091 'ext': 'mp4',
3867038a 1092 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
1093 'uploader': 'Philipp Hagemeister',
1094 'uploader_id': 'phihag',
ec85ded8 1095 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 1096 'upload_date': '20121002',
3867038a 1097 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 1098 'categories': ['Science & Technology'],
3867038a 1099 'tags': ['youtube-dl'],
556dbe7f 1100 'duration': 10,
dbdaaa23 1101 'view_count': int,
11b56058
PM
1102 'like_count': int,
1103 'dislike_count': int,
34a7de29
S
1104 },
1105 'params': {
1106 'skip_download': True,
1107 },
11b56058 1108 },
dd27fd17 1109 {
2d3d2997 1110 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
1111 'note': '256k DASH audio (format 141) via DASH manifest',
1112 'info_dict': {
1113 'id': 'a9LDPn-MO4I',
1114 'ext': 'm4a',
1115 'upload_date': '20121002',
1116 'uploader_id': '8KVIDEO',
ec85ded8 1117 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
1118 'description': '',
1119 'uploader': '8KVIDEO',
1120 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 1121 },
4bc3a23e
PH
1122 'params': {
1123 'youtube_include_dash_manifest': True,
1124 'format': '141',
4919603f 1125 },
de3c7fe0 1126 'skip': 'format 141 not served anymore',
dd27fd17 1127 },
8bdd16b4 1128 # DASH manifest with encrypted signature
1129 {
1130 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1131 'info_dict': {
1132 'id': 'IB3lcPjvWLA',
1133 'ext': 'm4a',
1134 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1135 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1136 'duration': 244,
1137 'uploader': 'AfrojackVEVO',
1138 'uploader_id': 'AfrojackVEVO',
1139 'upload_date': '20131011',
cc2db878 1140 'abr': 129.495,
8bdd16b4 1141 },
1142 'params': {
1143 'youtube_include_dash_manifest': True,
1144 'format': '141/bestaudio[ext=m4a]',
1145 },
1146 },
dd2d55f1 1147 # Normal age-gate video (embed allowed)
c522adb1 1148 {
2d3d2997 1149 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1150 'info_dict': {
1151 'id': 'HtVdAasjOgU',
1152 'ext': 'mp4',
1153 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1154 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1155 'duration': 142,
c522adb1
JMF
1156 'uploader': 'The Witcher',
1157 'uploader_id': 'WitcherGame',
ec85ded8 1158 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 1159 'upload_date': '20140605',
34952f09 1160 'age_limit': 18,
c522adb1
JMF
1161 },
1162 },
8bdd16b4 1163 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1164 # YouTube Red ad is not captured for creator
1165 {
1166 'url': '__2ABJjxzNo',
1167 'info_dict': {
1168 'id': '__2ABJjxzNo',
1169 'ext': 'mp4',
1170 'duration': 266,
1171 'upload_date': '20100430',
1172 'uploader_id': 'deadmau5',
1173 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 1174 'creator': 'deadmau5',
1175 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1176 'uploader': 'deadmau5',
1177 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1178 'alt_title': 'Some Chords',
8bdd16b4 1179 },
1180 'expected_warnings': [
1181 'DASH manifest missing',
1182 ]
1183 },
067aa17e 1184 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1185 {
1186 'url': 'lqQg6PlCWgI',
1187 'info_dict': {
1188 'id': 'lqQg6PlCWgI',
1189 'ext': 'mp4',
556dbe7f 1190 'duration': 6085,
90227264 1191 'upload_date': '20150827',
cbe2bd91 1192 'uploader_id': 'olympic',
ec85ded8 1193 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 1194 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
11f9be09 1195 'uploader': 'Olympics',
cbe2bd91
PH
1196 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1197 },
1198 'params': {
1199 'skip_download': 'requires avconv',
e52a40ab 1200 }
cbe2bd91 1201 },
6271f1ca
PH
1202 # Non-square pixels
1203 {
1204 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1205 'info_dict': {
1206 'id': '_b-2C3KPAM0',
1207 'ext': 'mp4',
1208 'stretched_ratio': 16 / 9.,
556dbe7f 1209 'duration': 85,
6271f1ca
PH
1210 'upload_date': '20110310',
1211 'uploader_id': 'AllenMeow',
ec85ded8 1212 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 1213 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 1214 'uploader': '孫ᄋᄅ',
6271f1ca
PH
1215 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1216 },
06b491eb
S
1217 },
1218 # url_encoded_fmt_stream_map is empty string
1219 {
1220 'url': 'qEJwOuvDf7I',
1221 'info_dict': {
1222 'id': 'qEJwOuvDf7I',
f57b7835 1223 'ext': 'webm',
06b491eb
S
1224 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1225 'description': '',
1226 'upload_date': '20150404',
1227 'uploader_id': 'spbelect',
1228 'uploader': 'Наблюдатели Петербурга',
1229 },
1230 'params': {
1231 'skip_download': 'requires avconv',
e323cf3f
S
1232 },
1233 'skip': 'This live event has ended.',
06b491eb 1234 },
067aa17e 1235 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1236 {
1237 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1238 'info_dict': {
1239 'id': 'FIl7x6_3R5Y',
eb6793ba 1240 'ext': 'webm',
da77d856
S
1241 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1242 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1243 'duration': 220,
da77d856
S
1244 'upload_date': '20150625',
1245 'uploader_id': 'dorappi2000',
ec85ded8 1246 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 1247 'uploader': 'dorappi2000',
eb6793ba 1248 'formats': 'mincount:31',
da77d856 1249 },
eb6793ba 1250 'skip': 'not actual anymore',
2ee8f5d8 1251 },
8a1a26ce
YCH
1252 # DASH manifest with segment_list
1253 {
1254 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1255 'md5': '8ce563a1d667b599d21064e982ab9e31',
1256 'info_dict': {
1257 'id': 'CsmdDsKjzN8',
1258 'ext': 'mp4',
17ee98e1 1259 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
1260 'uploader': 'Airtek',
1261 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1262 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1263 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1264 },
1265 'params': {
1266 'youtube_include_dash_manifest': True,
1267 'format': '135', # bestvideo
be49068d
S
1268 },
1269 'skip': 'This live event has ended.',
2ee8f5d8 1270 },
cf7e015f
S
1271 {
1272 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 1273 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 1274 'info_dict': {
545cc85d 1275 'id': 'jvGDaLqkpTg',
1276 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1277 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
1278 },
1279 'playlist': [{
1280 'info_dict': {
545cc85d 1281 'id': 'jvGDaLqkpTg',
cf7e015f 1282 'ext': 'mp4',
545cc85d 1283 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1284 'description': 'md5:e03b909557865076822aa169218d6a5d',
1285 'duration': 10643,
1286 'upload_date': '20161111',
1287 'uploader': 'Team PGP',
1288 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1289 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1290 },
1291 }, {
1292 'info_dict': {
545cc85d 1293 'id': '3AKt1R1aDnw',
cf7e015f 1294 'ext': 'mp4',
545cc85d 1295 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1296 'description': 'md5:e03b909557865076822aa169218d6a5d',
1297 'duration': 10991,
1298 'upload_date': '20161111',
1299 'uploader': 'Team PGP',
1300 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1301 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1302 },
1303 }, {
1304 'info_dict': {
545cc85d 1305 'id': 'RtAMM00gpVc',
cf7e015f 1306 'ext': 'mp4',
545cc85d 1307 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1308 'description': 'md5:e03b909557865076822aa169218d6a5d',
1309 'duration': 10995,
1310 'upload_date': '20161111',
1311 'uploader': 'Team PGP',
1312 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1313 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1314 },
1315 }, {
1316 'info_dict': {
545cc85d 1317 'id': '6N2fdlP3C5U',
cf7e015f 1318 'ext': 'mp4',
545cc85d 1319 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1320 'description': 'md5:e03b909557865076822aa169218d6a5d',
1321 'duration': 10990,
1322 'upload_date': '20161111',
1323 'uploader': 'Team PGP',
1324 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1325 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1326 },
1327 }],
1328 'params': {
1329 'skip_download': True,
1330 },
cbaed4bb 1331 },
f9f49d87 1332 {
067aa17e 1333 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1334 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1335 'info_dict': {
1336 'id': 'gVfLd0zydlo',
1337 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1338 },
1339 'playlist_count': 2,
be49068d 1340 'skip': 'Not multifeed anymore',
f9f49d87 1341 },
cbaed4bb 1342 {
2d3d2997 1343 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1344 'only_matching': True,
0e49d9a6 1345 },
6d4fc66b 1346 {
2d3d2997 1347 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1348 'only_matching': True,
1349 },
0e49d9a6 1350 {
067aa17e 1351 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1352 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1353 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1354 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1355 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1356 'info_dict': {
1357 'id': 'lsguqyKfVQg',
1358 'ext': 'mp4',
1359 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
11f9be09 1360 'alt_title': 'Dark Walk',
0e49d9a6 1361 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1362 'duration': 133,
0e49d9a6
LL
1363 'upload_date': '20151119',
1364 'uploader_id': 'IronSoulElf',
ec85ded8 1365 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1366 'uploader': 'IronSoulElf',
11f9be09 1367 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1368 'track': 'Dark Walk',
1369 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
92bc97d3 1370 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
1371 },
1372 'params': {
1373 'skip_download': True,
1374 },
1375 },
61f92af1 1376 {
067aa17e 1377 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1378 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1379 'only_matching': True,
1380 },
313dfc45
LL
1381 {
1382 # Video with yt:stretch=17:0
1383 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1384 'info_dict': {
1385 'id': 'Q39EVAstoRM',
1386 'ext': 'mp4',
1387 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1388 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1389 'upload_date': '20151107',
1390 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1391 'uploader': 'CH GAMER DROID',
1392 },
1393 'params': {
1394 'skip_download': True,
1395 },
be49068d 1396 'skip': 'This video does not exist.',
313dfc45 1397 },
201c1459 1398 {
1399 # Video with incomplete 'yt:stretch=16:'
1400 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1401 'only_matching': True,
1402 },
7caf9830
S
1403 {
1404 # Video licensed under Creative Commons
1405 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1406 'info_dict': {
1407 'id': 'M4gD1WSo5mA',
1408 'ext': 'mp4',
1409 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1410 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1411 'duration': 721,
7caf9830
S
1412 'upload_date': '20150127',
1413 'uploader_id': 'BerkmanCenter',
ec85ded8 1414 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1415 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1416 'license': 'Creative Commons Attribution license (reuse allowed)',
1417 },
1418 'params': {
1419 'skip_download': True,
1420 },
1421 },
fd050249
S
1422 {
1423 # Channel-like uploader_url
1424 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1425 'info_dict': {
1426 'id': 'eQcmzGIKrzg',
1427 'ext': 'mp4',
1428 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1429 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1430 'duration': 4060,
fd050249 1431 'upload_date': '20151119',
eb6793ba 1432 'uploader': 'Bernie Sanders',
fd050249 1433 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1434 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1435 'license': 'Creative Commons Attribution license (reuse allowed)',
1436 },
1437 'params': {
1438 'skip_download': True,
1439 },
1440 },
040ac686
S
1441 {
1442 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1443 'only_matching': True,
7f29cf54
S
1444 },
1445 {
067aa17e 1446 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1447 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1448 'only_matching': True,
6496ccb4
S
1449 },
1450 {
1451 # Rental video preview
1452 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1453 'info_dict': {
1454 'id': 'uGpuVWrhIzE',
1455 'ext': 'mp4',
1456 'title': 'Piku - Trailer',
1457 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1458 'upload_date': '20150811',
1459 'uploader': 'FlixMatrix',
1460 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1461 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1462 'license': 'Standard YouTube License',
1463 },
1464 'params': {
1465 'skip_download': True,
1466 },
eb6793ba 1467 'skip': 'This video is not available.',
022a5d66 1468 },
12afdc2a
S
1469 {
1470 # YouTube Red video with episode data
1471 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1472 'info_dict': {
1473 'id': 'iqKdEhx-dD4',
1474 'ext': 'mp4',
1475 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1476 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1477 'duration': 2085,
12afdc2a
S
1478 'upload_date': '20170118',
1479 'uploader': 'Vsauce',
1480 'uploader_id': 'Vsauce',
1481 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1482 'series': 'Mind Field',
1483 'season_number': 1,
1484 'episode_number': 1,
1485 },
1486 'params': {
1487 'skip_download': True,
1488 },
1489 'expected_warnings': [
1490 'Skipping DASH manifest',
1491 ],
1492 },
c7121fa7
S
1493 {
1494 # The following content has been identified by the YouTube community
1495 # as inappropriate or offensive to some audiences.
1496 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1497 'info_dict': {
1498 'id': '6SJNVb0GnPI',
1499 'ext': 'mp4',
1500 'title': 'Race Differences in Intelligence',
1501 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1502 'duration': 965,
1503 'upload_date': '20140124',
1504 'uploader': 'New Century Foundation',
1505 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1506 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1507 },
1508 'params': {
1509 'skip_download': True,
1510 },
545cc85d 1511 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1512 },
022a5d66
S
1513 {
1514 # itag 212
1515 'url': '1t24XAntNCY',
1516 'only_matching': True,
fd5c4aab
S
1517 },
1518 {
1519 # geo restricted to JP
1520 'url': 'sJL6WA-aGkQ',
1521 'only_matching': True,
1522 },
cd5a74a2
S
1523 {
1524 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1525 'only_matching': True,
1526 },
bc2ca1bb 1527 {
1528 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1529 'only_matching': True,
1530 },
1531 {
1532 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1533 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1534 'only_matching': True,
1535 },
825cd268
RA
1536 {
1537 # DRM protected
1538 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1539 'only_matching': True,
4fe54c12
S
1540 },
1541 {
1542 # Video with unsupported adaptive stream type formats
1543 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1544 'info_dict': {
1545 'id': 'Z4Vy8R84T1U',
1546 'ext': 'mp4',
1547 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1548 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1549 'duration': 433,
1550 'upload_date': '20130923',
1551 'uploader': 'Amelia Putri Harwita',
1552 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1553 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1554 'formats': 'maxcount:10',
1555 },
1556 'params': {
1557 'skip_download': True,
1558 'youtube_include_dash_manifest': False,
1559 },
5429d6a9 1560 'skip': 'not actual anymore',
5caabd3c 1561 },
1562 {
822b9d9c 1563 # Youtube Music Auto-generated description
5caabd3c 1564 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1565 'info_dict': {
1566 'id': 'MgNrAu2pzNs',
1567 'ext': 'mp4',
1568 'title': 'Voyeur Girl',
1569 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1570 'upload_date': '20190312',
5429d6a9
S
1571 'uploader': 'Stephen - Topic',
1572 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1573 'artist': 'Stephen',
1574 'track': 'Voyeur Girl',
1575 'album': 'it\'s too much love to know my dear',
1576 'release_date': '20190313',
1577 'release_year': 2019,
1578 },
1579 'params': {
1580 'skip_download': True,
1581 },
1582 },
66b48727
RA
1583 {
1584 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1585 'only_matching': True,
1586 },
011e75e6
S
1587 {
1588 # invalid -> valid video id redirection
1589 'url': 'DJztXj2GPfl',
1590 'info_dict': {
1591 'id': 'DJztXj2GPfk',
1592 'ext': 'mp4',
1593 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1594 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1595 'upload_date': '20090125',
1596 'uploader': 'Prochorowka',
1597 'uploader_id': 'Prochorowka',
1598 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1599 'artist': 'Panjabi MC',
1600 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1601 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1602 },
1603 'params': {
1604 'skip_download': True,
1605 },
545cc85d 1606 'skip': 'Video unavailable',
ea74e00b
DP
1607 },
1608 {
1609 # empty description results in an empty string
1610 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1611 'info_dict': {
1612 'id': 'x41yOUIvK2k',
1613 'ext': 'mp4',
1614 'title': 'IMG 3456',
1615 'description': '',
1616 'upload_date': '20170613',
1617 'uploader_id': 'ElevageOrVert',
1618 'uploader': 'ElevageOrVert',
1619 },
1620 'params': {
1621 'skip_download': True,
1622 },
1623 },
a0566bbf 1624 {
29f7c58a 1625 # with '};' inside yt initial data (see [1])
1626 # see [2] for an example with '};' inside ytInitialPlayerResponse
1627 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1628 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1629 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1630 'info_dict': {
1631 'id': 'CHqg6qOn4no',
1632 'ext': 'mp4',
1633 'title': 'Part 77 Sort a list of simple types in c#',
1634 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1635 'upload_date': '20130831',
1636 'uploader_id': 'kudvenkat',
1637 'uploader': 'kudvenkat',
1638 },
1639 'params': {
1640 'skip_download': True,
1641 },
1642 },
29f7c58a 1643 {
1644 # another example of '};' in ytInitialData
1645 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1646 'only_matching': True,
1647 },
1648 {
1649 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1650 'only_matching': True,
1651 },
545cc85d 1652 {
cc2db878 1653 # https://github.com/ytdl-org/youtube-dl/pull/28094
1654 'url': 'OtqTfy26tG0',
1655 'info_dict': {
1656 'id': 'OtqTfy26tG0',
1657 'ext': 'mp4',
1658 'title': 'Burn Out',
1659 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1660 'upload_date': '20141120',
1661 'uploader': 'The Cinematic Orchestra - Topic',
1662 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1663 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1664 'artist': 'The Cinematic Orchestra',
1665 'track': 'Burn Out',
1666 'album': 'Every Day',
1667 'release_data': None,
1668 'release_year': None,
1669 },
1670 'params': {
1671 'skip_download': True,
1672 },
545cc85d 1673 },
bc2ca1bb 1674 {
1675 # controversial video, only works with bpctr when authenticated with cookies
1676 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1677 'only_matching': True,
1678 },
a1a7907b 1679 {
1680 # controversial video, requires bpctr/contentCheckOk
1681 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1682 'info_dict': {
1683 'id': 'SZJvDhaSDnc',
1684 'ext': 'mp4',
1685 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1686 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1687 'uploader': 'CBS This Morning',
11f9be09 1688 'uploader_id': 'CBSThisMorning',
a1a7907b 1689 'upload_date': '20140716',
1690 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1691 }
1692 },
f7ad7160 1693 {
1694 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1695 'url': 'cBvYw8_A0vQ',
1696 'info_dict': {
1697 'id': 'cBvYw8_A0vQ',
1698 'ext': 'mp4',
1699 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1700 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1701 'upload_date': '20201120',
1702 'uploader': 'Walk around Japan',
1703 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1704 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1705 },
1706 'params': {
1707 'skip_download': True,
1708 },
0fb983f6 1709 }, {
1710 # Has multiple audio streams
1711 'url': 'WaOKSUlf4TM',
1712 'only_matching': True
9297939e 1713 }, {
1714 # Requires Premium: has format 141 when requested using YTM url
1715 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1716 'only_matching': True
1717 }, {
120916da 1718 # multiple subtitles with same lang_code
1719 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1720 'only_matching': True,
109dd3b2 1721 }, {
1722 # Force use android client fallback
1723 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1724 'info_dict': {
1725 'id': 'YOelRv7fMxY',
11f9be09 1726 'title': 'DIGGING A SECRET TUNNEL Part 1',
109dd3b2 1727 'ext': '3gp',
1728 'upload_date': '20210624',
1729 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1730 'uploader': 'colinfurze',
11f9be09 1731 'uploader_id': 'colinfurze',
109dd3b2 1732 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
11f9be09 1733 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
109dd3b2 1734 },
1735 'params': {
1736 'format': '17', # 3gp format available on android
1737 'extractor_args': {'youtube': {'player_client': ['android']}},
1738 },
120916da 1739 },
109dd3b2 1740 {
1741 # Skip download of additional client configs (remix client config in this case)
1742 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1743 'only_matching': True,
1744 'params': {
1745 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1746 },
1747 }
2eb88d95
PH
1748 ]
1749
201c1459 1750 @classmethod
1751 def suitable(cls, url):
1bdae7d3 1752 # Hack for lazy extractors until more generic solution is implemented
1753 # (see #28780)
1754 from .youtube import parse_qs
201c1459 1755 qs = parse_qs(url)
1756 if qs.get('list', [None])[0]:
1757 return False
1758 return super(YoutubeIE, cls).suitable(url)
1759
e0df6211
PH
1760 def __init__(self, *args, **kwargs):
1761 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1762 self._code_cache = {}
83799698 1763 self._player_cache = {}
e0df6211 1764
109dd3b2 1765 def _extract_player_url(self, ytcfg=None, webpage=None):
1766 player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)
11f9be09 1767 if not player_url and webpage:
109dd3b2 1768 player_url = self._search_regex(
1769 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1770 webpage, 'player URL', fatal=False)
11f9be09 1771 if not player_url:
1772 return None
109dd3b2 1773 if player_url.startswith('//'):
1774 player_url = 'https:' + player_url
1775 elif not re.match(r'https?://', player_url):
1776 player_url = compat_urlparse.urljoin(
1777 'https://www.youtube.com', player_url)
1778 return player_url
1779
60064c53
PH
1780 def _signature_cache_id(self, example_sig):
1781 """ Return a string representation of a signature """
78caa52a 1782 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1783
e40c758c
S
1784 @classmethod
1785 def _extract_player_info(cls, player_url):
1786 for player_re in cls._PLAYER_INFO_RE:
1787 id_m = re.search(player_re, player_url)
1788 if id_m:
1789 break
1790 else:
c081b35c 1791 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1792 return id_m.group('id')
e40c758c 1793
109dd3b2 1794 def _load_player(self, video_id, player_url, fatal=True) -> bool:
1795 player_id = self._extract_player_info(player_url)
1796 if player_id not in self._code_cache:
1797 self._code_cache[player_id] = self._download_webpage(
1798 player_url, video_id, fatal=fatal,
1799 note='Downloading player ' + player_id,
1800 errnote='Download of %s failed' % player_url)
1801 return player_id in self._code_cache
1802
e40c758c 1803 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1804 player_id = self._extract_player_info(player_url)
e0df6211 1805
c4417ddb 1806 # Read from filesystem cache
545cc85d 1807 func_id = 'js_%s_%s' % (
1808 player_id, self._signature_cache_id(example_sig))
c4417ddb 1809 assert os.path.basename(func_id) == func_id
a0e07d31 1810
69ea8ca4 1811 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1812 if cache_spec is not None:
78caa52a 1813 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1814
109dd3b2 1815 if self._load_player(video_id, player_url):
1816 code = self._code_cache[player_id]
1817 res = self._parse_sig_js(code)
e0df6211 1818
109dd3b2 1819 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1820 cache_res = res(test_string)
1821 cache_spec = [ord(c) for c in cache_res]
83799698 1822
109dd3b2 1823 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1824 return res
83799698 1825
60064c53 1826 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1827 def gen_sig_code(idxs):
1828 def _genslice(start, end, step):
78caa52a 1829 starts = '' if start == 0 else str(start)
8bcc8756 1830 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1831 steps = '' if step == 1 else (':%d' % step)
78caa52a 1832 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1833
1834 step = None
7af808a5
PH
1835 # Quelch pyflakes warnings - start will be set when step is set
1836 start = '(Never used)'
edf3e38e
PH
1837 for i, prev in zip(idxs[1:], idxs[:-1]):
1838 if step is not None:
1839 if i - prev == step:
1840 continue
1841 yield _genslice(start, prev, step)
1842 step = None
1843 continue
1844 if i - prev in [-1, 1]:
1845 step = i - prev
1846 start = prev
1847 continue
1848 else:
78caa52a 1849 yield 's[%d]' % prev
edf3e38e 1850 if step is None:
78caa52a 1851 yield 's[%d]' % i
edf3e38e
PH
1852 else:
1853 yield _genslice(start, i, step)
1854
78caa52a 1855 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1856 cache_res = func(test_string)
edf3e38e 1857 cache_spec = [ord(c) for c in cache_res]
78caa52a 1858 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1859 signature_id_tuple = '(%s)' % (
1860 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1861 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1862 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1863 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1864
e0df6211
PH
1865 def _parse_sig_js(self, jscode):
1866 funcname = self._search_regex(
abefc03f
S
1867 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1868 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1869 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1870 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1871 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1872 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1873 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1874 # Obsolete patterns
1875 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1876 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1877 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1878 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1879 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1880 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1881 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1882 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1883 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1884
1885 jsi = JSInterpreter(jscode)
1886 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1887 return lambda s: initial_function([s])
1888
545cc85d 1889 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1890 """Turn the encrypted s field into a working signature"""
6b37f0be 1891
c8bf86d5 1892 if player_url is None:
69ea8ca4 1893 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1894
c8bf86d5 1895 try:
62af3a0e 1896 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1897 if player_id not in self._player_cache:
1898 func = self._extract_signature_function(
60064c53 1899 video_id, player_url, s
c8bf86d5
PH
1900 )
1901 self._player_cache[player_id] = func
1902 func = self._player_cache[player_id]
a06916d9 1903 if self.get_param('youtube_print_sig_code'):
60064c53 1904 self._print_sig_code(func, s)
c8bf86d5
PH
1905 return func(s)
1906 except Exception as e:
1907 tb = traceback.format_exc()
1908 raise ExtractorError(
78caa52a 1909 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1910
109dd3b2 1911 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1912 """
1913 Extract signatureTimestamp (sts)
1914 Required to tell API what sig/player version is in use.
1915 """
1916 sts = None
1917 if isinstance(ytcfg, dict):
1918 sts = int_or_none(ytcfg.get('STS'))
1919
1920 if not sts:
1921 # Attempt to extract from player
1922 if player_url is None:
1923 error_msg = 'Cannot extract signature timestamp without player_url.'
1924 if fatal:
1925 raise ExtractorError(error_msg)
1926 self.report_warning(error_msg)
1927 return
1928 if self._load_player(video_id, player_url, fatal=fatal):
1929 player_id = self._extract_player_info(player_url)
1930 code = self._code_cache[player_id]
1931 sts = int_or_none(self._search_regex(
1932 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
1933 'JS player signature timestamp', group='sts', fatal=fatal))
1934 return sts
1935
11f9be09 1936 def _mark_watched(self, video_id, player_responses):
352d63fd 1937 playback_url = traverse_obj(
1938 player_responses, (..., 'playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
1939 expected_type=url_or_none, get_all=False)
d77ab8e2 1940 if not playback_url:
352d63fd 1941 self.report_warning('Unable to mark watched')
d77ab8e2
S
1942 return
1943 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1944 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1945
1946 # cpn generation algorithm is reverse engineered from base.js.
1947 # In fact it works even with dummy cpn.
1948 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1949 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1950
1951 qs.update({
1952 'ver': ['2'],
1953 'cpn': [cpn],
1954 })
1955 playback_url = compat_urlparse.urlunparse(
15707c7e 1956 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1957
1958 self._download_webpage(
1959 playback_url, video_id, 'Marking watched',
1960 'Unable to mark watched', fatal=False)
1961
66c9fa36
S
1962 @staticmethod
1963 def _extract_urls(webpage):
1964 # Embedded YouTube player
1965 entries = [
1966 unescapeHTML(mobj.group('url'))
1967 for mobj in re.finditer(r'''(?x)
1968 (?:
1969 <iframe[^>]+?src=|
1970 data-video-url=|
1971 <embed[^>]+?src=|
1972 embedSWF\(?:\s*|
1973 <object[^>]+data=|
1974 new\s+SWFObject\(
1975 )
1976 (["\'])
1977 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1978 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1979 \1''', webpage)]
1980
1981 # lazyYT YouTube embed
1982 entries.extend(list(map(
1983 unescapeHTML,
1984 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1985
1986 # Wordpress "YouTube Video Importer" plugin
1987 matches = re.findall(r'''(?x)<div[^>]+
1988 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1989 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1990 entries.extend(m[-1] for m in matches)
1991
1992 return entries
1993
1994 @staticmethod
1995 def _extract_url(webpage):
1996 urls = YoutubeIE._extract_urls(webpage)
1997 return urls[0] if urls else None
1998
97665381
PH
1999 @classmethod
2000 def extract_id(cls, url):
2001 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 2002 if mobj is None:
69ea8ca4 2003 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
2004 video_id = mobj.group(2)
2005 return video_id
2006
7c365c21 2007 def _extract_chapters_from_json(self, data, duration):
2008 chapter_list = traverse_obj(
2009 data, (
2010 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2011 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2012 ), expected_type=list)
2013
2014 return self._extract_chapters(
2015 chapter_list,
2016 chapter_time=lambda chapter: float_or_none(
2017 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2018 chapter_title=lambda chapter: traverse_obj(
2019 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2020 duration=duration)
2021
2022 def _extract_chapters_from_engagement_panel(self, data, duration):
2023 content_list = traverse_obj(
8bdd16b4 2024 data,
7c365c21 2025 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
da503b7a 2026 expected_type=list, default=[])
7c365c21 2027 chapter_time = lambda chapter: parse_duration(self._get_text(chapter.get('timeDescription')))
2028 chapter_title = lambda chapter: self._get_text(chapter.get('title'))
2029
2030 return next((
2031 filter(None, (
2032 self._extract_chapters(
2033 traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2034 chapter_time, chapter_title, duration)
2035 for contents in content_list
2036 ))), [])
2037
2038 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
84213ea8 2039 chapters = []
7c365c21 2040 last_chapter = {'start_time': 0}
2041 for idx, chapter in enumerate(chapter_list or []):
2042 title = chapter_title(chapter)
84213ea8
S
2043 start_time = chapter_time(chapter)
2044 if start_time is None:
2045 continue
7c365c21 2046 last_chapter['end_time'] = start_time
2047 if start_time < last_chapter['start_time']:
2048 if idx == 1:
2049 chapters.pop()
2050 self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2051 else:
2052 self.report_warning(f'Invalid start time for chapter "{title}"')
2053 continue
2054 last_chapter = {'start_time': start_time, 'title': title}
2055 chapters.append(last_chapter)
2056 last_chapter['end_time'] = duration
84213ea8
S
2057 return chapters
2058
545cc85d 2059 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2060 return self._parse_json(self._search_regex(
2061 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2062 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 2063
d92f5d5a 2064 @staticmethod
2065 def parse_time_text(time_text):
2066 """
2067 Parse the comment time text
2068 time_text is in the format 'X units ago (edited)'
2069 """
2070 time_text_split = time_text.split(' ')
2071 if len(time_text_split) >= 3:
da503b7a 2072 try:
2073 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
2074 except ValueError:
2075 return None
d92f5d5a 2076
a1c5d2ca
M
2077 def _extract_comment(self, comment_renderer, parent=None):
2078 comment_id = comment_renderer.get('commentId')
2079 if not comment_id:
2080 return
fe93e2c4 2081
2082 text = self._get_text(comment_renderer.get('contentText'))
2083
49bd8c66 2084 # note: timestamp is an estimate calculated from the current time and time_text
fe93e2c4 2085 time_text = self._get_text(comment_renderer.get('publishedTimeText')) or ''
2086 time_text_dt = self.parse_time_text(time_text)
2087 if isinstance(time_text_dt, datetime.datetime):
2088 timestamp = calendar.timegm(time_text_dt.timetuple())
2089 author = self._get_text(comment_renderer.get('authorText'))
a1c5d2ca
M
2090 author_id = try_get(comment_renderer,
2091 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
fe93e2c4 2092
49bd8c66 2093 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2094 lambda x: x['likeCount']), compat_str)) or 0
a1c5d2ca
M
2095 author_thumbnail = try_get(comment_renderer,
2096 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2097
2098 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
97524332 2099 is_favorited = 'creatorHeart' in (try_get(
2100 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
a1c5d2ca
M
2101 return {
2102 'id': comment_id,
2103 'text': text,
d92f5d5a 2104 'timestamp': timestamp,
a1c5d2ca
M
2105 'time_text': time_text,
2106 'like_count': votes,
97524332 2107 'is_favorited': is_favorited,
a1c5d2ca
M
2108 'author': author,
2109 'author_id': author_id,
2110 'author_thumbnail': author_thumbnail,
2111 'author_is_uploader': author_is_uploader,
2112 'parent': parent or 'root'
2113 }
2114
2115 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
2d6659b9 2116 ytcfg, video_id, parent=None, comment_counts=None):
2117
2118 def extract_header(contents):
2119 _total_comments = 0
2120 _continuation = None
2121 for content in contents:
2122 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
fe93e2c4 2123 expected_comment_count = parse_count(self._get_text(
2124 comments_header_renderer, (lambda x: x['countText'], lambda x: x['commentsCount']), max_runs=1))
2125
2d6659b9 2126 if expected_comment_count:
fe93e2c4 2127 comment_counts[1] = expected_comment_count
2128 self.to_screen('Downloading ~%d comments' % expected_comment_count)
2d6659b9 2129 _total_comments = comment_counts[1]
2130 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2131 comment_sort_index = int(sort_mode_str != 'top') # 1 = new, 0 = top
2132
2133 sort_menu_item = try_get(
2134 comments_header_renderer,
2135 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2136 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2137
2138 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2139 if not _continuation:
2140 continue
2141
2142 sort_text = sort_menu_item.get('title')
2143 if isinstance(sort_text, compat_str):
2144 sort_text = sort_text.lower()
2145 else:
2146 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2147 self.to_screen('Sorting comments by %s' % sort_text)
2148 break
2149 return _total_comments, _continuation
a1c5d2ca 2150
2d6659b9 2151 def extract_thread(contents):
a1c5d2ca
M
2152 if not parent:
2153 comment_counts[2] = 0
2154 for content in contents:
2155 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2156 comment_renderer = try_get(
2157 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2158 content, (lambda x: x['commentRenderer'], dict))
2159
2160 if not comment_renderer:
2161 continue
2162 comment = self._extract_comment(comment_renderer, parent)
2163 if not comment:
2164 continue
2165 comment_counts[0] += 1
2166 yield comment
2167 # Attempt to get the replies
2168 comment_replies_renderer = try_get(
2169 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2170
2171 if comment_replies_renderer:
2172 comment_counts[2] += 1
2173 comment_entries_iter = self._comment_entries(
f4f751af 2174 comment_replies_renderer, identity_token, account_syncid, ytcfg,
2d6659b9 2175 video_id, parent=comment.get('id'), comment_counts=comment_counts)
a1c5d2ca
M
2176
2177 for reply_comment in comment_entries_iter:
2178 yield reply_comment
2179
2d6659b9 2180 # YouTube comments have a max depth of 2
2181 max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2182 if max_depth == 1 and parent:
2183 return
a1c5d2ca
M
2184 if not comment_counts:
2185 # comment so far, est. total comments, current comment thread #
2186 comment_counts = [0, 0, 0]
a1c5d2ca 2187
2d6659b9 2188 continuation = self._extract_continuation(root_continuation_data)
fe93e2c4 2189 if continuation and len(continuation['continuation']) < 27:
2d6659b9 2190 self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2191 continuation_token = self._generate_comment_continuation(video_id)
fe93e2c4 2192 continuation = self._build_api_continuation_query(continuation_token, None)
2d6659b9 2193
2194 visitor_data = None
2195 is_first_continuation = parent is None
a1c5d2ca
M
2196
2197 for page_num in itertools.count(0):
2198 if not continuation:
2199 break
11f9be09 2200 headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
2d6659b9 2201 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2202 if page_num == 0:
2203 if is_first_continuation:
2204 note_prefix = 'Downloading comment section API JSON'
a1c5d2ca 2205 else:
2d6659b9 2206 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
2207 comment_counts[2], comment_prog_str)
2208 else:
2209 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2210 ' ' if parent else '', ' replies' if parent else '',
2211 page_num, comment_prog_str)
2212
2213 response = self._extract_response(
fe93e2c4 2214 item_id=None, query=continuation,
2d6659b9 2215 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2216 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca
M
2217 if not response:
2218 break
f4f751af 2219 visitor_data = try_get(
2220 response,
2221 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2222 compat_str) or visitor_data
a1c5d2ca 2223
2d6659b9 2224 continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca 2225
2d6659b9 2226 continuation = None
2227 if isinstance(continuation_contents, list):
2228 for continuation_section in continuation_contents:
2229 if not isinstance(continuation_section, dict):
2230 continue
2231 continuation_items = try_get(
2232 continuation_section,
2233 (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2234 lambda x: x['appendContinuationItemsAction']['continuationItems']),
2235 list) or []
2236 if is_first_continuation:
2237 total_comments, continuation = extract_header(continuation_items)
2238 if total_comments:
2239 yield total_comments
2240 is_first_continuation = False
2241 if continuation:
2242 break
2243 continue
2244 count = 0
2245 for count, entry in enumerate(extract_thread(continuation_items)):
2246 yield entry
2247 continuation = self._extract_continuation({'contents': continuation_items})
2248 if continuation:
2249 # Sometimes YouTube provides a continuation without any comments
2250 # In most cases we end up just downloading these with very little comments to come.
2251 if count == 0:
2252 if not parent:
2253 self.report_warning('No comments received - assuming end of comments')
2254 continuation = None
a1c5d2ca
M
2255 break
2256
2d6659b9 2257 # Deprecated response structure
2258 elif isinstance(continuation_contents, dict):
2259 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2260 for key, continuation_renderer in continuation_contents.items():
2261 if key not in known_continuation_renderers:
2262 continue
2263 if not isinstance(continuation_renderer, dict):
2264 continue
2265 if is_first_continuation:
2266 header_continuation_items = [continuation_renderer.get('header') or {}]
2267 total_comments, continuation = extract_header(header_continuation_items)
2268 if total_comments:
2269 yield total_comments
2270 is_first_continuation = False
2271 if continuation:
2272 break
a1c5d2ca 2273
2d6659b9 2274 # Sometimes YouTube provides a continuation without any comments
2275 # In most cases we end up just downloading these with very little comments to come.
2276 count = 0
2277 for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2278 yield entry
2279 continuation = self._extract_continuation(continuation_renderer)
2280 if count == 0:
2281 if not parent:
2282 self.report_warning('No comments received - assuming end of comments')
2283 continuation = None
2284 break
a1c5d2ca 2285
2d6659b9 2286 @staticmethod
2287 def _generate_comment_continuation(video_id):
2288 """
2289 Generates initial comment section continuation token from given video id
2290 """
2291 b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2292 parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2293 new_continuation_intlist = list(itertools.chain.from_iterable(
2294 [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2295 return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2296
2297 def _extract_comments(self, ytcfg, video_id, contents, webpage):
a1c5d2ca 2298 """Entry for comment extraction"""
2d6659b9 2299 def _real_comment_extract(contents):
2300 if isinstance(contents, list):
2301 for entry in contents:
2302 for key, renderer in entry.items():
2303 if key not in known_entry_comment_renderers:
2304 continue
2305 yield from self._comment_entries(
2306 renderer, video_id=video_id, ytcfg=ytcfg,
2307 identity_token=self._extract_identity_token(webpage, item_id=video_id),
2308 account_syncid=self._extract_account_syncid(ytcfg))
2309 break
a1c5d2ca 2310 comments = []
2d6659b9 2311 known_entry_comment_renderers = ('itemSectionRenderer',)
a1c5d2ca 2312 estimated_total = 0
2d6659b9 2313 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
a1c5d2ca 2314
2d6659b9 2315 try:
2316 for comment in _real_comment_extract(contents):
2317 if len(comments) >= max_comments:
2318 break
2319 if isinstance(comment, int):
2320 estimated_total = comment
2321 continue
2322 comments.append(comment)
2323 except KeyboardInterrupt:
2324 self.to_screen('Interrupted by user')
d92f5d5a 2325 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
a1c5d2ca
M
2326 return {
2327 'comments': comments,
2328 'comment_count': len(comments),
2329 }
2330
109dd3b2 2331 @staticmethod
2332 def _generate_player_context(sts=None):
2333 context = {
2334 'html5Preference': 'HTML5_PREF_WANTS',
2335 }
2336 if sts is not None:
2337 context['signatureTimestamp'] = sts
2338 return {
2339 'playbackContext': {
2340 'contentPlaybackContext': context
a1a7907b 2341 },
2342 'contentCheckOk': True
109dd3b2 2343 }
2344
4e6767b5 2345 @staticmethod
c888ffb9 2346 def _get_video_info_params(video_id, client='TVHTML5'):
2347 GVI_CLIENTS = {
2348 'ANDROID': {
2349 'c': 'ANDROID',
2350 'cver': '16.20',
2351 },
2352 'TVHTML5': {
2353 'c': 'TVHTML5',
2354 'cver': '6.20180913',
11f9be09 2355 },
2356 'IOS': {
2357 'c': 'IOS',
2358 'cver': '16.20'
c888ffb9 2359 }
2360 }
2361 query = {
4e6767b5 2362 'video_id': video_id,
2363 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
c888ffb9 2364 'html5': '1'
4e6767b5 2365 }
c888ffb9 2366 query.update(GVI_CLIENTS.get(client))
2367 return query
4e6767b5 2368
11f9be09 2369 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr):
109dd3b2 2370
11f9be09 2371 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2372 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
2373 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False)
2374 headers = self.generate_api_headers(
2375 player_ytcfg, identity_token, syncid,
2376 default_client=self._YT_CLIENTS[client], session_index=session_index)
9297939e 2377
11f9be09 2378 yt_query = {'videoId': video_id}
2379 yt_query.update(self._generate_player_context(sts))
2380 return self._extract_response(
2381 item_id=video_id, ep='player', query=yt_query,
2382 ytcfg=player_ytcfg, headers=headers, fatal=False,
2383 default_client=self._YT_CLIENTS[client],
2384 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2385 ) or None
2386
2387 def _extract_age_gated_player_response(self, client, video_id, ytcfg, identity_token, player_url, initial_pr):
2388 gvi_client = self._YT_CLIENTS.get(f'_{client}_agegate')
2389 if not gvi_client:
2390 return
109dd3b2 2391
11f9be09 2392 pr = self._parse_json(traverse_obj(
2393 compat_parse_qs(self._download_webpage(
2394 self.http_scheme() + '//www.youtube.com/get_video_info', video_id,
2395 'Refetching age-gated %s info webpage' % gvi_client.lower(),
2396 'unable to download video info webpage', fatal=False,
2397 query=self._get_video_info_params(video_id, client=gvi_client))),
2398 ('player_response', 0), expected_type=str) or '{}', video_id)
2399 if pr:
2400 return pr
2401
2402 self.report_warning('Falling back to embedded-only age-gate workaround')
2403 embed_webpage = None
2404 if client == 'web' and 'configs' not in self._configuration_arg('player_skip'):
2405 embed_webpage = self._download_webpage(
2406 'https://www.youtube.com/embed/%s?html5=1' % video_id,
2407 video_id=video_id, note=f'Downloading age-gated {client} embed config')
2408
2409 ytcfg_age = self.extract_ytcfg(video_id, embed_webpage) or {}
2410 # If we extracted the embed webpage, it'll tell us if we can view the video
2411 embedded_pr = self._parse_json(
2412 traverse_obj(ytcfg_age, ('PLAYER_VARS', 'embedded_player_response'), expected_type=str) or '{}',
2413 video_id=video_id)
2414 embedded_ps_reason = traverse_obj(embedded_pr, ('playabilityStatus', 'reason'), expected_type=str) or ''
2415 if embedded_ps_reason in self._AGE_GATE_REASONS:
2416 return
2417 return self._extract_player_response(
2418 f'_{client}_embedded', video_id,
2419 ytcfg_age or ytcfg, ytcfg_age if client == 'web' else {},
2420 identity_token, player_url, initial_pr)
545cc85d 2421
11f9be09 2422 def _get_requested_clients(self, url, smuggled_data):
2423 requested_clients = [client for client in self._configuration_arg('player_client')
2424 if client[:0] != '_' and client in self._YT_CLIENTS]
2425 if not requested_clients:
2426 requested_clients = ['android', 'web']
cf7e015f 2427
11f9be09 2428 if smuggled_data.get('is_music_url') or self.is_music_url(url):
2429 requested_clients.extend(
2430 f'{client}_music' for client in requested_clients if not client.endswith('_music'))
dbdaaa23 2431
11f9be09 2432 return orderedSet(requested_clients)
cf7e015f 2433
11f9be09 2434 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, player_url, identity_token):
2435 initial_pr = None
2436 if webpage:
2437 initial_pr = self._extract_yt_initial_variable(
2438 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2439 video_id, 'initial player response')
6b09401b 2440
11f9be09 2441 age_gated = False
2442 for client in clients:
2443 player_ytcfg = master_ytcfg if client == 'web' else {}
2444 if age_gated:
2445 pr = None
2446 elif client == 'web' and initial_pr:
2447 pr = initial_pr
8fe10494 2448 else:
11f9be09 2449 if client == 'web_music' and 'configs' not in self._configuration_arg('player_skip'):
2450 ytm_webpage = self._download_webpage(
2451 'https://music.youtube.com',
2452 video_id, fatal=False, note='Downloading remix client config')
2453 player_ytcfg = self.extract_ytcfg(video_id, ytm_webpage) or {}
2454 pr = self._extract_player_response(
2455 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr)
2456 if pr:
2457 yield pr
2458 if age_gated or traverse_obj(pr, ('playabilityStatus', 'reason')) in self._AGE_GATE_REASONS:
2459 age_gated = True
2460 pr = self._extract_age_gated_player_response(
2461 client, video_id, player_ytcfg or master_ytcfg, identity_token, player_url, initial_pr)
2462 if pr:
2463 yield pr
2464 # Android player_response does not have microFormats which are needed for
2465 # extraction of some data. So we return the initial_pr with formats
2466 # stripped out even if not requested by the user
2467 # See: https://github.com/yt-dlp/yt-dlp/issues/501
2468 if initial_pr and 'web' not in clients:
2469 initial_pr['streamingData'] = None
2470 yield initial_pr
2471
2472 def _extract_formats(self, streaming_data, video_id, player_url, is_live):
2473 itags, stream_ids = [], []
cc2db878 2474 itag_qualities = {}
d3fc8074 2475 q = qualities([
60bdb7bd 2476 # "tiny" is the smallest video-only format. But some audio-only formats
2477 # was also labeled "tiny". It is not clear if such formats still exist
d3fc8074 2478 'tiny', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
2479 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2480 ])
11f9be09 2481 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
9297939e 2482
545cc85d 2483 for fmt in streaming_formats:
2484 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2485 continue
321bf820 2486
cc2db878 2487 itag = str_or_none(fmt.get('itag'))
9297939e 2488 audio_track = fmt.get('audioTrack') or {}
2489 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2490 if stream_id in stream_ids:
2491 continue
2492
cc2db878 2493 quality = fmt.get('quality')
d3fc8074 2494 if quality == 'tiny' or not quality:
2495 quality = fmt.get('audioQuality', '').lower() or quality
cc2db878 2496 if itag and quality:
2497 itag_qualities[itag] = quality
2498 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2499 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2500 # number of fragment that would subsequently requested with (`&sq=N`)
2501 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2502 continue
2503
545cc85d 2504 fmt_url = fmt.get('url')
2505 if not fmt_url:
2506 sc = compat_parse_qs(fmt.get('signatureCipher'))
2507 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2508 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2509 if not (sc and fmt_url and encrypted_sig):
2510 continue
545cc85d 2511 if not player_url:
201e9eaa 2512 continue
545cc85d 2513 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2514 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2515 fmt_url += '&' + sp + '=' + signature
2516
545cc85d 2517 if itag:
2518 itags.append(itag)
9297939e 2519 stream_ids.append(stream_id)
2520
cc2db878 2521 tbr = float_or_none(
2522 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 2523 dct = {
2524 'asr': int_or_none(fmt.get('audioSampleRate')),
2525 'filesize': int_or_none(fmt.get('contentLength')),
2526 'format_id': itag,
11f9be09 2527 'format_note': ', '.join(filter(None, (
2528 audio_track.get('displayName'), fmt.get('qualityLabel') or quality))),
545cc85d 2529 'fps': int_or_none(fmt.get('fps')),
2530 'height': int_or_none(fmt.get('height')),
dca3ff4a 2531 'quality': q(quality),
cc2db878 2532 'tbr': tbr,
545cc85d 2533 'url': fmt_url,
2534 'width': fmt.get('width'),
0fb983f6 2535 'language': audio_track.get('id', '').split('.')[0],
545cc85d 2536 }
60bdb7bd 2537 mime_mobj = re.match(
2538 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2539 if mime_mobj:
2540 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2541 dct.update(parse_codecs(mime_mobj.group(2)))
2542 # The 3gp format in android client has a quality of "small",
2543 # but is actually worse than all other formats
2544 if dct['ext'] == '3gp':
2545 dct['quality'] = q('tiny')
11f9be09 2546 dct['preference'] = -10
cc2db878 2547 no_audio = dct.get('acodec') == 'none'
2548 no_video = dct.get('vcodec') == 'none'
2549 if no_audio:
2550 dct['vbr'] = tbr
2551 if no_video:
2552 dct['abr'] = tbr
2553 if no_audio or no_video:
545cc85d 2554 dct['downloader_options'] = {
2555 # Youtube throttles chunks >~10M
2556 'http_chunk_size': 10485760,
bf1317d2 2557 }
7c60c33e 2558 if dct.get('ext'):
2559 dct['container'] = dct['ext'] + '_dash'
11f9be09 2560 yield dct
545cc85d 2561
4bb6b02f 2562 skip_manifests = self._configuration_arg('skip')
11f9be09 2563 get_dash = not is_live and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True)
5d3a0e79 2564 get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2565
11f9be09 2566 for sd in streaming_data:
5d3a0e79 2567 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
9297939e 2568 if hls_manifest_url:
2569 for f in self._extract_m3u8_formats(
2570 hls_manifest_url, video_id, 'mp4', fatal=False):
2571 itag = self._search_regex(
2572 r'/itag/(\d+)', f['url'], 'itag', default=None)
11f9be09 2573 if itag in itags:
2574 continue
9297939e 2575 if itag:
2576 f['format_id'] = itag
11f9be09 2577 itags.append(itag)
2578 yield f
545cc85d 2579
5d3a0e79 2580 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2581 if dash_manifest_url:
2582 for f in self._extract_mpd_formats(
2583 dash_manifest_url, video_id, fatal=False):
2584 itag = f['format_id']
2585 if itag in itags:
2586 continue
11f9be09 2587 if itag:
2588 itags.append(itag)
5d3a0e79 2589 if itag in itag_qualities:
2590 f['quality'] = q(itag_qualities[itag])
2591 filesize = int_or_none(self._search_regex(
2592 r'/clen/(\d+)', f.get('fragment_base_url')
2593 or f['url'], 'file size', default=None))
2594 if filesize:
2595 f['filesize'] = filesize
11f9be09 2596 yield f
2597
2598 def _real_extract(self, url):
2599 url, smuggled_data = unsmuggle_url(url, {})
2600 video_id = self._match_id(url)
2601
2602 base_url = self.http_scheme() + '//www.youtube.com/'
2603 webpage_url = base_url + 'watch?v=' + video_id
2604 webpage = self._download_webpage(
2605 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2606
2607 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2608 player_url = self._extract_player_url(master_ytcfg, webpage)
2609 identity_token = self._extract_identity_token(webpage, video_id)
2610
2611 player_responses = list(self._extract_player_responses(
2612 self._get_requested_clients(url, smuggled_data),
2613 video_id, webpage, master_ytcfg, player_url, identity_token))
2614
352d63fd 2615 get_first = lambda obj, keys, **kwargs: traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
11f9be09 2616
2617 playability_statuses = traverse_obj(
2618 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2619
2620 trailer_video_id = get_first(
2621 playability_statuses,
2622 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2623 expected_type=str)
2624 if trailer_video_id:
2625 return self.url_result(
2626 trailer_video_id, self.ie_key(), trailer_video_id)
2627
2628 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2629 if webpage else (lambda x: None))
2630
2631 video_details = traverse_obj(
2632 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2633 microformats = traverse_obj(
2634 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2635 expected_type=dict, default=[])
2636 video_title = (
2637 get_first(video_details, 'title')
2638 or self._get_text(microformats, (..., 'title'))
2639 or search_meta(['og:title', 'twitter:title', 'title']))
2640 video_description = get_first(video_details, 'shortDescription')
2641
2642 if not smuggled_data.get('force_singlefeed', False):
2643 if not self.get_param('noplaylist'):
2644 multifeed_metadata_list = get_first(
2645 player_responses,
2646 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2647 expected_type=str)
2648 if multifeed_metadata_list:
2649 entries = []
2650 feed_ids = []
2651 for feed in multifeed_metadata_list.split(','):
2652 # Unquote should take place before split on comma (,) since textual
2653 # fields may contain comma as well (see
2654 # https://github.com/ytdl-org/youtube-dl/issues/8536)
2655 feed_data = compat_parse_qs(
2656 compat_urllib_parse_unquote_plus(feed))
2657
2658 def feed_entry(name):
2659 return try_get(
2660 feed_data, lambda x: x[name][0], compat_str)
2661
2662 feed_id = feed_entry('id')
2663 if not feed_id:
2664 continue
2665 feed_title = feed_entry('title')
2666 title = video_title
2667 if feed_title:
2668 title += ' (%s)' % feed_title
2669 entries.append({
2670 '_type': 'url_transparent',
2671 'ie_key': 'Youtube',
2672 'url': smuggle_url(
2673 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2674 {'force_singlefeed': True}),
2675 'title': title,
2676 })
2677 feed_ids.append(feed_id)
2678 self.to_screen(
2679 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2680 % (', '.join(feed_ids), video_id))
2681 return self.playlist_result(
2682 entries, video_id, video_title, video_description)
2683 else:
2684 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2685
7ea65411 2686 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
11f9be09 2687 is_live = get_first(video_details, 'isLive')
7ea65411 2688 if is_live is None:
2689 is_live = get_first(live_broadcast_details, 'isLiveNow')
11f9be09 2690
2691 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2692 formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
bf1317d2 2693
545cc85d 2694 if not formats:
11f9be09 2695 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
b7da73eb 2696 self.raise_no_formats(
545cc85d 2697 'This video is DRM protected.', expected=True)
11f9be09 2698 pemr = get_first(
2699 playability_statuses,
2700 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2701 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2702 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
545cc85d 2703 if subreason:
545cc85d 2704 if subreason == 'The uploader has not made this video available in your country.':
11f9be09 2705 countries = get_first(microformats, 'availableCountries')
545cc85d 2706 if not countries:
2707 regions_allowed = search_meta('regionsAllowed')
2708 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2709 self.raise_geo_restricted(subreason, countries, metadata_available=True)
11f9be09 2710 reason += f'. {subreason}'
545cc85d 2711 if reason:
b7da73eb 2712 self.raise_no_formats(reason, expected=True)
bf1317d2 2713
11f9be09 2714 for f in formats:
2715 # TODO: detect if throttled
2716 if '&n=' in f['url']: # possibly throttled
2717 f['source_preference'] = -10
2718 # note = f.get('format_note')
2719 # f['format_note'] = f'{note} (throttled)' if note else '(throttled)'
2720
545cc85d 2721 self._sort_formats(formats)
bf1317d2 2722
11f9be09 2723 keywords = get_first(video_details, 'keywords', expected_type=list) or []
545cc85d 2724 if not keywords and webpage:
2725 keywords = [
2726 unescapeHTML(m.group('content'))
2727 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2728 for keyword in keywords:
2729 if keyword.startswith('yt:stretch='):
201c1459 2730 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2731 if mobj:
2732 # NB: float is intentional for forcing float division
2733 w, h = (float(v) for v in mobj.groups())
2734 if w > 0 and h > 0:
2735 ratio = w / h
2736 for f in formats:
2737 if f.get('vcodec') != 'none':
2738 f['stretched_ratio'] = ratio
2739 break
6449cd80 2740
545cc85d 2741 thumbnails = []
11f9be09 2742 thumbnail_dicts = traverse_obj(
2743 (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2744 expected_type=dict, default=[])
2745 for thumbnail in thumbnail_dicts:
2746 thumbnail_url = thumbnail.get('url')
2747 if not thumbnail_url:
2748 continue
2749 # Sometimes youtube gives a wrong thumbnail URL. See:
2750 # https://github.com/yt-dlp/yt-dlp/issues/233
2751 # https://github.com/ytdl-org/youtube-dl/issues/28023
2752 if 'maxresdefault' in thumbnail_url:
2753 thumbnail_url = thumbnail_url.split('?')[0]
2754 thumbnails.append({
2755 'url': thumbnail_url,
2756 'height': int_or_none(thumbnail.get('height')),
2757 'width': int_or_none(thumbnail.get('width')),
2758 })
ff2751ac 2759 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2760 if thumbnail_url:
2761 thumbnails.append({
2762 'url': thumbnail_url,
ff2751ac 2763 })
0ba692ac 2764 # The best resolution thumbnails sometimes does not appear in the webpage
2765 # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
cca80fe6 2766 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2767 hq_thumbnail_names = ['maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3']
2768 guaranteed_thumbnail_names = [
2769 'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2770 'mqdefault', 'mq1', 'mq2', 'mq3',
2771 'default', '1', '2', '3'
2772 ]
2773 thumbnail_names = hq_thumbnail_names + guaranteed_thumbnail_names
2774 n_thumbnail_names = len(thumbnail_names)
2775
0ba692ac 2776 thumbnails.extend({
2777 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2778 video_id=video_id, name=name, ext=ext,
2779 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
cca80fe6 2780 '_test_url': name in hq_thumbnail_names,
2781 } for name in thumbnail_names for ext in ('webp', 'jpg'))
0ba692ac 2782 for thumb in thumbnails:
cca80fe6 2783 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
0ba692ac 2784 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
ff2751ac 2785 self._remove_duplicate_formats(thumbnails)
545cc85d 2786
7ea65411 2787 category = get_first(microformats, 'category') or search_meta('genre')
2788 channel_id = str_or_none(
2789 get_first(video_details, 'channelId')
2790 or get_first(microformats, 'externalChannelId')
2791 or search_meta('channelId'))
2792 duration = int_or_none(
2793 get_first(video_details, 'lengthSeconds')
2794 or get_first(microformats, 'lengthSeconds')
2795 or parse_duration(search_meta('duration'))) or None
2796 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2797
2798 live_content = get_first(video_details, 'isLiveContent')
2799 is_upcoming = get_first(video_details, 'isUpcoming')
2800 if is_live is None:
2801 if is_upcoming or live_content is False:
2802 is_live = False
2803 if is_upcoming is None and (live_content or is_live):
2804 is_upcoming = False
2805 live_starttime = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2806 live_endtime = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2807 if not duration and live_endtime and live_starttime:
2808 duration = live_endtime - live_starttime
2809
545cc85d 2810 info = {
2811 'id': video_id,
2812 'title': self._live_title(video_title) if is_live else video_title,
2813 'formats': formats,
2814 'thumbnails': thumbnails,
2815 'description': video_description,
2816 'upload_date': unified_strdate(
11f9be09 2817 get_first(microformats, 'uploadDate')
545cc85d 2818 or search_meta('uploadDate')),
11f9be09 2819 'uploader': get_first(video_details, 'author'),
545cc85d 2820 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2821 'uploader_url': owner_profile_url,
2822 'channel_id': channel_id,
11f9be09 2823 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
545cc85d 2824 'duration': duration,
2825 'view_count': int_or_none(
11f9be09 2826 get_first((video_details, microformats), (..., 'viewCount'))
545cc85d 2827 or search_meta('interactionCount')),
11f9be09 2828 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
545cc85d 2829 'age_limit': 18 if (
11f9be09 2830 get_first(microformats, 'isFamilySafe') is False
545cc85d 2831 or search_meta('isFamilyFriendly') == 'false'
2832 or search_meta('og:restrictions:age') == '18+') else 0,
2833 'webpage_url': webpage_url,
2834 'categories': [category] if category else None,
2835 'tags': keywords,
11f9be09 2836 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
7ea65411 2837 'is_live': is_live,
2838 'was_live': (False if is_live or is_upcoming or live_content is False
2839 else None if is_live is None or is_upcoming is None
2840 else live_content),
2841 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
2842 'release_timestamp': live_starttime,
545cc85d 2843 }
b477fc13 2844
11f9be09 2845 pctr = get_first(player_responses, ('captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
545cc85d 2846 subtitles = {}
2847 if pctr:
774d79cc 2848 def process_language(container, base_url, lang_code, sub_name, query):
120916da 2849 lang_subs = container.setdefault(lang_code, [])
545cc85d 2850 for fmt in self._SUBTITLE_FORMATS:
2851 query.update({
2852 'fmt': fmt,
2853 })
2854 lang_subs.append({
2855 'ext': fmt,
2856 'url': update_url_query(base_url, query),
774d79cc 2857 'name': sub_name,
545cc85d 2858 })
7e72694b 2859
545cc85d 2860 for caption_track in (pctr.get('captionTracks') or []):
2861 base_url = caption_track.get('baseUrl')
2862 if not base_url:
2863 continue
2864 if caption_track.get('kind') != 'asr':
120916da 2865 lang_code = (
2866 remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2867 or caption_track.get('languageCode'))
545cc85d 2868 if not lang_code:
2869 continue
2870 process_language(
774d79cc 2871 subtitles, base_url, lang_code,
2d6659b9 2872 try_get(caption_track, lambda x: x['name']['simpleText']),
774d79cc 2873 {})
545cc85d 2874 continue
2875 automatic_captions = {}
2876 for translation_language in (pctr.get('translationLanguages') or []):
2877 translation_language_code = translation_language.get('languageCode')
2878 if not translation_language_code:
2879 continue
2880 process_language(
2881 automatic_captions, base_url, translation_language_code,
fe93e2c4 2882 self._get_text(translation_language.get('languageName'), max_runs=1),
545cc85d 2883 {'tlang': translation_language_code})
2884 info['automatic_captions'] = automatic_captions
2885 info['subtitles'] = subtitles
7e72694b 2886
545cc85d 2887 parsed_url = compat_urllib_parse_urlparse(url)
2888 for component in [parsed_url.fragment, parsed_url.query]:
2889 query = compat_parse_qs(component)
2890 for k, v in query.items():
2891 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2892 d_k += '_time'
2893 if d_k not in info and k in s_ks:
2894 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2895
2896 # Youtube Music Auto-generated description
822b9d9c 2897 if video_description:
38d70284 2898 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2899 if mobj:
822b9d9c
RA
2900 release_year = mobj.group('release_year')
2901 release_date = mobj.group('release_date')
2902 if release_date:
2903 release_date = release_date.replace('-', '')
2904 if not release_year:
545cc85d 2905 release_year = release_date[:4]
2906 info.update({
2907 'album': mobj.group('album'.strip()),
2908 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2909 'track': mobj.group('track').strip(),
2910 'release_date': release_date,
cc2db878 2911 'release_year': int_or_none(release_year),
545cc85d 2912 })
7e72694b 2913
545cc85d 2914 initial_data = None
2915 if webpage:
2916 initial_data = self._extract_yt_initial_variable(
2917 webpage, self._YT_INITIAL_DATA_RE, video_id,
2918 'yt initial data')
2919 if not initial_data:
11f9be09 2920 headers = self.generate_api_headers(
2921 master_ytcfg, identity_token, self._extract_account_syncid(master_ytcfg),
2922 session_index=self._extract_session_index(master_ytcfg))
2923
109dd3b2 2924 initial_data = self._extract_response(
2925 item_id=video_id, ep='next', fatal=False,
11f9be09 2926 ytcfg=master_ytcfg, headers=headers, query={'videoId': video_id},
109dd3b2 2927 note='Downloading initial data API JSON')
545cc85d 2928
c60ee3a2 2929 try:
2930 # This will error if there is no livechat
2931 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2932 info['subtitles']['live_chat'] = [{
2933 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
2934 'video_id': video_id,
2935 'ext': 'json',
f6745c49 2936 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
c60ee3a2 2937 }]
2938 except (KeyError, IndexError, TypeError):
2939 pass
545cc85d 2940
2941 if initial_data:
7c365c21 2942 info['chapters'] = (
2943 self._extract_chapters_from_json(initial_data, duration)
2944 or self._extract_chapters_from_engagement_panel(initial_data, duration)
2945 or None)
545cc85d 2946
2947 contents = try_get(
2948 initial_data,
2949 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2950 list) or []
2951 for content in contents:
2952 vpir = content.get('videoPrimaryInfoRenderer')
2953 if vpir:
2954 stl = vpir.get('superTitleLink')
2955 if stl:
fe93e2c4 2956 stl = self._get_text(stl)
545cc85d 2957 if try_get(
2958 vpir,
2959 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2960 info['location'] = stl
2961 else:
2962 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2963 if mobj:
2964 info.update({
2965 'series': mobj.group(1),
2966 'season_number': int(mobj.group(2)),
2967 'episode_number': int(mobj.group(3)),
2968 })
2969 for tlb in (try_get(
2970 vpir,
2971 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2972 list) or []):
2973 tbr = tlb.get('toggleButtonRenderer') or {}
2974 for getter, regex in [(
2975 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2976 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2977 lambda x: x['accessibility'],
2978 lambda x: x['accessibilityData']['accessibilityData'],
2979 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2980 label = (try_get(tbr, getter, dict) or {}).get('label')
2981 if label:
2982 mobj = re.match(regex, label)
2983 if mobj:
2984 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2985 break
2986 sbr_tooltip = try_get(
2987 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2988 if sbr_tooltip:
2989 like_count, dislike_count = sbr_tooltip.split(' / ')
2990 info.update({
2991 'like_count': str_to_int(like_count),
2992 'dislike_count': str_to_int(dislike_count),
2993 })
2994 vsir = content.get('videoSecondaryInfoRenderer')
2995 if vsir:
fe93e2c4 2996 info['channel'] = self._get_text(try_get(
545cc85d 2997 vsir,
2998 lambda x: x['owner']['videoOwnerRenderer']['title'],
cce889b9 2999 dict))
545cc85d 3000 rows = try_get(
3001 vsir,
3002 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3003 list) or []
3004 multiple_songs = False
3005 for row in rows:
3006 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3007 multiple_songs = True
3008 break
3009 for row in rows:
3010 mrr = row.get('metadataRowRenderer') or {}
3011 mrr_title = mrr.get('title')
3012 if not mrr_title:
3013 continue
fe93e2c4 3014 mrr_title = self._get_text(mrr['title'])
3015 mrr_contents_text = self._get_text(mrr['contents'][0])
545cc85d 3016 if mrr_title == 'License':
3017 info['license'] = mrr_contents_text
3018 elif not multiple_songs:
3019 if mrr_title == 'Album':
3020 info['album'] = mrr_contents_text
3021 elif mrr_title == 'Artist':
3022 info['artist'] = mrr_contents_text
3023 elif mrr_title == 'Song':
3024 info['track'] = mrr_contents_text
3025
3026 fallbacks = {
3027 'channel': 'uploader',
3028 'channel_id': 'uploader_id',
3029 'channel_url': 'uploader_url',
3030 }
3031 for to, frm in fallbacks.items():
3032 if not info.get(to):
3033 info[to] = info.get(frm)
3034
3035 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3036 v = info.get(s_k)
3037 if v:
3038 info[d_k] = v
b84071c0 3039
11f9be09 3040 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3041 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
c224251a 3042 is_membersonly = None
b28f8d24 3043 is_premium = None
c224251a
M
3044 if initial_data and is_private is not None:
3045 is_membersonly = False
b28f8d24 3046 is_premium = False
47193e02 3047 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3048 badge_labels = set()
3049 for content in contents:
3050 if not isinstance(content, dict):
3051 continue
3052 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3053 for badge_label in badge_labels:
3054 if badge_label.lower() == 'members only':
3055 is_membersonly = True
3056 elif badge_label.lower() == 'premium':
3057 is_premium = True
3058 elif badge_label.lower() == 'unlisted':
3059 is_unlisted = True
c224251a 3060
c224251a
M
3061 info['availability'] = self._availability(
3062 is_private=is_private,
b28f8d24 3063 needs_premium=is_premium,
c224251a
M
3064 needs_subscription=is_membersonly,
3065 needs_auth=info['age_limit'] >= 18,
3066 is_unlisted=None if is_private is None else is_unlisted)
3067
06167fbb 3068 # get xsrf for annotations or comments
a06916d9 3069 get_annotations = self.get_param('writeannotations', False)
3070 get_comments = self.get_param('getcomments', False)
06167fbb 3071 if get_annotations or get_comments:
29f7c58a 3072 xsrf_token = None
11f9be09 3073 if master_ytcfg:
3074 xsrf_token = try_get(master_ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
29f7c58a 3075 if not xsrf_token:
3076 xsrf_token = self._search_regex(
3077 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 3078 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 3079
3080 # annotations
06167fbb 3081 if get_annotations:
11f9be09 3082 invideo_url = get_first(
3083 player_responses,
3084 ('annotations', 0, 'playerAnnotationsUrlsRenderer', 'invideoUrl'),
3085 expected_type=str)
64b6a4e9 3086 if xsrf_token and invideo_url:
29f7c58a 3087 xsrf_field_name = None
11f9be09 3088 if master_ytcfg:
3089 xsrf_field_name = try_get(master_ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
29f7c58a 3090 if not xsrf_field_name:
3091 xsrf_field_name = self._search_regex(
3092 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 3093 webpage, 'xsrf field name',
29f7c58a 3094 group='xsrf_field_name', default='session_token')
8a784c74 3095 info['annotations'] = self._download_webpage(
64b6a4e9
RA
3096 self._proto_relative_url(invideo_url),
3097 video_id, note='Downloading annotations',
3098 errnote='Unable to download video annotations', fatal=False,
3099 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 3100
277d6ff5 3101 if get_comments:
11f9be09 3102 info['__post_extractor'] = lambda: self._extract_comments(master_ytcfg, video_id, contents, webpage)
4ea3be0a 3103
11f9be09 3104 self.mark_watched(video_id, player_responses)
d77ab8e2 3105
545cc85d 3106 return info
c5e8d7af 3107
5f6a1245 3108
8bdd16b4 3109class YoutubeTabIE(YoutubeBaseInfoExtractor):
3110 IE_DESC = 'YouTube.com tab'
70d5c17b 3111 _VALID_URL = r'''(?x)
3112 https?://
3113 (?:\w+\.)?
3114 (?:
3115 youtube(?:kids)?\.com|
3116 invidio\.us
3117 )/
3118 (?:
fe03a6cd 3119 (?P<channel_type>channel|c|user|browse)/|
70d5c17b 3120 (?P<not_channel>
9ba5705a 3121 feed/|hashtag/|
70d5c17b 3122 (?:playlist|watch)\?.*?\blist=
3123 )|
29f7c58a 3124 (?!(?:%s)\b) # Direct URLs
70d5c17b 3125 )
3126 (?P<id>[^/?\#&]+)
3127 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 3128 IE_NAME = 'youtube:tab'
3129
81127aa5 3130 _TESTS = [{
da692b79 3131 'note': 'playlists, multipage',
8bdd16b4 3132 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3133 'playlist_mincount': 94,
3134 'info_dict': {
3135 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3136 'title': 'Игорь Клейнер - Playlists',
3137 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3138 'uploader': 'Игорь Клейнер',
3139 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 3140 },
3141 }, {
da692b79 3142 'note': 'playlists, multipage, different order',
8bdd16b4 3143 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3144 'playlist_mincount': 94,
3145 'info_dict': {
3146 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3147 'title': 'Игорь Клейнер - Playlists',
3148 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3149 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3150 'uploader': 'Игорь Клейнер',
8bdd16b4 3151 },
201c1459 3152 }, {
da692b79 3153 'note': 'playlists, series',
201c1459 3154 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3155 'playlist_mincount': 5,
3156 'info_dict': {
3157 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3158 'title': '3Blue1Brown - Playlists',
3159 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
da692b79 3160 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3161 'uploader': '3Blue1Brown',
201c1459 3162 },
8bdd16b4 3163 }, {
da692b79 3164 'note': 'playlists, singlepage',
8bdd16b4 3165 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3166 'playlist_mincount': 4,
3167 'info_dict': {
3168 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3169 'title': 'ThirstForScience - Playlists',
3170 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 3171 'uploader': 'ThirstForScience',
3172 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 3173 }
3174 }, {
3175 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3176 'only_matching': True,
3177 }, {
da692b79 3178 'note': 'basic, single video playlist',
0e30a7b9 3179 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 3180 'info_dict': {
0e30a7b9 3181 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3182 'uploader': 'Sergey M.',
3183 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 3184 'title': 'youtube-dl public playlist',
81127aa5 3185 },
0e30a7b9 3186 'playlist_count': 1,
9291475f 3187 }, {
da692b79 3188 'note': 'empty playlist',
0e30a7b9 3189 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 3190 'info_dict': {
0e30a7b9 3191 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3192 'uploader': 'Sergey M.',
3193 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 3194 'title': 'youtube-dl empty playlist',
9291475f
PH
3195 },
3196 'playlist_count': 0,
3197 }, {
da692b79 3198 'note': 'Home tab',
8bdd16b4 3199 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 3200 'info_dict': {
8bdd16b4 3201 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3202 'title': 'lex will - Home',
3203 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3204 'uploader': 'lex will',
3205 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3206 },
8bdd16b4 3207 'playlist_mincount': 2,
9291475f 3208 }, {
da692b79 3209 'note': 'Videos tab',
8bdd16b4 3210 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 3211 'info_dict': {
8bdd16b4 3212 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3213 'title': 'lex will - Videos',
3214 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3215 'uploader': 'lex will',
3216 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3217 },
8bdd16b4 3218 'playlist_mincount': 975,
9291475f 3219 }, {
da692b79 3220 'note': 'Videos tab, sorted by popular',
8bdd16b4 3221 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 3222 'info_dict': {
8bdd16b4 3223 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3224 'title': 'lex will - Videos',
3225 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3226 'uploader': 'lex will',
3227 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3228 },
8bdd16b4 3229 'playlist_mincount': 199,
9291475f 3230 }, {
da692b79 3231 'note': 'Playlists tab',
8bdd16b4 3232 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 3233 'info_dict': {
8bdd16b4 3234 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3235 'title': 'lex will - Playlists',
3236 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3237 'uploader': 'lex will',
3238 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3239 },
8bdd16b4 3240 'playlist_mincount': 17,
ac7553d0 3241 }, {
da692b79 3242 'note': 'Community tab',
8bdd16b4 3243 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 3244 'info_dict': {
8bdd16b4 3245 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3246 'title': 'lex will - Community',
3247 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3248 'uploader': 'lex will',
3249 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3250 },
3251 'playlist_mincount': 18,
87dadd45 3252 }, {
da692b79 3253 'note': 'Channels tab',
8bdd16b4 3254 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 3255 'info_dict': {
8bdd16b4 3256 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3257 'title': 'lex will - Channels',
3258 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3259 'uploader': 'lex will',
3260 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3261 },
deaec5af 3262 'playlist_mincount': 12,
cd684175 3263 }, {
3264 'note': 'Search tab',
3265 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3266 'playlist_mincount': 40,
3267 'info_dict': {
3268 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3269 'title': '3Blue1Brown - Search - linear algebra',
3270 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3271 'uploader': '3Blue1Brown',
3272 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3273 },
6b08cdf6 3274 }, {
a0566bbf 3275 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3276 'only_matching': True,
3277 }, {
a0566bbf 3278 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3279 'only_matching': True,
3280 }, {
a0566bbf 3281 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3282 'only_matching': True,
3283 }, {
3284 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3285 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3286 'info_dict': {
3287 'title': '29C3: Not my department',
3288 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3289 'uploader': 'Christiaan008',
3290 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 3291 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 3292 },
3293 'playlist_count': 96,
3294 }, {
3295 'note': 'Large playlist',
3296 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 3297 'info_dict': {
8bdd16b4 3298 'title': 'Uploads from Cauchemar',
3299 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3300 'uploader': 'Cauchemar',
3301 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 3302 },
8bdd16b4 3303 'playlist_mincount': 1123,
3304 }, {
da692b79 3305 'note': 'even larger playlist, 8832 videos',
8bdd16b4 3306 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3307 'only_matching': True,
4b7df0d3
JMF
3308 }, {
3309 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3310 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3311 'info_dict': {
acf757f4
PH
3312 'title': 'Uploads from Interstellar Movie',
3313 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 3314 'uploader': 'Interstellar Movie',
8bdd16b4 3315 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 3316 },
481cc733 3317 'playlist_mincount': 21,
358de58c 3318 }, {
3319 'note': 'Playlist with "show unavailable videos" button',
3320 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3321 'info_dict': {
3322 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3323 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3324 'uploader': 'Phim Siêu Nhân Nhật Bản',
3325 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3326 },
da692b79 3327 'playlist_mincount': 200,
5d342002 3328 }, {
da692b79 3329 'note': 'Playlist with unavailable videos in page 7',
5d342002 3330 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3331 'info_dict': {
3332 'title': 'Uploads from BlankTV',
3333 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3334 'uploader': 'BlankTV',
3335 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3336 },
da692b79 3337 'playlist_mincount': 1000,
8bdd16b4 3338 }, {
da692b79 3339 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
8bdd16b4 3340 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3341 'info_dict': {
3342 'title': 'Data Analysis with Dr Mike Pound',
3343 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3344 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3345 'uploader': 'Computerphile',
deaec5af 3346 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 3347 },
3348 'playlist_mincount': 11,
3349 }, {
a0566bbf 3350 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 3351 'only_matching': True,
dacb3a86 3352 }, {
da692b79 3353 'note': 'Playlist URL that does not actually serve a playlist',
dacb3a86
S
3354 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3355 'info_dict': {
3356 'id': 'FqZTN594JQw',
3357 'ext': 'webm',
3358 'title': "Smiley's People 01 detective, Adventure Series, Action",
3359 'uploader': 'STREEM',
3360 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 3361 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
3362 'upload_date': '20150526',
3363 'license': 'Standard YouTube License',
3364 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3365 'categories': ['People & Blogs'],
3366 'tags': list,
dbdaaa23 3367 'view_count': int,
dacb3a86
S
3368 'like_count': int,
3369 'dislike_count': int,
3370 },
3371 'params': {
3372 'skip_download': True,
3373 },
13a75688 3374 'skip': 'This video is not available.',
dacb3a86 3375 'add_ie': [YoutubeIE.ie_key()],
481cc733 3376 }, {
8bdd16b4 3377 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 3378 'only_matching': True,
66b48727 3379 }, {
8bdd16b4 3380 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 3381 'only_matching': True,
a0566bbf 3382 }, {
3383 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3384 'info_dict': {
11f9be09 3385 'id': 'FMtPN8yp5LU', # This will keep changing
a0566bbf 3386 'ext': 'mp4',
deaec5af 3387 'title': compat_str,
a0566bbf 3388 'uploader': 'Sky News',
3389 'uploader_id': 'skynews',
3390 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
da692b79 3391 'upload_date': r're:\d{8}',
3392 'description': compat_str,
a0566bbf 3393 'categories': ['News & Politics'],
3394 'tags': list,
3395 'like_count': int,
3396 'dislike_count': int,
3397 },
3398 'params': {
3399 'skip_download': True,
3400 },
da692b79 3401 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
a0566bbf 3402 }, {
3403 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3404 'info_dict': {
3405 'id': 'a48o2S1cPoo',
3406 'ext': 'mp4',
3407 'title': 'The Young Turks - Live Main Show',
3408 'uploader': 'The Young Turks',
3409 'uploader_id': 'TheYoungTurks',
3410 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3411 'upload_date': '20150715',
3412 'license': 'Standard YouTube License',
3413 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3414 'categories': ['News & Politics'],
3415 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3416 'like_count': int,
3417 'dislike_count': int,
3418 },
3419 'params': {
3420 'skip_download': True,
3421 },
3422 'only_matching': True,
3423 }, {
3424 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3425 'only_matching': True,
3426 }, {
3427 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3428 'only_matching': True,
09f1580e 3429 }, {
3430 'note': 'A channel that is not live. Should raise error',
3431 'url': 'https://www.youtube.com/user/numberphile/live',
3432 'only_matching': True,
3d3dddc9 3433 }, {
3434 'url': 'https://www.youtube.com/feed/trending',
3435 'only_matching': True,
3436 }, {
3d3dddc9 3437 'url': 'https://www.youtube.com/feed/library',
3438 'only_matching': True,
3439 }, {
3d3dddc9 3440 'url': 'https://www.youtube.com/feed/history',
3441 'only_matching': True,
3442 }, {
3d3dddc9 3443 'url': 'https://www.youtube.com/feed/subscriptions',
3444 'only_matching': True,
3445 }, {
3d3dddc9 3446 'url': 'https://www.youtube.com/feed/watch_later',
3447 'only_matching': True,
3448 }, {
da692b79 3449 'note': 'Recommended - redirects to home page',
3d3dddc9 3450 'url': 'https://www.youtube.com/feed/recommended',
3451 'only_matching': True,
29f7c58a 3452 }, {
da692b79 3453 'note': 'inline playlist with not always working continuations',
29f7c58a 3454 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3455 'only_matching': True,
3456 }, {
3457 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3458 'only_matching': True,
3459 }, {
3460 'url': 'https://www.youtube.com/course',
3461 'only_matching': True,
3462 }, {
3463 'url': 'https://www.youtube.com/zsecurity',
3464 'only_matching': True,
3465 }, {
3466 'url': 'http://www.youtube.com/NASAgovVideo/videos',
3467 'only_matching': True,
3468 }, {
3469 'url': 'https://www.youtube.com/TheYoungTurks/live',
3470 'only_matching': True,
39ed931e 3471 }, {
3472 'url': 'https://www.youtube.com/hashtag/cctv9',
3473 'info_dict': {
3474 'id': 'cctv9',
3475 'title': '#cctv9',
3476 },
3477 'playlist_mincount': 350,
201c1459 3478 }, {
3479 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3480 'only_matching': True,
9297939e 3481 }, {
da692b79 3482 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
9297939e 3483 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3484 'only_matching': True
fe03a6cd 3485 }, {
3486 'note': '/browse/ should redirect to /channel/',
3487 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3488 'only_matching': True
3489 }, {
3490 'note': 'VLPL, should redirect to playlist?list=PL...',
3491 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3492 'info_dict': {
3493 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3494 'uploader': 'NoCopyrightSounds',
3495 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3496 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3497 'title': 'NCS Releases',
3498 },
3499 'playlist_mincount': 166,
18db7548 3500 }, {
3501 'note': 'Topic, should redirect to playlist?list=UU...',
3502 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3503 'info_dict': {
3504 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3505 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3506 'title': 'Uploads from Royalty Free Music - Topic',
3507 'uploader': 'Royalty Free Music - Topic',
3508 },
3509 'expected_warnings': [
3510 'A channel/user page was given',
3511 'The URL does not have a videos tab',
3512 ],
3513 'playlist_mincount': 101,
3514 }, {
3515 'note': 'Topic without a UU playlist',
3516 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3517 'info_dict': {
3518 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3519 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3520 },
3521 'expected_warnings': [
3522 'A channel/user page was given',
3523 'The URL does not have a videos tab',
3524 'Falling back to channel URL',
3525 ],
3526 'playlist_mincount': 9,
abcdd12b 3527 }, {
3528 'note': 'Youtube music Album',
3529 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3530 'info_dict': {
3531 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3532 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3533 },
3534 'playlist_count': 50,
47193e02 3535 }, {
3536 'note': 'unlisted single video playlist',
3537 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3538 'info_dict': {
3539 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3540 'uploader': 'colethedj',
3541 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3542 'title': 'yt-dlp unlisted playlist test',
3543 'availability': 'unlisted'
3544 },
3545 'playlist_count': 1,
29f7c58a 3546 }]
3547
3548 @classmethod
3549 def suitable(cls, url):
3550 return False if YoutubeIE.suitable(url) else super(
3551 YoutubeTabIE, cls).suitable(url)
8bdd16b4 3552
3553 def _extract_channel_id(self, webpage):
3554 channel_id = self._html_search_meta(
3555 'channelId', webpage, 'channel id', default=None)
3556 if channel_id:
3557 return channel_id
3558 channel_url = self._html_search_meta(
3559 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3560 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3561 'twitter:app:url:googleplay'), webpage, 'channel url')
3562 return self._search_regex(
3563 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3564 channel_url, 'channel id')
15f6397c 3565
8bdd16b4 3566 @staticmethod
cd7c66cf 3567 def _extract_basic_item_renderer(item):
3568 # Modified from _extract_grid_item_renderer
201c1459 3569 known_basic_renderers = (
3570 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 3571 )
3572 for key, renderer in item.items():
201c1459 3573 if not isinstance(renderer, dict):
cd7c66cf 3574 continue
201c1459 3575 elif key in known_basic_renderers:
3576 return renderer
3577 elif key.startswith('grid') and key.endswith('Renderer'):
3578 return renderer
8bdd16b4 3579
8bdd16b4 3580 def _grid_entries(self, grid_renderer):
3581 for item in grid_renderer['items']:
3582 if not isinstance(item, dict):
39b62db1 3583 continue
cd7c66cf 3584 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 3585 if not isinstance(renderer, dict):
3586 continue
fe93e2c4 3587 title = self._get_text(renderer.get('title'))
3588
8bdd16b4 3589 # playlist
3590 playlist_id = renderer.get('playlistId')
3591 if playlist_id:
3592 yield self.url_result(
3593 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3594 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3595 video_title=title)
201c1459 3596 continue
8bdd16b4 3597 # video
3598 video_id = renderer.get('videoId')
3599 if video_id:
3600 yield self._extract_video(renderer)
201c1459 3601 continue
8bdd16b4 3602 # channel
3603 channel_id = renderer.get('channelId')
3604 if channel_id:
8bdd16b4 3605 yield self.url_result(
3606 'https://www.youtube.com/channel/%s' % channel_id,
3607 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 3608 continue
3609 # generic endpoint URL support
3610 ep_url = urljoin('https://www.youtube.com/', try_get(
3611 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3612 compat_str))
3613 if ep_url:
3614 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3615 if ie.suitable(ep_url):
3616 yield self.url_result(
3617 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3618 break
8bdd16b4 3619
3d3dddc9 3620 def _shelf_entries_from_content(self, shelf_renderer):
3621 content = shelf_renderer.get('content')
3622 if not isinstance(content, dict):
8bdd16b4 3623 return
cd7c66cf 3624 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 3625 if renderer:
3626 # TODO: add support for nested playlists so each shelf is processed
3627 # as separate playlist
3628 # TODO: this includes only first N items
3629 for entry in self._grid_entries(renderer):
3630 yield entry
3631 renderer = content.get('horizontalListRenderer')
3632 if renderer:
3633 # TODO
3634 pass
8bdd16b4 3635
29f7c58a 3636 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3637 ep = try_get(
3638 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3639 compat_str)
3640 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3641 if shelf_url:
29f7c58a 3642 # Skipping links to another channels, note that checking for
3643 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3644 # will not work
3645 if skip_channels and '/channels?' in shelf_url:
3646 return
fe93e2c4 3647 title = self._get_text(shelf_renderer, lambda x: x['title'])
3d3dddc9 3648 yield self.url_result(shelf_url, video_title=title)
3649 # Shelf may not contain shelf URL, fallback to extraction from content
3650 for entry in self._shelf_entries_from_content(shelf_renderer):
3651 yield entry
c5e8d7af 3652
8bdd16b4 3653 def _playlist_entries(self, video_list_renderer):
3654 for content in video_list_renderer['contents']:
3655 if not isinstance(content, dict):
3656 continue
3657 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3658 if not isinstance(renderer, dict):
3659 continue
3660 video_id = renderer.get('videoId')
3661 if not video_id:
3662 continue
3663 yield self._extract_video(renderer)
07aeced6 3664
3462ffa8 3665 def _rich_entries(self, rich_grid_renderer):
3666 renderer = try_get(
70d5c17b 3667 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3668 video_id = renderer.get('videoId')
3669 if not video_id:
3670 return
3671 yield self._extract_video(renderer)
3672
8bdd16b4 3673 def _video_entry(self, video_renderer):
3674 video_id = video_renderer.get('videoId')
3675 if video_id:
3676 return self._extract_video(video_renderer)
dacb3a86 3677
8bdd16b4 3678 def _post_thread_entries(self, post_thread_renderer):
3679 post_renderer = try_get(
3680 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3681 if not post_renderer:
3682 return
3683 # video attachment
3684 video_renderer = try_get(
895b0931 3685 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3686 video_id = video_renderer.get('videoId')
3687 if video_id:
3688 entry = self._extract_video(video_renderer)
8bdd16b4 3689 if entry:
3690 yield entry
895b0931 3691 # playlist attachment
3692 playlist_id = try_get(
3693 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3694 if playlist_id:
3695 yield self.url_result(
e28f1c0a 3696 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3697 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3698 # inline video links
3699 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3700 for run in runs:
3701 if not isinstance(run, dict):
3702 continue
3703 ep_url = try_get(
3704 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3705 if not ep_url:
3706 continue
3707 if not YoutubeIE.suitable(ep_url):
3708 continue
3709 ep_video_id = YoutubeIE._match_id(ep_url)
3710 if video_id == ep_video_id:
3711 continue
895b0931 3712 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 3713
8bdd16b4 3714 def _post_thread_continuation_entries(self, post_thread_continuation):
3715 contents = post_thread_continuation.get('contents')
3716 if not isinstance(contents, list):
3717 return
3718 for content in contents:
3719 renderer = content.get('backstagePostThreadRenderer')
3720 if not isinstance(renderer, dict):
3721 continue
3722 for entry in self._post_thread_entries(renderer):
3723 yield entry
07aeced6 3724
39ed931e 3725 r''' # unused
3726 def _rich_grid_entries(self, contents):
3727 for content in contents:
3728 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3729 if video_renderer:
3730 entry = self._video_entry(video_renderer)
3731 if entry:
3732 yield entry
3733 '''
f4f751af 3734 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3462ffa8 3735
70d5c17b 3736 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3737 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3738 for content in contents:
3739 if not isinstance(content, dict):
8bdd16b4 3740 continue
70d5c17b 3741 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3742 if not is_renderer:
70d5c17b 3743 renderer = content.get('richItemRenderer')
3462ffa8 3744 if renderer:
3745 for entry in self._rich_entries(renderer):
3746 yield entry
3747 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3748 continue
3462ffa8 3749 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3750 for isr_content in isr_contents:
3751 if not isinstance(isr_content, dict):
3752 continue
69184e41 3753
3754 known_renderers = {
3755 'playlistVideoListRenderer': self._playlist_entries,
3756 'gridRenderer': self._grid_entries,
3757 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3758 'backstagePostThreadRenderer': self._post_thread_entries,
3759 'videoRenderer': lambda x: [self._video_entry(x)],
3760 }
3761 for key, renderer in isr_content.items():
3762 if key not in known_renderers:
3763 continue
3764 for entry in known_renderers[key](renderer):
3765 if entry:
3766 yield entry
3462ffa8 3767 continuation_list[0] = self._extract_continuation(renderer)
69184e41 3768 break
70d5c17b 3769
3462ffa8 3770 if not continuation_list[0]:
3771 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3772
3773 if not continuation_list[0]:
3774 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3775
3776 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3777 tab_content = try_get(tab, lambda x: x['content'], dict)
3778 if not tab_content:
3779 return
3462ffa8 3780 parent_renderer = (
29f7c58a 3781 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3782 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3783 for entry in extract_entries(parent_renderer):
3784 yield entry
3462ffa8 3785 continuation = continuation_list[0]
fe93e2c4 3786 visitor_data = None
d069eca7 3787
8bdd16b4 3788 for page_num in itertools.count(1):
3789 if not continuation:
3790 break
11f9be09 3791 headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
79360d99 3792 response = self._extract_response(
3793 item_id='%s page %s' % (item_id, page_num),
fe93e2c4 3794 query=continuation, headers=headers, ytcfg=ytcfg,
79360d99 3795 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3796
3797 if not response:
8bdd16b4 3798 break
f4f751af 3799 visitor_data = try_get(
3800 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
ebf1b291 3801
69184e41 3802 known_continuation_renderers = {
3803 'playlistVideoListContinuation': self._playlist_entries,
3804 'gridContinuation': self._grid_entries,
3805 'itemSectionContinuation': self._post_thread_continuation_entries,
3806 'sectionListContinuation': extract_entries, # for feeds
3807 }
8bdd16b4 3808 continuation_contents = try_get(
69184e41 3809 response, lambda x: x['continuationContents'], dict) or {}
3810 continuation_renderer = None
3811 for key, value in continuation_contents.items():
3812 if key not in known_continuation_renderers:
3462ffa8 3813 continue
69184e41 3814 continuation_renderer = value
3815 continuation_list = [None]
3816 for entry in known_continuation_renderers[key](continuation_renderer):
3817 yield entry
3818 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3819 break
3820 if continuation_renderer:
3821 continue
c5e8d7af 3822
a1b535bd 3823 known_renderers = {
3824 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3825 'gridVideoRenderer': (self._grid_entries, 'items'),
d61fc646 3826 'gridChannelRenderer': (self._grid_entries, 'items'),
a1b535bd 3827 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3828 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3829 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3830 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3831 }
cce889b9 3832 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3833 continuation_items = try_get(
cce889b9 3834 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3835 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3836 video_items_renderer = None
3837 for key, value in continuation_item.items():
3838 if key not in known_renderers:
8bdd16b4 3839 continue
a1b535bd 3840 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3841 continuation_list = [None]
a1b535bd 3842 for entry in known_renderers[key][0](video_items_renderer):
3843 yield entry
9ba5705a 3844 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3845 break
3846 if video_items_renderer:
3847 continue
8bdd16b4 3848 break
9558dcec 3849
8bdd16b4 3850 @staticmethod
3851 def _extract_selected_tab(tabs):
3852 for tab in tabs:
cd684175 3853 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3854 if renderer.get('selected') is True:
3855 return renderer
2b3c2546 3856 else:
8bdd16b4 3857 raise ExtractorError('Unable to find selected tab')
b82f815f 3858
47193e02 3859 @classmethod
3860 def _extract_uploader(cls, data):
8bdd16b4 3861 uploader = {}
47193e02 3862 renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3863 owner = try_get(
3864 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3865 if owner:
3866 uploader['uploader'] = owner.get('text')
3867 uploader['uploader_id'] = try_get(
3868 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3869 uploader['uploader_url'] = urljoin(
3870 'https://www.youtube.com/',
3871 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3872 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3873
d069eca7 3874 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3875 playlist_id = title = description = channel_url = channel_name = channel_id = None
3876 thumbnails_list = tags = []
3877
8bdd16b4 3878 selected_tab = self._extract_selected_tab(tabs)
3879 renderer = try_get(
3880 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3881 if renderer:
b60419c5 3882 channel_name = renderer.get('title')
3883 channel_url = renderer.get('channelUrl')
3884 channel_id = renderer.get('externalId')
39ed931e 3885 else:
64c0d954 3886 renderer = try_get(
3887 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3888
8bdd16b4 3889 if renderer:
3890 title = renderer.get('title')
ecc97af3 3891 description = renderer.get('description', '')
b60419c5 3892 playlist_id = channel_id
3893 tags = renderer.get('keywords', '').split()
3894 thumbnails_list = (
3895 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3896 or try_get(
47193e02 3897 self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3898 lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
ff84930c 3899 list)
b60419c5 3900 or [])
3901
3902 thumbnails = []
3903 for t in thumbnails_list:
3904 if not isinstance(t, dict):
3905 continue
3906 thumbnail_url = url_or_none(t.get('url'))
3907 if not thumbnail_url:
3908 continue
3909 thumbnails.append({
3910 'url': thumbnail_url,
3911 'width': int_or_none(t.get('width')),
3912 'height': int_or_none(t.get('height')),
3913 })
3462ffa8 3914 if playlist_id is None:
70d5c17b 3915 playlist_id = item_id
3916 if title is None:
39ed931e 3917 title = (
3918 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3919 or playlist_id)
b60419c5 3920 title += format_field(selected_tab, 'title', ' - %s')
cd684175 3921 title += format_field(selected_tab, 'expandedText', ' - %s')
b60419c5 3922 metadata = {
3923 'playlist_id': playlist_id,
3924 'playlist_title': title,
3925 'playlist_description': description,
3926 'uploader': channel_name,
3927 'uploader_id': channel_id,
3928 'uploader_url': channel_url,
3929 'thumbnails': thumbnails,
3930 'tags': tags,
3931 }
47193e02 3932 availability = self._extract_availability(data)
3933 if availability:
3934 metadata['availability'] = availability
b60419c5 3935 if not channel_id:
3936 metadata.update(self._extract_uploader(data))
3937 metadata.update({
3938 'channel': metadata['uploader'],
3939 'channel_id': metadata['uploader_id'],
3940 'channel_url': metadata['uploader_url']})
11f9be09 3941 ytcfg = self.extract_ytcfg(item_id, webpage)
b60419c5 3942 return self.playlist_result(
d069eca7
M
3943 self._entries(
3944 selected_tab, playlist_id,
3945 self._extract_identity_token(webpage, item_id),
fe93e2c4 3946 self._extract_account_syncid(ytcfg, data), ytcfg),
b60419c5 3947 **metadata)
73c4ac2c 3948
79360d99 3949 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
2be71994 3950 first_id = last_id = None
11f9be09 3951 ytcfg = self.extract_ytcfg(playlist_id, webpage)
3952 headers = self.generate_api_headers(
fe93e2c4 3953 ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3954 identity_token=self._extract_identity_token(webpage, item_id=playlist_id))
2be71994 3955 for page_num in itertools.count(1):
cd7c66cf 3956 videos = list(self._playlist_entries(playlist))
3957 if not videos:
3958 return
2be71994 3959 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3960 if start >= len(videos):
3961 return
3962 for video in videos[start:]:
3963 if video['id'] == first_id:
3964 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3965 return
3966 yield video
3967 first_id = first_id or videos[0]['id']
3968 last_id = videos[-1]['id']
79360d99 3969 watch_endpoint = try_get(
3970 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3971 query = {
3972 'playlistId': playlist_id,
3973 'videoId': watch_endpoint.get('videoId') or last_id,
3974 'index': watch_endpoint.get('index') or len(videos),
3975 'params': watch_endpoint.get('params') or 'OAE%3D'
3976 }
3977 response = self._extract_response(
3978 item_id='%s page %d' % (playlist_id, page_num),
fe93e2c4 3979 query=query, ep='next', headers=headers, ytcfg=ytcfg,
79360d99 3980 check_get_keys='contents'
3981 )
cd7c66cf 3982 playlist = try_get(
79360d99 3983 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 3984
79360d99 3985 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
8bdd16b4 3986 title = playlist.get('title') or try_get(
3987 data, lambda x: x['titleText']['simpleText'], compat_str)
3988 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3989
3990 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3991 playlist_url = urljoin(url, try_get(
3992 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3993 compat_str))
3994 if playlist_url and playlist_url != url:
3995 return self.url_result(
3996 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3997 video_title=title)
cd7c66cf 3998
8bdd16b4 3999 return self.playlist_result(
79360d99 4000 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
cd7c66cf 4001 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 4002
47193e02 4003 def _extract_availability(self, data):
4004 """
4005 Gets the availability of a given playlist/tab.
4006 Note: Unless YouTube tells us explicitly, we do not assume it is public
4007 @param data: response
4008 """
4009 is_private = is_unlisted = None
4010 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4011 badge_labels = self._extract_badges(renderer)
4012
4013 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4014 privacy_dropdown_entries = try_get(
4015 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4016 for renderer_dict in privacy_dropdown_entries:
4017 is_selected = try_get(
4018 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4019 if not is_selected:
4020 continue
fe93e2c4 4021 label = self._get_text(
4022 try_get(renderer_dict, lambda x: x['privacyDropdownItemRenderer']['label'], dict) or [])
47193e02 4023 if label:
4024 badge_labels.add(label.lower())
4025 break
4026
4027 for badge_label in badge_labels:
4028 if badge_label == 'unlisted':
4029 is_unlisted = True
4030 elif badge_label == 'private':
4031 is_private = True
4032 elif badge_label == 'public':
4033 is_unlisted = is_private = False
4034 return self._availability(is_private, False, False, False, is_unlisted)
4035
4036 @staticmethod
4037 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4038 sidebar_renderer = try_get(
4039 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4040 for item in sidebar_renderer:
4041 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4042 if renderer:
4043 return renderer
4044
358de58c 4045 def _reload_with_unavailable_videos(self, item_id, data, webpage):
4046 """
4047 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4048 """
5d342002 4049 browse_id = params = None
47193e02 4050 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4051 if not renderer:
4052 return
4053 menu_renderer = try_get(
4054 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4055 for menu_item in menu_renderer:
4056 if not isinstance(menu_item, dict):
358de58c 4057 continue
47193e02 4058 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4059 text = try_get(
4060 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
4061 if not text or text.lower() != 'show unavailable videos':
4062 continue
4063 browse_endpoint = try_get(
4064 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4065 browse_id = browse_endpoint.get('browseId')
4066 params = browse_endpoint.get('params')
4067 break
5d342002 4068
11f9be09 4069 ytcfg = self.extract_ytcfg(item_id, webpage)
4070 headers = self.generate_api_headers(
fe93e2c4 4071 ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
47193e02 4072 identity_token=self._extract_identity_token(webpage, item_id=item_id),
4073 visitor_data=try_get(
4074 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
4075 query = {
4076 'params': params or 'wgYCCAA=',
4077 'browseId': browse_id or 'VL%s' % item_id
4078 }
4079 return self._extract_response(
4080 item_id=item_id, headers=headers, query=query,
fe93e2c4 4081 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
47193e02 4082 note='Downloading API JSON with unavailable videos')
358de58c 4083
cd7c66cf 4084 def _extract_webpage(self, url, item_id):
a06916d9 4085 retries = self.get_param('extractor_retries', 3)
62bff2c1 4086 count = -1
c705177d 4087 last_error = 'Incomplete yt initial data recieved'
14fdfea9 4088 while count < retries:
62bff2c1 4089 count += 1
14fdfea9 4090 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 4091 # See: https://github.com/yt-dlp/yt-dlp/issues/116
4092 if count:
c705177d 4093 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 4094 webpage = self._download_webpage(
4095 url, item_id,
cd7c66cf 4096 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
11f9be09 4097 data = self.extract_yt_initial_data(item_id, webpage)
14fdfea9 4098 if data.get('contents') or data.get('currentVideoEndpoint'):
4099 break
95c01b6c 4100 # Extract alerts here only when there is error
4101 self._extract_and_report_alerts(data)
c705177d 4102 if count >= retries:
6a39ee13 4103 raise ExtractorError(last_error)
cd7c66cf 4104 return webpage, data
4105
9297939e 4106 @staticmethod
4107 def _smuggle_data(entries, data):
4108 for entry in entries:
4109 if data:
4110 entry['url'] = smuggle_url(entry['url'], data)
4111 yield entry
4112
cd7c66cf 4113 def _real_extract(self, url):
9297939e 4114 url, smuggled_data = unsmuggle_url(url, {})
4115 if self.is_music_url(url):
4116 smuggled_data['is_music_url'] = True
fe03a6cd 4117 info_dict = self.__real_extract(url, smuggled_data)
9297939e 4118 if info_dict.get('entries'):
4119 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4120 return info_dict
4121
fe03a6cd 4122 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4123
4124 def __real_extract(self, url, smuggled_data):
cd7c66cf 4125 item_id = self._match_id(url)
4126 url = compat_urlparse.urlunparse(
4127 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 4128 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 4129
fe03a6cd 4130 def get_mobj(url):
4131 mobj = self._url_re.match(url).groupdict()
07cce701 4132 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 4133 return mobj
4134
4135 mobj = get_mobj(url)
4136 # Youtube returns incomplete data if tabname is not lower case
4137 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4138
4139 if is_channel:
4140 if smuggled_data.get('is_music_url'):
4141 if item_id[:2] == 'VL':
4142 # Youtube music VL channels have an equivalent playlist
4143 item_id = item_id[2:]
4144 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
abcdd12b 4145 elif item_id[:2] == 'MP':
4146 # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4147 item_id = self._search_regex(
4148 r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4149 self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4150 'playlist id')
4151 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
fe03a6cd 4152 elif mobj['channel_type'] == 'browse':
4153 # Youtube music /browse/ should be changed to /channel/
4154 pre = 'https://www.youtube.com/channel/%s' % item_id
4155 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4156 # Home URLs should redirect to /videos/
6a39ee13 4157 self.report_warning(
cd7c66cf 4158 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4159 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 4160 tab = '/videos'
4161
4162 url = ''.join((pre, tab, post))
4163 mobj = get_mobj(url)
cd7c66cf 4164
4165 # Handle both video/playlist URLs
201c1459 4166 qs = parse_qs(url)
cd7c66cf 4167 video_id = qs.get('v', [None])[0]
4168 playlist_id = qs.get('list', [None])[0]
4169
fe03a6cd 4170 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 4171 if not playlist_id:
fe03a6cd 4172 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 4173 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 4174 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6a39ee13 4175 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
cd7c66cf 4176 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
18db7548 4177 mobj = get_mobj(url)
cd7c66cf 4178
4179 if video_id and playlist_id:
a06916d9 4180 if self.get_param('noplaylist'):
cd7c66cf 4181 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4182 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4183 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4184
4185 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 4186
18db7548 4187 tabs = try_get(
4188 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4189 if tabs:
4190 selected_tab = self._extract_selected_tab(tabs)
4191 tab_name = selected_tab.get('title', '')
09f1580e 4192 if 'no-youtube-channel-redirect' not in compat_opts:
4193 if mobj['tab'] == '/live':
4194 # Live tab should have redirected to the video
4195 raise ExtractorError('The channel is not currently live', expected=True)
4196 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4197 if not mobj['not_channel'] and item_id[:2] == 'UC':
4198 # Topic channels don't have /videos. Use the equivalent playlist instead
4199 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4200 pl_id = 'UU%s' % item_id[2:]
4201 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4202 try:
4203 pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4204 for alert_type, alert_message in self._extract_alerts(pl_data):
4205 if alert_type == 'error':
4206 raise ExtractorError('Youtube said: %s' % alert_message)
4207 item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4208 except ExtractorError:
4209 self.report_warning('The playlist gave error. Falling back to channel URL')
4210 else:
4211 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
18db7548 4212
4213 self.write_debug('Final URL: %s' % url)
4214
358de58c 4215 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 4216 if 'no-youtube-unavailable-videos' not in compat_opts:
4217 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
95c01b6c 4218 self._extract_and_report_alerts(data)
8bdd16b4 4219 tabs = try_get(
4220 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4221 if tabs:
d069eca7 4222 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 4223
8bdd16b4 4224 playlist = try_get(
4225 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4226 if playlist:
79360d99 4227 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
cd7c66cf 4228
a0566bbf 4229 video_id = try_get(
4230 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4231 compat_str) or video_id
8bdd16b4 4232 if video_id:
09f1580e 4233 if mobj['tab'] != '/live': # live tab is expected to redirect to video
4234 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 4235 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 4236
8bdd16b4 4237 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 4238
c5e8d7af 4239
8bdd16b4 4240class YoutubePlaylistIE(InfoExtractor):
4241 IE_DESC = 'YouTube.com playlists'
4242 _VALID_URL = r'''(?x)(?:
4243 (?:https?://)?
4244 (?:\w+\.)?
4245 (?:
4246 (?:
4247 youtube(?:kids)?\.com|
29f7c58a 4248 invidio\.us
8bdd16b4 4249 )
4250 /.*?\?.*?\blist=
4251 )?
4252 (?P<id>%(playlist_id)s)
4253 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4254 IE_NAME = 'youtube:playlist'
cdc628a4 4255 _TESTS = [{
8bdd16b4 4256 'note': 'issue #673',
4257 'url': 'PLBB231211A4F62143',
cdc628a4 4258 'info_dict': {
8bdd16b4 4259 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4260 'id': 'PLBB231211A4F62143',
4261 'uploader': 'Wickydoo',
4262 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
11f9be09 4263 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
8bdd16b4 4264 },
4265 'playlist_mincount': 29,
4266 }, {
4267 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4268 'info_dict': {
4269 'title': 'YDL_safe_search',
4270 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4271 },
4272 'playlist_count': 2,
4273 'skip': 'This playlist is private',
9558dcec 4274 }, {
8bdd16b4 4275 'note': 'embedded',
4276 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4277 'playlist_count': 4,
9558dcec 4278 'info_dict': {
8bdd16b4 4279 'title': 'JODA15',
4280 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4281 'uploader': 'milan',
4282 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 4283 }
cdc628a4 4284 }, {
8bdd16b4 4285 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
11f9be09 4286 'playlist_mincount': 654,
8bdd16b4 4287 'info_dict': {
4288 'title': '2018 Chinese New Singles (11/6 updated)',
4289 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4290 'uploader': 'LBK',
4291 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
11f9be09 4292 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
8bdd16b4 4293 }
daa0df9e 4294 }, {
29f7c58a 4295 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4296 'only_matching': True,
4297 }, {
4298 # music album playlist
4299 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4300 'only_matching': True,
4301 }]
4302
4303 @classmethod
4304 def suitable(cls, url):
201c1459 4305 if YoutubeTabIE.suitable(url):
4306 return False
1bdae7d3 4307 # Hack for lazy extractors until more generic solution is implemented
4308 # (see #28780)
4309 from .youtube import parse_qs
201c1459 4310 qs = parse_qs(url)
4311 if qs.get('v', [None])[0]:
4312 return False
4313 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 4314
4315 def _real_extract(self, url):
4316 playlist_id = self._match_id(url)
46953e7e 4317 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 4318 url = update_url_query(
4319 'https://www.youtube.com/playlist',
4320 parse_qs(url) or {'list': playlist_id})
4321 if is_music_url:
4322 url = smuggle_url(url, {'is_music_url': True})
4323 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 4324
4325
4326class YoutubeYtBeIE(InfoExtractor):
c76eb41b 4327 IE_DESC = 'youtu.be'
29f7c58a 4328 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4329 _TESTS = [{
8bdd16b4 4330 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4331 'info_dict': {
4332 'id': 'yeWKywCrFtk',
4333 'ext': 'mp4',
4334 'title': 'Small Scale Baler and Braiding Rugs',
4335 'uploader': 'Backus-Page House Museum',
4336 'uploader_id': 'backuspagemuseum',
4337 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4338 'upload_date': '20161008',
4339 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4340 'categories': ['Nonprofits & Activism'],
4341 'tags': list,
4342 'like_count': int,
4343 'dislike_count': int,
4344 },
4345 'params': {
4346 'noplaylist': True,
4347 'skip_download': True,
4348 },
39e7107d 4349 }, {
8bdd16b4 4350 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 4351 'only_matching': True,
cdc628a4
PH
4352 }]
4353
8bdd16b4 4354 def _real_extract(self, url):
29f7c58a 4355 mobj = re.match(self._VALID_URL, url)
4356 video_id = mobj.group('id')
4357 playlist_id = mobj.group('playlist_id')
8bdd16b4 4358 return self.url_result(
29f7c58a 4359 update_url_query('https://www.youtube.com/watch', {
4360 'v': video_id,
4361 'list': playlist_id,
4362 'feature': 'youtu.be',
4363 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 4364
4365
4366class YoutubeYtUserIE(InfoExtractor):
c76eb41b 4367 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 4368 _VALID_URL = r'ytuser:(?P<id>.+)'
4369 _TESTS = [{
4370 'url': 'ytuser:phihag',
4371 'only_matching': True,
4372 }]
4373
4374 def _real_extract(self, url):
4375 user_id = self._match_id(url)
4376 return self.url_result(
4377 'https://www.youtube.com/user/%s' % user_id,
4378 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 4379
b05654f0 4380
3d3dddc9 4381class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 4382 IE_NAME = 'youtube:favorites'
4383 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4384 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4385 _LOGIN_REQUIRED = True
4386 _TESTS = [{
4387 'url': ':ytfav',
4388 'only_matching': True,
4389 }, {
4390 'url': ':ytfavorites',
4391 'only_matching': True,
4392 }]
4393
4394 def _real_extract(self, url):
4395 return self.url_result(
4396 'https://www.youtube.com/playlist?list=LL',
4397 ie=YoutubeTabIE.ie_key())
4398
4399
79360d99 4400class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
69184e41 4401 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
4402 # there doesn't appear to be a real limit, for example if you search for
4403 # 'python' you get more than 8.000.000 results
4404 _MAX_RESULTS = float('inf')
78caa52a 4405 IE_NAME = 'youtube:search'
b05654f0 4406 _SEARCH_KEY = 'ytsearch'
6c894ea1 4407 _SEARCH_PARAMS = None
9dd8e46a 4408 _TESTS = []
b05654f0 4409
6c894ea1 4410 def _entries(self, query, n):
a5c56234 4411 data = {'query': query}
6c894ea1
U
4412 if self._SEARCH_PARAMS:
4413 data['params'] = self._SEARCH_PARAMS
4414 total = 0
fe93e2c4 4415 continuation = {}
6c894ea1 4416 for page_num in itertools.count(1):
fe93e2c4 4417 data.update(continuation)
79360d99 4418 search = self._extract_response(
4419 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4420 check_get_keys=('contents', 'onResponseReceivedCommands')
4421 )
6c894ea1 4422 if not search:
b4c08069 4423 break
6c894ea1
U
4424 slr_contents = try_get(
4425 search,
4426 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4427 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4428 list)
4429 if not slr_contents:
a22b2fd1 4430 break
0366ae87 4431
0366ae87
M
4432 # Youtube sometimes adds promoted content to searches,
4433 # changing the index location of videos and token.
4434 # So we search through all entries till we find them.
fe93e2c4 4435 continuation = None
30a074c2 4436 for slr_content in slr_contents:
fe93e2c4 4437 if not continuation:
4438 continuation = self._extract_continuation({'contents': [slr_content]})
a96c6d15 4439
30a074c2 4440 isr_contents = try_get(
4441 slr_content,
4442 lambda x: x['itemSectionRenderer']['contents'],
4443 list)
9da76d30 4444 if not isr_contents:
30a074c2 4445 continue
4446 for content in isr_contents:
4447 if not isinstance(content, dict):
4448 continue
4449 video = content.get('videoRenderer')
4450 if not isinstance(video, dict):
4451 continue
4452 video_id = video.get('videoId')
4453 if not video_id:
4454 continue
4455
4456 yield self._extract_video(video)
4457 total += 1
4458 if total == n:
4459 return
0366ae87 4460
fe93e2c4 4461 if not continuation:
6c894ea1 4462 break
b05654f0 4463
6c894ea1
U
4464 def _get_n_results(self, query, n):
4465 """Get a specified number of results for a query"""
11f9be09 4466 return self.playlist_result(self._entries(query, n), query, query)
75dff0ee 4467
c9ae7b95 4468
a3dd9248 4469class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 4470 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 4471 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 4472 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 4473 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 4474
c9ae7b95 4475
386e1dd9 4476class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 4477 IE_DESC = 'YouTube.com search URLs'
386e1dd9 4478 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4479 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 4480 # _MAX_RESULTS = 100
3462ffa8 4481 _TESTS = [{
4482 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4483 'playlist_mincount': 5,
4484 'info_dict': {
11f9be09 4485 'id': 'youtube-dl test video',
3462ffa8 4486 'title': 'youtube-dl test video',
4487 }
4488 }, {
4489 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4490 'only_matching': True,
4491 }]
4492
386e1dd9 4493 @classmethod
4494 def _make_valid_url(cls):
4495 return cls._VALID_URL
4496
3462ffa8 4497 def _real_extract(self, url):
386e1dd9 4498 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4499 query = (qs.get('search_query') or qs.get('q'))[0]
4500 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4501 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 4502
4503
4504class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 4505 """
25f14e9f 4506 Base class for feed extractors
3d3dddc9 4507 Subclasses must define the _FEED_NAME property.
d7ae0639 4508 """
b2e8bc1b 4509 _LOGIN_REQUIRED = True
ef2f3c7f 4510 _TESTS = []
d7ae0639
JMF
4511
4512 @property
4513 def IE_NAME(self):
78caa52a 4514 return 'youtube:%s' % self._FEED_NAME
04cc9617 4515
3853309f 4516 def _real_extract(self, url):
3d3dddc9 4517 return self.url_result(
4518 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4519 ie=YoutubeTabIE.ie_key())
25f14e9f
S
4520
4521
ef2f3c7f 4522class YoutubeWatchLaterIE(InfoExtractor):
4523 IE_NAME = 'youtube:watchlater'
70d5c17b 4524 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 4525 _VALID_URL = r':ytwatchlater'
bc7a9cd8 4526 _TESTS = [{
8bdd16b4 4527 'url': ':ytwatchlater',
bc7a9cd8
S
4528 'only_matching': True,
4529 }]
25f14e9f
S
4530
4531 def _real_extract(self, url):
ef2f3c7f 4532 return self.url_result(
4533 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 4534
4535
25f14e9f
S
4536class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4537 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 4538 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 4539 _FEED_NAME = 'recommended'
45db527f 4540 _LOGIN_REQUIRED = False
3d3dddc9 4541 _TESTS = [{
4542 'url': ':ytrec',
4543 'only_matching': True,
4544 }, {
4545 'url': ':ytrecommended',
4546 'only_matching': True,
4547 }, {
4548 'url': 'https://youtube.com',
4549 'only_matching': True,
4550 }]
1ed5b5c9 4551
1ed5b5c9 4552
25f14e9f 4553class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 4554 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 4555 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 4556 _FEED_NAME = 'subscriptions'
3d3dddc9 4557 _TESTS = [{
4558 'url': ':ytsubs',
4559 'only_matching': True,
4560 }, {
4561 'url': ':ytsubscriptions',
4562 'only_matching': True,
4563 }]
1ed5b5c9 4564
1ed5b5c9 4565
25f14e9f 4566class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
4567 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4568 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 4569 _FEED_NAME = 'history'
3d3dddc9 4570 _TESTS = [{
4571 'url': ':ythistory',
4572 'only_matching': True,
4573 }]
1ed5b5c9
JMF
4574
4575
15870e90
PH
4576class YoutubeTruncatedURLIE(InfoExtractor):
4577 IE_NAME = 'youtube:truncated_url'
4578 IE_DESC = False # Do not list
975d35db 4579 _VALID_URL = r'''(?x)
b95aab84
PH
4580 (?:https?://)?
4581 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4582 (?:watch\?(?:
c4808c60 4583 feature=[a-z_]+|
b95aab84
PH
4584 annotation_id=annotation_[^&]+|
4585 x-yt-cl=[0-9]+|
c1708b89 4586 hl=[^&]*|
287be8c6 4587 t=[0-9]+
b95aab84
PH
4588 )?
4589 |
4590 attribution_link\?a=[^&]+
4591 )
4592 $
975d35db 4593 '''
15870e90 4594
c4808c60 4595 _TESTS = [{
2d3d2997 4596 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 4597 'only_matching': True,
dc2fc736 4598 }, {
2d3d2997 4599 'url': 'https://www.youtube.com/watch?',
dc2fc736 4600 'only_matching': True,
b95aab84
PH
4601 }, {
4602 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4603 'only_matching': True,
4604 }, {
4605 'url': 'https://www.youtube.com/watch?feature=foo',
4606 'only_matching': True,
c1708b89
PH
4607 }, {
4608 'url': 'https://www.youtube.com/watch?hl=en-GB',
4609 'only_matching': True,
287be8c6
PH
4610 }, {
4611 'url': 'https://www.youtube.com/watch?t=2372',
4612 'only_matching': True,
c4808c60
PH
4613 }]
4614
15870e90
PH
4615 def _real_extract(self, url):
4616 raise ExtractorError(
78caa52a
PH
4617 'Did you forget to quote the URL? Remember that & is a meta '
4618 'character in most shells, so you want to put the URL in quotes, '
3867038a 4619 'like youtube-dl '
2d3d2997 4620 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 4621 ' or simply youtube-dl BaW_jenozKc .',
15870e90 4622 expected=True)
772fd5cc
PH
4623
4624
4625class YoutubeTruncatedIDIE(InfoExtractor):
4626 IE_NAME = 'youtube:truncated_id'
4627 IE_DESC = False # Do not list
b95aab84 4628 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
4629
4630 _TESTS = [{
4631 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4632 'only_matching': True,
4633 }]
4634
4635 def _real_extract(self, url):
4636 video_id = self._match_id(url)
4637 raise ExtractorError(
4638 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4639 expected=True)