]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[youtube] Simplify `_get_text` early
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
2d6659b9 5import base64
d92f5d5a 6import calendar
109dd3b2 7import copy
fe93e2c4 8import datetime
a5c56234 9import hashlib
0ca96d48 10import itertools
c5e8d7af 11import json
c4417ddb 12import os.path
d77ab8e2 13import random
c5e8d7af 14import re
8a784c74 15import time
e0df6211 16import traceback
c5e8d7af 17
b05654f0 18from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 19from ..compat import (
edf3e38e 20 compat_chr,
29f7c58a 21 compat_HTTPError,
c5e8d7af 22 compat_parse_qs,
545cc85d 23 compat_str,
7fd002c0 24 compat_urllib_parse_unquote_plus,
15707c7e 25 compat_urllib_parse_urlencode,
7c80519c 26 compat_urllib_parse_urlparse,
7c61bd36 27 compat_urlparse,
4bb4a188 28)
545cc85d 29from ..jsinterp import JSInterpreter
4bb4a188 30from ..utils import (
2d6659b9 31 bytes_to_intlist,
c5e8d7af 32 clean_html,
d92f5d5a 33 datetime_from_str,
11f9be09 34 dict_get,
358de58c 35 error_to_compat_str,
c5e8d7af 36 ExtractorError,
2d30521a 37 float_or_none,
11f9be09 38 format_field,
dd27fd17 39 int_or_none,
2d6659b9 40 intlist_to_bytes,
94278f72 41 mimetype2ext,
9c0d7f49 42 network_exceptions,
11f9be09 43 orderedSet,
6310acf5 44 parse_codecs,
49bd8c66 45 parse_count,
7c80519c 46 parse_duration,
7ea65411 47 parse_iso8601,
dca3ff4a 48 qualities,
3995d37d 49 remove_start,
cf7e015f 50 smuggle_url,
dbdaaa23 51 str_or_none,
c93d53f5 52 str_to_int,
7c365c21 53 traverse_obj,
556dbe7f 54 try_get,
c5e8d7af
PH
55 unescapeHTML,
56 unified_strdate,
cf7e015f 57 unsmuggle_url,
8bdd16b4 58 update_url_query,
21c340b8 59 url_or_none,
6e6bc8da 60 urlencode_postdata,
fe93e2c4 61 urljoin,
7c365c21 62 variadic,
c5e8d7af
PH
63)
64
5f6a1245 65
201c1459 66def parse_qs(url):
67 return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
68
69
de7f3446 70class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
71 """Provide base functions for Youtube extractors"""
72 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 73 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
74
75 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
76 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
77 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 78
3462ffa8 79 _RESERVED_NAMES = (
bea74222 80 r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|shorts|'
46953e7e 81 r'movies|results|shared|hashtag|trending|feed|feeds|oembed|get_video_info|'
cd7c66cf 82 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 83
b2e8bc1b
JMF
84 _NETRC_MACHINE = 'youtube'
85 # If True it will raise an error if no login info is provided
86 _LOGIN_REQUIRED = False
87
70d5c17b 88 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 89
b2e8bc1b 90 def _login(self):
83317f69 91 """
92 Attempt to log in to YouTube.
93 True is returned if successful or skipped.
94 False is returned if login failed.
95
96 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
97 """
9d5d4d64 98
99 def warn(message):
100 self.report_warning(message)
101
102 # username+password login is broken
982ee69a
MB
103 if (self._LOGIN_REQUIRED
104 and self.get_param('cookiefile') is None
105 and self.get_param('cookiesfrombrowser') is None):
9d5d4d64 106 self.raise_login_required(
107 'Login details are needed to download this content', method='cookies')
68217024 108 username, password = self._get_login_info()
9d5d4d64 109 if username:
110 warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
111 return
9d5d4d64 112
2d6659b9 113 # Everything below this is broken!
114 r'''
b2e8bc1b
JMF
115 # No authentication to be performed
116 if username is None:
a06916d9 117 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
69ea8ca4 118 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
a06916d9 119 # if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
545cc85d 120 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 121 return True
b2e8bc1b 122
7cc3570e
PH
123 login_page = self._download_webpage(
124 self._LOGIN_URL, None,
69ea8ca4
PH
125 note='Downloading login page',
126 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
127 if login_page is False:
128 return
b2e8bc1b 129
1212e997 130 login_form = self._hidden_inputs(login_page)
c5e8d7af 131
e00eb564
S
132 def req(url, f_req, note, errnote):
133 data = login_form.copy()
134 data.update({
135 'pstMsg': 1,
136 'checkConnection': 'youtube',
137 'checkedDomains': 'youtube',
138 'hl': 'en',
139 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 140 'f.req': json.dumps(f_req),
e00eb564
S
141 'flowName': 'GlifWebSignIn',
142 'flowEntry': 'ServiceLogin',
baf67a60
S
143 # TODO: reverse actual botguard identifier generation algo
144 'bgRequest': '["identifier",""]',
041bc3ad 145 })
e00eb564
S
146 return self._download_json(
147 url, None, note=note, errnote=errnote,
148 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
149 fatal=False,
150 data=urlencode_postdata(data), headers={
151 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
152 'Google-Accounts-XSRF': 1,
153 })
154
3995d37d
S
155 lookup_req = [
156 username,
157 None, [], None, 'US', None, None, 2, False, True,
158 [
159 None, None,
160 [2, 1, None, 1,
161 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
162 None, [], 4],
163 1, [None, None, []], None, None, None, True
164 ],
165 username,
166 ]
167
e00eb564 168 lookup_results = req(
3995d37d 169 self._LOOKUP_URL, lookup_req,
e00eb564
S
170 'Looking up account info', 'Unable to look up account info')
171
172 if lookup_results is False:
173 return False
041bc3ad 174
3995d37d
S
175 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
176 if not user_hash:
177 warn('Unable to extract user hash')
178 return False
179
180 challenge_req = [
181 user_hash,
182 None, 1, None, [1, None, None, None, [password, None, True]],
183 [
184 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
185 1, [None, None, []], None, None, None, True
186 ]]
83317f69 187
3995d37d
S
188 challenge_results = req(
189 self._CHALLENGE_URL, challenge_req,
190 'Logging in', 'Unable to log in')
83317f69 191
3995d37d 192 if challenge_results is False:
e00eb564 193 return
83317f69 194
3995d37d
S
195 login_res = try_get(challenge_results, lambda x: x[0][5], list)
196 if login_res:
197 login_msg = try_get(login_res, lambda x: x[5], compat_str)
198 warn(
199 'Unable to login: %s' % 'Invalid password'
200 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
201 return False
202
203 res = try_get(challenge_results, lambda x: x[0][-1], list)
204 if not res:
205 warn('Unable to extract result entry')
206 return False
207
9a6628aa
S
208 login_challenge = try_get(res, lambda x: x[0][0], list)
209 if login_challenge:
210 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
211 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
212 # SEND_SUCCESS - TFA code has been successfully sent to phone
213 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 214 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
215 if status == 'QUOTA_EXCEEDED':
216 warn('Exceeded the limit of TFA codes, try later')
217 return False
218
219 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
220 if not tl:
221 warn('Unable to extract TL')
222 return False
223
224 tfa_code = self._get_tfa_info('2-step verification code')
225
226 if not tfa_code:
227 warn(
228 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
229 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
230 return False
231
232 tfa_code = remove_start(tfa_code, 'G-')
233
234 tfa_req = [
235 user_hash, None, 2, None,
236 [
237 9, None, None, None, None, None, None, None,
238 [None, tfa_code, True, 2]
239 ]]
240
241 tfa_results = req(
242 self._TFA_URL.format(tl), tfa_req,
243 'Submitting TFA code', 'Unable to submit TFA code')
244
245 if tfa_results is False:
246 return False
247
248 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
249 if tfa_res:
250 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
251 warn(
252 'Unable to finish TFA: %s' % 'Invalid TFA code'
253 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
254 return False
255
256 check_cookie_url = try_get(
257 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
258 else:
259 CHALLENGES = {
260 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
261 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
262 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
263 }
264 challenge = CHALLENGES.get(
265 challenge_str,
266 '%s returned error %s.' % (self.IE_NAME, challenge_str))
267 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
268 return False
3995d37d
S
269 else:
270 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
271
272 if not check_cookie_url:
273 warn('Unable to extract CheckCookie URL')
274 return False
e00eb564
S
275
276 check_cookie_results = self._download_webpage(
3995d37d
S
277 check_cookie_url, None, 'Checking cookie', fatal=False)
278
279 if check_cookie_results is False:
280 return False
e00eb564 281
3995d37d
S
282 if 'https://myaccount.google.com/' not in check_cookie_results:
283 warn('Unable to log in')
b2e8bc1b 284 return False
e00eb564 285
b2e8bc1b 286 return True
2d6659b9 287 '''
b2e8bc1b 288
cce889b9 289 def _initialize_consent(self):
290 cookies = self._get_cookies('https://www.youtube.com/')
291 if cookies.get('__Secure-3PSID'):
292 return
293 consent_id = None
294 consent = cookies.get('CONSENT')
295 if consent:
296 if 'YES' in consent.value:
297 return
298 consent_id = self._search_regex(
299 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
300 if not consent_id:
301 consent_id = random.randint(100, 999)
302 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 303
b2e8bc1b 304 def _real_initialize(self):
cce889b9 305 self._initialize_consent()
b2e8bc1b
JMF
306 if self._downloader is None:
307 return
b2e8bc1b
JMF
308 if not self._login():
309 return
c5e8d7af 310
a0566bbf 311 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 312 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
313 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 314
109dd3b2 315 _YT_DEFAULT_YTCFGS = {
316 'WEB': {
317 'INNERTUBE_API_VERSION': 'v1',
318 'INNERTUBE_CLIENT_NAME': 'WEB',
319 'INNERTUBE_CLIENT_VERSION': '2.20210622.10.00',
320 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
321 'INNERTUBE_CONTEXT': {
322 'client': {
323 'clientName': 'WEB',
324 'clientVersion': '2.20210622.10.00',
325 'hl': 'en',
326 }
327 },
328 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
329 },
330 'WEB_REMIX': {
331 'INNERTUBE_API_VERSION': 'v1',
332 'INNERTUBE_CLIENT_NAME': 'WEB_REMIX',
333 'INNERTUBE_CLIENT_VERSION': '1.20210621.00.00',
334 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
335 'INNERTUBE_CONTEXT': {
336 'client': {
337 'clientName': 'WEB_REMIX',
338 'clientVersion': '1.20210621.00.00',
339 'hl': 'en',
340 }
341 },
342 'INNERTUBE_CONTEXT_CLIENT_NAME': 67
343 },
344 'WEB_EMBEDDED_PLAYER': {
345 'INNERTUBE_API_VERSION': 'v1',
346 'INNERTUBE_CLIENT_NAME': 'WEB_EMBEDDED_PLAYER',
347 'INNERTUBE_CLIENT_VERSION': '1.20210620.0.1',
348 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
349 'INNERTUBE_CONTEXT': {
350 'client': {
351 'clientName': 'WEB_EMBEDDED_PLAYER',
352 'clientVersion': '1.20210620.0.1',
353 'hl': 'en',
354 }
355 },
356 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
357 },
358 'ANDROID': {
359 'INNERTUBE_API_VERSION': 'v1',
360 'INNERTUBE_CLIENT_NAME': 'ANDROID',
361 'INNERTUBE_CLIENT_VERSION': '16.20',
362 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
363 'INNERTUBE_CONTEXT': {
364 'client': {
365 'clientName': 'ANDROID',
366 'clientVersion': '16.20',
367 'hl': 'en',
368 }
369 },
fe93e2c4 370 'INNERTUBE_CONTEXT_CLIENT_NAME': 3
109dd3b2 371 },
372 'ANDROID_EMBEDDED_PLAYER': {
373 'INNERTUBE_API_VERSION': 'v1',
374 'INNERTUBE_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER',
375 'INNERTUBE_CLIENT_VERSION': '16.20',
376 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
377 'INNERTUBE_CONTEXT': {
378 'client': {
379 'clientName': 'ANDROID_EMBEDDED_PLAYER',
380 'clientVersion': '16.20',
381 'hl': 'en',
382 }
383 },
fe93e2c4 384 'INNERTUBE_CONTEXT_CLIENT_NAME': 55
109dd3b2 385 },
386 'ANDROID_MUSIC': {
387 'INNERTUBE_API_VERSION': 'v1',
388 'INNERTUBE_CLIENT_NAME': 'ANDROID_MUSIC',
389 'INNERTUBE_CLIENT_VERSION': '4.32',
390 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
391 'INNERTUBE_CONTEXT': {
392 'client': {
393 'clientName': 'ANDROID_MUSIC',
394 'clientVersion': '4.32',
395 'hl': 'en',
396 }
397 },
fe93e2c4 398 'INNERTUBE_CONTEXT_CLIENT_NAME': 21
11f9be09 399 },
400 'IOS': {
401 'INNERTUBE_API_VERSION': 'v1',
402 'INNERTUBE_CLIENT_NAME': 'IOS',
403 'INNERTUBE_CLIENT_VERSION': '16.20',
404 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
405 'INNERTUBE_CONTEXT': {
406 'client': {
407 'clientName': 'IOS',
408 'clientVersion': '16.20',
409 'hl': 'en',
410 }
411 },
412 'INNERTUBE_CONTEXT_CLIENT_NAME': 5
413
414 },
415 'IOS_MUSIC': {
416 'INNERTUBE_API_VERSION': 'v1',
417 'INNERTUBE_CLIENT_NAME': 'IOS_MUSIC',
418 'INNERTUBE_CLIENT_VERSION': '4.32',
419 'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
420 'INNERTUBE_CONTEXT': {
421 'client': {
422 'clientName': 'IOS_MUSIC',
423 'clientVersion': '4.32',
424 'hl': 'en',
425 }
426 },
427 'INNERTUBE_CONTEXT_CLIENT_NAME': 26
428 },
429 'IOS_MESSAGES_EXTENSION': {
430 'INNERTUBE_API_VERSION': 'v1',
431 'INNERTUBE_CLIENT_NAME': 'IOS_MESSAGES_EXTENSION',
432 'INNERTUBE_CLIENT_VERSION': '16.20',
433 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
434 'INNERTUBE_CONTEXT': {
435 'client': {
436 'clientName': 'IOS_MESSAGES_EXTENSION',
437 'clientVersion': '16.20',
438 'hl': 'en',
439 }
440 },
441 'INNERTUBE_CONTEXT_CLIENT_NAME': 66
109dd3b2 442 }
443 }
444
445 _YT_DEFAULT_INNERTUBE_HOSTS = {
446 'DIRECT': 'youtubei.googleapis.com',
447 'WEB': 'www.youtube.com',
448 'WEB_REMIX': 'music.youtube.com',
449 'ANDROID_MUSIC': 'music.youtube.com'
450 }
451
11f9be09 452 # clients starting with _ cannot be explicity requested by the user
453 _YT_CLIENTS = {
454 'web': 'WEB',
455 'web_music': 'WEB_REMIX',
456 '_web_embedded': 'WEB_EMBEDDED_PLAYER',
457 '_web_agegate': 'TVHTML5',
458 'android': 'ANDROID',
459 'android_music': 'ANDROID_MUSIC',
460 '_android_embedded': 'ANDROID_EMBEDDED_PLAYER',
461 '_android_agegate': 'ANDROID',
462 'ios': 'IOS',
463 'ios_music': 'IOS_MUSIC',
464 '_ios_embedded': 'IOS_MESSAGES_EXTENSION',
465 '_ios_agegate': 'IOS'
466 }
467
109dd3b2 468 def _get_default_ytcfg(self, client='WEB'):
469 if client in self._YT_DEFAULT_YTCFGS:
470 return copy.deepcopy(self._YT_DEFAULT_YTCFGS[client])
471 self.write_debug(f'INNERTUBE default client {client} does not exist - falling back to WEB client.')
472 return copy.deepcopy(self._YT_DEFAULT_YTCFGS['WEB'])
473
474 def _get_innertube_host(self, client='WEB'):
475 return dict_get(self._YT_DEFAULT_INNERTUBE_HOSTS, (client, 'WEB'))
476
477 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='WEB'):
478 # try_get but with fallback to default ytcfg client values when present
479 _func = lambda y: try_get(y, getter, expected_type)
480 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
481
482 def _extract_client_name(self, ytcfg, default_client='WEB'):
483 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str, default_client)
484
314ee305 485 @staticmethod
11f9be09 486 def _extract_session_index(*data):
487 for ytcfg in data:
488 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
489 if session_index is not None:
490 return session_index
314ee305 491
109dd3b2 492 def _extract_client_version(self, ytcfg, default_client='WEB'):
493 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str, default_client)
494
495 def _extract_api_key(self, ytcfg=None, default_client='WEB'):
496 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
497
498 def _extract_context(self, ytcfg=None, default_client='WEB'):
499 _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
500 context = _get_context(ytcfg)
501 if context:
502 return context
503
504 context = _get_context(self._get_default_ytcfg(default_client))
505 if not ytcfg:
506 return context
507
508 # Recreate the client context (required)
509 context['client'].update({
510 'clientVersion': self._extract_client_version(ytcfg, default_client),
511 'clientName': self._extract_client_name(ytcfg, default_client),
512 })
513 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
514 if visitor_data:
515 context['client']['visitorData'] = visitor_data
516 return context
517
518 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
1974e99f 519 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
520 # See: https://github.com/yt-dlp/yt-dlp/issues/393
521 yt_cookies = self._get_cookies('https://www.youtube.com')
522 sapisid_cookie = dict_get(
523 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
c926c954 524 if sapisid_cookie is None or not sapisid_cookie.value:
a5c56234
M
525 return
526 time_now = round(time.time())
1974e99f 527 # SAPISID cookie is required if not already present
528 if not yt_cookies.get('SAPISID'):
c926c954 529 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie', only_once=True)
1974e99f 530 self._set_cookie(
531 '.youtube.com', 'SAPISID', sapisid_cookie.value, secure=True, expire_time=time_now + 3600)
c926c954 532 self.write_debug('Extracted SAPISID cookie', only_once=True)
1974e99f 533 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
534 sapisidhash = hashlib.sha1(
109dd3b2 535 f'{time_now} {sapisid_cookie.value} {origin}'.encode('utf-8')).hexdigest()
1974e99f 536 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
537
538 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 539 note='Downloading API JSON', errnote='Unable to download API page',
109dd3b2 540 context=None, api_key=None, api_hostname=None, default_client='WEB'):
f4f751af 541
109dd3b2 542 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 543 data.update(query)
11f9be09 544 real_headers = self.generate_api_headers(default_client=default_client)
f4f751af 545 real_headers.update({'content-type': 'application/json'})
546 if headers:
547 real_headers.update(headers)
545cc85d 548 return self._download_json(
109dd3b2 549 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
a5c56234 550 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 551 data=json.dumps(data).encode('utf8'), headers=real_headers,
552 query={'key': api_key or self._extract_api_key()})
553
11f9be09 554 def extract_yt_initial_data(self, video_id, webpage):
8bdd16b4 555 return self._parse_json(
556 self._search_regex(
29f7c58a 557 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 558 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 559 video_id)
0c148415 560
a1c5d2ca 561 def _extract_identity_token(self, webpage, item_id):
11f9be09 562 if not webpage:
563 return None
564 ytcfg = self.extract_ytcfg(item_id, webpage)
a1c5d2ca
M
565 if ytcfg:
566 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
567 if token:
568 return token
569 return self._search_regex(
570 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
571 'identity token', default=None)
572
573 @staticmethod
fe93e2c4 574 def _extract_account_syncid(*args):
8ea3f7b9 575 """
576 Extract syncId required to download private playlists of secondary channels
fe93e2c4 577 @params response and/or ytcfg
8ea3f7b9 578 """
fe93e2c4 579 for data in args:
580 # ytcfg includes channel_syncid if on secondary channel
581 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
582 if delegated_sid:
583 return delegated_sid
584 sync_ids = (try_get(
585 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
586 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
587 if len(sync_ids) >= 2 and sync_ids[1]:
588 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
589 # and just "user_syncid||" for primary channel. We only want the channel_syncid
590 return sync_ids[0]
a1c5d2ca 591
11f9be09 592 def extract_ytcfg(self, video_id, webpage):
8c54a305 593 if not webpage:
594 return {}
29f7c58a 595 return self._parse_json(
596 self._search_regex(
597 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 598 default='{}'), video_id, fatal=False) or {}
599
11f9be09 600 def generate_api_headers(
601 self, ytcfg=None, identity_token=None, account_syncid=None,
602 visitor_data=None, api_hostname=None, default_client='WEB', session_index=None):
603 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
f4f751af 604 headers = {
109dd3b2 605 'X-YouTube-Client-Name': compat_str(
11f9be09 606 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
607 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
109dd3b2 608 'Origin': origin
f4f751af 609 }
2d6659b9 610 if not visitor_data and ytcfg:
611 visitor_data = try_get(
11f9be09 612 self._extract_context(ytcfg, default_client), lambda x: x['client']['visitorData'], compat_str)
f4f751af 613 if identity_token:
109dd3b2 614 headers['X-Youtube-Identity-Token'] = identity_token
f4f751af 615 if account_syncid:
616 headers['X-Goog-PageId'] = account_syncid
314ee305 617 if session_index is None and ytcfg:
618 session_index = self._extract_session_index(ytcfg)
619 if account_syncid or session_index is not None:
620 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
f4f751af 621 if visitor_data:
109dd3b2 622 headers['X-Goog-Visitor-Id'] = visitor_data
623 auth = self._generate_sapisidhash_header(origin)
f4f751af 624 if auth is not None:
625 headers['Authorization'] = auth
109dd3b2 626 headers['X-Origin'] = origin
f4f751af 627 return headers
29f7c58a 628
2d6659b9 629 @staticmethod
630 def _build_api_continuation_query(continuation, ctp=None):
631 query = {
632 'continuation': continuation
633 }
634 # TODO: Inconsistency with clickTrackingParams.
635 # Currently we have a fixed ctp contained within context (from ytcfg)
636 # and a ctp in root query for continuation.
637 if ctp:
638 query['clickTracking'] = {'clickTrackingParams': ctp}
639 return query
640
2d6659b9 641 @classmethod
642 def _extract_next_continuation_data(cls, renderer):
643 next_continuation = try_get(
644 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
645 lambda x: x['continuation']['reloadContinuationData']), dict)
646 if not next_continuation:
647 return
648 continuation = next_continuation.get('continuation')
649 if not continuation:
650 return
651 ctp = next_continuation.get('clickTrackingParams')
fe93e2c4 652 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 653
654 @classmethod
655 def _extract_continuation_ep_data(cls, continuation_ep: dict):
656 if isinstance(continuation_ep, dict):
657 continuation = try_get(
658 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
659 if not continuation:
660 return
661 ctp = continuation_ep.get('clickTrackingParams')
fe93e2c4 662 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 663
664 @classmethod
665 def _extract_continuation(cls, renderer):
666 next_continuation = cls._extract_next_continuation_data(renderer)
667 if next_continuation:
668 return next_continuation
fe93e2c4 669
2d6659b9 670 contents = []
671 for key in ('contents', 'items'):
672 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
fe93e2c4 673
2d6659b9 674 for content in contents:
675 if not isinstance(content, dict):
676 continue
677 continuation_ep = try_get(
678 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
679 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
680 dict)
681 continuation = cls._extract_continuation_ep_data(continuation_ep)
682 if continuation:
683 return continuation
684
fe93e2c4 685 @classmethod
686 def _extract_alerts(cls, data):
109dd3b2 687 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
688 if not isinstance(alert_dict, dict):
689 continue
690 for alert in alert_dict.values():
691 alert_type = alert.get('type')
692 if not alert_type:
693 continue
052e1350 694 message = cls._get_text(alert, 'text')
109dd3b2 695 if message:
696 yield alert_type, message
697
698 def _report_alerts(self, alerts, expected=True):
699 errors = []
700 warnings = []
701 for alert_type, alert_message in alerts:
702 if alert_type.lower() == 'error':
703 errors.append([alert_type, alert_message])
704 else:
705 warnings.append([alert_type, alert_message])
706
707 for alert_type, alert_message in (warnings + errors[:-1]):
708 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
709 if errors:
710 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
711
712 def _extract_and_report_alerts(self, data, *args, **kwargs):
713 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
714
47193e02 715 def _extract_badges(self, renderer: dict):
716 badges = set()
717 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
718 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
719 if label:
720 badges.add(label.lower())
721 return badges
722
723 @staticmethod
052e1350 724 def _get_text(data, *path_list, max_runs=None):
725 for path in path_list or [None]:
726 if path is None:
727 obj = [data]
728 else:
729 obj = traverse_obj(data, path, default=[])
730 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
731 obj = [obj]
732 for item in obj:
733 text = try_get(item, lambda x: x['simpleText'], compat_str)
734 if text:
735 return text
736 runs = try_get(item, lambda x: x['runs'], list) or []
737 if not runs and isinstance(item, list):
738 runs = item
739
740 runs = runs[:min(len(runs), max_runs or len(runs))]
741 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
742 if text:
743 return text
47193e02 744
109dd3b2 745 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
746 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
747 default_client='WEB'):
748 response = None
749 last_error = None
750 count = -1
751 retries = self.get_param('extractor_retries', 3)
752 if check_get_keys is None:
753 check_get_keys = []
754 while count < retries:
755 count += 1
756 if last_error:
757 self.report_warning('%s. Retrying ...' % last_error)
758 try:
759 response = self._call_api(
760 ep=ep, fatal=True, headers=headers,
761 video_id=item_id, query=query,
762 context=self._extract_context(ytcfg, default_client),
763 api_key=self._extract_api_key(ytcfg, default_client),
764 api_hostname=api_hostname, default_client=default_client,
765 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
766 except ExtractorError as e:
9c0d7f49 767 if isinstance(e.cause, network_exceptions):
109dd3b2 768 # Downloading page may result in intermittent 5xx HTTP error
769 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
9c0d7f49 770 # We also want to catch all other network exceptions since errors in later pages can be troublesome
771 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
772 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
773 last_error = error_to_compat_str(e.cause or e)
774 if count < retries:
775 continue
109dd3b2 776 if fatal:
777 raise
778 else:
779 self.report_warning(error_to_compat_str(e))
780 return
781
782 else:
783 # Youtube may send alerts if there was an issue with the continuation page
784 try:
785 self._extract_and_report_alerts(response, expected=False)
786 except ExtractorError as e:
787 if fatal:
788 raise
789 self.report_warning(error_to_compat_str(e))
790 return
791 if not check_get_keys or dict_get(response, check_get_keys):
792 break
793 # Youtube sometimes sends incomplete data
794 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
795 last_error = 'Incomplete data received'
796 if count >= retries:
797 if fatal:
798 raise ExtractorError(last_error)
799 else:
800 self.report_warning(last_error)
801 return
802 return response
803
9297939e 804 @staticmethod
805 def is_music_url(url):
806 return re.match(r'https?://music\.youtube\.com/', url) is not None
807
30a074c2 808 def _extract_video(self, renderer):
809 video_id = renderer.get('videoId')
052e1350 810 title = self._get_text(renderer, 'title')
811 description = self._get_text(renderer, 'descriptionSnippet')
812 duration = parse_duration(self._get_text(renderer, 'lengthText'))
813 view_count_text = self._get_text(renderer, 'viewCountText') or ''
30a074c2 814 view_count = str_to_int(self._search_regex(
815 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
816 'view count', default=None))
fe93e2c4 817
052e1350 818 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
fe93e2c4 819
30a074c2 820 return {
39ed931e 821 '_type': 'url',
30a074c2 822 'ie_key': YoutubeIE.ie_key(),
823 'id': video_id,
824 'url': video_id,
825 'title': title,
826 'description': description,
827 'duration': duration,
828 'view_count': view_count,
829 'uploader': uploader,
830 }
831
0c148415 832
360e1ca5 833class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 834 IE_DESC = 'YouTube.com'
bc2ca1bb 835 _INVIDIOUS_SITES = (
836 # invidious-redirect websites
837 r'(?:www\.)?redirect\.invidious\.io',
838 r'(?:(?:www|dev)\.)?invidio\.us',
839 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
840 r'(?:www\.)?invidious\.pussthecat\.org',
bc2ca1bb 841 r'(?:www\.)?invidious\.zee\.li',
bc2ca1bb 842 r'(?:www\.)?invidious\.ethibox\.fr',
bc2ca1bb 843 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
bc2ca1bb 844 # youtube-dl invidious instances list
845 r'(?:(?:www|no)\.)?invidiou\.sh',
846 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
847 r'(?:www\.)?invidious\.kabi\.tk',
bc2ca1bb 848 r'(?:www\.)?invidious\.mastodon\.host',
849 r'(?:www\.)?invidious\.zapashcanon\.fr',
ed807c18 850 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
201c1459 851 r'(?:www\.)?invidious\.tinfoil-hat\.net',
852 r'(?:www\.)?invidious\.himiko\.cloud',
853 r'(?:www\.)?invidious\.reallyancient\.tech',
bc2ca1bb 854 r'(?:www\.)?invidious\.tube',
855 r'(?:www\.)?invidiou\.site',
856 r'(?:www\.)?invidious\.site',
857 r'(?:www\.)?invidious\.xyz',
858 r'(?:www\.)?invidious\.nixnet\.xyz',
201c1459 859 r'(?:www\.)?invidious\.048596\.xyz',
bc2ca1bb 860 r'(?:www\.)?invidious\.drycat\.fr',
201c1459 861 r'(?:www\.)?inv\.skyn3t\.in',
bc2ca1bb 862 r'(?:www\.)?tube\.poal\.co',
863 r'(?:www\.)?tube\.connect\.cafe',
864 r'(?:www\.)?vid\.wxzm\.sx',
865 r'(?:www\.)?vid\.mint\.lgbt',
201c1459 866 r'(?:www\.)?vid\.puffyan\.us',
bc2ca1bb 867 r'(?:www\.)?yewtu\.be',
868 r'(?:www\.)?yt\.elukerio\.org',
869 r'(?:www\.)?yt\.lelux\.fi',
870 r'(?:www\.)?invidious\.ggc-project\.de',
871 r'(?:www\.)?yt\.maisputain\.ovh',
201c1459 872 r'(?:www\.)?ytprivate\.com',
873 r'(?:www\.)?invidious\.13ad\.de',
bc2ca1bb 874 r'(?:www\.)?invidious\.toot\.koeln',
875 r'(?:www\.)?invidious\.fdn\.fr',
876 r'(?:www\.)?watch\.nettohikari\.com',
ed807c18 877 r'(?:www\.)?invidious\.namazso\.eu',
878 r'(?:www\.)?invidious\.silkky\.cloud',
879 r'(?:www\.)?invidious\.exonip\.de',
880 r'(?:www\.)?invidious\.riverside\.rocks',
881 r'(?:www\.)?invidious\.blamefran\.net',
882 r'(?:www\.)?invidious\.moomoo\.de',
883 r'(?:www\.)?ytb\.trom\.tf',
884 r'(?:www\.)?yt\.cyberhost\.uk',
bc2ca1bb 885 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
886 r'(?:www\.)?qklhadlycap4cnod\.onion',
887 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
888 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
889 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
890 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
891 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
892 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
ed807c18 893 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
894 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
895 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
896 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
bc2ca1bb 897 )
cb7dfeea 898 _VALID_URL = r"""(?x)^
c5e8d7af 899 (
edb53e2d 900 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 901 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
902 (?:www\.)?deturl\.com/www\.youtube\.com|
903 (?:www\.)?pwnyoutube\.com|
904 (?:www\.)?hooktube\.com|
905 (?:www\.)?yourepeat\.com|
906 tube\.majestyc\.net|
907 %(invidious)s|
908 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
909 (?:.*?\#/)? # handle anchor (#/) redirect urls
910 (?: # the various things that can precede the ID:
ac7553d0 911 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 912 |(?: # or the v= param in all its forms
f7000f3a 913 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 914 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 915 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
916 v=
917 )
f4b05232 918 ))
cbaed4bb
S
919 |(?:
920 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
921 vid\.plus| # or vid.plus/xxxx
922 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 923 %(invidious)s
cbaed4bb 924 )/
edb53e2d 925 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 926 )
c5e8d7af 927 )? # all until now is optional -> you can pass the naked ID
201c1459 928 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 929 (?(1).+)? # if we found the ID, everything can follow
9297939e 930 (?:\#|$)""" % {
bc2ca1bb 931 'invidious': '|'.join(_INVIDIOUS_SITES),
932 }
e40c758c 933 _PLAYER_INFO_RE = (
cc2db878 934 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
935 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 936 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 937 )
2c62dc26 938 _formats = {
c2d3cb4c 939 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
940 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
941 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
942 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
943 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
944 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
945 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
946 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 947 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 948 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
949 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
950 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
951 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
952 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
953 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 954 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 955 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
956 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 957
958
959 # 3D videos
c2d3cb4c 960 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
961 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
962 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
963 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 964 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
965 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
966 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 967
96fb5605 968 # Apple HTTP Live Streaming
11f12195 969 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 970 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
971 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
972 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
973 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
974 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 975 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
976 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
977
978 # DASH mp4 video
d23028a8
S
979 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
980 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
981 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
982 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
983 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 984 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
985 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
986 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
987 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
988 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
989 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
990 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 991
f6f1fc92 992 # Dash mp4 audio
d23028a8
S
993 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
994 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
995 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
996 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
997 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
998 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
999 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
1000
1001 # Dash webm
d23028a8
S
1002 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1003 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1004 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1005 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1006 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1007 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1008 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1009 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1010 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1011 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1012 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1013 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1014 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1015 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1016 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 1017 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
1018 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1019 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1020 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1021 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1022 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1023 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
1024
1025 # Dash webm audio
d23028a8
S
1026 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1027 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 1028
0857baad 1029 # Dash webm audio with opus inside
d23028a8
S
1030 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1031 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1032 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 1033
ce6b9a2d
PH
1034 # RTMP (unnamed)
1035 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
1036
1037 # av01 video only formats sometimes served with "unknown" codecs
1038 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1039 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1040 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1041 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 1042 }
29f7c58a 1043 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 1044
109dd3b2 1045 _AGE_GATE_REASONS = (
1046 'Sign in to confirm your age',
1047 'This video may be inappropriate for some users.',
1048 'Sorry, this content is age-restricted.')
1049
fd5c4aab
S
1050 _GEO_BYPASS = False
1051
78caa52a 1052 IE_NAME = 'youtube'
2eb88d95
PH
1053 _TESTS = [
1054 {
2d3d2997 1055 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
1056 'info_dict': {
1057 'id': 'BaW_jenozKc',
1058 'ext': 'mp4',
3867038a 1059 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
1060 'uploader': 'Philipp Hagemeister',
1061 'uploader_id': 'phihag',
ec85ded8 1062 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
1063 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1064 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 1065 'upload_date': '20121002',
3867038a 1066 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 1067 'categories': ['Science & Technology'],
3867038a 1068 'tags': ['youtube-dl'],
556dbe7f 1069 'duration': 10,
dbdaaa23 1070 'view_count': int,
3e7c1224
PH
1071 'like_count': int,
1072 'dislike_count': int,
7c80519c 1073 'start_time': 1,
297a564b 1074 'end_time': 9,
2eb88d95 1075 }
0e853ca4 1076 },
fccd3771 1077 {
4bc3a23e
PH
1078 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1079 'note': 'Embed-only video (#1746)',
1080 'info_dict': {
1081 'id': 'yZIXLfi8CZQ',
1082 'ext': 'mp4',
1083 'upload_date': '20120608',
1084 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1085 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1086 'uploader': 'SET India',
94bfcd23 1087 'uploader_id': 'setindia',
ec85ded8 1088 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 1089 'age_limit': 18,
545cc85d 1090 },
1091 'skip': 'Private video',
fccd3771 1092 },
11b56058 1093 {
8bdd16b4 1094 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
1095 'note': 'Use the first video ID in the URL',
1096 'info_dict': {
1097 'id': 'BaW_jenozKc',
1098 'ext': 'mp4',
3867038a 1099 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
1100 'uploader': 'Philipp Hagemeister',
1101 'uploader_id': 'phihag',
ec85ded8 1102 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 1103 'upload_date': '20121002',
3867038a 1104 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 1105 'categories': ['Science & Technology'],
3867038a 1106 'tags': ['youtube-dl'],
556dbe7f 1107 'duration': 10,
dbdaaa23 1108 'view_count': int,
11b56058
PM
1109 'like_count': int,
1110 'dislike_count': int,
34a7de29
S
1111 },
1112 'params': {
1113 'skip_download': True,
1114 },
11b56058 1115 },
dd27fd17 1116 {
2d3d2997 1117 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
1118 'note': '256k DASH audio (format 141) via DASH manifest',
1119 'info_dict': {
1120 'id': 'a9LDPn-MO4I',
1121 'ext': 'm4a',
1122 'upload_date': '20121002',
1123 'uploader_id': '8KVIDEO',
ec85ded8 1124 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
1125 'description': '',
1126 'uploader': '8KVIDEO',
1127 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 1128 },
4bc3a23e
PH
1129 'params': {
1130 'youtube_include_dash_manifest': True,
1131 'format': '141',
4919603f 1132 },
de3c7fe0 1133 'skip': 'format 141 not served anymore',
dd27fd17 1134 },
8bdd16b4 1135 # DASH manifest with encrypted signature
1136 {
1137 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1138 'info_dict': {
1139 'id': 'IB3lcPjvWLA',
1140 'ext': 'm4a',
1141 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1142 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1143 'duration': 244,
1144 'uploader': 'AfrojackVEVO',
1145 'uploader_id': 'AfrojackVEVO',
1146 'upload_date': '20131011',
cc2db878 1147 'abr': 129.495,
8bdd16b4 1148 },
1149 'params': {
1150 'youtube_include_dash_manifest': True,
1151 'format': '141/bestaudio[ext=m4a]',
1152 },
1153 },
dd2d55f1 1154 # Normal age-gate video (embed allowed)
c522adb1 1155 {
2d3d2997 1156 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1157 'info_dict': {
1158 'id': 'HtVdAasjOgU',
1159 'ext': 'mp4',
1160 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1161 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1162 'duration': 142,
c522adb1
JMF
1163 'uploader': 'The Witcher',
1164 'uploader_id': 'WitcherGame',
ec85ded8 1165 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 1166 'upload_date': '20140605',
34952f09 1167 'age_limit': 18,
c522adb1
JMF
1168 },
1169 },
8bdd16b4 1170 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1171 # YouTube Red ad is not captured for creator
1172 {
1173 'url': '__2ABJjxzNo',
1174 'info_dict': {
1175 'id': '__2ABJjxzNo',
1176 'ext': 'mp4',
1177 'duration': 266,
1178 'upload_date': '20100430',
1179 'uploader_id': 'deadmau5',
1180 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 1181 'creator': 'deadmau5',
1182 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1183 'uploader': 'deadmau5',
1184 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1185 'alt_title': 'Some Chords',
8bdd16b4 1186 },
1187 'expected_warnings': [
1188 'DASH manifest missing',
1189 ]
1190 },
067aa17e 1191 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1192 {
1193 'url': 'lqQg6PlCWgI',
1194 'info_dict': {
1195 'id': 'lqQg6PlCWgI',
1196 'ext': 'mp4',
556dbe7f 1197 'duration': 6085,
90227264 1198 'upload_date': '20150827',
cbe2bd91 1199 'uploader_id': 'olympic',
ec85ded8 1200 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 1201 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
11f9be09 1202 'uploader': 'Olympics',
cbe2bd91
PH
1203 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1204 },
1205 'params': {
1206 'skip_download': 'requires avconv',
e52a40ab 1207 }
cbe2bd91 1208 },
6271f1ca
PH
1209 # Non-square pixels
1210 {
1211 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1212 'info_dict': {
1213 'id': '_b-2C3KPAM0',
1214 'ext': 'mp4',
1215 'stretched_ratio': 16 / 9.,
556dbe7f 1216 'duration': 85,
6271f1ca
PH
1217 'upload_date': '20110310',
1218 'uploader_id': 'AllenMeow',
ec85ded8 1219 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 1220 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 1221 'uploader': '孫ᄋᄅ',
6271f1ca
PH
1222 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1223 },
06b491eb
S
1224 },
1225 # url_encoded_fmt_stream_map is empty string
1226 {
1227 'url': 'qEJwOuvDf7I',
1228 'info_dict': {
1229 'id': 'qEJwOuvDf7I',
f57b7835 1230 'ext': 'webm',
06b491eb
S
1231 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1232 'description': '',
1233 'upload_date': '20150404',
1234 'uploader_id': 'spbelect',
1235 'uploader': 'Наблюдатели Петербурга',
1236 },
1237 'params': {
1238 'skip_download': 'requires avconv',
e323cf3f
S
1239 },
1240 'skip': 'This live event has ended.',
06b491eb 1241 },
067aa17e 1242 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1243 {
1244 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1245 'info_dict': {
1246 'id': 'FIl7x6_3R5Y',
eb6793ba 1247 'ext': 'webm',
da77d856
S
1248 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1249 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1250 'duration': 220,
da77d856
S
1251 'upload_date': '20150625',
1252 'uploader_id': 'dorappi2000',
ec85ded8 1253 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 1254 'uploader': 'dorappi2000',
eb6793ba 1255 'formats': 'mincount:31',
da77d856 1256 },
eb6793ba 1257 'skip': 'not actual anymore',
2ee8f5d8 1258 },
8a1a26ce
YCH
1259 # DASH manifest with segment_list
1260 {
1261 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1262 'md5': '8ce563a1d667b599d21064e982ab9e31',
1263 'info_dict': {
1264 'id': 'CsmdDsKjzN8',
1265 'ext': 'mp4',
17ee98e1 1266 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
1267 'uploader': 'Airtek',
1268 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1269 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1270 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1271 },
1272 'params': {
1273 'youtube_include_dash_manifest': True,
1274 'format': '135', # bestvideo
be49068d
S
1275 },
1276 'skip': 'This live event has ended.',
2ee8f5d8 1277 },
cf7e015f
S
1278 {
1279 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 1280 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 1281 'info_dict': {
545cc85d 1282 'id': 'jvGDaLqkpTg',
1283 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1284 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
1285 },
1286 'playlist': [{
1287 'info_dict': {
545cc85d 1288 'id': 'jvGDaLqkpTg',
cf7e015f 1289 'ext': 'mp4',
545cc85d 1290 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1291 'description': 'md5:e03b909557865076822aa169218d6a5d',
1292 'duration': 10643,
1293 'upload_date': '20161111',
1294 'uploader': 'Team PGP',
1295 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1296 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1297 },
1298 }, {
1299 'info_dict': {
545cc85d 1300 'id': '3AKt1R1aDnw',
cf7e015f 1301 'ext': 'mp4',
545cc85d 1302 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1303 'description': 'md5:e03b909557865076822aa169218d6a5d',
1304 'duration': 10991,
1305 'upload_date': '20161111',
1306 'uploader': 'Team PGP',
1307 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1308 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1309 },
1310 }, {
1311 'info_dict': {
545cc85d 1312 'id': 'RtAMM00gpVc',
cf7e015f 1313 'ext': 'mp4',
545cc85d 1314 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1315 'description': 'md5:e03b909557865076822aa169218d6a5d',
1316 'duration': 10995,
1317 'upload_date': '20161111',
1318 'uploader': 'Team PGP',
1319 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1320 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1321 },
1322 }, {
1323 'info_dict': {
545cc85d 1324 'id': '6N2fdlP3C5U',
cf7e015f 1325 'ext': 'mp4',
545cc85d 1326 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1327 'description': 'md5:e03b909557865076822aa169218d6a5d',
1328 'duration': 10990,
1329 'upload_date': '20161111',
1330 'uploader': 'Team PGP',
1331 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1332 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1333 },
1334 }],
1335 'params': {
1336 'skip_download': True,
1337 },
cbaed4bb 1338 },
f9f49d87 1339 {
067aa17e 1340 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1341 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1342 'info_dict': {
1343 'id': 'gVfLd0zydlo',
1344 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1345 },
1346 'playlist_count': 2,
be49068d 1347 'skip': 'Not multifeed anymore',
f9f49d87 1348 },
cbaed4bb 1349 {
2d3d2997 1350 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1351 'only_matching': True,
0e49d9a6 1352 },
6d4fc66b 1353 {
2d3d2997 1354 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1355 'only_matching': True,
1356 },
0e49d9a6 1357 {
067aa17e 1358 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1359 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1360 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1361 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1362 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1363 'info_dict': {
1364 'id': 'lsguqyKfVQg',
1365 'ext': 'mp4',
1366 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
11f9be09 1367 'alt_title': 'Dark Walk',
0e49d9a6 1368 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1369 'duration': 133,
0e49d9a6
LL
1370 'upload_date': '20151119',
1371 'uploader_id': 'IronSoulElf',
ec85ded8 1372 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1373 'uploader': 'IronSoulElf',
11f9be09 1374 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1375 'track': 'Dark Walk',
1376 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
92bc97d3 1377 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
1378 },
1379 'params': {
1380 'skip_download': True,
1381 },
1382 },
61f92af1 1383 {
067aa17e 1384 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1385 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1386 'only_matching': True,
1387 },
313dfc45
LL
1388 {
1389 # Video with yt:stretch=17:0
1390 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1391 'info_dict': {
1392 'id': 'Q39EVAstoRM',
1393 'ext': 'mp4',
1394 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1395 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1396 'upload_date': '20151107',
1397 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1398 'uploader': 'CH GAMER DROID',
1399 },
1400 'params': {
1401 'skip_download': True,
1402 },
be49068d 1403 'skip': 'This video does not exist.',
313dfc45 1404 },
201c1459 1405 {
1406 # Video with incomplete 'yt:stretch=16:'
1407 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1408 'only_matching': True,
1409 },
7caf9830
S
1410 {
1411 # Video licensed under Creative Commons
1412 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1413 'info_dict': {
1414 'id': 'M4gD1WSo5mA',
1415 'ext': 'mp4',
1416 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1417 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1418 'duration': 721,
7caf9830
S
1419 'upload_date': '20150127',
1420 'uploader_id': 'BerkmanCenter',
ec85ded8 1421 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1422 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1423 'license': 'Creative Commons Attribution license (reuse allowed)',
1424 },
1425 'params': {
1426 'skip_download': True,
1427 },
1428 },
fd050249
S
1429 {
1430 # Channel-like uploader_url
1431 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1432 'info_dict': {
1433 'id': 'eQcmzGIKrzg',
1434 'ext': 'mp4',
1435 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1436 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1437 'duration': 4060,
fd050249 1438 'upload_date': '20151119',
eb6793ba 1439 'uploader': 'Bernie Sanders',
fd050249 1440 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1441 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1442 'license': 'Creative Commons Attribution license (reuse allowed)',
1443 },
1444 'params': {
1445 'skip_download': True,
1446 },
1447 },
040ac686
S
1448 {
1449 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1450 'only_matching': True,
7f29cf54
S
1451 },
1452 {
067aa17e 1453 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1454 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1455 'only_matching': True,
6496ccb4
S
1456 },
1457 {
1458 # Rental video preview
1459 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1460 'info_dict': {
1461 'id': 'uGpuVWrhIzE',
1462 'ext': 'mp4',
1463 'title': 'Piku - Trailer',
1464 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1465 'upload_date': '20150811',
1466 'uploader': 'FlixMatrix',
1467 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1468 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1469 'license': 'Standard YouTube License',
1470 },
1471 'params': {
1472 'skip_download': True,
1473 },
eb6793ba 1474 'skip': 'This video is not available.',
022a5d66 1475 },
12afdc2a
S
1476 {
1477 # YouTube Red video with episode data
1478 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1479 'info_dict': {
1480 'id': 'iqKdEhx-dD4',
1481 'ext': 'mp4',
1482 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1483 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1484 'duration': 2085,
12afdc2a
S
1485 'upload_date': '20170118',
1486 'uploader': 'Vsauce',
1487 'uploader_id': 'Vsauce',
1488 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1489 'series': 'Mind Field',
1490 'season_number': 1,
1491 'episode_number': 1,
1492 },
1493 'params': {
1494 'skip_download': True,
1495 },
1496 'expected_warnings': [
1497 'Skipping DASH manifest',
1498 ],
1499 },
c7121fa7
S
1500 {
1501 # The following content has been identified by the YouTube community
1502 # as inappropriate or offensive to some audiences.
1503 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1504 'info_dict': {
1505 'id': '6SJNVb0GnPI',
1506 'ext': 'mp4',
1507 'title': 'Race Differences in Intelligence',
1508 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1509 'duration': 965,
1510 'upload_date': '20140124',
1511 'uploader': 'New Century Foundation',
1512 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1513 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1514 },
1515 'params': {
1516 'skip_download': True,
1517 },
545cc85d 1518 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1519 },
022a5d66
S
1520 {
1521 # itag 212
1522 'url': '1t24XAntNCY',
1523 'only_matching': True,
fd5c4aab
S
1524 },
1525 {
1526 # geo restricted to JP
1527 'url': 'sJL6WA-aGkQ',
1528 'only_matching': True,
1529 },
cd5a74a2
S
1530 {
1531 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1532 'only_matching': True,
1533 },
bc2ca1bb 1534 {
1535 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1536 'only_matching': True,
1537 },
1538 {
1539 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1540 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1541 'only_matching': True,
1542 },
825cd268
RA
1543 {
1544 # DRM protected
1545 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1546 'only_matching': True,
4fe54c12
S
1547 },
1548 {
1549 # Video with unsupported adaptive stream type formats
1550 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1551 'info_dict': {
1552 'id': 'Z4Vy8R84T1U',
1553 'ext': 'mp4',
1554 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1555 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1556 'duration': 433,
1557 'upload_date': '20130923',
1558 'uploader': 'Amelia Putri Harwita',
1559 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1560 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1561 'formats': 'maxcount:10',
1562 },
1563 'params': {
1564 'skip_download': True,
1565 'youtube_include_dash_manifest': False,
1566 },
5429d6a9 1567 'skip': 'not actual anymore',
5caabd3c 1568 },
1569 {
822b9d9c 1570 # Youtube Music Auto-generated description
5caabd3c 1571 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1572 'info_dict': {
1573 'id': 'MgNrAu2pzNs',
1574 'ext': 'mp4',
1575 'title': 'Voyeur Girl',
1576 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1577 'upload_date': '20190312',
5429d6a9
S
1578 'uploader': 'Stephen - Topic',
1579 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1580 'artist': 'Stephen',
1581 'track': 'Voyeur Girl',
1582 'album': 'it\'s too much love to know my dear',
1583 'release_date': '20190313',
1584 'release_year': 2019,
1585 },
1586 'params': {
1587 'skip_download': True,
1588 },
1589 },
66b48727
RA
1590 {
1591 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1592 'only_matching': True,
1593 },
011e75e6
S
1594 {
1595 # invalid -> valid video id redirection
1596 'url': 'DJztXj2GPfl',
1597 'info_dict': {
1598 'id': 'DJztXj2GPfk',
1599 'ext': 'mp4',
1600 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1601 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1602 'upload_date': '20090125',
1603 'uploader': 'Prochorowka',
1604 'uploader_id': 'Prochorowka',
1605 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1606 'artist': 'Panjabi MC',
1607 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1608 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1609 },
1610 'params': {
1611 'skip_download': True,
1612 },
545cc85d 1613 'skip': 'Video unavailable',
ea74e00b
DP
1614 },
1615 {
1616 # empty description results in an empty string
1617 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1618 'info_dict': {
1619 'id': 'x41yOUIvK2k',
1620 'ext': 'mp4',
1621 'title': 'IMG 3456',
1622 'description': '',
1623 'upload_date': '20170613',
1624 'uploader_id': 'ElevageOrVert',
1625 'uploader': 'ElevageOrVert',
1626 },
1627 'params': {
1628 'skip_download': True,
1629 },
1630 },
a0566bbf 1631 {
29f7c58a 1632 # with '};' inside yt initial data (see [1])
1633 # see [2] for an example with '};' inside ytInitialPlayerResponse
1634 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1635 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1636 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1637 'info_dict': {
1638 'id': 'CHqg6qOn4no',
1639 'ext': 'mp4',
1640 'title': 'Part 77 Sort a list of simple types in c#',
1641 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1642 'upload_date': '20130831',
1643 'uploader_id': 'kudvenkat',
1644 'uploader': 'kudvenkat',
1645 },
1646 'params': {
1647 'skip_download': True,
1648 },
1649 },
29f7c58a 1650 {
1651 # another example of '};' in ytInitialData
1652 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1653 'only_matching': True,
1654 },
1655 {
1656 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1657 'only_matching': True,
1658 },
545cc85d 1659 {
cc2db878 1660 # https://github.com/ytdl-org/youtube-dl/pull/28094
1661 'url': 'OtqTfy26tG0',
1662 'info_dict': {
1663 'id': 'OtqTfy26tG0',
1664 'ext': 'mp4',
1665 'title': 'Burn Out',
1666 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1667 'upload_date': '20141120',
1668 'uploader': 'The Cinematic Orchestra - Topic',
1669 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1670 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1671 'artist': 'The Cinematic Orchestra',
1672 'track': 'Burn Out',
1673 'album': 'Every Day',
1674 'release_data': None,
1675 'release_year': None,
1676 },
1677 'params': {
1678 'skip_download': True,
1679 },
545cc85d 1680 },
bc2ca1bb 1681 {
1682 # controversial video, only works with bpctr when authenticated with cookies
1683 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1684 'only_matching': True,
1685 },
a1a7907b 1686 {
1687 # controversial video, requires bpctr/contentCheckOk
1688 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1689 'info_dict': {
1690 'id': 'SZJvDhaSDnc',
1691 'ext': 'mp4',
1692 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1693 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1694 'uploader': 'CBS This Morning',
11f9be09 1695 'uploader_id': 'CBSThisMorning',
a1a7907b 1696 'upload_date': '20140716',
1697 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1698 }
1699 },
f7ad7160 1700 {
1701 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1702 'url': 'cBvYw8_A0vQ',
1703 'info_dict': {
1704 'id': 'cBvYw8_A0vQ',
1705 'ext': 'mp4',
1706 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1707 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1708 'upload_date': '20201120',
1709 'uploader': 'Walk around Japan',
1710 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1711 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1712 },
1713 'params': {
1714 'skip_download': True,
1715 },
0fb983f6 1716 }, {
1717 # Has multiple audio streams
1718 'url': 'WaOKSUlf4TM',
1719 'only_matching': True
9297939e 1720 }, {
1721 # Requires Premium: has format 141 when requested using YTM url
1722 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1723 'only_matching': True
1724 }, {
120916da 1725 # multiple subtitles with same lang_code
1726 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1727 'only_matching': True,
109dd3b2 1728 }, {
1729 # Force use android client fallback
1730 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1731 'info_dict': {
1732 'id': 'YOelRv7fMxY',
11f9be09 1733 'title': 'DIGGING A SECRET TUNNEL Part 1',
109dd3b2 1734 'ext': '3gp',
1735 'upload_date': '20210624',
1736 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1737 'uploader': 'colinfurze',
11f9be09 1738 'uploader_id': 'colinfurze',
109dd3b2 1739 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
11f9be09 1740 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
109dd3b2 1741 },
1742 'params': {
1743 'format': '17', # 3gp format available on android
1744 'extractor_args': {'youtube': {'player_client': ['android']}},
1745 },
120916da 1746 },
109dd3b2 1747 {
1748 # Skip download of additional client configs (remix client config in this case)
1749 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1750 'only_matching': True,
1751 'params': {
1752 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1753 },
1754 }
2eb88d95
PH
1755 ]
1756
201c1459 1757 @classmethod
1758 def suitable(cls, url):
1bdae7d3 1759 # Hack for lazy extractors until more generic solution is implemented
1760 # (see #28780)
1761 from .youtube import parse_qs
201c1459 1762 qs = parse_qs(url)
1763 if qs.get('list', [None])[0]:
1764 return False
1765 return super(YoutubeIE, cls).suitable(url)
1766
e0df6211
PH
1767 def __init__(self, *args, **kwargs):
1768 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1769 self._code_cache = {}
83799698 1770 self._player_cache = {}
e0df6211 1771
109dd3b2 1772 def _extract_player_url(self, ytcfg=None, webpage=None):
1773 player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)
11f9be09 1774 if not player_url and webpage:
109dd3b2 1775 player_url = self._search_regex(
1776 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1777 webpage, 'player URL', fatal=False)
11f9be09 1778 if not player_url:
1779 return None
109dd3b2 1780 if player_url.startswith('//'):
1781 player_url = 'https:' + player_url
1782 elif not re.match(r'https?://', player_url):
1783 player_url = compat_urlparse.urljoin(
1784 'https://www.youtube.com', player_url)
1785 return player_url
1786
60064c53
PH
1787 def _signature_cache_id(self, example_sig):
1788 """ Return a string representation of a signature """
78caa52a 1789 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1790
e40c758c
S
1791 @classmethod
1792 def _extract_player_info(cls, player_url):
1793 for player_re in cls._PLAYER_INFO_RE:
1794 id_m = re.search(player_re, player_url)
1795 if id_m:
1796 break
1797 else:
c081b35c 1798 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1799 return id_m.group('id')
e40c758c 1800
109dd3b2 1801 def _load_player(self, video_id, player_url, fatal=True) -> bool:
1802 player_id = self._extract_player_info(player_url)
1803 if player_id not in self._code_cache:
1804 self._code_cache[player_id] = self._download_webpage(
1805 player_url, video_id, fatal=fatal,
1806 note='Downloading player ' + player_id,
1807 errnote='Download of %s failed' % player_url)
1808 return player_id in self._code_cache
1809
e40c758c 1810 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1811 player_id = self._extract_player_info(player_url)
e0df6211 1812
c4417ddb 1813 # Read from filesystem cache
545cc85d 1814 func_id = 'js_%s_%s' % (
1815 player_id, self._signature_cache_id(example_sig))
c4417ddb 1816 assert os.path.basename(func_id) == func_id
a0e07d31 1817
69ea8ca4 1818 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1819 if cache_spec is not None:
78caa52a 1820 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1821
109dd3b2 1822 if self._load_player(video_id, player_url):
1823 code = self._code_cache[player_id]
1824 res = self._parse_sig_js(code)
e0df6211 1825
109dd3b2 1826 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1827 cache_res = res(test_string)
1828 cache_spec = [ord(c) for c in cache_res]
83799698 1829
109dd3b2 1830 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1831 return res
83799698 1832
60064c53 1833 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1834 def gen_sig_code(idxs):
1835 def _genslice(start, end, step):
78caa52a 1836 starts = '' if start == 0 else str(start)
8bcc8756 1837 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1838 steps = '' if step == 1 else (':%d' % step)
78caa52a 1839 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1840
1841 step = None
7af808a5
PH
1842 # Quelch pyflakes warnings - start will be set when step is set
1843 start = '(Never used)'
edf3e38e
PH
1844 for i, prev in zip(idxs[1:], idxs[:-1]):
1845 if step is not None:
1846 if i - prev == step:
1847 continue
1848 yield _genslice(start, prev, step)
1849 step = None
1850 continue
1851 if i - prev in [-1, 1]:
1852 step = i - prev
1853 start = prev
1854 continue
1855 else:
78caa52a 1856 yield 's[%d]' % prev
edf3e38e 1857 if step is None:
78caa52a 1858 yield 's[%d]' % i
edf3e38e
PH
1859 else:
1860 yield _genslice(start, i, step)
1861
78caa52a 1862 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1863 cache_res = func(test_string)
edf3e38e 1864 cache_spec = [ord(c) for c in cache_res]
78caa52a 1865 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1866 signature_id_tuple = '(%s)' % (
1867 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1868 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1869 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1870 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1871
e0df6211
PH
1872 def _parse_sig_js(self, jscode):
1873 funcname = self._search_regex(
abefc03f
S
1874 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1875 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1876 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1877 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1878 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1879 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1880 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1881 # Obsolete patterns
1882 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1883 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1884 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1885 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1886 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1887 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1888 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1889 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1890 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1891
1892 jsi = JSInterpreter(jscode)
1893 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1894 return lambda s: initial_function([s])
1895
545cc85d 1896 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1897 """Turn the encrypted s field into a working signature"""
6b37f0be 1898
c8bf86d5 1899 if player_url is None:
69ea8ca4 1900 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1901
c8bf86d5 1902 try:
62af3a0e 1903 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1904 if player_id not in self._player_cache:
1905 func = self._extract_signature_function(
60064c53 1906 video_id, player_url, s
c8bf86d5
PH
1907 )
1908 self._player_cache[player_id] = func
1909 func = self._player_cache[player_id]
a06916d9 1910 if self.get_param('youtube_print_sig_code'):
60064c53 1911 self._print_sig_code(func, s)
c8bf86d5
PH
1912 return func(s)
1913 except Exception as e:
1914 tb = traceback.format_exc()
1915 raise ExtractorError(
78caa52a 1916 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1917
109dd3b2 1918 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1919 """
1920 Extract signatureTimestamp (sts)
1921 Required to tell API what sig/player version is in use.
1922 """
1923 sts = None
1924 if isinstance(ytcfg, dict):
1925 sts = int_or_none(ytcfg.get('STS'))
1926
1927 if not sts:
1928 # Attempt to extract from player
1929 if player_url is None:
1930 error_msg = 'Cannot extract signature timestamp without player_url.'
1931 if fatal:
1932 raise ExtractorError(error_msg)
1933 self.report_warning(error_msg)
1934 return
1935 if self._load_player(video_id, player_url, fatal=fatal):
1936 player_id = self._extract_player_info(player_url)
1937 code = self._code_cache[player_id]
1938 sts = int_or_none(self._search_regex(
1939 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
1940 'JS player signature timestamp', group='sts', fatal=fatal))
1941 return sts
1942
11f9be09 1943 def _mark_watched(self, video_id, player_responses):
352d63fd 1944 playback_url = traverse_obj(
1945 player_responses, (..., 'playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
1946 expected_type=url_or_none, get_all=False)
d77ab8e2 1947 if not playback_url:
352d63fd 1948 self.report_warning('Unable to mark watched')
d77ab8e2
S
1949 return
1950 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1951 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1952
1953 # cpn generation algorithm is reverse engineered from base.js.
1954 # In fact it works even with dummy cpn.
1955 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1956 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1957
1958 qs.update({
1959 'ver': ['2'],
1960 'cpn': [cpn],
1961 })
1962 playback_url = compat_urlparse.urlunparse(
15707c7e 1963 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1964
1965 self._download_webpage(
1966 playback_url, video_id, 'Marking watched',
1967 'Unable to mark watched', fatal=False)
1968
66c9fa36
S
1969 @staticmethod
1970 def _extract_urls(webpage):
1971 # Embedded YouTube player
1972 entries = [
1973 unescapeHTML(mobj.group('url'))
1974 for mobj in re.finditer(r'''(?x)
1975 (?:
1976 <iframe[^>]+?src=|
1977 data-video-url=|
1978 <embed[^>]+?src=|
1979 embedSWF\(?:\s*|
1980 <object[^>]+data=|
1981 new\s+SWFObject\(
1982 )
1983 (["\'])
1984 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1985 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1986 \1''', webpage)]
1987
1988 # lazyYT YouTube embed
1989 entries.extend(list(map(
1990 unescapeHTML,
1991 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1992
1993 # Wordpress "YouTube Video Importer" plugin
1994 matches = re.findall(r'''(?x)<div[^>]+
1995 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1996 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1997 entries.extend(m[-1] for m in matches)
1998
1999 return entries
2000
2001 @staticmethod
2002 def _extract_url(webpage):
2003 urls = YoutubeIE._extract_urls(webpage)
2004 return urls[0] if urls else None
2005
97665381
PH
2006 @classmethod
2007 def extract_id(cls, url):
2008 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 2009 if mobj is None:
69ea8ca4 2010 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
2011 video_id = mobj.group(2)
2012 return video_id
2013
7c365c21 2014 def _extract_chapters_from_json(self, data, duration):
2015 chapter_list = traverse_obj(
2016 data, (
2017 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2018 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2019 ), expected_type=list)
2020
2021 return self._extract_chapters(
2022 chapter_list,
2023 chapter_time=lambda chapter: float_or_none(
2024 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2025 chapter_title=lambda chapter: traverse_obj(
2026 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2027 duration=duration)
2028
2029 def _extract_chapters_from_engagement_panel(self, data, duration):
2030 content_list = traverse_obj(
8bdd16b4 2031 data,
7c365c21 2032 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
da503b7a 2033 expected_type=list, default=[])
052e1350 2034 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2035 chapter_title = lambda chapter: self._get_text(chapter, 'title')
7c365c21 2036
2037 return next((
2038 filter(None, (
2039 self._extract_chapters(
2040 traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2041 chapter_time, chapter_title, duration)
2042 for contents in content_list
2043 ))), [])
2044
2045 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
84213ea8 2046 chapters = []
7c365c21 2047 last_chapter = {'start_time': 0}
2048 for idx, chapter in enumerate(chapter_list or []):
2049 title = chapter_title(chapter)
84213ea8
S
2050 start_time = chapter_time(chapter)
2051 if start_time is None:
2052 continue
7c365c21 2053 last_chapter['end_time'] = start_time
2054 if start_time < last_chapter['start_time']:
2055 if idx == 1:
2056 chapters.pop()
2057 self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2058 else:
2059 self.report_warning(f'Invalid start time for chapter "{title}"')
2060 continue
2061 last_chapter = {'start_time': start_time, 'title': title}
2062 chapters.append(last_chapter)
2063 last_chapter['end_time'] = duration
84213ea8
S
2064 return chapters
2065
545cc85d 2066 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2067 return self._parse_json(self._search_regex(
2068 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2069 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 2070
d92f5d5a 2071 @staticmethod
2072 def parse_time_text(time_text):
2073 """
2074 Parse the comment time text
2075 time_text is in the format 'X units ago (edited)'
2076 """
2077 time_text_split = time_text.split(' ')
2078 if len(time_text_split) >= 3:
da503b7a 2079 try:
2080 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
2081 except ValueError:
2082 return None
d92f5d5a 2083
a1c5d2ca
M
2084 def _extract_comment(self, comment_renderer, parent=None):
2085 comment_id = comment_renderer.get('commentId')
2086 if not comment_id:
2087 return
fe93e2c4 2088
052e1350 2089 text = self._get_text(comment_renderer, 'contentText')
fe93e2c4 2090
49bd8c66 2091 # note: timestamp is an estimate calculated from the current time and time_text
052e1350 2092 time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
fe93e2c4 2093 time_text_dt = self.parse_time_text(time_text)
2094 if isinstance(time_text_dt, datetime.datetime):
2095 timestamp = calendar.timegm(time_text_dt.timetuple())
052e1350 2096 author = self._get_text(comment_renderer, 'authorText')
a1c5d2ca
M
2097 author_id = try_get(comment_renderer,
2098 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
fe93e2c4 2099
49bd8c66 2100 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2101 lambda x: x['likeCount']), compat_str)) or 0
a1c5d2ca
M
2102 author_thumbnail = try_get(comment_renderer,
2103 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2104
2105 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
97524332 2106 is_favorited = 'creatorHeart' in (try_get(
2107 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
a1c5d2ca
M
2108 return {
2109 'id': comment_id,
2110 'text': text,
d92f5d5a 2111 'timestamp': timestamp,
a1c5d2ca
M
2112 'time_text': time_text,
2113 'like_count': votes,
97524332 2114 'is_favorited': is_favorited,
a1c5d2ca
M
2115 'author': author,
2116 'author_id': author_id,
2117 'author_thumbnail': author_thumbnail,
2118 'author_is_uploader': author_is_uploader,
2119 'parent': parent or 'root'
2120 }
2121
2122 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
2d6659b9 2123 ytcfg, video_id, parent=None, comment_counts=None):
2124
2125 def extract_header(contents):
2126 _total_comments = 0
2127 _continuation = None
2128 for content in contents:
2129 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
fe93e2c4 2130 expected_comment_count = parse_count(self._get_text(
052e1350 2131 comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
fe93e2c4 2132
2d6659b9 2133 if expected_comment_count:
fe93e2c4 2134 comment_counts[1] = expected_comment_count
2135 self.to_screen('Downloading ~%d comments' % expected_comment_count)
2d6659b9 2136 _total_comments = comment_counts[1]
2137 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2138 comment_sort_index = int(sort_mode_str != 'top') # 1 = new, 0 = top
2139
2140 sort_menu_item = try_get(
2141 comments_header_renderer,
2142 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2143 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2144
2145 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2146 if not _continuation:
2147 continue
2148
2149 sort_text = sort_menu_item.get('title')
2150 if isinstance(sort_text, compat_str):
2151 sort_text = sort_text.lower()
2152 else:
2153 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2154 self.to_screen('Sorting comments by %s' % sort_text)
2155 break
2156 return _total_comments, _continuation
a1c5d2ca 2157
2d6659b9 2158 def extract_thread(contents):
a1c5d2ca
M
2159 if not parent:
2160 comment_counts[2] = 0
2161 for content in contents:
2162 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2163 comment_renderer = try_get(
2164 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2165 content, (lambda x: x['commentRenderer'], dict))
2166
2167 if not comment_renderer:
2168 continue
2169 comment = self._extract_comment(comment_renderer, parent)
2170 if not comment:
2171 continue
2172 comment_counts[0] += 1
2173 yield comment
2174 # Attempt to get the replies
2175 comment_replies_renderer = try_get(
2176 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2177
2178 if comment_replies_renderer:
2179 comment_counts[2] += 1
2180 comment_entries_iter = self._comment_entries(
f4f751af 2181 comment_replies_renderer, identity_token, account_syncid, ytcfg,
2d6659b9 2182 video_id, parent=comment.get('id'), comment_counts=comment_counts)
a1c5d2ca
M
2183
2184 for reply_comment in comment_entries_iter:
2185 yield reply_comment
2186
2d6659b9 2187 # YouTube comments have a max depth of 2
2188 max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2189 if max_depth == 1 and parent:
2190 return
a1c5d2ca
M
2191 if not comment_counts:
2192 # comment so far, est. total comments, current comment thread #
2193 comment_counts = [0, 0, 0]
a1c5d2ca 2194
2d6659b9 2195 continuation = self._extract_continuation(root_continuation_data)
fe93e2c4 2196 if continuation and len(continuation['continuation']) < 27:
2d6659b9 2197 self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2198 continuation_token = self._generate_comment_continuation(video_id)
fe93e2c4 2199 continuation = self._build_api_continuation_query(continuation_token, None)
2d6659b9 2200
2201 visitor_data = None
2202 is_first_continuation = parent is None
a1c5d2ca
M
2203
2204 for page_num in itertools.count(0):
2205 if not continuation:
2206 break
11f9be09 2207 headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
2d6659b9 2208 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2209 if page_num == 0:
2210 if is_first_continuation:
2211 note_prefix = 'Downloading comment section API JSON'
a1c5d2ca 2212 else:
2d6659b9 2213 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
2214 comment_counts[2], comment_prog_str)
2215 else:
2216 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2217 ' ' if parent else '', ' replies' if parent else '',
2218 page_num, comment_prog_str)
2219
2220 response = self._extract_response(
fe93e2c4 2221 item_id=None, query=continuation,
2d6659b9 2222 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2223 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca
M
2224 if not response:
2225 break
f4f751af 2226 visitor_data = try_get(
2227 response,
2228 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2229 compat_str) or visitor_data
a1c5d2ca 2230
2d6659b9 2231 continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca 2232
2d6659b9 2233 continuation = None
2234 if isinstance(continuation_contents, list):
2235 for continuation_section in continuation_contents:
2236 if not isinstance(continuation_section, dict):
2237 continue
2238 continuation_items = try_get(
2239 continuation_section,
2240 (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2241 lambda x: x['appendContinuationItemsAction']['continuationItems']),
2242 list) or []
2243 if is_first_continuation:
2244 total_comments, continuation = extract_header(continuation_items)
2245 if total_comments:
2246 yield total_comments
2247 is_first_continuation = False
2248 if continuation:
2249 break
2250 continue
2251 count = 0
2252 for count, entry in enumerate(extract_thread(continuation_items)):
2253 yield entry
2254 continuation = self._extract_continuation({'contents': continuation_items})
2255 if continuation:
2256 # Sometimes YouTube provides a continuation without any comments
2257 # In most cases we end up just downloading these with very little comments to come.
2258 if count == 0:
2259 if not parent:
2260 self.report_warning('No comments received - assuming end of comments')
2261 continuation = None
a1c5d2ca
M
2262 break
2263
2d6659b9 2264 # Deprecated response structure
2265 elif isinstance(continuation_contents, dict):
2266 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2267 for key, continuation_renderer in continuation_contents.items():
2268 if key not in known_continuation_renderers:
2269 continue
2270 if not isinstance(continuation_renderer, dict):
2271 continue
2272 if is_first_continuation:
2273 header_continuation_items = [continuation_renderer.get('header') or {}]
2274 total_comments, continuation = extract_header(header_continuation_items)
2275 if total_comments:
2276 yield total_comments
2277 is_first_continuation = False
2278 if continuation:
2279 break
a1c5d2ca 2280
2d6659b9 2281 # Sometimes YouTube provides a continuation without any comments
2282 # In most cases we end up just downloading these with very little comments to come.
2283 count = 0
2284 for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2285 yield entry
2286 continuation = self._extract_continuation(continuation_renderer)
2287 if count == 0:
2288 if not parent:
2289 self.report_warning('No comments received - assuming end of comments')
2290 continuation = None
2291 break
a1c5d2ca 2292
2d6659b9 2293 @staticmethod
2294 def _generate_comment_continuation(video_id):
2295 """
2296 Generates initial comment section continuation token from given video id
2297 """
2298 b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2299 parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2300 new_continuation_intlist = list(itertools.chain.from_iterable(
2301 [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2302 return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2303
2304 def _extract_comments(self, ytcfg, video_id, contents, webpage):
a1c5d2ca 2305 """Entry for comment extraction"""
2d6659b9 2306 def _real_comment_extract(contents):
2307 if isinstance(contents, list):
2308 for entry in contents:
2309 for key, renderer in entry.items():
2310 if key not in known_entry_comment_renderers:
2311 continue
2312 yield from self._comment_entries(
2313 renderer, video_id=video_id, ytcfg=ytcfg,
2314 identity_token=self._extract_identity_token(webpage, item_id=video_id),
2315 account_syncid=self._extract_account_syncid(ytcfg))
2316 break
a1c5d2ca 2317 comments = []
2d6659b9 2318 known_entry_comment_renderers = ('itemSectionRenderer',)
a1c5d2ca 2319 estimated_total = 0
2d6659b9 2320 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
a1c5d2ca 2321
2d6659b9 2322 try:
2323 for comment in _real_comment_extract(contents):
2324 if len(comments) >= max_comments:
2325 break
2326 if isinstance(comment, int):
2327 estimated_total = comment
2328 continue
2329 comments.append(comment)
2330 except KeyboardInterrupt:
2331 self.to_screen('Interrupted by user')
d92f5d5a 2332 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
a1c5d2ca
M
2333 return {
2334 'comments': comments,
2335 'comment_count': len(comments),
2336 }
2337
109dd3b2 2338 @staticmethod
2339 def _generate_player_context(sts=None):
2340 context = {
2341 'html5Preference': 'HTML5_PREF_WANTS',
2342 }
2343 if sts is not None:
2344 context['signatureTimestamp'] = sts
2345 return {
2346 'playbackContext': {
2347 'contentPlaybackContext': context
a1a7907b 2348 },
2fd226f6 2349 'contentCheckOk': True,
2350 'racyCheckOk': True
109dd3b2 2351 }
2352
4e6767b5 2353 @staticmethod
c888ffb9 2354 def _get_video_info_params(video_id, client='TVHTML5'):
2355 GVI_CLIENTS = {
2356 'ANDROID': {
2357 'c': 'ANDROID',
2358 'cver': '16.20',
2359 },
2360 'TVHTML5': {
2361 'c': 'TVHTML5',
2362 'cver': '6.20180913',
11f9be09 2363 },
2364 'IOS': {
2365 'c': 'IOS',
2366 'cver': '16.20'
c888ffb9 2367 }
2368 }
2369 query = {
4e6767b5 2370 'video_id': video_id,
2371 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
c888ffb9 2372 'html5': '1'
4e6767b5 2373 }
c888ffb9 2374 query.update(GVI_CLIENTS.get(client))
2375 return query
4e6767b5 2376
11f9be09 2377 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr):
109dd3b2 2378
11f9be09 2379 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2380 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
2381 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False)
2382 headers = self.generate_api_headers(
2383 player_ytcfg, identity_token, syncid,
2384 default_client=self._YT_CLIENTS[client], session_index=session_index)
9297939e 2385
11f9be09 2386 yt_query = {'videoId': video_id}
2387 yt_query.update(self._generate_player_context(sts))
2388 return self._extract_response(
2389 item_id=video_id, ep='player', query=yt_query,
2390 ytcfg=player_ytcfg, headers=headers, fatal=False,
2391 default_client=self._YT_CLIENTS[client],
2392 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2393 ) or None
2394
2395 def _extract_age_gated_player_response(self, client, video_id, ytcfg, identity_token, player_url, initial_pr):
c8fa48fd 2396 # get_video_info endpoint seems to be completely dead
2397 gvi_client = None # self._YT_CLIENTS.get(f'_{client}_agegate')
2398 if gvi_client:
2399 pr = self._parse_json(traverse_obj(
2400 compat_parse_qs(self._download_webpage(
2401 self.http_scheme() + '//www.youtube.com/get_video_info', video_id,
2402 'Refetching age-gated %s info webpage' % gvi_client.lower(),
2403 'unable to download video info webpage', fatal=False,
2404 query=self._get_video_info_params(video_id, client=gvi_client))),
2405 ('player_response', 0), expected_type=str) or '{}', video_id)
2406 if pr:
2407 return pr
2408 self.report_warning('Falling back to embedded-only age-gate workaround')
2409
2410 if not self._YT_CLIENTS.get(f'_{client}_embedded'):
11f9be09 2411 return
11f9be09 2412 embed_webpage = None
2413 if client == 'web' and 'configs' not in self._configuration_arg('player_skip'):
2414 embed_webpage = self._download_webpage(
2415 'https://www.youtube.com/embed/%s?html5=1' % video_id,
2416 video_id=video_id, note=f'Downloading age-gated {client} embed config')
2417
2418 ytcfg_age = self.extract_ytcfg(video_id, embed_webpage) or {}
2419 # If we extracted the embed webpage, it'll tell us if we can view the video
2420 embedded_pr = self._parse_json(
2421 traverse_obj(ytcfg_age, ('PLAYER_VARS', 'embedded_player_response'), expected_type=str) or '{}',
2422 video_id=video_id)
2423 embedded_ps_reason = traverse_obj(embedded_pr, ('playabilityStatus', 'reason'), expected_type=str) or ''
2424 if embedded_ps_reason in self._AGE_GATE_REASONS:
2425 return
2426 return self._extract_player_response(
2427 f'_{client}_embedded', video_id,
2428 ytcfg_age or ytcfg, ytcfg_age if client == 'web' else {},
2429 identity_token, player_url, initial_pr)
545cc85d 2430
11f9be09 2431 def _get_requested_clients(self, url, smuggled_data):
2432 requested_clients = [client for client in self._configuration_arg('player_client')
2433 if client[:0] != '_' and client in self._YT_CLIENTS]
2434 if not requested_clients:
2435 requested_clients = ['android', 'web']
cf7e015f 2436
11f9be09 2437 if smuggled_data.get('is_music_url') or self.is_music_url(url):
2438 requested_clients.extend(
2439 f'{client}_music' for client in requested_clients if not client.endswith('_music'))
dbdaaa23 2440
11f9be09 2441 return orderedSet(requested_clients)
cf7e015f 2442
11f9be09 2443 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, player_url, identity_token):
2444 initial_pr = None
2445 if webpage:
2446 initial_pr = self._extract_yt_initial_variable(
2447 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2448 video_id, 'initial player response')
6b09401b 2449
11f9be09 2450 for client in clients:
2451 player_ytcfg = master_ytcfg if client == 'web' else {}
ad34b295 2452 if client == 'web' and initial_pr:
11f9be09 2453 pr = initial_pr
8fe10494 2454 else:
11f9be09 2455 if client == 'web_music' and 'configs' not in self._configuration_arg('player_skip'):
2456 ytm_webpage = self._download_webpage(
2457 'https://music.youtube.com',
2458 video_id, fatal=False, note='Downloading remix client config')
2459 player_ytcfg = self.extract_ytcfg(video_id, ytm_webpage) or {}
2460 pr = self._extract_player_response(
2461 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr)
2462 if pr:
2463 yield pr
ad34b295 2464 if traverse_obj(pr, ('playabilityStatus', 'reason')) in self._AGE_GATE_REASONS:
11f9be09 2465 pr = self._extract_age_gated_player_response(
2466 client, video_id, player_ytcfg or master_ytcfg, identity_token, player_url, initial_pr)
2467 if pr:
2468 yield pr
2469 # Android player_response does not have microFormats which are needed for
2470 # extraction of some data. So we return the initial_pr with formats
2471 # stripped out even if not requested by the user
2472 # See: https://github.com/yt-dlp/yt-dlp/issues/501
2473 if initial_pr and 'web' not in clients:
2474 initial_pr['streamingData'] = None
2475 yield initial_pr
2476
2477 def _extract_formats(self, streaming_data, video_id, player_url, is_live):
2478 itags, stream_ids = [], []
cc2db878 2479 itag_qualities = {}
d3fc8074 2480 q = qualities([
60bdb7bd 2481 # "tiny" is the smallest video-only format. But some audio-only formats
2482 # was also labeled "tiny". It is not clear if such formats still exist
d3fc8074 2483 'tiny', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
2484 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2485 ])
11f9be09 2486 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
9297939e 2487
545cc85d 2488 for fmt in streaming_formats:
2489 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2490 continue
321bf820 2491
cc2db878 2492 itag = str_or_none(fmt.get('itag'))
9297939e 2493 audio_track = fmt.get('audioTrack') or {}
2494 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2495 if stream_id in stream_ids:
2496 continue
2497
cc2db878 2498 quality = fmt.get('quality')
d3fc8074 2499 if quality == 'tiny' or not quality:
2500 quality = fmt.get('audioQuality', '').lower() or quality
cc2db878 2501 if itag and quality:
2502 itag_qualities[itag] = quality
2503 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2504 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2505 # number of fragment that would subsequently requested with (`&sq=N`)
2506 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2507 continue
2508
545cc85d 2509 fmt_url = fmt.get('url')
2510 if not fmt_url:
2511 sc = compat_parse_qs(fmt.get('signatureCipher'))
2512 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2513 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2514 if not (sc and fmt_url and encrypted_sig):
2515 continue
545cc85d 2516 if not player_url:
201e9eaa 2517 continue
545cc85d 2518 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2519 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2520 fmt_url += '&' + sp + '=' + signature
2521
545cc85d 2522 if itag:
2523 itags.append(itag)
9297939e 2524 stream_ids.append(stream_id)
2525
cc2db878 2526 tbr = float_or_none(
2527 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 2528 dct = {
2529 'asr': int_or_none(fmt.get('audioSampleRate')),
2530 'filesize': int_or_none(fmt.get('contentLength')),
2531 'format_id': itag,
11f9be09 2532 'format_note': ', '.join(filter(None, (
2533 audio_track.get('displayName'), fmt.get('qualityLabel') or quality))),
545cc85d 2534 'fps': int_or_none(fmt.get('fps')),
2535 'height': int_or_none(fmt.get('height')),
dca3ff4a 2536 'quality': q(quality),
cc2db878 2537 'tbr': tbr,
545cc85d 2538 'url': fmt_url,
2539 'width': fmt.get('width'),
0fb983f6 2540 'language': audio_track.get('id', '').split('.')[0],
545cc85d 2541 }
60bdb7bd 2542 mime_mobj = re.match(
2543 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2544 if mime_mobj:
2545 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2546 dct.update(parse_codecs(mime_mobj.group(2)))
2547 # The 3gp format in android client has a quality of "small",
2548 # but is actually worse than all other formats
2549 if dct['ext'] == '3gp':
2550 dct['quality'] = q('tiny')
11f9be09 2551 dct['preference'] = -10
cc2db878 2552 no_audio = dct.get('acodec') == 'none'
2553 no_video = dct.get('vcodec') == 'none'
2554 if no_audio:
2555 dct['vbr'] = tbr
2556 if no_video:
2557 dct['abr'] = tbr
2558 if no_audio or no_video:
545cc85d 2559 dct['downloader_options'] = {
2560 # Youtube throttles chunks >~10M
2561 'http_chunk_size': 10485760,
bf1317d2 2562 }
7c60c33e 2563 if dct.get('ext'):
2564 dct['container'] = dct['ext'] + '_dash'
11f9be09 2565 yield dct
545cc85d 2566
4bb6b02f 2567 skip_manifests = self._configuration_arg('skip')
11f9be09 2568 get_dash = not is_live and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True)
5d3a0e79 2569 get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2570
11f9be09 2571 for sd in streaming_data:
5d3a0e79 2572 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
9297939e 2573 if hls_manifest_url:
2574 for f in self._extract_m3u8_formats(
2575 hls_manifest_url, video_id, 'mp4', fatal=False):
2576 itag = self._search_regex(
2577 r'/itag/(\d+)', f['url'], 'itag', default=None)
11f9be09 2578 if itag in itags:
2579 continue
9297939e 2580 if itag:
2581 f['format_id'] = itag
11f9be09 2582 itags.append(itag)
2583 yield f
545cc85d 2584
5d3a0e79 2585 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2586 if dash_manifest_url:
2587 for f in self._extract_mpd_formats(
2588 dash_manifest_url, video_id, fatal=False):
2589 itag = f['format_id']
2590 if itag in itags:
2591 continue
11f9be09 2592 if itag:
2593 itags.append(itag)
5d3a0e79 2594 if itag in itag_qualities:
2595 f['quality'] = q(itag_qualities[itag])
2596 filesize = int_or_none(self._search_regex(
2597 r'/clen/(\d+)', f.get('fragment_base_url')
2598 or f['url'], 'file size', default=None))
2599 if filesize:
2600 f['filesize'] = filesize
11f9be09 2601 yield f
2602
2603 def _real_extract(self, url):
2604 url, smuggled_data = unsmuggle_url(url, {})
2605 video_id = self._match_id(url)
2606
2607 base_url = self.http_scheme() + '//www.youtube.com/'
2608 webpage_url = base_url + 'watch?v=' + video_id
2609 webpage = self._download_webpage(
2610 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2611
2612 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2613 player_url = self._extract_player_url(master_ytcfg, webpage)
2614 identity_token = self._extract_identity_token(webpage, video_id)
2615
2616 player_responses = list(self._extract_player_responses(
2617 self._get_requested_clients(url, smuggled_data),
2618 video_id, webpage, master_ytcfg, player_url, identity_token))
2619
352d63fd 2620 get_first = lambda obj, keys, **kwargs: traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
11f9be09 2621
2622 playability_statuses = traverse_obj(
2623 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2624
2625 trailer_video_id = get_first(
2626 playability_statuses,
2627 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2628 expected_type=str)
2629 if trailer_video_id:
2630 return self.url_result(
2631 trailer_video_id, self.ie_key(), trailer_video_id)
2632
2633 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2634 if webpage else (lambda x: None))
2635
2636 video_details = traverse_obj(
2637 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2638 microformats = traverse_obj(
2639 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2640 expected_type=dict, default=[])
2641 video_title = (
2642 get_first(video_details, 'title')
2643 or self._get_text(microformats, (..., 'title'))
2644 or search_meta(['og:title', 'twitter:title', 'title']))
2645 video_description = get_first(video_details, 'shortDescription')
2646
2647 if not smuggled_data.get('force_singlefeed', False):
2648 if not self.get_param('noplaylist'):
2649 multifeed_metadata_list = get_first(
2650 player_responses,
2651 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2652 expected_type=str)
2653 if multifeed_metadata_list:
2654 entries = []
2655 feed_ids = []
2656 for feed in multifeed_metadata_list.split(','):
2657 # Unquote should take place before split on comma (,) since textual
2658 # fields may contain comma as well (see
2659 # https://github.com/ytdl-org/youtube-dl/issues/8536)
2660 feed_data = compat_parse_qs(
2661 compat_urllib_parse_unquote_plus(feed))
2662
2663 def feed_entry(name):
2664 return try_get(
2665 feed_data, lambda x: x[name][0], compat_str)
2666
2667 feed_id = feed_entry('id')
2668 if not feed_id:
2669 continue
2670 feed_title = feed_entry('title')
2671 title = video_title
2672 if feed_title:
2673 title += ' (%s)' % feed_title
2674 entries.append({
2675 '_type': 'url_transparent',
2676 'ie_key': 'Youtube',
2677 'url': smuggle_url(
2678 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2679 {'force_singlefeed': True}),
2680 'title': title,
2681 })
2682 feed_ids.append(feed_id)
2683 self.to_screen(
2684 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2685 % (', '.join(feed_ids), video_id))
2686 return self.playlist_result(
2687 entries, video_id, video_title, video_description)
2688 else:
2689 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2690
7ea65411 2691 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
11f9be09 2692 is_live = get_first(video_details, 'isLive')
7ea65411 2693 if is_live is None:
2694 is_live = get_first(live_broadcast_details, 'isLiveNow')
11f9be09 2695
2696 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2697 formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
bf1317d2 2698
545cc85d 2699 if not formats:
11f9be09 2700 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
b7da73eb 2701 self.raise_no_formats(
545cc85d 2702 'This video is DRM protected.', expected=True)
11f9be09 2703 pemr = get_first(
2704 playability_statuses,
2705 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2706 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2707 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
545cc85d 2708 if subreason:
545cc85d 2709 if subreason == 'The uploader has not made this video available in your country.':
11f9be09 2710 countries = get_first(microformats, 'availableCountries')
545cc85d 2711 if not countries:
2712 regions_allowed = search_meta('regionsAllowed')
2713 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2714 self.raise_geo_restricted(subreason, countries, metadata_available=True)
11f9be09 2715 reason += f'. {subreason}'
545cc85d 2716 if reason:
b7da73eb 2717 self.raise_no_formats(reason, expected=True)
bf1317d2 2718
11f9be09 2719 for f in formats:
2720 # TODO: detect if throttled
2721 if '&n=' in f['url']: # possibly throttled
2722 f['source_preference'] = -10
2723 # note = f.get('format_note')
2724 # f['format_note'] = f'{note} (throttled)' if note else '(throttled)'
2725
545cc85d 2726 self._sort_formats(formats)
bf1317d2 2727
11f9be09 2728 keywords = get_first(video_details, 'keywords', expected_type=list) or []
545cc85d 2729 if not keywords and webpage:
2730 keywords = [
2731 unescapeHTML(m.group('content'))
2732 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2733 for keyword in keywords:
2734 if keyword.startswith('yt:stretch='):
201c1459 2735 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2736 if mobj:
2737 # NB: float is intentional for forcing float division
2738 w, h = (float(v) for v in mobj.groups())
2739 if w > 0 and h > 0:
2740 ratio = w / h
2741 for f in formats:
2742 if f.get('vcodec') != 'none':
2743 f['stretched_ratio'] = ratio
2744 break
6449cd80 2745
545cc85d 2746 thumbnails = []
11f9be09 2747 thumbnail_dicts = traverse_obj(
2748 (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2749 expected_type=dict, default=[])
2750 for thumbnail in thumbnail_dicts:
2751 thumbnail_url = thumbnail.get('url')
2752 if not thumbnail_url:
2753 continue
2754 # Sometimes youtube gives a wrong thumbnail URL. See:
2755 # https://github.com/yt-dlp/yt-dlp/issues/233
2756 # https://github.com/ytdl-org/youtube-dl/issues/28023
2757 if 'maxresdefault' in thumbnail_url:
2758 thumbnail_url = thumbnail_url.split('?')[0]
2759 thumbnails.append({
2760 'url': thumbnail_url,
2761 'height': int_or_none(thumbnail.get('height')),
2762 'width': int_or_none(thumbnail.get('width')),
2763 })
ff2751ac 2764 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2765 if thumbnail_url:
2766 thumbnails.append({
2767 'url': thumbnail_url,
ff2751ac 2768 })
0ba692ac 2769 # The best resolution thumbnails sometimes does not appear in the webpage
2770 # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
cca80fe6 2771 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2772 hq_thumbnail_names = ['maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3']
245524e6 2773 # TODO: Test them also? - For some videos, even these don't exist
cca80fe6 2774 guaranteed_thumbnail_names = [
2775 'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2776 'mqdefault', 'mq1', 'mq2', 'mq3',
2777 'default', '1', '2', '3'
2778 ]
2779 thumbnail_names = hq_thumbnail_names + guaranteed_thumbnail_names
2780 n_thumbnail_names = len(thumbnail_names)
2781
0ba692ac 2782 thumbnails.extend({
2783 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2784 video_id=video_id, name=name, ext=ext,
2785 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
cca80fe6 2786 '_test_url': name in hq_thumbnail_names,
2787 } for name in thumbnail_names for ext in ('webp', 'jpg'))
0ba692ac 2788 for thumb in thumbnails:
cca80fe6 2789 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
0ba692ac 2790 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
ff2751ac 2791 self._remove_duplicate_formats(thumbnails)
545cc85d 2792
7ea65411 2793 category = get_first(microformats, 'category') or search_meta('genre')
2794 channel_id = str_or_none(
2795 get_first(video_details, 'channelId')
2796 or get_first(microformats, 'externalChannelId')
2797 or search_meta('channelId'))
2798 duration = int_or_none(
2799 get_first(video_details, 'lengthSeconds')
2800 or get_first(microformats, 'lengthSeconds')
2801 or parse_duration(search_meta('duration'))) or None
2802 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2803
2804 live_content = get_first(video_details, 'isLiveContent')
2805 is_upcoming = get_first(video_details, 'isUpcoming')
2806 if is_live is None:
2807 if is_upcoming or live_content is False:
2808 is_live = False
2809 if is_upcoming is None and (live_content or is_live):
2810 is_upcoming = False
2811 live_starttime = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2812 live_endtime = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2813 if not duration and live_endtime and live_starttime:
2814 duration = live_endtime - live_starttime
2815
545cc85d 2816 info = {
2817 'id': video_id,
2818 'title': self._live_title(video_title) if is_live else video_title,
2819 'formats': formats,
2820 'thumbnails': thumbnails,
2821 'description': video_description,
2822 'upload_date': unified_strdate(
11f9be09 2823 get_first(microformats, 'uploadDate')
545cc85d 2824 or search_meta('uploadDate')),
11f9be09 2825 'uploader': get_first(video_details, 'author'),
545cc85d 2826 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2827 'uploader_url': owner_profile_url,
2828 'channel_id': channel_id,
11f9be09 2829 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
545cc85d 2830 'duration': duration,
2831 'view_count': int_or_none(
11f9be09 2832 get_first((video_details, microformats), (..., 'viewCount'))
545cc85d 2833 or search_meta('interactionCount')),
11f9be09 2834 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
545cc85d 2835 'age_limit': 18 if (
11f9be09 2836 get_first(microformats, 'isFamilySafe') is False
545cc85d 2837 or search_meta('isFamilyFriendly') == 'false'
2838 or search_meta('og:restrictions:age') == '18+') else 0,
2839 'webpage_url': webpage_url,
2840 'categories': [category] if category else None,
2841 'tags': keywords,
11f9be09 2842 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
7ea65411 2843 'is_live': is_live,
2844 'was_live': (False if is_live or is_upcoming or live_content is False
2845 else None if is_live is None or is_upcoming is None
2846 else live_content),
2847 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
2848 'release_timestamp': live_starttime,
545cc85d 2849 }
b477fc13 2850
3944e7af 2851 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
2852 # Converted into dicts to remove duplicates
2853 captions = {
2854 sub.get('baseUrl'): sub
2855 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
2856 translation_languages = {
2857 lang.get('languageCode'): lang.get('languageName')
2858 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
545cc85d 2859 subtitles = {}
2860 if pctr:
774d79cc 2861 def process_language(container, base_url, lang_code, sub_name, query):
120916da 2862 lang_subs = container.setdefault(lang_code, [])
545cc85d 2863 for fmt in self._SUBTITLE_FORMATS:
2864 query.update({
2865 'fmt': fmt,
2866 })
2867 lang_subs.append({
2868 'ext': fmt,
2869 'url': update_url_query(base_url, query),
774d79cc 2870 'name': sub_name,
545cc85d 2871 })
7e72694b 2872
3944e7af 2873 for base_url, caption_track in captions.items():
545cc85d 2874 if not base_url:
2875 continue
2876 if caption_track.get('kind') != 'asr':
120916da 2877 lang_code = (
2878 remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2879 or caption_track.get('languageCode'))
545cc85d 2880 if not lang_code:
2881 continue
2882 process_language(
774d79cc 2883 subtitles, base_url, lang_code,
3944e7af 2884 traverse_obj(caption_track, ('name', 'simpleText')),
774d79cc 2885 {})
545cc85d 2886 continue
2887 automatic_captions = {}
3944e7af 2888 for trans_code, trans_name in translation_languages.items():
2889 if not trans_code:
545cc85d 2890 continue
2891 process_language(
3944e7af 2892 automatic_captions, base_url, trans_code,
2893 self._get_text(trans_name, max_runs=1),
2894 {'tlang': trans_code})
545cc85d 2895 info['automatic_captions'] = automatic_captions
2896 info['subtitles'] = subtitles
7e72694b 2897
545cc85d 2898 parsed_url = compat_urllib_parse_urlparse(url)
2899 for component in [parsed_url.fragment, parsed_url.query]:
2900 query = compat_parse_qs(component)
2901 for k, v in query.items():
2902 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2903 d_k += '_time'
2904 if d_k not in info and k in s_ks:
2905 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2906
2907 # Youtube Music Auto-generated description
822b9d9c 2908 if video_description:
38d70284 2909 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2910 if mobj:
822b9d9c
RA
2911 release_year = mobj.group('release_year')
2912 release_date = mobj.group('release_date')
2913 if release_date:
2914 release_date = release_date.replace('-', '')
2915 if not release_year:
545cc85d 2916 release_year = release_date[:4]
2917 info.update({
2918 'album': mobj.group('album'.strip()),
2919 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2920 'track': mobj.group('track').strip(),
2921 'release_date': release_date,
cc2db878 2922 'release_year': int_or_none(release_year),
545cc85d 2923 })
7e72694b 2924
545cc85d 2925 initial_data = None
2926 if webpage:
2927 initial_data = self._extract_yt_initial_variable(
2928 webpage, self._YT_INITIAL_DATA_RE, video_id,
2929 'yt initial data')
2930 if not initial_data:
11f9be09 2931 headers = self.generate_api_headers(
2932 master_ytcfg, identity_token, self._extract_account_syncid(master_ytcfg),
2933 session_index=self._extract_session_index(master_ytcfg))
2934
109dd3b2 2935 initial_data = self._extract_response(
2936 item_id=video_id, ep='next', fatal=False,
11f9be09 2937 ytcfg=master_ytcfg, headers=headers, query={'videoId': video_id},
109dd3b2 2938 note='Downloading initial data API JSON')
545cc85d 2939
c60ee3a2 2940 try:
2941 # This will error if there is no livechat
2942 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2943 info['subtitles']['live_chat'] = [{
2944 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
2945 'video_id': video_id,
2946 'ext': 'json',
f6745c49 2947 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
c60ee3a2 2948 }]
2949 except (KeyError, IndexError, TypeError):
2950 pass
545cc85d 2951
2952 if initial_data:
7c365c21 2953 info['chapters'] = (
2954 self._extract_chapters_from_json(initial_data, duration)
2955 or self._extract_chapters_from_engagement_panel(initial_data, duration)
2956 or None)
545cc85d 2957
2958 contents = try_get(
2959 initial_data,
2960 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2961 list) or []
2962 for content in contents:
2963 vpir = content.get('videoPrimaryInfoRenderer')
2964 if vpir:
2965 stl = vpir.get('superTitleLink')
2966 if stl:
fe93e2c4 2967 stl = self._get_text(stl)
545cc85d 2968 if try_get(
2969 vpir,
2970 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2971 info['location'] = stl
2972 else:
2973 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2974 if mobj:
2975 info.update({
2976 'series': mobj.group(1),
2977 'season_number': int(mobj.group(2)),
2978 'episode_number': int(mobj.group(3)),
2979 })
2980 for tlb in (try_get(
2981 vpir,
2982 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2983 list) or []):
2984 tbr = tlb.get('toggleButtonRenderer') or {}
2985 for getter, regex in [(
2986 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2987 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2988 lambda x: x['accessibility'],
2989 lambda x: x['accessibilityData']['accessibilityData'],
2990 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2991 label = (try_get(tbr, getter, dict) or {}).get('label')
2992 if label:
2993 mobj = re.match(regex, label)
2994 if mobj:
2995 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2996 break
2997 sbr_tooltip = try_get(
2998 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2999 if sbr_tooltip:
3000 like_count, dislike_count = sbr_tooltip.split(' / ')
3001 info.update({
3002 'like_count': str_to_int(like_count),
3003 'dislike_count': str_to_int(dislike_count),
3004 })
3005 vsir = content.get('videoSecondaryInfoRenderer')
3006 if vsir:
052e1350 3007 info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
545cc85d 3008 rows = try_get(
3009 vsir,
3010 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3011 list) or []
3012 multiple_songs = False
3013 for row in rows:
3014 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3015 multiple_songs = True
3016 break
3017 for row in rows:
3018 mrr = row.get('metadataRowRenderer') or {}
3019 mrr_title = mrr.get('title')
3020 if not mrr_title:
3021 continue
052e1350 3022 mrr_title = self._get_text(mrr, 'title')
3023 mrr_contents_text = self._get_text(mrr, ('contents', 0))
545cc85d 3024 if mrr_title == 'License':
3025 info['license'] = mrr_contents_text
3026 elif not multiple_songs:
3027 if mrr_title == 'Album':
3028 info['album'] = mrr_contents_text
3029 elif mrr_title == 'Artist':
3030 info['artist'] = mrr_contents_text
3031 elif mrr_title == 'Song':
3032 info['track'] = mrr_contents_text
3033
3034 fallbacks = {
3035 'channel': 'uploader',
3036 'channel_id': 'uploader_id',
3037 'channel_url': 'uploader_url',
3038 }
3039 for to, frm in fallbacks.items():
3040 if not info.get(to):
3041 info[to] = info.get(frm)
3042
3043 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3044 v = info.get(s_k)
3045 if v:
3046 info[d_k] = v
b84071c0 3047
11f9be09 3048 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3049 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
c224251a 3050 is_membersonly = None
b28f8d24 3051 is_premium = None
c224251a
M
3052 if initial_data and is_private is not None:
3053 is_membersonly = False
b28f8d24 3054 is_premium = False
47193e02 3055 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3056 badge_labels = set()
3057 for content in contents:
3058 if not isinstance(content, dict):
3059 continue
3060 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3061 for badge_label in badge_labels:
3062 if badge_label.lower() == 'members only':
3063 is_membersonly = True
3064 elif badge_label.lower() == 'premium':
3065 is_premium = True
3066 elif badge_label.lower() == 'unlisted':
3067 is_unlisted = True
c224251a 3068
c224251a
M
3069 info['availability'] = self._availability(
3070 is_private=is_private,
b28f8d24 3071 needs_premium=is_premium,
c224251a
M
3072 needs_subscription=is_membersonly,
3073 needs_auth=info['age_limit'] >= 18,
3074 is_unlisted=None if is_private is None else is_unlisted)
3075
06167fbb 3076 # get xsrf for annotations or comments
a06916d9 3077 get_annotations = self.get_param('writeannotations', False)
3078 get_comments = self.get_param('getcomments', False)
06167fbb 3079 if get_annotations or get_comments:
29f7c58a 3080 xsrf_token = None
11f9be09 3081 if master_ytcfg:
3082 xsrf_token = try_get(master_ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
29f7c58a 3083 if not xsrf_token:
3084 xsrf_token = self._search_regex(
3085 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 3086 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 3087
3088 # annotations
06167fbb 3089 if get_annotations:
11f9be09 3090 invideo_url = get_first(
3091 player_responses,
3092 ('annotations', 0, 'playerAnnotationsUrlsRenderer', 'invideoUrl'),
3093 expected_type=str)
64b6a4e9 3094 if xsrf_token and invideo_url:
29f7c58a 3095 xsrf_field_name = None
11f9be09 3096 if master_ytcfg:
3097 xsrf_field_name = try_get(master_ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
29f7c58a 3098 if not xsrf_field_name:
3099 xsrf_field_name = self._search_regex(
3100 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 3101 webpage, 'xsrf field name',
29f7c58a 3102 group='xsrf_field_name', default='session_token')
8a784c74 3103 info['annotations'] = self._download_webpage(
64b6a4e9
RA
3104 self._proto_relative_url(invideo_url),
3105 video_id, note='Downloading annotations',
3106 errnote='Unable to download video annotations', fatal=False,
3107 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 3108
277d6ff5 3109 if get_comments:
11f9be09 3110 info['__post_extractor'] = lambda: self._extract_comments(master_ytcfg, video_id, contents, webpage)
4ea3be0a 3111
11f9be09 3112 self.mark_watched(video_id, player_responses)
d77ab8e2 3113
545cc85d 3114 return info
c5e8d7af 3115
5f6a1245 3116
8bdd16b4 3117class YoutubeTabIE(YoutubeBaseInfoExtractor):
3118 IE_DESC = 'YouTube.com tab'
70d5c17b 3119 _VALID_URL = r'''(?x)
3120 https?://
3121 (?:\w+\.)?
3122 (?:
3123 youtube(?:kids)?\.com|
3124 invidio\.us
3125 )/
3126 (?:
fe03a6cd 3127 (?P<channel_type>channel|c|user|browse)/|
70d5c17b 3128 (?P<not_channel>
9ba5705a 3129 feed/|hashtag/|
70d5c17b 3130 (?:playlist|watch)\?.*?\blist=
3131 )|
29f7c58a 3132 (?!(?:%s)\b) # Direct URLs
70d5c17b 3133 )
3134 (?P<id>[^/?\#&]+)
3135 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 3136 IE_NAME = 'youtube:tab'
3137
81127aa5 3138 _TESTS = [{
da692b79 3139 'note': 'playlists, multipage',
8bdd16b4 3140 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3141 'playlist_mincount': 94,
3142 'info_dict': {
3143 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3144 'title': 'Игорь Клейнер - Playlists',
3145 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3146 'uploader': 'Игорь Клейнер',
3147 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 3148 },
3149 }, {
da692b79 3150 'note': 'playlists, multipage, different order',
8bdd16b4 3151 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3152 'playlist_mincount': 94,
3153 'info_dict': {
3154 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3155 'title': 'Игорь Клейнер - Playlists',
3156 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3157 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3158 'uploader': 'Игорь Клейнер',
8bdd16b4 3159 },
201c1459 3160 }, {
da692b79 3161 'note': 'playlists, series',
201c1459 3162 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3163 'playlist_mincount': 5,
3164 'info_dict': {
3165 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3166 'title': '3Blue1Brown - Playlists',
3167 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
da692b79 3168 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3169 'uploader': '3Blue1Brown',
201c1459 3170 },
8bdd16b4 3171 }, {
da692b79 3172 'note': 'playlists, singlepage',
8bdd16b4 3173 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3174 'playlist_mincount': 4,
3175 'info_dict': {
3176 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3177 'title': 'ThirstForScience - Playlists',
3178 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 3179 'uploader': 'ThirstForScience',
3180 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 3181 }
3182 }, {
3183 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3184 'only_matching': True,
3185 }, {
da692b79 3186 'note': 'basic, single video playlist',
0e30a7b9 3187 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 3188 'info_dict': {
0e30a7b9 3189 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3190 'uploader': 'Sergey M.',
3191 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 3192 'title': 'youtube-dl public playlist',
81127aa5 3193 },
0e30a7b9 3194 'playlist_count': 1,
9291475f 3195 }, {
da692b79 3196 'note': 'empty playlist',
0e30a7b9 3197 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 3198 'info_dict': {
0e30a7b9 3199 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3200 'uploader': 'Sergey M.',
3201 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 3202 'title': 'youtube-dl empty playlist',
9291475f
PH
3203 },
3204 'playlist_count': 0,
3205 }, {
da692b79 3206 'note': 'Home tab',
8bdd16b4 3207 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 3208 'info_dict': {
8bdd16b4 3209 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3210 'title': 'lex will - Home',
3211 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3212 'uploader': 'lex will',
3213 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3214 },
8bdd16b4 3215 'playlist_mincount': 2,
9291475f 3216 }, {
da692b79 3217 'note': 'Videos tab',
8bdd16b4 3218 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 3219 'info_dict': {
8bdd16b4 3220 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3221 'title': 'lex will - Videos',
3222 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3223 'uploader': 'lex will',
3224 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3225 },
8bdd16b4 3226 'playlist_mincount': 975,
9291475f 3227 }, {
da692b79 3228 'note': 'Videos tab, sorted by popular',
8bdd16b4 3229 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 3230 'info_dict': {
8bdd16b4 3231 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3232 'title': 'lex will - Videos',
3233 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3234 'uploader': 'lex will',
3235 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3236 },
8bdd16b4 3237 'playlist_mincount': 199,
9291475f 3238 }, {
da692b79 3239 'note': 'Playlists tab',
8bdd16b4 3240 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 3241 'info_dict': {
8bdd16b4 3242 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3243 'title': 'lex will - Playlists',
3244 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3245 'uploader': 'lex will',
3246 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3247 },
8bdd16b4 3248 'playlist_mincount': 17,
ac7553d0 3249 }, {
da692b79 3250 'note': 'Community tab',
8bdd16b4 3251 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 3252 'info_dict': {
8bdd16b4 3253 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3254 'title': 'lex will - Community',
3255 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3256 'uploader': 'lex will',
3257 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3258 },
3259 'playlist_mincount': 18,
87dadd45 3260 }, {
da692b79 3261 'note': 'Channels tab',
8bdd16b4 3262 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 3263 'info_dict': {
8bdd16b4 3264 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3265 'title': 'lex will - Channels',
3266 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3267 'uploader': 'lex will',
3268 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3269 },
deaec5af 3270 'playlist_mincount': 12,
cd684175 3271 }, {
3272 'note': 'Search tab',
3273 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3274 'playlist_mincount': 40,
3275 'info_dict': {
3276 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3277 'title': '3Blue1Brown - Search - linear algebra',
3278 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3279 'uploader': '3Blue1Brown',
3280 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3281 },
6b08cdf6 3282 }, {
a0566bbf 3283 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3284 'only_matching': True,
3285 }, {
a0566bbf 3286 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3287 'only_matching': True,
3288 }, {
a0566bbf 3289 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3290 'only_matching': True,
3291 }, {
3292 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3293 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3294 'info_dict': {
3295 'title': '29C3: Not my department',
3296 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3297 'uploader': 'Christiaan008',
3298 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 3299 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 3300 },
3301 'playlist_count': 96,
3302 }, {
3303 'note': 'Large playlist',
3304 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 3305 'info_dict': {
8bdd16b4 3306 'title': 'Uploads from Cauchemar',
3307 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3308 'uploader': 'Cauchemar',
3309 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 3310 },
8bdd16b4 3311 'playlist_mincount': 1123,
3312 }, {
da692b79 3313 'note': 'even larger playlist, 8832 videos',
8bdd16b4 3314 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3315 'only_matching': True,
4b7df0d3
JMF
3316 }, {
3317 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3318 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3319 'info_dict': {
acf757f4
PH
3320 'title': 'Uploads from Interstellar Movie',
3321 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 3322 'uploader': 'Interstellar Movie',
8bdd16b4 3323 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 3324 },
481cc733 3325 'playlist_mincount': 21,
358de58c 3326 }, {
3327 'note': 'Playlist with "show unavailable videos" button',
3328 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3329 'info_dict': {
3330 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3331 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3332 'uploader': 'Phim Siêu Nhân Nhật Bản',
3333 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3334 },
da692b79 3335 'playlist_mincount': 200,
5d342002 3336 }, {
da692b79 3337 'note': 'Playlist with unavailable videos in page 7',
5d342002 3338 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3339 'info_dict': {
3340 'title': 'Uploads from BlankTV',
3341 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3342 'uploader': 'BlankTV',
3343 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3344 },
da692b79 3345 'playlist_mincount': 1000,
8bdd16b4 3346 }, {
da692b79 3347 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
8bdd16b4 3348 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3349 'info_dict': {
3350 'title': 'Data Analysis with Dr Mike Pound',
3351 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3352 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3353 'uploader': 'Computerphile',
deaec5af 3354 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 3355 },
3356 'playlist_mincount': 11,
3357 }, {
a0566bbf 3358 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 3359 'only_matching': True,
dacb3a86 3360 }, {
da692b79 3361 'note': 'Playlist URL that does not actually serve a playlist',
dacb3a86
S
3362 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3363 'info_dict': {
3364 'id': 'FqZTN594JQw',
3365 'ext': 'webm',
3366 'title': "Smiley's People 01 detective, Adventure Series, Action",
3367 'uploader': 'STREEM',
3368 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 3369 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
3370 'upload_date': '20150526',
3371 'license': 'Standard YouTube License',
3372 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3373 'categories': ['People & Blogs'],
3374 'tags': list,
dbdaaa23 3375 'view_count': int,
dacb3a86
S
3376 'like_count': int,
3377 'dislike_count': int,
3378 },
3379 'params': {
3380 'skip_download': True,
3381 },
13a75688 3382 'skip': 'This video is not available.',
dacb3a86 3383 'add_ie': [YoutubeIE.ie_key()],
481cc733 3384 }, {
8bdd16b4 3385 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 3386 'only_matching': True,
66b48727 3387 }, {
8bdd16b4 3388 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 3389 'only_matching': True,
a0566bbf 3390 }, {
3391 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3392 'info_dict': {
11f9be09 3393 'id': 'FMtPN8yp5LU', # This will keep changing
a0566bbf 3394 'ext': 'mp4',
deaec5af 3395 'title': compat_str,
a0566bbf 3396 'uploader': 'Sky News',
3397 'uploader_id': 'skynews',
3398 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
da692b79 3399 'upload_date': r're:\d{8}',
3400 'description': compat_str,
a0566bbf 3401 'categories': ['News & Politics'],
3402 'tags': list,
3403 'like_count': int,
3404 'dislike_count': int,
3405 },
3406 'params': {
3407 'skip_download': True,
3408 },
da692b79 3409 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
a0566bbf 3410 }, {
3411 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3412 'info_dict': {
3413 'id': 'a48o2S1cPoo',
3414 'ext': 'mp4',
3415 'title': 'The Young Turks - Live Main Show',
3416 'uploader': 'The Young Turks',
3417 'uploader_id': 'TheYoungTurks',
3418 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3419 'upload_date': '20150715',
3420 'license': 'Standard YouTube License',
3421 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3422 'categories': ['News & Politics'],
3423 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3424 'like_count': int,
3425 'dislike_count': int,
3426 },
3427 'params': {
3428 'skip_download': True,
3429 },
3430 'only_matching': True,
3431 }, {
3432 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3433 'only_matching': True,
3434 }, {
3435 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3436 'only_matching': True,
09f1580e 3437 }, {
3438 'note': 'A channel that is not live. Should raise error',
3439 'url': 'https://www.youtube.com/user/numberphile/live',
3440 'only_matching': True,
3d3dddc9 3441 }, {
3442 'url': 'https://www.youtube.com/feed/trending',
3443 'only_matching': True,
3444 }, {
3d3dddc9 3445 'url': 'https://www.youtube.com/feed/library',
3446 'only_matching': True,
3447 }, {
3d3dddc9 3448 'url': 'https://www.youtube.com/feed/history',
3449 'only_matching': True,
3450 }, {
3d3dddc9 3451 'url': 'https://www.youtube.com/feed/subscriptions',
3452 'only_matching': True,
3453 }, {
3d3dddc9 3454 'url': 'https://www.youtube.com/feed/watch_later',
3455 'only_matching': True,
3456 }, {
da692b79 3457 'note': 'Recommended - redirects to home page',
3d3dddc9 3458 'url': 'https://www.youtube.com/feed/recommended',
3459 'only_matching': True,
29f7c58a 3460 }, {
da692b79 3461 'note': 'inline playlist with not always working continuations',
29f7c58a 3462 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3463 'only_matching': True,
3464 }, {
3465 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3466 'only_matching': True,
3467 }, {
3468 'url': 'https://www.youtube.com/course',
3469 'only_matching': True,
3470 }, {
3471 'url': 'https://www.youtube.com/zsecurity',
3472 'only_matching': True,
3473 }, {
3474 'url': 'http://www.youtube.com/NASAgovVideo/videos',
3475 'only_matching': True,
3476 }, {
3477 'url': 'https://www.youtube.com/TheYoungTurks/live',
3478 'only_matching': True,
39ed931e 3479 }, {
3480 'url': 'https://www.youtube.com/hashtag/cctv9',
3481 'info_dict': {
3482 'id': 'cctv9',
3483 'title': '#cctv9',
3484 },
3485 'playlist_mincount': 350,
201c1459 3486 }, {
3487 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3488 'only_matching': True,
9297939e 3489 }, {
da692b79 3490 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
9297939e 3491 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3492 'only_matching': True
fe03a6cd 3493 }, {
3494 'note': '/browse/ should redirect to /channel/',
3495 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3496 'only_matching': True
3497 }, {
3498 'note': 'VLPL, should redirect to playlist?list=PL...',
3499 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3500 'info_dict': {
3501 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3502 'uploader': 'NoCopyrightSounds',
3503 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3504 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3505 'title': 'NCS Releases',
3506 },
3507 'playlist_mincount': 166,
18db7548 3508 }, {
3509 'note': 'Topic, should redirect to playlist?list=UU...',
3510 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3511 'info_dict': {
3512 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3513 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3514 'title': 'Uploads from Royalty Free Music - Topic',
3515 'uploader': 'Royalty Free Music - Topic',
3516 },
3517 'expected_warnings': [
3518 'A channel/user page was given',
3519 'The URL does not have a videos tab',
3520 ],
3521 'playlist_mincount': 101,
3522 }, {
3523 'note': 'Topic without a UU playlist',
3524 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3525 'info_dict': {
3526 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3527 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3528 },
3529 'expected_warnings': [
3530 'A channel/user page was given',
3531 'The URL does not have a videos tab',
3532 'Falling back to channel URL',
3533 ],
3534 'playlist_mincount': 9,
abcdd12b 3535 }, {
3536 'note': 'Youtube music Album',
3537 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3538 'info_dict': {
3539 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3540 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3541 },
3542 'playlist_count': 50,
47193e02 3543 }, {
3544 'note': 'unlisted single video playlist',
3545 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3546 'info_dict': {
3547 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3548 'uploader': 'colethedj',
3549 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3550 'title': 'yt-dlp unlisted playlist test',
3551 'availability': 'unlisted'
3552 },
3553 'playlist_count': 1,
29f7c58a 3554 }]
3555
3556 @classmethod
3557 def suitable(cls, url):
3558 return False if YoutubeIE.suitable(url) else super(
3559 YoutubeTabIE, cls).suitable(url)
8bdd16b4 3560
3561 def _extract_channel_id(self, webpage):
3562 channel_id = self._html_search_meta(
3563 'channelId', webpage, 'channel id', default=None)
3564 if channel_id:
3565 return channel_id
3566 channel_url = self._html_search_meta(
3567 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3568 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3569 'twitter:app:url:googleplay'), webpage, 'channel url')
3570 return self._search_regex(
3571 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3572 channel_url, 'channel id')
15f6397c 3573
8bdd16b4 3574 @staticmethod
cd7c66cf 3575 def _extract_basic_item_renderer(item):
3576 # Modified from _extract_grid_item_renderer
201c1459 3577 known_basic_renderers = (
3578 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 3579 )
3580 for key, renderer in item.items():
201c1459 3581 if not isinstance(renderer, dict):
cd7c66cf 3582 continue
201c1459 3583 elif key in known_basic_renderers:
3584 return renderer
3585 elif key.startswith('grid') and key.endswith('Renderer'):
3586 return renderer
8bdd16b4 3587
8bdd16b4 3588 def _grid_entries(self, grid_renderer):
3589 for item in grid_renderer['items']:
3590 if not isinstance(item, dict):
39b62db1 3591 continue
cd7c66cf 3592 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 3593 if not isinstance(renderer, dict):
3594 continue
052e1350 3595 title = self._get_text(renderer, 'title')
fe93e2c4 3596
8bdd16b4 3597 # playlist
3598 playlist_id = renderer.get('playlistId')
3599 if playlist_id:
3600 yield self.url_result(
3601 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3602 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3603 video_title=title)
201c1459 3604 continue
8bdd16b4 3605 # video
3606 video_id = renderer.get('videoId')
3607 if video_id:
3608 yield self._extract_video(renderer)
201c1459 3609 continue
8bdd16b4 3610 # channel
3611 channel_id = renderer.get('channelId')
3612 if channel_id:
8bdd16b4 3613 yield self.url_result(
3614 'https://www.youtube.com/channel/%s' % channel_id,
3615 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 3616 continue
3617 # generic endpoint URL support
3618 ep_url = urljoin('https://www.youtube.com/', try_get(
3619 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3620 compat_str))
3621 if ep_url:
3622 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3623 if ie.suitable(ep_url):
3624 yield self.url_result(
3625 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3626 break
8bdd16b4 3627
3d3dddc9 3628 def _shelf_entries_from_content(self, shelf_renderer):
3629 content = shelf_renderer.get('content')
3630 if not isinstance(content, dict):
8bdd16b4 3631 return
cd7c66cf 3632 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 3633 if renderer:
3634 # TODO: add support for nested playlists so each shelf is processed
3635 # as separate playlist
3636 # TODO: this includes only first N items
3637 for entry in self._grid_entries(renderer):
3638 yield entry
3639 renderer = content.get('horizontalListRenderer')
3640 if renderer:
3641 # TODO
3642 pass
8bdd16b4 3643
29f7c58a 3644 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3645 ep = try_get(
3646 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3647 compat_str)
3648 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3649 if shelf_url:
29f7c58a 3650 # Skipping links to another channels, note that checking for
3651 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3652 # will not work
3653 if skip_channels and '/channels?' in shelf_url:
3654 return
052e1350 3655 title = self._get_text(shelf_renderer, 'title')
3d3dddc9 3656 yield self.url_result(shelf_url, video_title=title)
3657 # Shelf may not contain shelf URL, fallback to extraction from content
3658 for entry in self._shelf_entries_from_content(shelf_renderer):
3659 yield entry
c5e8d7af 3660
8bdd16b4 3661 def _playlist_entries(self, video_list_renderer):
3662 for content in video_list_renderer['contents']:
3663 if not isinstance(content, dict):
3664 continue
3665 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3666 if not isinstance(renderer, dict):
3667 continue
3668 video_id = renderer.get('videoId')
3669 if not video_id:
3670 continue
3671 yield self._extract_video(renderer)
07aeced6 3672
3462ffa8 3673 def _rich_entries(self, rich_grid_renderer):
3674 renderer = try_get(
70d5c17b 3675 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3676 video_id = renderer.get('videoId')
3677 if not video_id:
3678 return
3679 yield self._extract_video(renderer)
3680
8bdd16b4 3681 def _video_entry(self, video_renderer):
3682 video_id = video_renderer.get('videoId')
3683 if video_id:
3684 return self._extract_video(video_renderer)
dacb3a86 3685
8bdd16b4 3686 def _post_thread_entries(self, post_thread_renderer):
3687 post_renderer = try_get(
3688 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3689 if not post_renderer:
3690 return
3691 # video attachment
3692 video_renderer = try_get(
895b0931 3693 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3694 video_id = video_renderer.get('videoId')
3695 if video_id:
3696 entry = self._extract_video(video_renderer)
8bdd16b4 3697 if entry:
3698 yield entry
895b0931 3699 # playlist attachment
3700 playlist_id = try_get(
3701 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3702 if playlist_id:
3703 yield self.url_result(
e28f1c0a 3704 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3705 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3706 # inline video links
3707 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3708 for run in runs:
3709 if not isinstance(run, dict):
3710 continue
3711 ep_url = try_get(
3712 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3713 if not ep_url:
3714 continue
3715 if not YoutubeIE.suitable(ep_url):
3716 continue
3717 ep_video_id = YoutubeIE._match_id(ep_url)
3718 if video_id == ep_video_id:
3719 continue
895b0931 3720 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 3721
8bdd16b4 3722 def _post_thread_continuation_entries(self, post_thread_continuation):
3723 contents = post_thread_continuation.get('contents')
3724 if not isinstance(contents, list):
3725 return
3726 for content in contents:
3727 renderer = content.get('backstagePostThreadRenderer')
3728 if not isinstance(renderer, dict):
3729 continue
3730 for entry in self._post_thread_entries(renderer):
3731 yield entry
07aeced6 3732
39ed931e 3733 r''' # unused
3734 def _rich_grid_entries(self, contents):
3735 for content in contents:
3736 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3737 if video_renderer:
3738 entry = self._video_entry(video_renderer)
3739 if entry:
3740 yield entry
3741 '''
f4f751af 3742 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3462ffa8 3743
70d5c17b 3744 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3745 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3746 for content in contents:
3747 if not isinstance(content, dict):
8bdd16b4 3748 continue
70d5c17b 3749 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3750 if not is_renderer:
70d5c17b 3751 renderer = content.get('richItemRenderer')
3462ffa8 3752 if renderer:
3753 for entry in self._rich_entries(renderer):
3754 yield entry
3755 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3756 continue
3462ffa8 3757 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3758 for isr_content in isr_contents:
3759 if not isinstance(isr_content, dict):
3760 continue
69184e41 3761
3762 known_renderers = {
3763 'playlistVideoListRenderer': self._playlist_entries,
3764 'gridRenderer': self._grid_entries,
3765 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3766 'backstagePostThreadRenderer': self._post_thread_entries,
3767 'videoRenderer': lambda x: [self._video_entry(x)],
3768 }
3769 for key, renderer in isr_content.items():
3770 if key not in known_renderers:
3771 continue
3772 for entry in known_renderers[key](renderer):
3773 if entry:
3774 yield entry
3462ffa8 3775 continuation_list[0] = self._extract_continuation(renderer)
69184e41 3776 break
70d5c17b 3777
3462ffa8 3778 if not continuation_list[0]:
3779 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3780
3781 if not continuation_list[0]:
3782 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3783
3784 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3785 tab_content = try_get(tab, lambda x: x['content'], dict)
3786 if not tab_content:
3787 return
3462ffa8 3788 parent_renderer = (
29f7c58a 3789 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3790 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3791 for entry in extract_entries(parent_renderer):
3792 yield entry
3462ffa8 3793 continuation = continuation_list[0]
fe93e2c4 3794 visitor_data = None
d069eca7 3795
8bdd16b4 3796 for page_num in itertools.count(1):
3797 if not continuation:
3798 break
11f9be09 3799 headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
79360d99 3800 response = self._extract_response(
3801 item_id='%s page %s' % (item_id, page_num),
fe93e2c4 3802 query=continuation, headers=headers, ytcfg=ytcfg,
79360d99 3803 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3804
3805 if not response:
8bdd16b4 3806 break
f4f751af 3807 visitor_data = try_get(
3808 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
ebf1b291 3809
69184e41 3810 known_continuation_renderers = {
3811 'playlistVideoListContinuation': self._playlist_entries,
3812 'gridContinuation': self._grid_entries,
3813 'itemSectionContinuation': self._post_thread_continuation_entries,
3814 'sectionListContinuation': extract_entries, # for feeds
3815 }
8bdd16b4 3816 continuation_contents = try_get(
69184e41 3817 response, lambda x: x['continuationContents'], dict) or {}
3818 continuation_renderer = None
3819 for key, value in continuation_contents.items():
3820 if key not in known_continuation_renderers:
3462ffa8 3821 continue
69184e41 3822 continuation_renderer = value
3823 continuation_list = [None]
3824 for entry in known_continuation_renderers[key](continuation_renderer):
3825 yield entry
3826 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3827 break
3828 if continuation_renderer:
3829 continue
c5e8d7af 3830
a1b535bd 3831 known_renderers = {
3832 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3833 'gridVideoRenderer': (self._grid_entries, 'items'),
d61fc646 3834 'gridChannelRenderer': (self._grid_entries, 'items'),
a1b535bd 3835 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3836 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3837 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3838 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3839 }
cce889b9 3840 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3841 continuation_items = try_get(
cce889b9 3842 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3843 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3844 video_items_renderer = None
3845 for key, value in continuation_item.items():
3846 if key not in known_renderers:
8bdd16b4 3847 continue
a1b535bd 3848 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3849 continuation_list = [None]
a1b535bd 3850 for entry in known_renderers[key][0](video_items_renderer):
3851 yield entry
9ba5705a 3852 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3853 break
3854 if video_items_renderer:
3855 continue
8bdd16b4 3856 break
9558dcec 3857
8bdd16b4 3858 @staticmethod
3859 def _extract_selected_tab(tabs):
3860 for tab in tabs:
cd684175 3861 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3862 if renderer.get('selected') is True:
3863 return renderer
2b3c2546 3864 else:
8bdd16b4 3865 raise ExtractorError('Unable to find selected tab')
b82f815f 3866
47193e02 3867 @classmethod
3868 def _extract_uploader(cls, data):
8bdd16b4 3869 uploader = {}
47193e02 3870 renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3871 owner = try_get(
3872 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3873 if owner:
3874 uploader['uploader'] = owner.get('text')
3875 uploader['uploader_id'] = try_get(
3876 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3877 uploader['uploader_url'] = urljoin(
3878 'https://www.youtube.com/',
3879 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3880 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3881
d069eca7 3882 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3883 playlist_id = title = description = channel_url = channel_name = channel_id = None
3884 thumbnails_list = tags = []
3885
8bdd16b4 3886 selected_tab = self._extract_selected_tab(tabs)
3887 renderer = try_get(
3888 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3889 if renderer:
b60419c5 3890 channel_name = renderer.get('title')
3891 channel_url = renderer.get('channelUrl')
3892 channel_id = renderer.get('externalId')
39ed931e 3893 else:
64c0d954 3894 renderer = try_get(
3895 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3896
8bdd16b4 3897 if renderer:
3898 title = renderer.get('title')
ecc97af3 3899 description = renderer.get('description', '')
b60419c5 3900 playlist_id = channel_id
3901 tags = renderer.get('keywords', '').split()
3902 thumbnails_list = (
3903 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3904 or try_get(
47193e02 3905 self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3906 lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
ff84930c 3907 list)
b60419c5 3908 or [])
3909
3910 thumbnails = []
3911 for t in thumbnails_list:
3912 if not isinstance(t, dict):
3913 continue
3914 thumbnail_url = url_or_none(t.get('url'))
3915 if not thumbnail_url:
3916 continue
3917 thumbnails.append({
3918 'url': thumbnail_url,
3919 'width': int_or_none(t.get('width')),
3920 'height': int_or_none(t.get('height')),
3921 })
3462ffa8 3922 if playlist_id is None:
70d5c17b 3923 playlist_id = item_id
3924 if title is None:
39ed931e 3925 title = (
3926 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3927 or playlist_id)
b60419c5 3928 title += format_field(selected_tab, 'title', ' - %s')
cd684175 3929 title += format_field(selected_tab, 'expandedText', ' - %s')
b60419c5 3930 metadata = {
3931 'playlist_id': playlist_id,
3932 'playlist_title': title,
3933 'playlist_description': description,
3934 'uploader': channel_name,
3935 'uploader_id': channel_id,
3936 'uploader_url': channel_url,
3937 'thumbnails': thumbnails,
3938 'tags': tags,
3939 }
47193e02 3940 availability = self._extract_availability(data)
3941 if availability:
3942 metadata['availability'] = availability
b60419c5 3943 if not channel_id:
3944 metadata.update(self._extract_uploader(data))
3945 metadata.update({
3946 'channel': metadata['uploader'],
3947 'channel_id': metadata['uploader_id'],
3948 'channel_url': metadata['uploader_url']})
11f9be09 3949 ytcfg = self.extract_ytcfg(item_id, webpage)
b60419c5 3950 return self.playlist_result(
d069eca7
M
3951 self._entries(
3952 selected_tab, playlist_id,
3953 self._extract_identity_token(webpage, item_id),
fe93e2c4 3954 self._extract_account_syncid(ytcfg, data), ytcfg),
b60419c5 3955 **metadata)
73c4ac2c 3956
79360d99 3957 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
2be71994 3958 first_id = last_id = None
11f9be09 3959 ytcfg = self.extract_ytcfg(playlist_id, webpage)
3960 headers = self.generate_api_headers(
fe93e2c4 3961 ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3962 identity_token=self._extract_identity_token(webpage, item_id=playlist_id))
2be71994 3963 for page_num in itertools.count(1):
cd7c66cf 3964 videos = list(self._playlist_entries(playlist))
3965 if not videos:
3966 return
2be71994 3967 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3968 if start >= len(videos):
3969 return
3970 for video in videos[start:]:
3971 if video['id'] == first_id:
3972 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3973 return
3974 yield video
3975 first_id = first_id or videos[0]['id']
3976 last_id = videos[-1]['id']
79360d99 3977 watch_endpoint = try_get(
3978 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3979 query = {
3980 'playlistId': playlist_id,
3981 'videoId': watch_endpoint.get('videoId') or last_id,
3982 'index': watch_endpoint.get('index') or len(videos),
3983 'params': watch_endpoint.get('params') or 'OAE%3D'
3984 }
3985 response = self._extract_response(
3986 item_id='%s page %d' % (playlist_id, page_num),
fe93e2c4 3987 query=query, ep='next', headers=headers, ytcfg=ytcfg,
79360d99 3988 check_get_keys='contents'
3989 )
cd7c66cf 3990 playlist = try_get(
79360d99 3991 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 3992
79360d99 3993 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
8bdd16b4 3994 title = playlist.get('title') or try_get(
3995 data, lambda x: x['titleText']['simpleText'], compat_str)
3996 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3997
3998 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3999 playlist_url = urljoin(url, try_get(
4000 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4001 compat_str))
4002 if playlist_url and playlist_url != url:
4003 return self.url_result(
4004 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4005 video_title=title)
cd7c66cf 4006
8bdd16b4 4007 return self.playlist_result(
79360d99 4008 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
cd7c66cf 4009 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 4010
47193e02 4011 def _extract_availability(self, data):
4012 """
4013 Gets the availability of a given playlist/tab.
4014 Note: Unless YouTube tells us explicitly, we do not assume it is public
4015 @param data: response
4016 """
4017 is_private = is_unlisted = None
4018 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4019 badge_labels = self._extract_badges(renderer)
4020
4021 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4022 privacy_dropdown_entries = try_get(
4023 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4024 for renderer_dict in privacy_dropdown_entries:
4025 is_selected = try_get(
4026 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4027 if not is_selected:
4028 continue
052e1350 4029 label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
47193e02 4030 if label:
4031 badge_labels.add(label.lower())
4032 break
4033
4034 for badge_label in badge_labels:
4035 if badge_label == 'unlisted':
4036 is_unlisted = True
4037 elif badge_label == 'private':
4038 is_private = True
4039 elif badge_label == 'public':
4040 is_unlisted = is_private = False
4041 return self._availability(is_private, False, False, False, is_unlisted)
4042
4043 @staticmethod
4044 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4045 sidebar_renderer = try_get(
4046 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4047 for item in sidebar_renderer:
4048 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4049 if renderer:
4050 return renderer
4051
358de58c 4052 def _reload_with_unavailable_videos(self, item_id, data, webpage):
4053 """
4054 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4055 """
5d342002 4056 browse_id = params = None
47193e02 4057 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4058 if not renderer:
4059 return
4060 menu_renderer = try_get(
4061 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4062 for menu_item in menu_renderer:
4063 if not isinstance(menu_item, dict):
358de58c 4064 continue
47193e02 4065 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4066 text = try_get(
4067 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
4068 if not text or text.lower() != 'show unavailable videos':
4069 continue
4070 browse_endpoint = try_get(
4071 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4072 browse_id = browse_endpoint.get('browseId')
4073 params = browse_endpoint.get('params')
4074 break
5d342002 4075
11f9be09 4076 ytcfg = self.extract_ytcfg(item_id, webpage)
4077 headers = self.generate_api_headers(
fe93e2c4 4078 ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
47193e02 4079 identity_token=self._extract_identity_token(webpage, item_id=item_id),
4080 visitor_data=try_get(
4081 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
4082 query = {
4083 'params': params or 'wgYCCAA=',
4084 'browseId': browse_id or 'VL%s' % item_id
4085 }
4086 return self._extract_response(
4087 item_id=item_id, headers=headers, query=query,
fe93e2c4 4088 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
47193e02 4089 note='Downloading API JSON with unavailable videos')
358de58c 4090
cd7c66cf 4091 def _extract_webpage(self, url, item_id):
a06916d9 4092 retries = self.get_param('extractor_retries', 3)
62bff2c1 4093 count = -1
c705177d 4094 last_error = 'Incomplete yt initial data recieved'
14fdfea9 4095 while count < retries:
62bff2c1 4096 count += 1
14fdfea9 4097 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 4098 # See: https://github.com/yt-dlp/yt-dlp/issues/116
4099 if count:
c705177d 4100 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 4101 webpage = self._download_webpage(
4102 url, item_id,
cd7c66cf 4103 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
11f9be09 4104 data = self.extract_yt_initial_data(item_id, webpage)
14fdfea9 4105 if data.get('contents') or data.get('currentVideoEndpoint'):
4106 break
95c01b6c 4107 # Extract alerts here only when there is error
4108 self._extract_and_report_alerts(data)
c705177d 4109 if count >= retries:
6a39ee13 4110 raise ExtractorError(last_error)
cd7c66cf 4111 return webpage, data
4112
9297939e 4113 @staticmethod
4114 def _smuggle_data(entries, data):
4115 for entry in entries:
4116 if data:
4117 entry['url'] = smuggle_url(entry['url'], data)
4118 yield entry
4119
cd7c66cf 4120 def _real_extract(self, url):
9297939e 4121 url, smuggled_data = unsmuggle_url(url, {})
4122 if self.is_music_url(url):
4123 smuggled_data['is_music_url'] = True
fe03a6cd 4124 info_dict = self.__real_extract(url, smuggled_data)
9297939e 4125 if info_dict.get('entries'):
4126 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4127 return info_dict
4128
fe03a6cd 4129 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4130
4131 def __real_extract(self, url, smuggled_data):
cd7c66cf 4132 item_id = self._match_id(url)
4133 url = compat_urlparse.urlunparse(
4134 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 4135 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 4136
fe03a6cd 4137 def get_mobj(url):
4138 mobj = self._url_re.match(url).groupdict()
07cce701 4139 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 4140 return mobj
4141
4142 mobj = get_mobj(url)
4143 # Youtube returns incomplete data if tabname is not lower case
4144 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4145
4146 if is_channel:
4147 if smuggled_data.get('is_music_url'):
4148 if item_id[:2] == 'VL':
4149 # Youtube music VL channels have an equivalent playlist
4150 item_id = item_id[2:]
4151 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
abcdd12b 4152 elif item_id[:2] == 'MP':
4153 # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4154 item_id = self._search_regex(
4155 r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4156 self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4157 'playlist id')
4158 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
fe03a6cd 4159 elif mobj['channel_type'] == 'browse':
4160 # Youtube music /browse/ should be changed to /channel/
4161 pre = 'https://www.youtube.com/channel/%s' % item_id
4162 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4163 # Home URLs should redirect to /videos/
6a39ee13 4164 self.report_warning(
cd7c66cf 4165 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4166 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 4167 tab = '/videos'
4168
4169 url = ''.join((pre, tab, post))
4170 mobj = get_mobj(url)
cd7c66cf 4171
4172 # Handle both video/playlist URLs
201c1459 4173 qs = parse_qs(url)
cd7c66cf 4174 video_id = qs.get('v', [None])[0]
4175 playlist_id = qs.get('list', [None])[0]
4176
fe03a6cd 4177 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 4178 if not playlist_id:
fe03a6cd 4179 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 4180 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 4181 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6a39ee13 4182 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
cd7c66cf 4183 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
18db7548 4184 mobj = get_mobj(url)
cd7c66cf 4185
4186 if video_id and playlist_id:
a06916d9 4187 if self.get_param('noplaylist'):
cd7c66cf 4188 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4189 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4190 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4191
4192 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 4193
18db7548 4194 tabs = try_get(
4195 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4196 if tabs:
4197 selected_tab = self._extract_selected_tab(tabs)
4198 tab_name = selected_tab.get('title', '')
09f1580e 4199 if 'no-youtube-channel-redirect' not in compat_opts:
4200 if mobj['tab'] == '/live':
4201 # Live tab should have redirected to the video
4202 raise ExtractorError('The channel is not currently live', expected=True)
4203 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4204 if not mobj['not_channel'] and item_id[:2] == 'UC':
4205 # Topic channels don't have /videos. Use the equivalent playlist instead
4206 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4207 pl_id = 'UU%s' % item_id[2:]
4208 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4209 try:
4210 pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4211 for alert_type, alert_message in self._extract_alerts(pl_data):
4212 if alert_type == 'error':
4213 raise ExtractorError('Youtube said: %s' % alert_message)
4214 item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4215 except ExtractorError:
4216 self.report_warning('The playlist gave error. Falling back to channel URL')
4217 else:
4218 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
18db7548 4219
4220 self.write_debug('Final URL: %s' % url)
4221
358de58c 4222 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 4223 if 'no-youtube-unavailable-videos' not in compat_opts:
4224 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
95c01b6c 4225 self._extract_and_report_alerts(data)
8bdd16b4 4226 tabs = try_get(
4227 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4228 if tabs:
d069eca7 4229 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 4230
8bdd16b4 4231 playlist = try_get(
4232 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4233 if playlist:
79360d99 4234 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
cd7c66cf 4235
a0566bbf 4236 video_id = try_get(
4237 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4238 compat_str) or video_id
8bdd16b4 4239 if video_id:
09f1580e 4240 if mobj['tab'] != '/live': # live tab is expected to redirect to video
4241 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 4242 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 4243
8bdd16b4 4244 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 4245
c5e8d7af 4246
8bdd16b4 4247class YoutubePlaylistIE(InfoExtractor):
4248 IE_DESC = 'YouTube.com playlists'
4249 _VALID_URL = r'''(?x)(?:
4250 (?:https?://)?
4251 (?:\w+\.)?
4252 (?:
4253 (?:
4254 youtube(?:kids)?\.com|
29f7c58a 4255 invidio\.us
8bdd16b4 4256 )
4257 /.*?\?.*?\blist=
4258 )?
4259 (?P<id>%(playlist_id)s)
4260 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4261 IE_NAME = 'youtube:playlist'
cdc628a4 4262 _TESTS = [{
8bdd16b4 4263 'note': 'issue #673',
4264 'url': 'PLBB231211A4F62143',
cdc628a4 4265 'info_dict': {
8bdd16b4 4266 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4267 'id': 'PLBB231211A4F62143',
4268 'uploader': 'Wickydoo',
4269 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
11f9be09 4270 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
8bdd16b4 4271 },
4272 'playlist_mincount': 29,
4273 }, {
4274 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4275 'info_dict': {
4276 'title': 'YDL_safe_search',
4277 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4278 },
4279 'playlist_count': 2,
4280 'skip': 'This playlist is private',
9558dcec 4281 }, {
8bdd16b4 4282 'note': 'embedded',
4283 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4284 'playlist_count': 4,
9558dcec 4285 'info_dict': {
8bdd16b4 4286 'title': 'JODA15',
4287 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4288 'uploader': 'milan',
4289 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 4290 }
cdc628a4 4291 }, {
8bdd16b4 4292 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
11f9be09 4293 'playlist_mincount': 654,
8bdd16b4 4294 'info_dict': {
4295 'title': '2018 Chinese New Singles (11/6 updated)',
4296 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4297 'uploader': 'LBK',
4298 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
11f9be09 4299 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
8bdd16b4 4300 }
daa0df9e 4301 }, {
29f7c58a 4302 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4303 'only_matching': True,
4304 }, {
4305 # music album playlist
4306 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4307 'only_matching': True,
4308 }]
4309
4310 @classmethod
4311 def suitable(cls, url):
201c1459 4312 if YoutubeTabIE.suitable(url):
4313 return False
1bdae7d3 4314 # Hack for lazy extractors until more generic solution is implemented
4315 # (see #28780)
4316 from .youtube import parse_qs
201c1459 4317 qs = parse_qs(url)
4318 if qs.get('v', [None])[0]:
4319 return False
4320 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 4321
4322 def _real_extract(self, url):
4323 playlist_id = self._match_id(url)
46953e7e 4324 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 4325 url = update_url_query(
4326 'https://www.youtube.com/playlist',
4327 parse_qs(url) or {'list': playlist_id})
4328 if is_music_url:
4329 url = smuggle_url(url, {'is_music_url': True})
4330 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 4331
4332
4333class YoutubeYtBeIE(InfoExtractor):
c76eb41b 4334 IE_DESC = 'youtu.be'
29f7c58a 4335 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4336 _TESTS = [{
8bdd16b4 4337 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4338 'info_dict': {
4339 'id': 'yeWKywCrFtk',
4340 'ext': 'mp4',
4341 'title': 'Small Scale Baler and Braiding Rugs',
4342 'uploader': 'Backus-Page House Museum',
4343 'uploader_id': 'backuspagemuseum',
4344 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4345 'upload_date': '20161008',
4346 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4347 'categories': ['Nonprofits & Activism'],
4348 'tags': list,
4349 'like_count': int,
4350 'dislike_count': int,
4351 },
4352 'params': {
4353 'noplaylist': True,
4354 'skip_download': True,
4355 },
39e7107d 4356 }, {
8bdd16b4 4357 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 4358 'only_matching': True,
cdc628a4
PH
4359 }]
4360
8bdd16b4 4361 def _real_extract(self, url):
29f7c58a 4362 mobj = re.match(self._VALID_URL, url)
4363 video_id = mobj.group('id')
4364 playlist_id = mobj.group('playlist_id')
8bdd16b4 4365 return self.url_result(
29f7c58a 4366 update_url_query('https://www.youtube.com/watch', {
4367 'v': video_id,
4368 'list': playlist_id,
4369 'feature': 'youtu.be',
4370 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 4371
4372
4373class YoutubeYtUserIE(InfoExtractor):
c76eb41b 4374 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 4375 _VALID_URL = r'ytuser:(?P<id>.+)'
4376 _TESTS = [{
4377 'url': 'ytuser:phihag',
4378 'only_matching': True,
4379 }]
4380
4381 def _real_extract(self, url):
4382 user_id = self._match_id(url)
4383 return self.url_result(
4384 'https://www.youtube.com/user/%s' % user_id,
4385 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 4386
b05654f0 4387
3d3dddc9 4388class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 4389 IE_NAME = 'youtube:favorites'
4390 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4391 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4392 _LOGIN_REQUIRED = True
4393 _TESTS = [{
4394 'url': ':ytfav',
4395 'only_matching': True,
4396 }, {
4397 'url': ':ytfavorites',
4398 'only_matching': True,
4399 }]
4400
4401 def _real_extract(self, url):
4402 return self.url_result(
4403 'https://www.youtube.com/playlist?list=LL',
4404 ie=YoutubeTabIE.ie_key())
4405
4406
79360d99 4407class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
69184e41 4408 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
4409 # there doesn't appear to be a real limit, for example if you search for
4410 # 'python' you get more than 8.000.000 results
4411 _MAX_RESULTS = float('inf')
78caa52a 4412 IE_NAME = 'youtube:search'
b05654f0 4413 _SEARCH_KEY = 'ytsearch'
6c894ea1 4414 _SEARCH_PARAMS = None
9dd8e46a 4415 _TESTS = []
b05654f0 4416
6c894ea1 4417 def _entries(self, query, n):
a5c56234 4418 data = {'query': query}
6c894ea1
U
4419 if self._SEARCH_PARAMS:
4420 data['params'] = self._SEARCH_PARAMS
4421 total = 0
fe93e2c4 4422 continuation = {}
6c894ea1 4423 for page_num in itertools.count(1):
fe93e2c4 4424 data.update(continuation)
79360d99 4425 search = self._extract_response(
4426 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4427 check_get_keys=('contents', 'onResponseReceivedCommands')
4428 )
6c894ea1 4429 if not search:
b4c08069 4430 break
6c894ea1
U
4431 slr_contents = try_get(
4432 search,
4433 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4434 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4435 list)
4436 if not slr_contents:
a22b2fd1 4437 break
0366ae87 4438
0366ae87
M
4439 # Youtube sometimes adds promoted content to searches,
4440 # changing the index location of videos and token.
4441 # So we search through all entries till we find them.
fe93e2c4 4442 continuation = None
30a074c2 4443 for slr_content in slr_contents:
fe93e2c4 4444 if not continuation:
4445 continuation = self._extract_continuation({'contents': [slr_content]})
a96c6d15 4446
30a074c2 4447 isr_contents = try_get(
4448 slr_content,
4449 lambda x: x['itemSectionRenderer']['contents'],
4450 list)
9da76d30 4451 if not isr_contents:
30a074c2 4452 continue
4453 for content in isr_contents:
4454 if not isinstance(content, dict):
4455 continue
4456 video = content.get('videoRenderer')
4457 if not isinstance(video, dict):
4458 continue
4459 video_id = video.get('videoId')
4460 if not video_id:
4461 continue
4462
4463 yield self._extract_video(video)
4464 total += 1
4465 if total == n:
4466 return
0366ae87 4467
fe93e2c4 4468 if not continuation:
6c894ea1 4469 break
b05654f0 4470
6c894ea1
U
4471 def _get_n_results(self, query, n):
4472 """Get a specified number of results for a query"""
11f9be09 4473 return self.playlist_result(self._entries(query, n), query, query)
75dff0ee 4474
c9ae7b95 4475
a3dd9248 4476class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 4477 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 4478 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 4479 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 4480 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 4481
c9ae7b95 4482
386e1dd9 4483class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 4484 IE_DESC = 'YouTube.com search URLs'
386e1dd9 4485 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4486 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 4487 # _MAX_RESULTS = 100
3462ffa8 4488 _TESTS = [{
4489 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4490 'playlist_mincount': 5,
4491 'info_dict': {
11f9be09 4492 'id': 'youtube-dl test video',
3462ffa8 4493 'title': 'youtube-dl test video',
4494 }
4495 }, {
4496 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4497 'only_matching': True,
4498 }]
4499
386e1dd9 4500 @classmethod
4501 def _make_valid_url(cls):
4502 return cls._VALID_URL
4503
3462ffa8 4504 def _real_extract(self, url):
386e1dd9 4505 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4506 query = (qs.get('search_query') or qs.get('q'))[0]
4507 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4508 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 4509
4510
4511class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 4512 """
25f14e9f 4513 Base class for feed extractors
3d3dddc9 4514 Subclasses must define the _FEED_NAME property.
d7ae0639 4515 """
b2e8bc1b 4516 _LOGIN_REQUIRED = True
ef2f3c7f 4517 _TESTS = []
d7ae0639
JMF
4518
4519 @property
4520 def IE_NAME(self):
78caa52a 4521 return 'youtube:%s' % self._FEED_NAME
04cc9617 4522
3853309f 4523 def _real_extract(self, url):
3d3dddc9 4524 return self.url_result(
4525 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4526 ie=YoutubeTabIE.ie_key())
25f14e9f
S
4527
4528
ef2f3c7f 4529class YoutubeWatchLaterIE(InfoExtractor):
4530 IE_NAME = 'youtube:watchlater'
70d5c17b 4531 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 4532 _VALID_URL = r':ytwatchlater'
bc7a9cd8 4533 _TESTS = [{
8bdd16b4 4534 'url': ':ytwatchlater',
bc7a9cd8
S
4535 'only_matching': True,
4536 }]
25f14e9f
S
4537
4538 def _real_extract(self, url):
ef2f3c7f 4539 return self.url_result(
4540 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 4541
4542
25f14e9f
S
4543class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4544 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 4545 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 4546 _FEED_NAME = 'recommended'
45db527f 4547 _LOGIN_REQUIRED = False
3d3dddc9 4548 _TESTS = [{
4549 'url': ':ytrec',
4550 'only_matching': True,
4551 }, {
4552 'url': ':ytrecommended',
4553 'only_matching': True,
4554 }, {
4555 'url': 'https://youtube.com',
4556 'only_matching': True,
4557 }]
1ed5b5c9 4558
1ed5b5c9 4559
25f14e9f 4560class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 4561 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 4562 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 4563 _FEED_NAME = 'subscriptions'
3d3dddc9 4564 _TESTS = [{
4565 'url': ':ytsubs',
4566 'only_matching': True,
4567 }, {
4568 'url': ':ytsubscriptions',
4569 'only_matching': True,
4570 }]
1ed5b5c9 4571
1ed5b5c9 4572
25f14e9f 4573class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
4574 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4575 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 4576 _FEED_NAME = 'history'
3d3dddc9 4577 _TESTS = [{
4578 'url': ':ythistory',
4579 'only_matching': True,
4580 }]
1ed5b5c9
JMF
4581
4582
15870e90
PH
4583class YoutubeTruncatedURLIE(InfoExtractor):
4584 IE_NAME = 'youtube:truncated_url'
4585 IE_DESC = False # Do not list
975d35db 4586 _VALID_URL = r'''(?x)
b95aab84
PH
4587 (?:https?://)?
4588 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4589 (?:watch\?(?:
c4808c60 4590 feature=[a-z_]+|
b95aab84
PH
4591 annotation_id=annotation_[^&]+|
4592 x-yt-cl=[0-9]+|
c1708b89 4593 hl=[^&]*|
287be8c6 4594 t=[0-9]+
b95aab84
PH
4595 )?
4596 |
4597 attribution_link\?a=[^&]+
4598 )
4599 $
975d35db 4600 '''
15870e90 4601
c4808c60 4602 _TESTS = [{
2d3d2997 4603 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 4604 'only_matching': True,
dc2fc736 4605 }, {
2d3d2997 4606 'url': 'https://www.youtube.com/watch?',
dc2fc736 4607 'only_matching': True,
b95aab84
PH
4608 }, {
4609 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4610 'only_matching': True,
4611 }, {
4612 'url': 'https://www.youtube.com/watch?feature=foo',
4613 'only_matching': True,
c1708b89
PH
4614 }, {
4615 'url': 'https://www.youtube.com/watch?hl=en-GB',
4616 'only_matching': True,
287be8c6
PH
4617 }, {
4618 'url': 'https://www.youtube.com/watch?t=2372',
4619 'only_matching': True,
c4808c60
PH
4620 }]
4621
15870e90
PH
4622 def _real_extract(self, url):
4623 raise ExtractorError(
78caa52a
PH
4624 'Did you forget to quote the URL? Remember that & is a meta '
4625 'character in most shells, so you want to put the URL in quotes, '
3867038a 4626 'like youtube-dl '
2d3d2997 4627 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 4628 ' or simply youtube-dl BaW_jenozKc .',
15870e90 4629 expected=True)
772fd5cc
PH
4630
4631
4632class YoutubeTruncatedIDIE(InfoExtractor):
4633 IE_NAME = 'youtube:truncated_id'
4634 IE_DESC = False # Do not list
b95aab84 4635 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
4636
4637 _TESTS = [{
4638 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4639 'only_matching': True,
4640 }]
4641
4642 def _real_extract(self, url):
4643 video_id = self._match_id(url)
4644 raise ExtractorError(
4645 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4646 expected=True)