]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
Add field `live_status`
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
2d6659b9 5import base64
d92f5d5a 6import calendar
109dd3b2 7import copy
fe93e2c4 8import datetime
a5c56234 9import hashlib
0ca96d48 10import itertools
c5e8d7af 11import json
c4417ddb 12import os.path
d77ab8e2 13import random
c5e8d7af 14import re
8a784c74 15import time
e0df6211 16import traceback
c5e8d7af 17
b05654f0 18from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 19from ..compat import (
edf3e38e 20 compat_chr,
29f7c58a 21 compat_HTTPError,
c5e8d7af 22 compat_parse_qs,
545cc85d 23 compat_str,
7fd002c0 24 compat_urllib_parse_unquote_plus,
15707c7e 25 compat_urllib_parse_urlencode,
7c80519c 26 compat_urllib_parse_urlparse,
7c61bd36 27 compat_urlparse,
4bb4a188 28)
545cc85d 29from ..jsinterp import JSInterpreter
4bb4a188 30from ..utils import (
2d6659b9 31 bytes_to_intlist,
c5e8d7af 32 clean_html,
d92f5d5a 33 datetime_from_str,
11f9be09 34 dict_get,
358de58c 35 error_to_compat_str,
c5e8d7af 36 ExtractorError,
2d30521a 37 float_or_none,
11f9be09 38 format_field,
dd27fd17 39 int_or_none,
2d6659b9 40 intlist_to_bytes,
94278f72 41 mimetype2ext,
11f9be09 42 orderedSet,
6310acf5 43 parse_codecs,
49bd8c66 44 parse_count,
7c80519c 45 parse_duration,
dca3ff4a 46 qualities,
3995d37d 47 remove_start,
cf7e015f 48 smuggle_url,
dbdaaa23 49 str_or_none,
c93d53f5 50 str_to_int,
7c365c21 51 traverse_obj,
556dbe7f 52 try_get,
c5e8d7af
PH
53 unescapeHTML,
54 unified_strdate,
cf7e015f 55 unsmuggle_url,
8bdd16b4 56 update_url_query,
21c340b8 57 url_or_none,
6e6bc8da 58 urlencode_postdata,
fe93e2c4 59 urljoin,
7c365c21 60 variadic,
c5e8d7af
PH
61)
62
5f6a1245 63
201c1459 64def parse_qs(url):
65 return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
66
67
de7f3446 68class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
69 """Provide base functions for Youtube extractors"""
70 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 71 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
72
73 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
74 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
75 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 76
3462ffa8 77 _RESERVED_NAMES = (
bea74222 78 r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|shorts|'
46953e7e 79 r'movies|results|shared|hashtag|trending|feed|feeds|oembed|get_video_info|'
cd7c66cf 80 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 81
b2e8bc1b
JMF
82 _NETRC_MACHINE = 'youtube'
83 # If True it will raise an error if no login info is provided
84 _LOGIN_REQUIRED = False
85
70d5c17b 86 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 87
b2e8bc1b 88 def _login(self):
83317f69 89 """
90 Attempt to log in to YouTube.
91 True is returned if successful or skipped.
92 False is returned if login failed.
93
94 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
95 """
9d5d4d64 96
97 def warn(message):
98 self.report_warning(message)
99
100 # username+password login is broken
101 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
102 self.raise_login_required(
103 'Login details are needed to download this content', method='cookies')
68217024 104 username, password = self._get_login_info()
9d5d4d64 105 if username:
106 warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
107 return
9d5d4d64 108
2d6659b9 109 # Everything below this is broken!
110 r'''
b2e8bc1b
JMF
111 # No authentication to be performed
112 if username is None:
a06916d9 113 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
69ea8ca4 114 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
a06916d9 115 # if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
545cc85d 116 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 117 return True
b2e8bc1b 118
7cc3570e
PH
119 login_page = self._download_webpage(
120 self._LOGIN_URL, None,
69ea8ca4
PH
121 note='Downloading login page',
122 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
123 if login_page is False:
124 return
b2e8bc1b 125
1212e997 126 login_form = self._hidden_inputs(login_page)
c5e8d7af 127
e00eb564
S
128 def req(url, f_req, note, errnote):
129 data = login_form.copy()
130 data.update({
131 'pstMsg': 1,
132 'checkConnection': 'youtube',
133 'checkedDomains': 'youtube',
134 'hl': 'en',
135 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 136 'f.req': json.dumps(f_req),
e00eb564
S
137 'flowName': 'GlifWebSignIn',
138 'flowEntry': 'ServiceLogin',
baf67a60
S
139 # TODO: reverse actual botguard identifier generation algo
140 'bgRequest': '["identifier",""]',
041bc3ad 141 })
e00eb564
S
142 return self._download_json(
143 url, None, note=note, errnote=errnote,
144 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
145 fatal=False,
146 data=urlencode_postdata(data), headers={
147 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
148 'Google-Accounts-XSRF': 1,
149 })
150
3995d37d
S
151 lookup_req = [
152 username,
153 None, [], None, 'US', None, None, 2, False, True,
154 [
155 None, None,
156 [2, 1, None, 1,
157 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
158 None, [], 4],
159 1, [None, None, []], None, None, None, True
160 ],
161 username,
162 ]
163
e00eb564 164 lookup_results = req(
3995d37d 165 self._LOOKUP_URL, lookup_req,
e00eb564
S
166 'Looking up account info', 'Unable to look up account info')
167
168 if lookup_results is False:
169 return False
041bc3ad 170
3995d37d
S
171 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
172 if not user_hash:
173 warn('Unable to extract user hash')
174 return False
175
176 challenge_req = [
177 user_hash,
178 None, 1, None, [1, None, None, None, [password, None, True]],
179 [
180 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
181 1, [None, None, []], None, None, None, True
182 ]]
83317f69 183
3995d37d
S
184 challenge_results = req(
185 self._CHALLENGE_URL, challenge_req,
186 'Logging in', 'Unable to log in')
83317f69 187
3995d37d 188 if challenge_results is False:
e00eb564 189 return
83317f69 190
3995d37d
S
191 login_res = try_get(challenge_results, lambda x: x[0][5], list)
192 if login_res:
193 login_msg = try_get(login_res, lambda x: x[5], compat_str)
194 warn(
195 'Unable to login: %s' % 'Invalid password'
196 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
197 return False
198
199 res = try_get(challenge_results, lambda x: x[0][-1], list)
200 if not res:
201 warn('Unable to extract result entry')
202 return False
203
9a6628aa
S
204 login_challenge = try_get(res, lambda x: x[0][0], list)
205 if login_challenge:
206 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
207 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
208 # SEND_SUCCESS - TFA code has been successfully sent to phone
209 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 210 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
211 if status == 'QUOTA_EXCEEDED':
212 warn('Exceeded the limit of TFA codes, try later')
213 return False
214
215 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
216 if not tl:
217 warn('Unable to extract TL')
218 return False
219
220 tfa_code = self._get_tfa_info('2-step verification code')
221
222 if not tfa_code:
223 warn(
224 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
225 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
226 return False
227
228 tfa_code = remove_start(tfa_code, 'G-')
229
230 tfa_req = [
231 user_hash, None, 2, None,
232 [
233 9, None, None, None, None, None, None, None,
234 [None, tfa_code, True, 2]
235 ]]
236
237 tfa_results = req(
238 self._TFA_URL.format(tl), tfa_req,
239 'Submitting TFA code', 'Unable to submit TFA code')
240
241 if tfa_results is False:
242 return False
243
244 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
245 if tfa_res:
246 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
247 warn(
248 'Unable to finish TFA: %s' % 'Invalid TFA code'
249 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
250 return False
251
252 check_cookie_url = try_get(
253 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
254 else:
255 CHALLENGES = {
256 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
257 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
258 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
259 }
260 challenge = CHALLENGES.get(
261 challenge_str,
262 '%s returned error %s.' % (self.IE_NAME, challenge_str))
263 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
264 return False
3995d37d
S
265 else:
266 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
267
268 if not check_cookie_url:
269 warn('Unable to extract CheckCookie URL')
270 return False
e00eb564
S
271
272 check_cookie_results = self._download_webpage(
3995d37d
S
273 check_cookie_url, None, 'Checking cookie', fatal=False)
274
275 if check_cookie_results is False:
276 return False
e00eb564 277
3995d37d
S
278 if 'https://myaccount.google.com/' not in check_cookie_results:
279 warn('Unable to log in')
b2e8bc1b 280 return False
e00eb564 281
b2e8bc1b 282 return True
2d6659b9 283 '''
b2e8bc1b 284
cce889b9 285 def _initialize_consent(self):
286 cookies = self._get_cookies('https://www.youtube.com/')
287 if cookies.get('__Secure-3PSID'):
288 return
289 consent_id = None
290 consent = cookies.get('CONSENT')
291 if consent:
292 if 'YES' in consent.value:
293 return
294 consent_id = self._search_regex(
295 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
296 if not consent_id:
297 consent_id = random.randint(100, 999)
298 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 299
b2e8bc1b 300 def _real_initialize(self):
cce889b9 301 self._initialize_consent()
b2e8bc1b
JMF
302 if self._downloader is None:
303 return
b2e8bc1b
JMF
304 if not self._login():
305 return
c5e8d7af 306
a0566bbf 307 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 308 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
309 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 310
109dd3b2 311 _YT_DEFAULT_YTCFGS = {
312 'WEB': {
313 'INNERTUBE_API_VERSION': 'v1',
314 'INNERTUBE_CLIENT_NAME': 'WEB',
315 'INNERTUBE_CLIENT_VERSION': '2.20210622.10.00',
316 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
317 'INNERTUBE_CONTEXT': {
318 'client': {
319 'clientName': 'WEB',
320 'clientVersion': '2.20210622.10.00',
321 'hl': 'en',
322 }
323 },
324 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
325 },
326 'WEB_REMIX': {
327 'INNERTUBE_API_VERSION': 'v1',
328 'INNERTUBE_CLIENT_NAME': 'WEB_REMIX',
329 'INNERTUBE_CLIENT_VERSION': '1.20210621.00.00',
330 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
331 'INNERTUBE_CONTEXT': {
332 'client': {
333 'clientName': 'WEB_REMIX',
334 'clientVersion': '1.20210621.00.00',
335 'hl': 'en',
336 }
337 },
338 'INNERTUBE_CONTEXT_CLIENT_NAME': 67
339 },
340 'WEB_EMBEDDED_PLAYER': {
341 'INNERTUBE_API_VERSION': 'v1',
342 'INNERTUBE_CLIENT_NAME': 'WEB_EMBEDDED_PLAYER',
343 'INNERTUBE_CLIENT_VERSION': '1.20210620.0.1',
344 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
345 'INNERTUBE_CONTEXT': {
346 'client': {
347 'clientName': 'WEB_EMBEDDED_PLAYER',
348 'clientVersion': '1.20210620.0.1',
349 'hl': 'en',
350 }
351 },
352 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
353 },
354 'ANDROID': {
355 'INNERTUBE_API_VERSION': 'v1',
356 'INNERTUBE_CLIENT_NAME': 'ANDROID',
357 'INNERTUBE_CLIENT_VERSION': '16.20',
358 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
359 'INNERTUBE_CONTEXT': {
360 'client': {
361 'clientName': 'ANDROID',
362 'clientVersion': '16.20',
363 'hl': 'en',
364 }
365 },
fe93e2c4 366 'INNERTUBE_CONTEXT_CLIENT_NAME': 3
109dd3b2 367 },
368 'ANDROID_EMBEDDED_PLAYER': {
369 'INNERTUBE_API_VERSION': 'v1',
370 'INNERTUBE_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER',
371 'INNERTUBE_CLIENT_VERSION': '16.20',
372 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
373 'INNERTUBE_CONTEXT': {
374 'client': {
375 'clientName': 'ANDROID_EMBEDDED_PLAYER',
376 'clientVersion': '16.20',
377 'hl': 'en',
378 }
379 },
fe93e2c4 380 'INNERTUBE_CONTEXT_CLIENT_NAME': 55
109dd3b2 381 },
382 'ANDROID_MUSIC': {
383 'INNERTUBE_API_VERSION': 'v1',
384 'INNERTUBE_CLIENT_NAME': 'ANDROID_MUSIC',
385 'INNERTUBE_CLIENT_VERSION': '4.32',
386 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
387 'INNERTUBE_CONTEXT': {
388 'client': {
389 'clientName': 'ANDROID_MUSIC',
390 'clientVersion': '4.32',
391 'hl': 'en',
392 }
393 },
fe93e2c4 394 'INNERTUBE_CONTEXT_CLIENT_NAME': 21
11f9be09 395 },
396 'IOS': {
397 'INNERTUBE_API_VERSION': 'v1',
398 'INNERTUBE_CLIENT_NAME': 'IOS',
399 'INNERTUBE_CLIENT_VERSION': '16.20',
400 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
401 'INNERTUBE_CONTEXT': {
402 'client': {
403 'clientName': 'IOS',
404 'clientVersion': '16.20',
405 'hl': 'en',
406 }
407 },
408 'INNERTUBE_CONTEXT_CLIENT_NAME': 5
409
410 },
411 'IOS_MUSIC': {
412 'INNERTUBE_API_VERSION': 'v1',
413 'INNERTUBE_CLIENT_NAME': 'IOS_MUSIC',
414 'INNERTUBE_CLIENT_VERSION': '4.32',
415 'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
416 'INNERTUBE_CONTEXT': {
417 'client': {
418 'clientName': 'IOS_MUSIC',
419 'clientVersion': '4.32',
420 'hl': 'en',
421 }
422 },
423 'INNERTUBE_CONTEXT_CLIENT_NAME': 26
424 },
425 'IOS_MESSAGES_EXTENSION': {
426 'INNERTUBE_API_VERSION': 'v1',
427 'INNERTUBE_CLIENT_NAME': 'IOS_MESSAGES_EXTENSION',
428 'INNERTUBE_CLIENT_VERSION': '16.20',
429 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
430 'INNERTUBE_CONTEXT': {
431 'client': {
432 'clientName': 'IOS_MESSAGES_EXTENSION',
433 'clientVersion': '16.20',
434 'hl': 'en',
435 }
436 },
437 'INNERTUBE_CONTEXT_CLIENT_NAME': 66
109dd3b2 438 }
439 }
440
441 _YT_DEFAULT_INNERTUBE_HOSTS = {
442 'DIRECT': 'youtubei.googleapis.com',
443 'WEB': 'www.youtube.com',
444 'WEB_REMIX': 'music.youtube.com',
445 'ANDROID_MUSIC': 'music.youtube.com'
446 }
447
11f9be09 448 # clients starting with _ cannot be explicity requested by the user
449 _YT_CLIENTS = {
450 'web': 'WEB',
451 'web_music': 'WEB_REMIX',
452 '_web_embedded': 'WEB_EMBEDDED_PLAYER',
453 '_web_agegate': 'TVHTML5',
454 'android': 'ANDROID',
455 'android_music': 'ANDROID_MUSIC',
456 '_android_embedded': 'ANDROID_EMBEDDED_PLAYER',
457 '_android_agegate': 'ANDROID',
458 'ios': 'IOS',
459 'ios_music': 'IOS_MUSIC',
460 '_ios_embedded': 'IOS_MESSAGES_EXTENSION',
461 '_ios_agegate': 'IOS'
462 }
463
109dd3b2 464 def _get_default_ytcfg(self, client='WEB'):
465 if client in self._YT_DEFAULT_YTCFGS:
466 return copy.deepcopy(self._YT_DEFAULT_YTCFGS[client])
467 self.write_debug(f'INNERTUBE default client {client} does not exist - falling back to WEB client.')
468 return copy.deepcopy(self._YT_DEFAULT_YTCFGS['WEB'])
469
470 def _get_innertube_host(self, client='WEB'):
471 return dict_get(self._YT_DEFAULT_INNERTUBE_HOSTS, (client, 'WEB'))
472
473 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='WEB'):
474 # try_get but with fallback to default ytcfg client values when present
475 _func = lambda y: try_get(y, getter, expected_type)
476 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
477
478 def _extract_client_name(self, ytcfg, default_client='WEB'):
479 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str, default_client)
480
314ee305 481 @staticmethod
11f9be09 482 def _extract_session_index(*data):
483 for ytcfg in data:
484 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
485 if session_index is not None:
486 return session_index
314ee305 487
109dd3b2 488 def _extract_client_version(self, ytcfg, default_client='WEB'):
489 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str, default_client)
490
491 def _extract_api_key(self, ytcfg=None, default_client='WEB'):
492 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
493
494 def _extract_context(self, ytcfg=None, default_client='WEB'):
495 _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
496 context = _get_context(ytcfg)
497 if context:
498 return context
499
500 context = _get_context(self._get_default_ytcfg(default_client))
501 if not ytcfg:
502 return context
503
504 # Recreate the client context (required)
505 context['client'].update({
506 'clientVersion': self._extract_client_version(ytcfg, default_client),
507 'clientName': self._extract_client_name(ytcfg, default_client),
508 })
509 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
510 if visitor_data:
511 context['client']['visitorData'] = visitor_data
512 return context
513
514 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
1974e99f 515 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
516 # See: https://github.com/yt-dlp/yt-dlp/issues/393
517 yt_cookies = self._get_cookies('https://www.youtube.com')
518 sapisid_cookie = dict_get(
519 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
a5c56234
M
520 if sapisid_cookie is None:
521 return
522 time_now = round(time.time())
1974e99f 523 # SAPISID cookie is required if not already present
524 if not yt_cookies.get('SAPISID'):
525 self._set_cookie(
526 '.youtube.com', 'SAPISID', sapisid_cookie.value, secure=True, expire_time=time_now + 3600)
527 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
528 sapisidhash = hashlib.sha1(
109dd3b2 529 f'{time_now} {sapisid_cookie.value} {origin}'.encode('utf-8')).hexdigest()
1974e99f 530 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
531
532 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 533 note='Downloading API JSON', errnote='Unable to download API page',
109dd3b2 534 context=None, api_key=None, api_hostname=None, default_client='WEB'):
f4f751af 535
109dd3b2 536 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 537 data.update(query)
11f9be09 538 real_headers = self.generate_api_headers(default_client=default_client)
f4f751af 539 real_headers.update({'content-type': 'application/json'})
540 if headers:
541 real_headers.update(headers)
545cc85d 542 return self._download_json(
109dd3b2 543 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
a5c56234 544 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 545 data=json.dumps(data).encode('utf8'), headers=real_headers,
546 query={'key': api_key or self._extract_api_key()})
547
11f9be09 548 def extract_yt_initial_data(self, video_id, webpage):
8bdd16b4 549 return self._parse_json(
550 self._search_regex(
29f7c58a 551 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 552 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 553 video_id)
0c148415 554
a1c5d2ca 555 def _extract_identity_token(self, webpage, item_id):
11f9be09 556 if not webpage:
557 return None
558 ytcfg = self.extract_ytcfg(item_id, webpage)
a1c5d2ca
M
559 if ytcfg:
560 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
561 if token:
562 return token
563 return self._search_regex(
564 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
565 'identity token', default=None)
566
567 @staticmethod
fe93e2c4 568 def _extract_account_syncid(*args):
8ea3f7b9 569 """
570 Extract syncId required to download private playlists of secondary channels
fe93e2c4 571 @params response and/or ytcfg
8ea3f7b9 572 """
fe93e2c4 573 for data in args:
574 # ytcfg includes channel_syncid if on secondary channel
575 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
576 if delegated_sid:
577 return delegated_sid
578 sync_ids = (try_get(
579 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
580 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
581 if len(sync_ids) >= 2 and sync_ids[1]:
582 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
583 # and just "user_syncid||" for primary channel. We only want the channel_syncid
584 return sync_ids[0]
a1c5d2ca 585
11f9be09 586 def extract_ytcfg(self, video_id, webpage):
8c54a305 587 if not webpage:
588 return {}
29f7c58a 589 return self._parse_json(
590 self._search_regex(
591 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 592 default='{}'), video_id, fatal=False) or {}
593
11f9be09 594 def generate_api_headers(
595 self, ytcfg=None, identity_token=None, account_syncid=None,
596 visitor_data=None, api_hostname=None, default_client='WEB', session_index=None):
597 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
f4f751af 598 headers = {
109dd3b2 599 'X-YouTube-Client-Name': compat_str(
11f9be09 600 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
601 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
109dd3b2 602 'Origin': origin
f4f751af 603 }
2d6659b9 604 if not visitor_data and ytcfg:
605 visitor_data = try_get(
11f9be09 606 self._extract_context(ytcfg, default_client), lambda x: x['client']['visitorData'], compat_str)
f4f751af 607 if identity_token:
109dd3b2 608 headers['X-Youtube-Identity-Token'] = identity_token
f4f751af 609 if account_syncid:
610 headers['X-Goog-PageId'] = account_syncid
314ee305 611 if session_index is None and ytcfg:
612 session_index = self._extract_session_index(ytcfg)
613 if account_syncid or session_index is not None:
614 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
f4f751af 615 if visitor_data:
109dd3b2 616 headers['X-Goog-Visitor-Id'] = visitor_data
617 auth = self._generate_sapisidhash_header(origin)
f4f751af 618 if auth is not None:
619 headers['Authorization'] = auth
109dd3b2 620 headers['X-Origin'] = origin
f4f751af 621 return headers
29f7c58a 622
2d6659b9 623 @staticmethod
624 def _build_api_continuation_query(continuation, ctp=None):
625 query = {
626 'continuation': continuation
627 }
628 # TODO: Inconsistency with clickTrackingParams.
629 # Currently we have a fixed ctp contained within context (from ytcfg)
630 # and a ctp in root query for continuation.
631 if ctp:
632 query['clickTracking'] = {'clickTrackingParams': ctp}
633 return query
634
2d6659b9 635 @classmethod
636 def _extract_next_continuation_data(cls, renderer):
637 next_continuation = try_get(
638 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
639 lambda x: x['continuation']['reloadContinuationData']), dict)
640 if not next_continuation:
641 return
642 continuation = next_continuation.get('continuation')
643 if not continuation:
644 return
645 ctp = next_continuation.get('clickTrackingParams')
fe93e2c4 646 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 647
648 @classmethod
649 def _extract_continuation_ep_data(cls, continuation_ep: dict):
650 if isinstance(continuation_ep, dict):
651 continuation = try_get(
652 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
653 if not continuation:
654 return
655 ctp = continuation_ep.get('clickTrackingParams')
fe93e2c4 656 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 657
658 @classmethod
659 def _extract_continuation(cls, renderer):
660 next_continuation = cls._extract_next_continuation_data(renderer)
661 if next_continuation:
662 return next_continuation
fe93e2c4 663
2d6659b9 664 contents = []
665 for key in ('contents', 'items'):
666 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
fe93e2c4 667
2d6659b9 668 for content in contents:
669 if not isinstance(content, dict):
670 continue
671 continuation_ep = try_get(
672 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
673 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
674 dict)
675 continuation = cls._extract_continuation_ep_data(continuation_ep)
676 if continuation:
677 return continuation
678
fe93e2c4 679 @classmethod
680 def _extract_alerts(cls, data):
109dd3b2 681 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
682 if not isinstance(alert_dict, dict):
683 continue
684 for alert in alert_dict.values():
685 alert_type = alert.get('type')
686 if not alert_type:
687 continue
fe93e2c4 688 message = cls._get_text(alert.get('text'))
109dd3b2 689 if message:
690 yield alert_type, message
691
692 def _report_alerts(self, alerts, expected=True):
693 errors = []
694 warnings = []
695 for alert_type, alert_message in alerts:
696 if alert_type.lower() == 'error':
697 errors.append([alert_type, alert_message])
698 else:
699 warnings.append([alert_type, alert_message])
700
701 for alert_type, alert_message in (warnings + errors[:-1]):
702 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
703 if errors:
704 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
705
706 def _extract_and_report_alerts(self, data, *args, **kwargs):
707 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
708
47193e02 709 def _extract_badges(self, renderer: dict):
710 badges = set()
711 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
712 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
713 if label:
714 badges.add(label.lower())
715 return badges
716
717 @staticmethod
fe93e2c4 718 def _get_text(data, getter=None, max_runs=None):
719 for get in variadic(getter):
720 d = try_get(data, get) if get is not None else data
721 text = try_get(d, lambda x: x['simpleText'], compat_str)
722 if text:
723 return text
724 runs = try_get(d, lambda x: x['runs'], list) or []
725 if not runs and isinstance(d, list):
726 runs = d
727
728 def get_runs(runs):
729 for run in runs[:min(len(runs), max_runs or len(runs))]:
730 yield try_get(run, lambda x: x['text'], compat_str) or ''
731
732 text = ''.join(get_runs(runs))
733 if text:
734 return text
47193e02 735
109dd3b2 736 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
737 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
738 default_client='WEB'):
739 response = None
740 last_error = None
741 count = -1
742 retries = self.get_param('extractor_retries', 3)
743 if check_get_keys is None:
744 check_get_keys = []
745 while count < retries:
746 count += 1
747 if last_error:
748 self.report_warning('%s. Retrying ...' % last_error)
749 try:
750 response = self._call_api(
751 ep=ep, fatal=True, headers=headers,
752 video_id=item_id, query=query,
753 context=self._extract_context(ytcfg, default_client),
754 api_key=self._extract_api_key(ytcfg, default_client),
755 api_hostname=api_hostname, default_client=default_client,
756 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
757 except ExtractorError as e:
758 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
759 # Downloading page may result in intermittent 5xx HTTP error
760 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
761 last_error = 'HTTP Error %s' % e.cause.code
762 if count < retries:
763 continue
764 if fatal:
765 raise
766 else:
767 self.report_warning(error_to_compat_str(e))
768 return
769
770 else:
771 # Youtube may send alerts if there was an issue with the continuation page
772 try:
773 self._extract_and_report_alerts(response, expected=False)
774 except ExtractorError as e:
775 if fatal:
776 raise
777 self.report_warning(error_to_compat_str(e))
778 return
779 if not check_get_keys or dict_get(response, check_get_keys):
780 break
781 # Youtube sometimes sends incomplete data
782 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
783 last_error = 'Incomplete data received'
784 if count >= retries:
785 if fatal:
786 raise ExtractorError(last_error)
787 else:
788 self.report_warning(last_error)
789 return
790 return response
791
9297939e 792 @staticmethod
793 def is_music_url(url):
794 return re.match(r'https?://music\.youtube\.com/', url) is not None
795
30a074c2 796 def _extract_video(self, renderer):
797 video_id = renderer.get('videoId')
fe93e2c4 798 title = self._get_text(renderer.get('title'))
799 description = self._get_text(renderer.get('descriptionSnippet'))
800 duration = parse_duration(self._get_text(renderer.get('lengthText')))
801 view_count_text = self._get_text(renderer.get('viewCountText')) or ''
30a074c2 802 view_count = str_to_int(self._search_regex(
803 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
804 'view count', default=None))
fe93e2c4 805
806 uploader = self._get_text(renderer, (lambda x: x['ownerText'], lambda x: x['shortBylineText']))
807
30a074c2 808 return {
39ed931e 809 '_type': 'url',
30a074c2 810 'ie_key': YoutubeIE.ie_key(),
811 'id': video_id,
812 'url': video_id,
813 'title': title,
814 'description': description,
815 'duration': duration,
816 'view_count': view_count,
817 'uploader': uploader,
818 }
819
0c148415 820
360e1ca5 821class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 822 IE_DESC = 'YouTube.com'
bc2ca1bb 823 _INVIDIOUS_SITES = (
824 # invidious-redirect websites
825 r'(?:www\.)?redirect\.invidious\.io',
826 r'(?:(?:www|dev)\.)?invidio\.us',
827 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
828 r'(?:www\.)?invidious\.pussthecat\.org',
bc2ca1bb 829 r'(?:www\.)?invidious\.zee\.li',
bc2ca1bb 830 r'(?:www\.)?invidious\.ethibox\.fr',
bc2ca1bb 831 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
bc2ca1bb 832 # youtube-dl invidious instances list
833 r'(?:(?:www|no)\.)?invidiou\.sh',
834 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
835 r'(?:www\.)?invidious\.kabi\.tk',
bc2ca1bb 836 r'(?:www\.)?invidious\.mastodon\.host',
837 r'(?:www\.)?invidious\.zapashcanon\.fr',
ed807c18 838 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
201c1459 839 r'(?:www\.)?invidious\.tinfoil-hat\.net',
840 r'(?:www\.)?invidious\.himiko\.cloud',
841 r'(?:www\.)?invidious\.reallyancient\.tech',
bc2ca1bb 842 r'(?:www\.)?invidious\.tube',
843 r'(?:www\.)?invidiou\.site',
844 r'(?:www\.)?invidious\.site',
845 r'(?:www\.)?invidious\.xyz',
846 r'(?:www\.)?invidious\.nixnet\.xyz',
201c1459 847 r'(?:www\.)?invidious\.048596\.xyz',
bc2ca1bb 848 r'(?:www\.)?invidious\.drycat\.fr',
201c1459 849 r'(?:www\.)?inv\.skyn3t\.in',
bc2ca1bb 850 r'(?:www\.)?tube\.poal\.co',
851 r'(?:www\.)?tube\.connect\.cafe',
852 r'(?:www\.)?vid\.wxzm\.sx',
853 r'(?:www\.)?vid\.mint\.lgbt',
201c1459 854 r'(?:www\.)?vid\.puffyan\.us',
bc2ca1bb 855 r'(?:www\.)?yewtu\.be',
856 r'(?:www\.)?yt\.elukerio\.org',
857 r'(?:www\.)?yt\.lelux\.fi',
858 r'(?:www\.)?invidious\.ggc-project\.de',
859 r'(?:www\.)?yt\.maisputain\.ovh',
201c1459 860 r'(?:www\.)?ytprivate\.com',
861 r'(?:www\.)?invidious\.13ad\.de',
bc2ca1bb 862 r'(?:www\.)?invidious\.toot\.koeln',
863 r'(?:www\.)?invidious\.fdn\.fr',
864 r'(?:www\.)?watch\.nettohikari\.com',
ed807c18 865 r'(?:www\.)?invidious\.namazso\.eu',
866 r'(?:www\.)?invidious\.silkky\.cloud',
867 r'(?:www\.)?invidious\.exonip\.de',
868 r'(?:www\.)?invidious\.riverside\.rocks',
869 r'(?:www\.)?invidious\.blamefran\.net',
870 r'(?:www\.)?invidious\.moomoo\.de',
871 r'(?:www\.)?ytb\.trom\.tf',
872 r'(?:www\.)?yt\.cyberhost\.uk',
bc2ca1bb 873 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
874 r'(?:www\.)?qklhadlycap4cnod\.onion',
875 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
876 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
877 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
878 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
879 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
880 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
ed807c18 881 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
882 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
883 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
884 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
bc2ca1bb 885 )
cb7dfeea 886 _VALID_URL = r"""(?x)^
c5e8d7af 887 (
edb53e2d 888 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 889 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
890 (?:www\.)?deturl\.com/www\.youtube\.com|
891 (?:www\.)?pwnyoutube\.com|
892 (?:www\.)?hooktube\.com|
893 (?:www\.)?yourepeat\.com|
894 tube\.majestyc\.net|
895 %(invidious)s|
896 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
897 (?:.*?\#/)? # handle anchor (#/) redirect urls
898 (?: # the various things that can precede the ID:
ac7553d0 899 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 900 |(?: # or the v= param in all its forms
f7000f3a 901 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 902 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 903 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
904 v=
905 )
f4b05232 906 ))
cbaed4bb
S
907 |(?:
908 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
909 vid\.plus| # or vid.plus/xxxx
910 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 911 %(invidious)s
cbaed4bb 912 )/
edb53e2d 913 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 914 )
c5e8d7af 915 )? # all until now is optional -> you can pass the naked ID
201c1459 916 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 917 (?(1).+)? # if we found the ID, everything can follow
9297939e 918 (?:\#|$)""" % {
bc2ca1bb 919 'invidious': '|'.join(_INVIDIOUS_SITES),
920 }
e40c758c 921 _PLAYER_INFO_RE = (
cc2db878 922 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
923 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 924 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 925 )
2c62dc26 926 _formats = {
c2d3cb4c 927 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
928 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
929 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
930 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
931 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
932 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
933 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
934 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 935 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 936 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
937 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
938 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
939 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
940 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
941 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 942 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 943 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
944 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 945
946
947 # 3D videos
c2d3cb4c 948 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
949 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
950 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
951 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 952 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
953 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
954 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 955
96fb5605 956 # Apple HTTP Live Streaming
11f12195 957 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 958 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
959 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
960 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
961 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
962 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 963 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
964 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
965
966 # DASH mp4 video
d23028a8
S
967 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
968 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
969 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
970 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
971 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 972 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
973 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
974 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
975 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
976 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
977 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
978 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 979
f6f1fc92 980 # Dash mp4 audio
d23028a8
S
981 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
982 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
983 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
984 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
985 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
986 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
987 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
988
989 # Dash webm
d23028a8
S
990 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
991 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
992 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
993 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
994 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
995 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
996 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
997 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
998 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
999 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1000 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1001 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1002 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1003 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1004 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 1005 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
1006 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1007 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1008 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1009 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1010 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1011 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
1012
1013 # Dash webm audio
d23028a8
S
1014 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1015 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 1016
0857baad 1017 # Dash webm audio with opus inside
d23028a8
S
1018 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1019 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1020 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 1021
ce6b9a2d
PH
1022 # RTMP (unnamed)
1023 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
1024
1025 # av01 video only formats sometimes served with "unknown" codecs
1026 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1027 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1028 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1029 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 1030 }
29f7c58a 1031 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 1032
109dd3b2 1033 _AGE_GATE_REASONS = (
1034 'Sign in to confirm your age',
1035 'This video may be inappropriate for some users.',
1036 'Sorry, this content is age-restricted.')
1037
fd5c4aab
S
1038 _GEO_BYPASS = False
1039
78caa52a 1040 IE_NAME = 'youtube'
2eb88d95
PH
1041 _TESTS = [
1042 {
2d3d2997 1043 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
1044 'info_dict': {
1045 'id': 'BaW_jenozKc',
1046 'ext': 'mp4',
3867038a 1047 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
1048 'uploader': 'Philipp Hagemeister',
1049 'uploader_id': 'phihag',
ec85ded8 1050 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
1051 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1052 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 1053 'upload_date': '20121002',
3867038a 1054 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 1055 'categories': ['Science & Technology'],
3867038a 1056 'tags': ['youtube-dl'],
556dbe7f 1057 'duration': 10,
dbdaaa23 1058 'view_count': int,
3e7c1224
PH
1059 'like_count': int,
1060 'dislike_count': int,
7c80519c 1061 'start_time': 1,
297a564b 1062 'end_time': 9,
2eb88d95 1063 }
0e853ca4 1064 },
fccd3771 1065 {
4bc3a23e
PH
1066 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1067 'note': 'Embed-only video (#1746)',
1068 'info_dict': {
1069 'id': 'yZIXLfi8CZQ',
1070 'ext': 'mp4',
1071 'upload_date': '20120608',
1072 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1073 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1074 'uploader': 'SET India',
94bfcd23 1075 'uploader_id': 'setindia',
ec85ded8 1076 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 1077 'age_limit': 18,
545cc85d 1078 },
1079 'skip': 'Private video',
fccd3771 1080 },
11b56058 1081 {
8bdd16b4 1082 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
1083 'note': 'Use the first video ID in the URL',
1084 'info_dict': {
1085 'id': 'BaW_jenozKc',
1086 'ext': 'mp4',
3867038a 1087 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
1088 'uploader': 'Philipp Hagemeister',
1089 'uploader_id': 'phihag',
ec85ded8 1090 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 1091 'upload_date': '20121002',
3867038a 1092 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 1093 'categories': ['Science & Technology'],
3867038a 1094 'tags': ['youtube-dl'],
556dbe7f 1095 'duration': 10,
dbdaaa23 1096 'view_count': int,
11b56058
PM
1097 'like_count': int,
1098 'dislike_count': int,
34a7de29
S
1099 },
1100 'params': {
1101 'skip_download': True,
1102 },
11b56058 1103 },
dd27fd17 1104 {
2d3d2997 1105 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
1106 'note': '256k DASH audio (format 141) via DASH manifest',
1107 'info_dict': {
1108 'id': 'a9LDPn-MO4I',
1109 'ext': 'm4a',
1110 'upload_date': '20121002',
1111 'uploader_id': '8KVIDEO',
ec85ded8 1112 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
1113 'description': '',
1114 'uploader': '8KVIDEO',
1115 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 1116 },
4bc3a23e
PH
1117 'params': {
1118 'youtube_include_dash_manifest': True,
1119 'format': '141',
4919603f 1120 },
de3c7fe0 1121 'skip': 'format 141 not served anymore',
dd27fd17 1122 },
8bdd16b4 1123 # DASH manifest with encrypted signature
1124 {
1125 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1126 'info_dict': {
1127 'id': 'IB3lcPjvWLA',
1128 'ext': 'm4a',
1129 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1130 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1131 'duration': 244,
1132 'uploader': 'AfrojackVEVO',
1133 'uploader_id': 'AfrojackVEVO',
1134 'upload_date': '20131011',
cc2db878 1135 'abr': 129.495,
8bdd16b4 1136 },
1137 'params': {
1138 'youtube_include_dash_manifest': True,
1139 'format': '141/bestaudio[ext=m4a]',
1140 },
1141 },
dd2d55f1 1142 # Normal age-gate video (embed allowed)
c522adb1 1143 {
2d3d2997 1144 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1145 'info_dict': {
1146 'id': 'HtVdAasjOgU',
1147 'ext': 'mp4',
1148 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1149 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1150 'duration': 142,
c522adb1
JMF
1151 'uploader': 'The Witcher',
1152 'uploader_id': 'WitcherGame',
ec85ded8 1153 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 1154 'upload_date': '20140605',
34952f09 1155 'age_limit': 18,
c522adb1
JMF
1156 },
1157 },
8bdd16b4 1158 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1159 # YouTube Red ad is not captured for creator
1160 {
1161 'url': '__2ABJjxzNo',
1162 'info_dict': {
1163 'id': '__2ABJjxzNo',
1164 'ext': 'mp4',
1165 'duration': 266,
1166 'upload_date': '20100430',
1167 'uploader_id': 'deadmau5',
1168 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 1169 'creator': 'deadmau5',
1170 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1171 'uploader': 'deadmau5',
1172 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1173 'alt_title': 'Some Chords',
8bdd16b4 1174 },
1175 'expected_warnings': [
1176 'DASH manifest missing',
1177 ]
1178 },
067aa17e 1179 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1180 {
1181 'url': 'lqQg6PlCWgI',
1182 'info_dict': {
1183 'id': 'lqQg6PlCWgI',
1184 'ext': 'mp4',
556dbe7f 1185 'duration': 6085,
90227264 1186 'upload_date': '20150827',
cbe2bd91 1187 'uploader_id': 'olympic',
ec85ded8 1188 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 1189 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
11f9be09 1190 'uploader': 'Olympics',
cbe2bd91
PH
1191 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1192 },
1193 'params': {
1194 'skip_download': 'requires avconv',
e52a40ab 1195 }
cbe2bd91 1196 },
6271f1ca
PH
1197 # Non-square pixels
1198 {
1199 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1200 'info_dict': {
1201 'id': '_b-2C3KPAM0',
1202 'ext': 'mp4',
1203 'stretched_ratio': 16 / 9.,
556dbe7f 1204 'duration': 85,
6271f1ca
PH
1205 'upload_date': '20110310',
1206 'uploader_id': 'AllenMeow',
ec85ded8 1207 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 1208 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 1209 'uploader': '孫ᄋᄅ',
6271f1ca
PH
1210 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1211 },
06b491eb
S
1212 },
1213 # url_encoded_fmt_stream_map is empty string
1214 {
1215 'url': 'qEJwOuvDf7I',
1216 'info_dict': {
1217 'id': 'qEJwOuvDf7I',
f57b7835 1218 'ext': 'webm',
06b491eb
S
1219 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1220 'description': '',
1221 'upload_date': '20150404',
1222 'uploader_id': 'spbelect',
1223 'uploader': 'Наблюдатели Петербурга',
1224 },
1225 'params': {
1226 'skip_download': 'requires avconv',
e323cf3f
S
1227 },
1228 'skip': 'This live event has ended.',
06b491eb 1229 },
067aa17e 1230 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1231 {
1232 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1233 'info_dict': {
1234 'id': 'FIl7x6_3R5Y',
eb6793ba 1235 'ext': 'webm',
da77d856
S
1236 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1237 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1238 'duration': 220,
da77d856
S
1239 'upload_date': '20150625',
1240 'uploader_id': 'dorappi2000',
ec85ded8 1241 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 1242 'uploader': 'dorappi2000',
eb6793ba 1243 'formats': 'mincount:31',
da77d856 1244 },
eb6793ba 1245 'skip': 'not actual anymore',
2ee8f5d8 1246 },
8a1a26ce
YCH
1247 # DASH manifest with segment_list
1248 {
1249 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1250 'md5': '8ce563a1d667b599d21064e982ab9e31',
1251 'info_dict': {
1252 'id': 'CsmdDsKjzN8',
1253 'ext': 'mp4',
17ee98e1 1254 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
1255 'uploader': 'Airtek',
1256 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1257 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1258 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1259 },
1260 'params': {
1261 'youtube_include_dash_manifest': True,
1262 'format': '135', # bestvideo
be49068d
S
1263 },
1264 'skip': 'This live event has ended.',
2ee8f5d8 1265 },
cf7e015f
S
1266 {
1267 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 1268 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 1269 'info_dict': {
545cc85d 1270 'id': 'jvGDaLqkpTg',
1271 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1272 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
1273 },
1274 'playlist': [{
1275 'info_dict': {
545cc85d 1276 'id': 'jvGDaLqkpTg',
cf7e015f 1277 'ext': 'mp4',
545cc85d 1278 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1279 'description': 'md5:e03b909557865076822aa169218d6a5d',
1280 'duration': 10643,
1281 'upload_date': '20161111',
1282 'uploader': 'Team PGP',
1283 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1284 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1285 },
1286 }, {
1287 'info_dict': {
545cc85d 1288 'id': '3AKt1R1aDnw',
cf7e015f 1289 'ext': 'mp4',
545cc85d 1290 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1291 'description': 'md5:e03b909557865076822aa169218d6a5d',
1292 'duration': 10991,
1293 'upload_date': '20161111',
1294 'uploader': 'Team PGP',
1295 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1296 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1297 },
1298 }, {
1299 'info_dict': {
545cc85d 1300 'id': 'RtAMM00gpVc',
cf7e015f 1301 'ext': 'mp4',
545cc85d 1302 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1303 'description': 'md5:e03b909557865076822aa169218d6a5d',
1304 'duration': 10995,
1305 'upload_date': '20161111',
1306 'uploader': 'Team PGP',
1307 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1308 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1309 },
1310 }, {
1311 'info_dict': {
545cc85d 1312 'id': '6N2fdlP3C5U',
cf7e015f 1313 'ext': 'mp4',
545cc85d 1314 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1315 'description': 'md5:e03b909557865076822aa169218d6a5d',
1316 'duration': 10990,
1317 'upload_date': '20161111',
1318 'uploader': 'Team PGP',
1319 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1320 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1321 },
1322 }],
1323 'params': {
1324 'skip_download': True,
1325 },
cbaed4bb 1326 },
f9f49d87 1327 {
067aa17e 1328 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1329 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1330 'info_dict': {
1331 'id': 'gVfLd0zydlo',
1332 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1333 },
1334 'playlist_count': 2,
be49068d 1335 'skip': 'Not multifeed anymore',
f9f49d87 1336 },
cbaed4bb 1337 {
2d3d2997 1338 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1339 'only_matching': True,
0e49d9a6 1340 },
6d4fc66b 1341 {
2d3d2997 1342 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1343 'only_matching': True,
1344 },
0e49d9a6 1345 {
067aa17e 1346 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1347 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1348 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1349 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1350 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1351 'info_dict': {
1352 'id': 'lsguqyKfVQg',
1353 'ext': 'mp4',
1354 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
11f9be09 1355 'alt_title': 'Dark Walk',
0e49d9a6 1356 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1357 'duration': 133,
0e49d9a6
LL
1358 'upload_date': '20151119',
1359 'uploader_id': 'IronSoulElf',
ec85ded8 1360 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1361 'uploader': 'IronSoulElf',
11f9be09 1362 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1363 'track': 'Dark Walk',
1364 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
92bc97d3 1365 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
1366 },
1367 'params': {
1368 'skip_download': True,
1369 },
1370 },
61f92af1 1371 {
067aa17e 1372 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1373 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1374 'only_matching': True,
1375 },
313dfc45
LL
1376 {
1377 # Video with yt:stretch=17:0
1378 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1379 'info_dict': {
1380 'id': 'Q39EVAstoRM',
1381 'ext': 'mp4',
1382 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1383 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1384 'upload_date': '20151107',
1385 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1386 'uploader': 'CH GAMER DROID',
1387 },
1388 'params': {
1389 'skip_download': True,
1390 },
be49068d 1391 'skip': 'This video does not exist.',
313dfc45 1392 },
201c1459 1393 {
1394 # Video with incomplete 'yt:stretch=16:'
1395 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1396 'only_matching': True,
1397 },
7caf9830
S
1398 {
1399 # Video licensed under Creative Commons
1400 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1401 'info_dict': {
1402 'id': 'M4gD1WSo5mA',
1403 'ext': 'mp4',
1404 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1405 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1406 'duration': 721,
7caf9830
S
1407 'upload_date': '20150127',
1408 'uploader_id': 'BerkmanCenter',
ec85ded8 1409 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1410 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1411 'license': 'Creative Commons Attribution license (reuse allowed)',
1412 },
1413 'params': {
1414 'skip_download': True,
1415 },
1416 },
fd050249
S
1417 {
1418 # Channel-like uploader_url
1419 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1420 'info_dict': {
1421 'id': 'eQcmzGIKrzg',
1422 'ext': 'mp4',
1423 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1424 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1425 'duration': 4060,
fd050249 1426 'upload_date': '20151119',
eb6793ba 1427 'uploader': 'Bernie Sanders',
fd050249 1428 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1429 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1430 'license': 'Creative Commons Attribution license (reuse allowed)',
1431 },
1432 'params': {
1433 'skip_download': True,
1434 },
1435 },
040ac686
S
1436 {
1437 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1438 'only_matching': True,
7f29cf54
S
1439 },
1440 {
067aa17e 1441 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1442 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1443 'only_matching': True,
6496ccb4
S
1444 },
1445 {
1446 # Rental video preview
1447 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1448 'info_dict': {
1449 'id': 'uGpuVWrhIzE',
1450 'ext': 'mp4',
1451 'title': 'Piku - Trailer',
1452 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1453 'upload_date': '20150811',
1454 'uploader': 'FlixMatrix',
1455 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1456 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1457 'license': 'Standard YouTube License',
1458 },
1459 'params': {
1460 'skip_download': True,
1461 },
eb6793ba 1462 'skip': 'This video is not available.',
022a5d66 1463 },
12afdc2a
S
1464 {
1465 # YouTube Red video with episode data
1466 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1467 'info_dict': {
1468 'id': 'iqKdEhx-dD4',
1469 'ext': 'mp4',
1470 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1471 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1472 'duration': 2085,
12afdc2a
S
1473 'upload_date': '20170118',
1474 'uploader': 'Vsauce',
1475 'uploader_id': 'Vsauce',
1476 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1477 'series': 'Mind Field',
1478 'season_number': 1,
1479 'episode_number': 1,
1480 },
1481 'params': {
1482 'skip_download': True,
1483 },
1484 'expected_warnings': [
1485 'Skipping DASH manifest',
1486 ],
1487 },
c7121fa7
S
1488 {
1489 # The following content has been identified by the YouTube community
1490 # as inappropriate or offensive to some audiences.
1491 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1492 'info_dict': {
1493 'id': '6SJNVb0GnPI',
1494 'ext': 'mp4',
1495 'title': 'Race Differences in Intelligence',
1496 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1497 'duration': 965,
1498 'upload_date': '20140124',
1499 'uploader': 'New Century Foundation',
1500 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1501 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1502 },
1503 'params': {
1504 'skip_download': True,
1505 },
545cc85d 1506 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1507 },
022a5d66
S
1508 {
1509 # itag 212
1510 'url': '1t24XAntNCY',
1511 'only_matching': True,
fd5c4aab
S
1512 },
1513 {
1514 # geo restricted to JP
1515 'url': 'sJL6WA-aGkQ',
1516 'only_matching': True,
1517 },
cd5a74a2
S
1518 {
1519 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1520 'only_matching': True,
1521 },
bc2ca1bb 1522 {
1523 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1524 'only_matching': True,
1525 },
1526 {
1527 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1528 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1529 'only_matching': True,
1530 },
825cd268
RA
1531 {
1532 # DRM protected
1533 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1534 'only_matching': True,
4fe54c12
S
1535 },
1536 {
1537 # Video with unsupported adaptive stream type formats
1538 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1539 'info_dict': {
1540 'id': 'Z4Vy8R84T1U',
1541 'ext': 'mp4',
1542 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1543 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1544 'duration': 433,
1545 'upload_date': '20130923',
1546 'uploader': 'Amelia Putri Harwita',
1547 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1548 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1549 'formats': 'maxcount:10',
1550 },
1551 'params': {
1552 'skip_download': True,
1553 'youtube_include_dash_manifest': False,
1554 },
5429d6a9 1555 'skip': 'not actual anymore',
5caabd3c 1556 },
1557 {
822b9d9c 1558 # Youtube Music Auto-generated description
5caabd3c 1559 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1560 'info_dict': {
1561 'id': 'MgNrAu2pzNs',
1562 'ext': 'mp4',
1563 'title': 'Voyeur Girl',
1564 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1565 'upload_date': '20190312',
5429d6a9
S
1566 'uploader': 'Stephen - Topic',
1567 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1568 'artist': 'Stephen',
1569 'track': 'Voyeur Girl',
1570 'album': 'it\'s too much love to know my dear',
1571 'release_date': '20190313',
1572 'release_year': 2019,
1573 },
1574 'params': {
1575 'skip_download': True,
1576 },
1577 },
66b48727
RA
1578 {
1579 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1580 'only_matching': True,
1581 },
011e75e6
S
1582 {
1583 # invalid -> valid video id redirection
1584 'url': 'DJztXj2GPfl',
1585 'info_dict': {
1586 'id': 'DJztXj2GPfk',
1587 'ext': 'mp4',
1588 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1589 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1590 'upload_date': '20090125',
1591 'uploader': 'Prochorowka',
1592 'uploader_id': 'Prochorowka',
1593 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1594 'artist': 'Panjabi MC',
1595 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1596 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1597 },
1598 'params': {
1599 'skip_download': True,
1600 },
545cc85d 1601 'skip': 'Video unavailable',
ea74e00b
DP
1602 },
1603 {
1604 # empty description results in an empty string
1605 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1606 'info_dict': {
1607 'id': 'x41yOUIvK2k',
1608 'ext': 'mp4',
1609 'title': 'IMG 3456',
1610 'description': '',
1611 'upload_date': '20170613',
1612 'uploader_id': 'ElevageOrVert',
1613 'uploader': 'ElevageOrVert',
1614 },
1615 'params': {
1616 'skip_download': True,
1617 },
1618 },
a0566bbf 1619 {
29f7c58a 1620 # with '};' inside yt initial data (see [1])
1621 # see [2] for an example with '};' inside ytInitialPlayerResponse
1622 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1623 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1624 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1625 'info_dict': {
1626 'id': 'CHqg6qOn4no',
1627 'ext': 'mp4',
1628 'title': 'Part 77 Sort a list of simple types in c#',
1629 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1630 'upload_date': '20130831',
1631 'uploader_id': 'kudvenkat',
1632 'uploader': 'kudvenkat',
1633 },
1634 'params': {
1635 'skip_download': True,
1636 },
1637 },
29f7c58a 1638 {
1639 # another example of '};' in ytInitialData
1640 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1641 'only_matching': True,
1642 },
1643 {
1644 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1645 'only_matching': True,
1646 },
545cc85d 1647 {
cc2db878 1648 # https://github.com/ytdl-org/youtube-dl/pull/28094
1649 'url': 'OtqTfy26tG0',
1650 'info_dict': {
1651 'id': 'OtqTfy26tG0',
1652 'ext': 'mp4',
1653 'title': 'Burn Out',
1654 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1655 'upload_date': '20141120',
1656 'uploader': 'The Cinematic Orchestra - Topic',
1657 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1658 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1659 'artist': 'The Cinematic Orchestra',
1660 'track': 'Burn Out',
1661 'album': 'Every Day',
1662 'release_data': None,
1663 'release_year': None,
1664 },
1665 'params': {
1666 'skip_download': True,
1667 },
545cc85d 1668 },
bc2ca1bb 1669 {
1670 # controversial video, only works with bpctr when authenticated with cookies
1671 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1672 'only_matching': True,
1673 },
a1a7907b 1674 {
1675 # controversial video, requires bpctr/contentCheckOk
1676 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1677 'info_dict': {
1678 'id': 'SZJvDhaSDnc',
1679 'ext': 'mp4',
1680 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1681 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1682 'uploader': 'CBS This Morning',
11f9be09 1683 'uploader_id': 'CBSThisMorning',
a1a7907b 1684 'upload_date': '20140716',
1685 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1686 }
1687 },
f7ad7160 1688 {
1689 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1690 'url': 'cBvYw8_A0vQ',
1691 'info_dict': {
1692 'id': 'cBvYw8_A0vQ',
1693 'ext': 'mp4',
1694 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1695 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1696 'upload_date': '20201120',
1697 'uploader': 'Walk around Japan',
1698 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1699 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1700 },
1701 'params': {
1702 'skip_download': True,
1703 },
0fb983f6 1704 }, {
1705 # Has multiple audio streams
1706 'url': 'WaOKSUlf4TM',
1707 'only_matching': True
9297939e 1708 }, {
1709 # Requires Premium: has format 141 when requested using YTM url
1710 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1711 'only_matching': True
1712 }, {
120916da 1713 # multiple subtitles with same lang_code
1714 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1715 'only_matching': True,
109dd3b2 1716 }, {
1717 # Force use android client fallback
1718 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1719 'info_dict': {
1720 'id': 'YOelRv7fMxY',
11f9be09 1721 'title': 'DIGGING A SECRET TUNNEL Part 1',
109dd3b2 1722 'ext': '3gp',
1723 'upload_date': '20210624',
1724 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1725 'uploader': 'colinfurze',
11f9be09 1726 'uploader_id': 'colinfurze',
109dd3b2 1727 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
11f9be09 1728 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
109dd3b2 1729 },
1730 'params': {
1731 'format': '17', # 3gp format available on android
1732 'extractor_args': {'youtube': {'player_client': ['android']}},
1733 },
120916da 1734 },
109dd3b2 1735 {
1736 # Skip download of additional client configs (remix client config in this case)
1737 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1738 'only_matching': True,
1739 'params': {
1740 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1741 },
1742 }
2eb88d95
PH
1743 ]
1744
201c1459 1745 @classmethod
1746 def suitable(cls, url):
1bdae7d3 1747 # Hack for lazy extractors until more generic solution is implemented
1748 # (see #28780)
1749 from .youtube import parse_qs
201c1459 1750 qs = parse_qs(url)
1751 if qs.get('list', [None])[0]:
1752 return False
1753 return super(YoutubeIE, cls).suitable(url)
1754
e0df6211
PH
1755 def __init__(self, *args, **kwargs):
1756 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1757 self._code_cache = {}
83799698 1758 self._player_cache = {}
e0df6211 1759
109dd3b2 1760 def _extract_player_url(self, ytcfg=None, webpage=None):
1761 player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)
11f9be09 1762 if not player_url and webpage:
109dd3b2 1763 player_url = self._search_regex(
1764 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1765 webpage, 'player URL', fatal=False)
11f9be09 1766 if not player_url:
1767 return None
109dd3b2 1768 if player_url.startswith('//'):
1769 player_url = 'https:' + player_url
1770 elif not re.match(r'https?://', player_url):
1771 player_url = compat_urlparse.urljoin(
1772 'https://www.youtube.com', player_url)
1773 return player_url
1774
60064c53
PH
1775 def _signature_cache_id(self, example_sig):
1776 """ Return a string representation of a signature """
78caa52a 1777 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1778
e40c758c
S
1779 @classmethod
1780 def _extract_player_info(cls, player_url):
1781 for player_re in cls._PLAYER_INFO_RE:
1782 id_m = re.search(player_re, player_url)
1783 if id_m:
1784 break
1785 else:
c081b35c 1786 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1787 return id_m.group('id')
e40c758c 1788
109dd3b2 1789 def _load_player(self, video_id, player_url, fatal=True) -> bool:
1790 player_id = self._extract_player_info(player_url)
1791 if player_id not in self._code_cache:
1792 self._code_cache[player_id] = self._download_webpage(
1793 player_url, video_id, fatal=fatal,
1794 note='Downloading player ' + player_id,
1795 errnote='Download of %s failed' % player_url)
1796 return player_id in self._code_cache
1797
e40c758c 1798 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1799 player_id = self._extract_player_info(player_url)
e0df6211 1800
c4417ddb 1801 # Read from filesystem cache
545cc85d 1802 func_id = 'js_%s_%s' % (
1803 player_id, self._signature_cache_id(example_sig))
c4417ddb 1804 assert os.path.basename(func_id) == func_id
a0e07d31 1805
69ea8ca4 1806 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1807 if cache_spec is not None:
78caa52a 1808 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1809
109dd3b2 1810 if self._load_player(video_id, player_url):
1811 code = self._code_cache[player_id]
1812 res = self._parse_sig_js(code)
e0df6211 1813
109dd3b2 1814 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1815 cache_res = res(test_string)
1816 cache_spec = [ord(c) for c in cache_res]
83799698 1817
109dd3b2 1818 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1819 return res
83799698 1820
60064c53 1821 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1822 def gen_sig_code(idxs):
1823 def _genslice(start, end, step):
78caa52a 1824 starts = '' if start == 0 else str(start)
8bcc8756 1825 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1826 steps = '' if step == 1 else (':%d' % step)
78caa52a 1827 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1828
1829 step = None
7af808a5
PH
1830 # Quelch pyflakes warnings - start will be set when step is set
1831 start = '(Never used)'
edf3e38e
PH
1832 for i, prev in zip(idxs[1:], idxs[:-1]):
1833 if step is not None:
1834 if i - prev == step:
1835 continue
1836 yield _genslice(start, prev, step)
1837 step = None
1838 continue
1839 if i - prev in [-1, 1]:
1840 step = i - prev
1841 start = prev
1842 continue
1843 else:
78caa52a 1844 yield 's[%d]' % prev
edf3e38e 1845 if step is None:
78caa52a 1846 yield 's[%d]' % i
edf3e38e
PH
1847 else:
1848 yield _genslice(start, i, step)
1849
78caa52a 1850 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1851 cache_res = func(test_string)
edf3e38e 1852 cache_spec = [ord(c) for c in cache_res]
78caa52a 1853 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1854 signature_id_tuple = '(%s)' % (
1855 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1856 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1857 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1858 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1859
e0df6211
PH
1860 def _parse_sig_js(self, jscode):
1861 funcname = self._search_regex(
abefc03f
S
1862 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1863 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1864 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1865 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1866 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1867 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1868 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1869 # Obsolete patterns
1870 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1871 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1872 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1873 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1874 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1875 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1876 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1877 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1878 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1879
1880 jsi = JSInterpreter(jscode)
1881 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1882 return lambda s: initial_function([s])
1883
545cc85d 1884 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1885 """Turn the encrypted s field into a working signature"""
6b37f0be 1886
c8bf86d5 1887 if player_url is None:
69ea8ca4 1888 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1889
c8bf86d5 1890 try:
62af3a0e 1891 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1892 if player_id not in self._player_cache:
1893 func = self._extract_signature_function(
60064c53 1894 video_id, player_url, s
c8bf86d5
PH
1895 )
1896 self._player_cache[player_id] = func
1897 func = self._player_cache[player_id]
a06916d9 1898 if self.get_param('youtube_print_sig_code'):
60064c53 1899 self._print_sig_code(func, s)
c8bf86d5
PH
1900 return func(s)
1901 except Exception as e:
1902 tb = traceback.format_exc()
1903 raise ExtractorError(
78caa52a 1904 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1905
109dd3b2 1906 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1907 """
1908 Extract signatureTimestamp (sts)
1909 Required to tell API what sig/player version is in use.
1910 """
1911 sts = None
1912 if isinstance(ytcfg, dict):
1913 sts = int_or_none(ytcfg.get('STS'))
1914
1915 if not sts:
1916 # Attempt to extract from player
1917 if player_url is None:
1918 error_msg = 'Cannot extract signature timestamp without player_url.'
1919 if fatal:
1920 raise ExtractorError(error_msg)
1921 self.report_warning(error_msg)
1922 return
1923 if self._load_player(video_id, player_url, fatal=fatal):
1924 player_id = self._extract_player_info(player_url)
1925 code = self._code_cache[player_id]
1926 sts = int_or_none(self._search_regex(
1927 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
1928 'JS player signature timestamp', group='sts', fatal=fatal))
1929 return sts
1930
11f9be09 1931 def _mark_watched(self, video_id, player_responses):
352d63fd 1932 playback_url = traverse_obj(
1933 player_responses, (..., 'playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
1934 expected_type=url_or_none, get_all=False)
d77ab8e2 1935 if not playback_url:
352d63fd 1936 self.report_warning('Unable to mark watched')
d77ab8e2
S
1937 return
1938 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1939 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1940
1941 # cpn generation algorithm is reverse engineered from base.js.
1942 # In fact it works even with dummy cpn.
1943 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1944 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1945
1946 qs.update({
1947 'ver': ['2'],
1948 'cpn': [cpn],
1949 })
1950 playback_url = compat_urlparse.urlunparse(
15707c7e 1951 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1952
1953 self._download_webpage(
1954 playback_url, video_id, 'Marking watched',
1955 'Unable to mark watched', fatal=False)
1956
66c9fa36
S
1957 @staticmethod
1958 def _extract_urls(webpage):
1959 # Embedded YouTube player
1960 entries = [
1961 unescapeHTML(mobj.group('url'))
1962 for mobj in re.finditer(r'''(?x)
1963 (?:
1964 <iframe[^>]+?src=|
1965 data-video-url=|
1966 <embed[^>]+?src=|
1967 embedSWF\(?:\s*|
1968 <object[^>]+data=|
1969 new\s+SWFObject\(
1970 )
1971 (["\'])
1972 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1973 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1974 \1''', webpage)]
1975
1976 # lazyYT YouTube embed
1977 entries.extend(list(map(
1978 unescapeHTML,
1979 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1980
1981 # Wordpress "YouTube Video Importer" plugin
1982 matches = re.findall(r'''(?x)<div[^>]+
1983 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1984 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1985 entries.extend(m[-1] for m in matches)
1986
1987 return entries
1988
1989 @staticmethod
1990 def _extract_url(webpage):
1991 urls = YoutubeIE._extract_urls(webpage)
1992 return urls[0] if urls else None
1993
97665381
PH
1994 @classmethod
1995 def extract_id(cls, url):
1996 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1997 if mobj is None:
69ea8ca4 1998 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1999 video_id = mobj.group(2)
2000 return video_id
2001
7c365c21 2002 def _extract_chapters_from_json(self, data, duration):
2003 chapter_list = traverse_obj(
2004 data, (
2005 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2006 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2007 ), expected_type=list)
2008
2009 return self._extract_chapters(
2010 chapter_list,
2011 chapter_time=lambda chapter: float_or_none(
2012 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2013 chapter_title=lambda chapter: traverse_obj(
2014 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2015 duration=duration)
2016
2017 def _extract_chapters_from_engagement_panel(self, data, duration):
2018 content_list = traverse_obj(
8bdd16b4 2019 data,
7c365c21 2020 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
da503b7a 2021 expected_type=list, default=[])
7c365c21 2022 chapter_time = lambda chapter: parse_duration(self._get_text(chapter.get('timeDescription')))
2023 chapter_title = lambda chapter: self._get_text(chapter.get('title'))
2024
2025 return next((
2026 filter(None, (
2027 self._extract_chapters(
2028 traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2029 chapter_time, chapter_title, duration)
2030 for contents in content_list
2031 ))), [])
2032
2033 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
84213ea8 2034 chapters = []
7c365c21 2035 last_chapter = {'start_time': 0}
2036 for idx, chapter in enumerate(chapter_list or []):
2037 title = chapter_title(chapter)
84213ea8
S
2038 start_time = chapter_time(chapter)
2039 if start_time is None:
2040 continue
7c365c21 2041 last_chapter['end_time'] = start_time
2042 if start_time < last_chapter['start_time']:
2043 if idx == 1:
2044 chapters.pop()
2045 self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2046 else:
2047 self.report_warning(f'Invalid start time for chapter "{title}"')
2048 continue
2049 last_chapter = {'start_time': start_time, 'title': title}
2050 chapters.append(last_chapter)
2051 last_chapter['end_time'] = duration
84213ea8
S
2052 return chapters
2053
545cc85d 2054 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2055 return self._parse_json(self._search_regex(
2056 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2057 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 2058
d92f5d5a 2059 @staticmethod
2060 def parse_time_text(time_text):
2061 """
2062 Parse the comment time text
2063 time_text is in the format 'X units ago (edited)'
2064 """
2065 time_text_split = time_text.split(' ')
2066 if len(time_text_split) >= 3:
da503b7a 2067 try:
2068 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
2069 except ValueError:
2070 return None
d92f5d5a 2071
a1c5d2ca
M
2072 def _extract_comment(self, comment_renderer, parent=None):
2073 comment_id = comment_renderer.get('commentId')
2074 if not comment_id:
2075 return
fe93e2c4 2076
2077 text = self._get_text(comment_renderer.get('contentText'))
2078
49bd8c66 2079 # note: timestamp is an estimate calculated from the current time and time_text
fe93e2c4 2080 time_text = self._get_text(comment_renderer.get('publishedTimeText')) or ''
2081 time_text_dt = self.parse_time_text(time_text)
2082 if isinstance(time_text_dt, datetime.datetime):
2083 timestamp = calendar.timegm(time_text_dt.timetuple())
2084 author = self._get_text(comment_renderer.get('authorText'))
a1c5d2ca
M
2085 author_id = try_get(comment_renderer,
2086 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
fe93e2c4 2087
49bd8c66 2088 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2089 lambda x: x['likeCount']), compat_str)) or 0
a1c5d2ca
M
2090 author_thumbnail = try_get(comment_renderer,
2091 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2092
2093 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
97524332 2094 is_favorited = 'creatorHeart' in (try_get(
2095 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
a1c5d2ca
M
2096 return {
2097 'id': comment_id,
2098 'text': text,
d92f5d5a 2099 'timestamp': timestamp,
a1c5d2ca
M
2100 'time_text': time_text,
2101 'like_count': votes,
97524332 2102 'is_favorited': is_favorited,
a1c5d2ca
M
2103 'author': author,
2104 'author_id': author_id,
2105 'author_thumbnail': author_thumbnail,
2106 'author_is_uploader': author_is_uploader,
2107 'parent': parent or 'root'
2108 }
2109
2110 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
2d6659b9 2111 ytcfg, video_id, parent=None, comment_counts=None):
2112
2113 def extract_header(contents):
2114 _total_comments = 0
2115 _continuation = None
2116 for content in contents:
2117 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
fe93e2c4 2118 expected_comment_count = parse_count(self._get_text(
2119 comments_header_renderer, (lambda x: x['countText'], lambda x: x['commentsCount']), max_runs=1))
2120
2d6659b9 2121 if expected_comment_count:
fe93e2c4 2122 comment_counts[1] = expected_comment_count
2123 self.to_screen('Downloading ~%d comments' % expected_comment_count)
2d6659b9 2124 _total_comments = comment_counts[1]
2125 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2126 comment_sort_index = int(sort_mode_str != 'top') # 1 = new, 0 = top
2127
2128 sort_menu_item = try_get(
2129 comments_header_renderer,
2130 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2131 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2132
2133 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2134 if not _continuation:
2135 continue
2136
2137 sort_text = sort_menu_item.get('title')
2138 if isinstance(sort_text, compat_str):
2139 sort_text = sort_text.lower()
2140 else:
2141 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2142 self.to_screen('Sorting comments by %s' % sort_text)
2143 break
2144 return _total_comments, _continuation
a1c5d2ca 2145
2d6659b9 2146 def extract_thread(contents):
a1c5d2ca
M
2147 if not parent:
2148 comment_counts[2] = 0
2149 for content in contents:
2150 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2151 comment_renderer = try_get(
2152 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2153 content, (lambda x: x['commentRenderer'], dict))
2154
2155 if not comment_renderer:
2156 continue
2157 comment = self._extract_comment(comment_renderer, parent)
2158 if not comment:
2159 continue
2160 comment_counts[0] += 1
2161 yield comment
2162 # Attempt to get the replies
2163 comment_replies_renderer = try_get(
2164 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2165
2166 if comment_replies_renderer:
2167 comment_counts[2] += 1
2168 comment_entries_iter = self._comment_entries(
f4f751af 2169 comment_replies_renderer, identity_token, account_syncid, ytcfg,
2d6659b9 2170 video_id, parent=comment.get('id'), comment_counts=comment_counts)
a1c5d2ca
M
2171
2172 for reply_comment in comment_entries_iter:
2173 yield reply_comment
2174
2d6659b9 2175 # YouTube comments have a max depth of 2
2176 max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2177 if max_depth == 1 and parent:
2178 return
a1c5d2ca
M
2179 if not comment_counts:
2180 # comment so far, est. total comments, current comment thread #
2181 comment_counts = [0, 0, 0]
a1c5d2ca 2182
2d6659b9 2183 continuation = self._extract_continuation(root_continuation_data)
fe93e2c4 2184 if continuation and len(continuation['continuation']) < 27:
2d6659b9 2185 self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2186 continuation_token = self._generate_comment_continuation(video_id)
fe93e2c4 2187 continuation = self._build_api_continuation_query(continuation_token, None)
2d6659b9 2188
2189 visitor_data = None
2190 is_first_continuation = parent is None
a1c5d2ca
M
2191
2192 for page_num in itertools.count(0):
2193 if not continuation:
2194 break
11f9be09 2195 headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
2d6659b9 2196 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2197 if page_num == 0:
2198 if is_first_continuation:
2199 note_prefix = 'Downloading comment section API JSON'
a1c5d2ca 2200 else:
2d6659b9 2201 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
2202 comment_counts[2], comment_prog_str)
2203 else:
2204 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2205 ' ' if parent else '', ' replies' if parent else '',
2206 page_num, comment_prog_str)
2207
2208 response = self._extract_response(
fe93e2c4 2209 item_id=None, query=continuation,
2d6659b9 2210 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2211 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca
M
2212 if not response:
2213 break
f4f751af 2214 visitor_data = try_get(
2215 response,
2216 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2217 compat_str) or visitor_data
a1c5d2ca 2218
2d6659b9 2219 continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca 2220
2d6659b9 2221 continuation = None
2222 if isinstance(continuation_contents, list):
2223 for continuation_section in continuation_contents:
2224 if not isinstance(continuation_section, dict):
2225 continue
2226 continuation_items = try_get(
2227 continuation_section,
2228 (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2229 lambda x: x['appendContinuationItemsAction']['continuationItems']),
2230 list) or []
2231 if is_first_continuation:
2232 total_comments, continuation = extract_header(continuation_items)
2233 if total_comments:
2234 yield total_comments
2235 is_first_continuation = False
2236 if continuation:
2237 break
2238 continue
2239 count = 0
2240 for count, entry in enumerate(extract_thread(continuation_items)):
2241 yield entry
2242 continuation = self._extract_continuation({'contents': continuation_items})
2243 if continuation:
2244 # Sometimes YouTube provides a continuation without any comments
2245 # In most cases we end up just downloading these with very little comments to come.
2246 if count == 0:
2247 if not parent:
2248 self.report_warning('No comments received - assuming end of comments')
2249 continuation = None
a1c5d2ca
M
2250 break
2251
2d6659b9 2252 # Deprecated response structure
2253 elif isinstance(continuation_contents, dict):
2254 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2255 for key, continuation_renderer in continuation_contents.items():
2256 if key not in known_continuation_renderers:
2257 continue
2258 if not isinstance(continuation_renderer, dict):
2259 continue
2260 if is_first_continuation:
2261 header_continuation_items = [continuation_renderer.get('header') or {}]
2262 total_comments, continuation = extract_header(header_continuation_items)
2263 if total_comments:
2264 yield total_comments
2265 is_first_continuation = False
2266 if continuation:
2267 break
a1c5d2ca 2268
2d6659b9 2269 # Sometimes YouTube provides a continuation without any comments
2270 # In most cases we end up just downloading these with very little comments to come.
2271 count = 0
2272 for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2273 yield entry
2274 continuation = self._extract_continuation(continuation_renderer)
2275 if count == 0:
2276 if not parent:
2277 self.report_warning('No comments received - assuming end of comments')
2278 continuation = None
2279 break
a1c5d2ca 2280
2d6659b9 2281 @staticmethod
2282 def _generate_comment_continuation(video_id):
2283 """
2284 Generates initial comment section continuation token from given video id
2285 """
2286 b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2287 parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2288 new_continuation_intlist = list(itertools.chain.from_iterable(
2289 [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2290 return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2291
2292 def _extract_comments(self, ytcfg, video_id, contents, webpage):
a1c5d2ca 2293 """Entry for comment extraction"""
2d6659b9 2294 def _real_comment_extract(contents):
2295 if isinstance(contents, list):
2296 for entry in contents:
2297 for key, renderer in entry.items():
2298 if key not in known_entry_comment_renderers:
2299 continue
2300 yield from self._comment_entries(
2301 renderer, video_id=video_id, ytcfg=ytcfg,
2302 identity_token=self._extract_identity_token(webpage, item_id=video_id),
2303 account_syncid=self._extract_account_syncid(ytcfg))
2304 break
a1c5d2ca 2305 comments = []
2d6659b9 2306 known_entry_comment_renderers = ('itemSectionRenderer',)
a1c5d2ca 2307 estimated_total = 0
2d6659b9 2308 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
a1c5d2ca 2309
2d6659b9 2310 try:
2311 for comment in _real_comment_extract(contents):
2312 if len(comments) >= max_comments:
2313 break
2314 if isinstance(comment, int):
2315 estimated_total = comment
2316 continue
2317 comments.append(comment)
2318 except KeyboardInterrupt:
2319 self.to_screen('Interrupted by user')
d92f5d5a 2320 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
a1c5d2ca
M
2321 return {
2322 'comments': comments,
2323 'comment_count': len(comments),
2324 }
2325
109dd3b2 2326 @staticmethod
2327 def _generate_player_context(sts=None):
2328 context = {
2329 'html5Preference': 'HTML5_PREF_WANTS',
2330 }
2331 if sts is not None:
2332 context['signatureTimestamp'] = sts
2333 return {
2334 'playbackContext': {
2335 'contentPlaybackContext': context
a1a7907b 2336 },
2337 'contentCheckOk': True
109dd3b2 2338 }
2339
4e6767b5 2340 @staticmethod
c888ffb9 2341 def _get_video_info_params(video_id, client='TVHTML5'):
2342 GVI_CLIENTS = {
2343 'ANDROID': {
2344 'c': 'ANDROID',
2345 'cver': '16.20',
2346 },
2347 'TVHTML5': {
2348 'c': 'TVHTML5',
2349 'cver': '6.20180913',
11f9be09 2350 },
2351 'IOS': {
2352 'c': 'IOS',
2353 'cver': '16.20'
c888ffb9 2354 }
2355 }
2356 query = {
4e6767b5 2357 'video_id': video_id,
2358 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
c888ffb9 2359 'html5': '1'
4e6767b5 2360 }
c888ffb9 2361 query.update(GVI_CLIENTS.get(client))
2362 return query
4e6767b5 2363
11f9be09 2364 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr):
109dd3b2 2365
11f9be09 2366 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2367 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
2368 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False)
2369 headers = self.generate_api_headers(
2370 player_ytcfg, identity_token, syncid,
2371 default_client=self._YT_CLIENTS[client], session_index=session_index)
9297939e 2372
11f9be09 2373 yt_query = {'videoId': video_id}
2374 yt_query.update(self._generate_player_context(sts))
2375 return self._extract_response(
2376 item_id=video_id, ep='player', query=yt_query,
2377 ytcfg=player_ytcfg, headers=headers, fatal=False,
2378 default_client=self._YT_CLIENTS[client],
2379 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2380 ) or None
2381
2382 def _extract_age_gated_player_response(self, client, video_id, ytcfg, identity_token, player_url, initial_pr):
2383 gvi_client = self._YT_CLIENTS.get(f'_{client}_agegate')
2384 if not gvi_client:
2385 return
109dd3b2 2386
11f9be09 2387 pr = self._parse_json(traverse_obj(
2388 compat_parse_qs(self._download_webpage(
2389 self.http_scheme() + '//www.youtube.com/get_video_info', video_id,
2390 'Refetching age-gated %s info webpage' % gvi_client.lower(),
2391 'unable to download video info webpage', fatal=False,
2392 query=self._get_video_info_params(video_id, client=gvi_client))),
2393 ('player_response', 0), expected_type=str) or '{}', video_id)
2394 if pr:
2395 return pr
2396
2397 self.report_warning('Falling back to embedded-only age-gate workaround')
2398 embed_webpage = None
2399 if client == 'web' and 'configs' not in self._configuration_arg('player_skip'):
2400 embed_webpage = self._download_webpage(
2401 'https://www.youtube.com/embed/%s?html5=1' % video_id,
2402 video_id=video_id, note=f'Downloading age-gated {client} embed config')
2403
2404 ytcfg_age = self.extract_ytcfg(video_id, embed_webpage) or {}
2405 # If we extracted the embed webpage, it'll tell us if we can view the video
2406 embedded_pr = self._parse_json(
2407 traverse_obj(ytcfg_age, ('PLAYER_VARS', 'embedded_player_response'), expected_type=str) or '{}',
2408 video_id=video_id)
2409 embedded_ps_reason = traverse_obj(embedded_pr, ('playabilityStatus', 'reason'), expected_type=str) or ''
2410 if embedded_ps_reason in self._AGE_GATE_REASONS:
2411 return
2412 return self._extract_player_response(
2413 f'_{client}_embedded', video_id,
2414 ytcfg_age or ytcfg, ytcfg_age if client == 'web' else {},
2415 identity_token, player_url, initial_pr)
545cc85d 2416
11f9be09 2417 def _get_requested_clients(self, url, smuggled_data):
2418 requested_clients = [client for client in self._configuration_arg('player_client')
2419 if client[:0] != '_' and client in self._YT_CLIENTS]
2420 if not requested_clients:
2421 requested_clients = ['android', 'web']
cf7e015f 2422
11f9be09 2423 if smuggled_data.get('is_music_url') or self.is_music_url(url):
2424 requested_clients.extend(
2425 f'{client}_music' for client in requested_clients if not client.endswith('_music'))
dbdaaa23 2426
11f9be09 2427 return orderedSet(requested_clients)
cf7e015f 2428
11f9be09 2429 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, player_url, identity_token):
2430 initial_pr = None
2431 if webpage:
2432 initial_pr = self._extract_yt_initial_variable(
2433 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2434 video_id, 'initial player response')
6b09401b 2435
11f9be09 2436 age_gated = False
2437 for client in clients:
2438 player_ytcfg = master_ytcfg if client == 'web' else {}
2439 if age_gated:
2440 pr = None
2441 elif client == 'web' and initial_pr:
2442 pr = initial_pr
8fe10494 2443 else:
11f9be09 2444 if client == 'web_music' and 'configs' not in self._configuration_arg('player_skip'):
2445 ytm_webpage = self._download_webpage(
2446 'https://music.youtube.com',
2447 video_id, fatal=False, note='Downloading remix client config')
2448 player_ytcfg = self.extract_ytcfg(video_id, ytm_webpage) or {}
2449 pr = self._extract_player_response(
2450 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr)
2451 if pr:
2452 yield pr
2453 if age_gated or traverse_obj(pr, ('playabilityStatus', 'reason')) in self._AGE_GATE_REASONS:
2454 age_gated = True
2455 pr = self._extract_age_gated_player_response(
2456 client, video_id, player_ytcfg or master_ytcfg, identity_token, player_url, initial_pr)
2457 if pr:
2458 yield pr
2459 # Android player_response does not have microFormats which are needed for
2460 # extraction of some data. So we return the initial_pr with formats
2461 # stripped out even if not requested by the user
2462 # See: https://github.com/yt-dlp/yt-dlp/issues/501
2463 if initial_pr and 'web' not in clients:
2464 initial_pr['streamingData'] = None
2465 yield initial_pr
2466
2467 def _extract_formats(self, streaming_data, video_id, player_url, is_live):
2468 itags, stream_ids = [], []
cc2db878 2469 itag_qualities = {}
d3fc8074 2470 q = qualities([
60bdb7bd 2471 # "tiny" is the smallest video-only format. But some audio-only formats
2472 # was also labeled "tiny". It is not clear if such formats still exist
d3fc8074 2473 'tiny', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
2474 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2475 ])
11f9be09 2476 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
9297939e 2477
545cc85d 2478 for fmt in streaming_formats:
2479 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2480 continue
321bf820 2481
cc2db878 2482 itag = str_or_none(fmt.get('itag'))
9297939e 2483 audio_track = fmt.get('audioTrack') or {}
2484 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2485 if stream_id in stream_ids:
2486 continue
2487
cc2db878 2488 quality = fmt.get('quality')
d3fc8074 2489 if quality == 'tiny' or not quality:
2490 quality = fmt.get('audioQuality', '').lower() or quality
cc2db878 2491 if itag and quality:
2492 itag_qualities[itag] = quality
2493 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2494 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2495 # number of fragment that would subsequently requested with (`&sq=N`)
2496 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2497 continue
2498
545cc85d 2499 fmt_url = fmt.get('url')
2500 if not fmt_url:
2501 sc = compat_parse_qs(fmt.get('signatureCipher'))
2502 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2503 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2504 if not (sc and fmt_url and encrypted_sig):
2505 continue
545cc85d 2506 if not player_url:
201e9eaa 2507 continue
545cc85d 2508 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2509 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2510 fmt_url += '&' + sp + '=' + signature
2511
545cc85d 2512 if itag:
2513 itags.append(itag)
9297939e 2514 stream_ids.append(stream_id)
2515
cc2db878 2516 tbr = float_or_none(
2517 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 2518 dct = {
2519 'asr': int_or_none(fmt.get('audioSampleRate')),
2520 'filesize': int_or_none(fmt.get('contentLength')),
2521 'format_id': itag,
11f9be09 2522 'format_note': ', '.join(filter(None, (
2523 audio_track.get('displayName'), fmt.get('qualityLabel') or quality))),
545cc85d 2524 'fps': int_or_none(fmt.get('fps')),
2525 'height': int_or_none(fmt.get('height')),
dca3ff4a 2526 'quality': q(quality),
cc2db878 2527 'tbr': tbr,
545cc85d 2528 'url': fmt_url,
2529 'width': fmt.get('width'),
0fb983f6 2530 'language': audio_track.get('id', '').split('.')[0],
545cc85d 2531 }
60bdb7bd 2532 mime_mobj = re.match(
2533 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2534 if mime_mobj:
2535 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2536 dct.update(parse_codecs(mime_mobj.group(2)))
2537 # The 3gp format in android client has a quality of "small",
2538 # but is actually worse than all other formats
2539 if dct['ext'] == '3gp':
2540 dct['quality'] = q('tiny')
11f9be09 2541 dct['preference'] = -10
cc2db878 2542 no_audio = dct.get('acodec') == 'none'
2543 no_video = dct.get('vcodec') == 'none'
2544 if no_audio:
2545 dct['vbr'] = tbr
2546 if no_video:
2547 dct['abr'] = tbr
2548 if no_audio or no_video:
545cc85d 2549 dct['downloader_options'] = {
2550 # Youtube throttles chunks >~10M
2551 'http_chunk_size': 10485760,
bf1317d2 2552 }
7c60c33e 2553 if dct.get('ext'):
2554 dct['container'] = dct['ext'] + '_dash'
11f9be09 2555 yield dct
545cc85d 2556
4bb6b02f 2557 skip_manifests = self._configuration_arg('skip')
11f9be09 2558 get_dash = not is_live and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True)
5d3a0e79 2559 get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2560
11f9be09 2561 for sd in streaming_data:
5d3a0e79 2562 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
9297939e 2563 if hls_manifest_url:
2564 for f in self._extract_m3u8_formats(
2565 hls_manifest_url, video_id, 'mp4', fatal=False):
2566 itag = self._search_regex(
2567 r'/itag/(\d+)', f['url'], 'itag', default=None)
11f9be09 2568 if itag in itags:
2569 continue
9297939e 2570 if itag:
2571 f['format_id'] = itag
11f9be09 2572 itags.append(itag)
2573 yield f
545cc85d 2574
5d3a0e79 2575 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2576 if dash_manifest_url:
2577 for f in self._extract_mpd_formats(
2578 dash_manifest_url, video_id, fatal=False):
2579 itag = f['format_id']
2580 if itag in itags:
2581 continue
11f9be09 2582 if itag:
2583 itags.append(itag)
5d3a0e79 2584 if itag in itag_qualities:
2585 f['quality'] = q(itag_qualities[itag])
2586 filesize = int_or_none(self._search_regex(
2587 r'/clen/(\d+)', f.get('fragment_base_url')
2588 or f['url'], 'file size', default=None))
2589 if filesize:
2590 f['filesize'] = filesize
11f9be09 2591 yield f
2592
2593 def _real_extract(self, url):
2594 url, smuggled_data = unsmuggle_url(url, {})
2595 video_id = self._match_id(url)
2596
2597 base_url = self.http_scheme() + '//www.youtube.com/'
2598 webpage_url = base_url + 'watch?v=' + video_id
2599 webpage = self._download_webpage(
2600 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2601
2602 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2603 player_url = self._extract_player_url(master_ytcfg, webpage)
2604 identity_token = self._extract_identity_token(webpage, video_id)
2605
2606 player_responses = list(self._extract_player_responses(
2607 self._get_requested_clients(url, smuggled_data),
2608 video_id, webpage, master_ytcfg, player_url, identity_token))
2609
352d63fd 2610 get_first = lambda obj, keys, **kwargs: traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
11f9be09 2611
2612 playability_statuses = traverse_obj(
2613 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2614
2615 trailer_video_id = get_first(
2616 playability_statuses,
2617 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2618 expected_type=str)
2619 if trailer_video_id:
2620 return self.url_result(
2621 trailer_video_id, self.ie_key(), trailer_video_id)
2622
2623 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2624 if webpage else (lambda x: None))
2625
2626 video_details = traverse_obj(
2627 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2628 microformats = traverse_obj(
2629 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2630 expected_type=dict, default=[])
2631 video_title = (
2632 get_first(video_details, 'title')
2633 or self._get_text(microformats, (..., 'title'))
2634 or search_meta(['og:title', 'twitter:title', 'title']))
2635 video_description = get_first(video_details, 'shortDescription')
2636
2637 if not smuggled_data.get('force_singlefeed', False):
2638 if not self.get_param('noplaylist'):
2639 multifeed_metadata_list = get_first(
2640 player_responses,
2641 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2642 expected_type=str)
2643 if multifeed_metadata_list:
2644 entries = []
2645 feed_ids = []
2646 for feed in multifeed_metadata_list.split(','):
2647 # Unquote should take place before split on comma (,) since textual
2648 # fields may contain comma as well (see
2649 # https://github.com/ytdl-org/youtube-dl/issues/8536)
2650 feed_data = compat_parse_qs(
2651 compat_urllib_parse_unquote_plus(feed))
2652
2653 def feed_entry(name):
2654 return try_get(
2655 feed_data, lambda x: x[name][0], compat_str)
2656
2657 feed_id = feed_entry('id')
2658 if not feed_id:
2659 continue
2660 feed_title = feed_entry('title')
2661 title = video_title
2662 if feed_title:
2663 title += ' (%s)' % feed_title
2664 entries.append({
2665 '_type': 'url_transparent',
2666 'ie_key': 'Youtube',
2667 'url': smuggle_url(
2668 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2669 {'force_singlefeed': True}),
2670 'title': title,
2671 })
2672 feed_ids.append(feed_id)
2673 self.to_screen(
2674 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2675 % (', '.join(feed_ids), video_id))
2676 return self.playlist_result(
2677 entries, video_id, video_title, video_description)
2678 else:
2679 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2680
2681 category = get_first(microformats, 'category') or search_meta('genre')
2682 channel_id = get_first(video_details, 'channelId') \
2683 or get_first(microformats, 'externalChannelId') \
2684 or search_meta('channelId')
2685 duration = int_or_none(
2686 get_first(video_details, 'lengthSeconds')
2687 or get_first(microformats, 'lengthSeconds')) \
2688 or parse_duration(search_meta('duration'))
2689 is_live = get_first(video_details, 'isLive')
2690 is_upcoming = get_first(video_details, 'isUpcoming')
2691 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2692
2693 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2694 formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
bf1317d2 2695
545cc85d 2696 if not formats:
11f9be09 2697 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
b7da73eb 2698 self.raise_no_formats(
545cc85d 2699 'This video is DRM protected.', expected=True)
11f9be09 2700 pemr = get_first(
2701 playability_statuses,
2702 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2703 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2704 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
545cc85d 2705 if subreason:
545cc85d 2706 if subreason == 'The uploader has not made this video available in your country.':
11f9be09 2707 countries = get_first(microformats, 'availableCountries')
545cc85d 2708 if not countries:
2709 regions_allowed = search_meta('regionsAllowed')
2710 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2711 self.raise_geo_restricted(subreason, countries, metadata_available=True)
11f9be09 2712 reason += f'. {subreason}'
545cc85d 2713 if reason:
b7da73eb 2714 self.raise_no_formats(reason, expected=True)
bf1317d2 2715
11f9be09 2716 for f in formats:
2717 # TODO: detect if throttled
2718 if '&n=' in f['url']: # possibly throttled
2719 f['source_preference'] = -10
2720 # note = f.get('format_note')
2721 # f['format_note'] = f'{note} (throttled)' if note else '(throttled)'
2722
545cc85d 2723 self._sort_formats(formats)
bf1317d2 2724
11f9be09 2725 keywords = get_first(video_details, 'keywords', expected_type=list) or []
545cc85d 2726 if not keywords and webpage:
2727 keywords = [
2728 unescapeHTML(m.group('content'))
2729 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2730 for keyword in keywords:
2731 if keyword.startswith('yt:stretch='):
201c1459 2732 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2733 if mobj:
2734 # NB: float is intentional for forcing float division
2735 w, h = (float(v) for v in mobj.groups())
2736 if w > 0 and h > 0:
2737 ratio = w / h
2738 for f in formats:
2739 if f.get('vcodec') != 'none':
2740 f['stretched_ratio'] = ratio
2741 break
6449cd80 2742
545cc85d 2743 thumbnails = []
11f9be09 2744 thumbnail_dicts = traverse_obj(
2745 (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2746 expected_type=dict, default=[])
2747 for thumbnail in thumbnail_dicts:
2748 thumbnail_url = thumbnail.get('url')
2749 if not thumbnail_url:
2750 continue
2751 # Sometimes youtube gives a wrong thumbnail URL. See:
2752 # https://github.com/yt-dlp/yt-dlp/issues/233
2753 # https://github.com/ytdl-org/youtube-dl/issues/28023
2754 if 'maxresdefault' in thumbnail_url:
2755 thumbnail_url = thumbnail_url.split('?')[0]
2756 thumbnails.append({
2757 'url': thumbnail_url,
2758 'height': int_or_none(thumbnail.get('height')),
2759 'width': int_or_none(thumbnail.get('width')),
2760 })
ff2751ac 2761 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2762 if thumbnail_url:
2763 thumbnails.append({
2764 'url': thumbnail_url,
ff2751ac 2765 })
0ba692ac 2766 # The best resolution thumbnails sometimes does not appear in the webpage
2767 # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
cca80fe6 2768 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2769 hq_thumbnail_names = ['maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3']
2770 guaranteed_thumbnail_names = [
2771 'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2772 'mqdefault', 'mq1', 'mq2', 'mq3',
2773 'default', '1', '2', '3'
2774 ]
2775 thumbnail_names = hq_thumbnail_names + guaranteed_thumbnail_names
2776 n_thumbnail_names = len(thumbnail_names)
2777
0ba692ac 2778 thumbnails.extend({
2779 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2780 video_id=video_id, name=name, ext=ext,
2781 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
cca80fe6 2782 '_test_url': name in hq_thumbnail_names,
2783 } for name in thumbnail_names for ext in ('webp', 'jpg'))
0ba692ac 2784 for thumb in thumbnails:
cca80fe6 2785 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
0ba692ac 2786 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
ff2751ac 2787 self._remove_duplicate_formats(thumbnails)
545cc85d 2788
545cc85d 2789 info = {
2790 'id': video_id,
2791 'title': self._live_title(video_title) if is_live else video_title,
2792 'formats': formats,
2793 'thumbnails': thumbnails,
2794 'description': video_description,
2795 'upload_date': unified_strdate(
11f9be09 2796 get_first(microformats, 'uploadDate')
545cc85d 2797 or search_meta('uploadDate')),
11f9be09 2798 'uploader': get_first(video_details, 'author'),
545cc85d 2799 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2800 'uploader_url': owner_profile_url,
2801 'channel_id': channel_id,
11f9be09 2802 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
545cc85d 2803 'duration': duration,
2804 'view_count': int_or_none(
11f9be09 2805 get_first((video_details, microformats), (..., 'viewCount'))
545cc85d 2806 or search_meta('interactionCount')),
11f9be09 2807 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
545cc85d 2808 'age_limit': 18 if (
11f9be09 2809 get_first(microformats, 'isFamilySafe') is False
545cc85d 2810 or search_meta('isFamilyFriendly') == 'false'
2811 or search_meta('og:restrictions:age') == '18+') else 0,
2812 'webpage_url': webpage_url,
2813 'categories': [category] if category else None,
2814 'tags': keywords,
2815 'is_live': is_live,
11f9be09 2816 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
2817 'was_live': get_first(video_details, 'isLiveContent'),
545cc85d 2818 }
b477fc13 2819
11f9be09 2820 pctr = get_first(player_responses, ('captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
545cc85d 2821 subtitles = {}
2822 if pctr:
774d79cc 2823 def process_language(container, base_url, lang_code, sub_name, query):
120916da 2824 lang_subs = container.setdefault(lang_code, [])
545cc85d 2825 for fmt in self._SUBTITLE_FORMATS:
2826 query.update({
2827 'fmt': fmt,
2828 })
2829 lang_subs.append({
2830 'ext': fmt,
2831 'url': update_url_query(base_url, query),
774d79cc 2832 'name': sub_name,
545cc85d 2833 })
7e72694b 2834
545cc85d 2835 for caption_track in (pctr.get('captionTracks') or []):
2836 base_url = caption_track.get('baseUrl')
2837 if not base_url:
2838 continue
2839 if caption_track.get('kind') != 'asr':
120916da 2840 lang_code = (
2841 remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2842 or caption_track.get('languageCode'))
545cc85d 2843 if not lang_code:
2844 continue
2845 process_language(
774d79cc 2846 subtitles, base_url, lang_code,
2d6659b9 2847 try_get(caption_track, lambda x: x['name']['simpleText']),
774d79cc 2848 {})
545cc85d 2849 continue
2850 automatic_captions = {}
2851 for translation_language in (pctr.get('translationLanguages') or []):
2852 translation_language_code = translation_language.get('languageCode')
2853 if not translation_language_code:
2854 continue
2855 process_language(
2856 automatic_captions, base_url, translation_language_code,
fe93e2c4 2857 self._get_text(translation_language.get('languageName'), max_runs=1),
545cc85d 2858 {'tlang': translation_language_code})
2859 info['automatic_captions'] = automatic_captions
2860 info['subtitles'] = subtitles
7e72694b 2861
545cc85d 2862 parsed_url = compat_urllib_parse_urlparse(url)
2863 for component in [parsed_url.fragment, parsed_url.query]:
2864 query = compat_parse_qs(component)
2865 for k, v in query.items():
2866 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2867 d_k += '_time'
2868 if d_k not in info and k in s_ks:
2869 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2870
2871 # Youtube Music Auto-generated description
822b9d9c 2872 if video_description:
38d70284 2873 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2874 if mobj:
822b9d9c
RA
2875 release_year = mobj.group('release_year')
2876 release_date = mobj.group('release_date')
2877 if release_date:
2878 release_date = release_date.replace('-', '')
2879 if not release_year:
545cc85d 2880 release_year = release_date[:4]
2881 info.update({
2882 'album': mobj.group('album'.strip()),
2883 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2884 'track': mobj.group('track').strip(),
2885 'release_date': release_date,
cc2db878 2886 'release_year': int_or_none(release_year),
545cc85d 2887 })
7e72694b 2888
545cc85d 2889 initial_data = None
2890 if webpage:
2891 initial_data = self._extract_yt_initial_variable(
2892 webpage, self._YT_INITIAL_DATA_RE, video_id,
2893 'yt initial data')
2894 if not initial_data:
11f9be09 2895 headers = self.generate_api_headers(
2896 master_ytcfg, identity_token, self._extract_account_syncid(master_ytcfg),
2897 session_index=self._extract_session_index(master_ytcfg))
2898
109dd3b2 2899 initial_data = self._extract_response(
2900 item_id=video_id, ep='next', fatal=False,
11f9be09 2901 ytcfg=master_ytcfg, headers=headers, query={'videoId': video_id},
109dd3b2 2902 note='Downloading initial data API JSON')
545cc85d 2903
c60ee3a2 2904 try:
2905 # This will error if there is no livechat
2906 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2907 info['subtitles']['live_chat'] = [{
2908 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
2909 'video_id': video_id,
2910 'ext': 'json',
f6745c49 2911 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
c60ee3a2 2912 }]
2913 except (KeyError, IndexError, TypeError):
2914 pass
545cc85d 2915
2916 if initial_data:
7c365c21 2917 info['chapters'] = (
2918 self._extract_chapters_from_json(initial_data, duration)
2919 or self._extract_chapters_from_engagement_panel(initial_data, duration)
2920 or None)
545cc85d 2921
2922 contents = try_get(
2923 initial_data,
2924 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2925 list) or []
2926 for content in contents:
2927 vpir = content.get('videoPrimaryInfoRenderer')
2928 if vpir:
2929 stl = vpir.get('superTitleLink')
2930 if stl:
fe93e2c4 2931 stl = self._get_text(stl)
545cc85d 2932 if try_get(
2933 vpir,
2934 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2935 info['location'] = stl
2936 else:
2937 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2938 if mobj:
2939 info.update({
2940 'series': mobj.group(1),
2941 'season_number': int(mobj.group(2)),
2942 'episode_number': int(mobj.group(3)),
2943 })
2944 for tlb in (try_get(
2945 vpir,
2946 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2947 list) or []):
2948 tbr = tlb.get('toggleButtonRenderer') or {}
2949 for getter, regex in [(
2950 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2951 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2952 lambda x: x['accessibility'],
2953 lambda x: x['accessibilityData']['accessibilityData'],
2954 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2955 label = (try_get(tbr, getter, dict) or {}).get('label')
2956 if label:
2957 mobj = re.match(regex, label)
2958 if mobj:
2959 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2960 break
2961 sbr_tooltip = try_get(
2962 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2963 if sbr_tooltip:
2964 like_count, dislike_count = sbr_tooltip.split(' / ')
2965 info.update({
2966 'like_count': str_to_int(like_count),
2967 'dislike_count': str_to_int(dislike_count),
2968 })
2969 vsir = content.get('videoSecondaryInfoRenderer')
2970 if vsir:
fe93e2c4 2971 info['channel'] = self._get_text(try_get(
545cc85d 2972 vsir,
2973 lambda x: x['owner']['videoOwnerRenderer']['title'],
cce889b9 2974 dict))
545cc85d 2975 rows = try_get(
2976 vsir,
2977 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2978 list) or []
2979 multiple_songs = False
2980 for row in rows:
2981 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2982 multiple_songs = True
2983 break
2984 for row in rows:
2985 mrr = row.get('metadataRowRenderer') or {}
2986 mrr_title = mrr.get('title')
2987 if not mrr_title:
2988 continue
fe93e2c4 2989 mrr_title = self._get_text(mrr['title'])
2990 mrr_contents_text = self._get_text(mrr['contents'][0])
545cc85d 2991 if mrr_title == 'License':
2992 info['license'] = mrr_contents_text
2993 elif not multiple_songs:
2994 if mrr_title == 'Album':
2995 info['album'] = mrr_contents_text
2996 elif mrr_title == 'Artist':
2997 info['artist'] = mrr_contents_text
2998 elif mrr_title == 'Song':
2999 info['track'] = mrr_contents_text
3000
3001 fallbacks = {
3002 'channel': 'uploader',
3003 'channel_id': 'uploader_id',
3004 'channel_url': 'uploader_url',
3005 }
3006 for to, frm in fallbacks.items():
3007 if not info.get(to):
3008 info[to] = info.get(frm)
3009
3010 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3011 v = info.get(s_k)
3012 if v:
3013 info[d_k] = v
b84071c0 3014
11f9be09 3015 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3016 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
c224251a 3017 is_membersonly = None
b28f8d24 3018 is_premium = None
c224251a
M
3019 if initial_data and is_private is not None:
3020 is_membersonly = False
b28f8d24 3021 is_premium = False
47193e02 3022 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3023 badge_labels = set()
3024 for content in contents:
3025 if not isinstance(content, dict):
3026 continue
3027 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3028 for badge_label in badge_labels:
3029 if badge_label.lower() == 'members only':
3030 is_membersonly = True
3031 elif badge_label.lower() == 'premium':
3032 is_premium = True
3033 elif badge_label.lower() == 'unlisted':
3034 is_unlisted = True
c224251a 3035
c224251a
M
3036 info['availability'] = self._availability(
3037 is_private=is_private,
b28f8d24 3038 needs_premium=is_premium,
c224251a
M
3039 needs_subscription=is_membersonly,
3040 needs_auth=info['age_limit'] >= 18,
3041 is_unlisted=None if is_private is None else is_unlisted)
3042
06167fbb 3043 # get xsrf for annotations or comments
a06916d9 3044 get_annotations = self.get_param('writeannotations', False)
3045 get_comments = self.get_param('getcomments', False)
06167fbb 3046 if get_annotations or get_comments:
29f7c58a 3047 xsrf_token = None
11f9be09 3048 if master_ytcfg:
3049 xsrf_token = try_get(master_ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
29f7c58a 3050 if not xsrf_token:
3051 xsrf_token = self._search_regex(
3052 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 3053 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 3054
3055 # annotations
06167fbb 3056 if get_annotations:
11f9be09 3057 invideo_url = get_first(
3058 player_responses,
3059 ('annotations', 0, 'playerAnnotationsUrlsRenderer', 'invideoUrl'),
3060 expected_type=str)
64b6a4e9 3061 if xsrf_token and invideo_url:
29f7c58a 3062 xsrf_field_name = None
11f9be09 3063 if master_ytcfg:
3064 xsrf_field_name = try_get(master_ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
29f7c58a 3065 if not xsrf_field_name:
3066 xsrf_field_name = self._search_regex(
3067 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 3068 webpage, 'xsrf field name',
29f7c58a 3069 group='xsrf_field_name', default='session_token')
8a784c74 3070 info['annotations'] = self._download_webpage(
64b6a4e9
RA
3071 self._proto_relative_url(invideo_url),
3072 video_id, note='Downloading annotations',
3073 errnote='Unable to download video annotations', fatal=False,
3074 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 3075
277d6ff5 3076 if get_comments:
11f9be09 3077 info['__post_extractor'] = lambda: self._extract_comments(master_ytcfg, video_id, contents, webpage)
4ea3be0a 3078
11f9be09 3079 self.mark_watched(video_id, player_responses)
d77ab8e2 3080
545cc85d 3081 return info
c5e8d7af 3082
5f6a1245 3083
8bdd16b4 3084class YoutubeTabIE(YoutubeBaseInfoExtractor):
3085 IE_DESC = 'YouTube.com tab'
70d5c17b 3086 _VALID_URL = r'''(?x)
3087 https?://
3088 (?:\w+\.)?
3089 (?:
3090 youtube(?:kids)?\.com|
3091 invidio\.us
3092 )/
3093 (?:
fe03a6cd 3094 (?P<channel_type>channel|c|user|browse)/|
70d5c17b 3095 (?P<not_channel>
9ba5705a 3096 feed/|hashtag/|
70d5c17b 3097 (?:playlist|watch)\?.*?\blist=
3098 )|
29f7c58a 3099 (?!(?:%s)\b) # Direct URLs
70d5c17b 3100 )
3101 (?P<id>[^/?\#&]+)
3102 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 3103 IE_NAME = 'youtube:tab'
3104
81127aa5 3105 _TESTS = [{
da692b79 3106 'note': 'playlists, multipage',
8bdd16b4 3107 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3108 'playlist_mincount': 94,
3109 'info_dict': {
3110 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3111 'title': 'Игорь Клейнер - Playlists',
3112 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3113 'uploader': 'Игорь Клейнер',
3114 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 3115 },
3116 }, {
da692b79 3117 'note': 'playlists, multipage, different order',
8bdd16b4 3118 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3119 'playlist_mincount': 94,
3120 'info_dict': {
3121 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3122 'title': 'Игорь Клейнер - Playlists',
3123 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3124 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3125 'uploader': 'Игорь Клейнер',
8bdd16b4 3126 },
201c1459 3127 }, {
da692b79 3128 'note': 'playlists, series',
201c1459 3129 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3130 'playlist_mincount': 5,
3131 'info_dict': {
3132 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3133 'title': '3Blue1Brown - Playlists',
3134 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
da692b79 3135 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3136 'uploader': '3Blue1Brown',
201c1459 3137 },
8bdd16b4 3138 }, {
da692b79 3139 'note': 'playlists, singlepage',
8bdd16b4 3140 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3141 'playlist_mincount': 4,
3142 'info_dict': {
3143 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3144 'title': 'ThirstForScience - Playlists',
3145 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 3146 'uploader': 'ThirstForScience',
3147 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 3148 }
3149 }, {
3150 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3151 'only_matching': True,
3152 }, {
da692b79 3153 'note': 'basic, single video playlist',
0e30a7b9 3154 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 3155 'info_dict': {
0e30a7b9 3156 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3157 'uploader': 'Sergey M.',
3158 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 3159 'title': 'youtube-dl public playlist',
81127aa5 3160 },
0e30a7b9 3161 'playlist_count': 1,
9291475f 3162 }, {
da692b79 3163 'note': 'empty playlist',
0e30a7b9 3164 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 3165 'info_dict': {
0e30a7b9 3166 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3167 'uploader': 'Sergey M.',
3168 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 3169 'title': 'youtube-dl empty playlist',
9291475f
PH
3170 },
3171 'playlist_count': 0,
3172 }, {
da692b79 3173 'note': 'Home tab',
8bdd16b4 3174 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 3175 'info_dict': {
8bdd16b4 3176 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3177 'title': 'lex will - Home',
3178 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3179 'uploader': 'lex will',
3180 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3181 },
8bdd16b4 3182 'playlist_mincount': 2,
9291475f 3183 }, {
da692b79 3184 'note': 'Videos tab',
8bdd16b4 3185 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 3186 'info_dict': {
8bdd16b4 3187 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3188 'title': 'lex will - Videos',
3189 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3190 'uploader': 'lex will',
3191 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3192 },
8bdd16b4 3193 'playlist_mincount': 975,
9291475f 3194 }, {
da692b79 3195 'note': 'Videos tab, sorted by popular',
8bdd16b4 3196 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 3197 'info_dict': {
8bdd16b4 3198 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3199 'title': 'lex will - Videos',
3200 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3201 'uploader': 'lex will',
3202 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3203 },
8bdd16b4 3204 'playlist_mincount': 199,
9291475f 3205 }, {
da692b79 3206 'note': 'Playlists tab',
8bdd16b4 3207 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 3208 'info_dict': {
8bdd16b4 3209 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3210 'title': 'lex will - Playlists',
3211 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3212 'uploader': 'lex will',
3213 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3214 },
8bdd16b4 3215 'playlist_mincount': 17,
ac7553d0 3216 }, {
da692b79 3217 'note': 'Community tab',
8bdd16b4 3218 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 3219 'info_dict': {
8bdd16b4 3220 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3221 'title': 'lex will - Community',
3222 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3223 'uploader': 'lex will',
3224 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3225 },
3226 'playlist_mincount': 18,
87dadd45 3227 }, {
da692b79 3228 'note': 'Channels tab',
8bdd16b4 3229 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 3230 'info_dict': {
8bdd16b4 3231 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3232 'title': 'lex will - Channels',
3233 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3234 'uploader': 'lex will',
3235 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3236 },
deaec5af 3237 'playlist_mincount': 12,
cd684175 3238 }, {
3239 'note': 'Search tab',
3240 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3241 'playlist_mincount': 40,
3242 'info_dict': {
3243 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3244 'title': '3Blue1Brown - Search - linear algebra',
3245 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3246 'uploader': '3Blue1Brown',
3247 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3248 },
6b08cdf6 3249 }, {
a0566bbf 3250 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3251 'only_matching': True,
3252 }, {
a0566bbf 3253 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3254 'only_matching': True,
3255 }, {
a0566bbf 3256 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3257 'only_matching': True,
3258 }, {
3259 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3260 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3261 'info_dict': {
3262 'title': '29C3: Not my department',
3263 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3264 'uploader': 'Christiaan008',
3265 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 3266 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 3267 },
3268 'playlist_count': 96,
3269 }, {
3270 'note': 'Large playlist',
3271 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 3272 'info_dict': {
8bdd16b4 3273 'title': 'Uploads from Cauchemar',
3274 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3275 'uploader': 'Cauchemar',
3276 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 3277 },
8bdd16b4 3278 'playlist_mincount': 1123,
3279 }, {
da692b79 3280 'note': 'even larger playlist, 8832 videos',
8bdd16b4 3281 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3282 'only_matching': True,
4b7df0d3
JMF
3283 }, {
3284 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3285 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3286 'info_dict': {
acf757f4
PH
3287 'title': 'Uploads from Interstellar Movie',
3288 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 3289 'uploader': 'Interstellar Movie',
8bdd16b4 3290 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 3291 },
481cc733 3292 'playlist_mincount': 21,
358de58c 3293 }, {
3294 'note': 'Playlist with "show unavailable videos" button',
3295 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3296 'info_dict': {
3297 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3298 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3299 'uploader': 'Phim Siêu Nhân Nhật Bản',
3300 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3301 },
da692b79 3302 'playlist_mincount': 200,
5d342002 3303 }, {
da692b79 3304 'note': 'Playlist with unavailable videos in page 7',
5d342002 3305 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3306 'info_dict': {
3307 'title': 'Uploads from BlankTV',
3308 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3309 'uploader': 'BlankTV',
3310 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3311 },
da692b79 3312 'playlist_mincount': 1000,
8bdd16b4 3313 }, {
da692b79 3314 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
8bdd16b4 3315 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3316 'info_dict': {
3317 'title': 'Data Analysis with Dr Mike Pound',
3318 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3319 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3320 'uploader': 'Computerphile',
deaec5af 3321 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 3322 },
3323 'playlist_mincount': 11,
3324 }, {
a0566bbf 3325 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 3326 'only_matching': True,
dacb3a86 3327 }, {
da692b79 3328 'note': 'Playlist URL that does not actually serve a playlist',
dacb3a86
S
3329 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3330 'info_dict': {
3331 'id': 'FqZTN594JQw',
3332 'ext': 'webm',
3333 'title': "Smiley's People 01 detective, Adventure Series, Action",
3334 'uploader': 'STREEM',
3335 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 3336 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
3337 'upload_date': '20150526',
3338 'license': 'Standard YouTube License',
3339 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3340 'categories': ['People & Blogs'],
3341 'tags': list,
dbdaaa23 3342 'view_count': int,
dacb3a86
S
3343 'like_count': int,
3344 'dislike_count': int,
3345 },
3346 'params': {
3347 'skip_download': True,
3348 },
13a75688 3349 'skip': 'This video is not available.',
dacb3a86 3350 'add_ie': [YoutubeIE.ie_key()],
481cc733 3351 }, {
8bdd16b4 3352 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 3353 'only_matching': True,
66b48727 3354 }, {
8bdd16b4 3355 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 3356 'only_matching': True,
a0566bbf 3357 }, {
3358 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3359 'info_dict': {
11f9be09 3360 'id': 'FMtPN8yp5LU', # This will keep changing
a0566bbf 3361 'ext': 'mp4',
deaec5af 3362 'title': compat_str,
a0566bbf 3363 'uploader': 'Sky News',
3364 'uploader_id': 'skynews',
3365 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
da692b79 3366 'upload_date': r're:\d{8}',
3367 'description': compat_str,
a0566bbf 3368 'categories': ['News & Politics'],
3369 'tags': list,
3370 'like_count': int,
3371 'dislike_count': int,
3372 },
3373 'params': {
3374 'skip_download': True,
3375 },
da692b79 3376 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
a0566bbf 3377 }, {
3378 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3379 'info_dict': {
3380 'id': 'a48o2S1cPoo',
3381 'ext': 'mp4',
3382 'title': 'The Young Turks - Live Main Show',
3383 'uploader': 'The Young Turks',
3384 'uploader_id': 'TheYoungTurks',
3385 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3386 'upload_date': '20150715',
3387 'license': 'Standard YouTube License',
3388 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3389 'categories': ['News & Politics'],
3390 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3391 'like_count': int,
3392 'dislike_count': int,
3393 },
3394 'params': {
3395 'skip_download': True,
3396 },
3397 'only_matching': True,
3398 }, {
3399 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3400 'only_matching': True,
3401 }, {
3402 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3403 'only_matching': True,
09f1580e 3404 }, {
3405 'note': 'A channel that is not live. Should raise error',
3406 'url': 'https://www.youtube.com/user/numberphile/live',
3407 'only_matching': True,
3d3dddc9 3408 }, {
3409 'url': 'https://www.youtube.com/feed/trending',
3410 'only_matching': True,
3411 }, {
3d3dddc9 3412 'url': 'https://www.youtube.com/feed/library',
3413 'only_matching': True,
3414 }, {
3d3dddc9 3415 'url': 'https://www.youtube.com/feed/history',
3416 'only_matching': True,
3417 }, {
3d3dddc9 3418 'url': 'https://www.youtube.com/feed/subscriptions',
3419 'only_matching': True,
3420 }, {
3d3dddc9 3421 'url': 'https://www.youtube.com/feed/watch_later',
3422 'only_matching': True,
3423 }, {
da692b79 3424 'note': 'Recommended - redirects to home page',
3d3dddc9 3425 'url': 'https://www.youtube.com/feed/recommended',
3426 'only_matching': True,
29f7c58a 3427 }, {
da692b79 3428 'note': 'inline playlist with not always working continuations',
29f7c58a 3429 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3430 'only_matching': True,
3431 }, {
3432 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3433 'only_matching': True,
3434 }, {
3435 'url': 'https://www.youtube.com/course',
3436 'only_matching': True,
3437 }, {
3438 'url': 'https://www.youtube.com/zsecurity',
3439 'only_matching': True,
3440 }, {
3441 'url': 'http://www.youtube.com/NASAgovVideo/videos',
3442 'only_matching': True,
3443 }, {
3444 'url': 'https://www.youtube.com/TheYoungTurks/live',
3445 'only_matching': True,
39ed931e 3446 }, {
3447 'url': 'https://www.youtube.com/hashtag/cctv9',
3448 'info_dict': {
3449 'id': 'cctv9',
3450 'title': '#cctv9',
3451 },
3452 'playlist_mincount': 350,
201c1459 3453 }, {
3454 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3455 'only_matching': True,
9297939e 3456 }, {
da692b79 3457 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
9297939e 3458 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3459 'only_matching': True
fe03a6cd 3460 }, {
3461 'note': '/browse/ should redirect to /channel/',
3462 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3463 'only_matching': True
3464 }, {
3465 'note': 'VLPL, should redirect to playlist?list=PL...',
3466 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3467 'info_dict': {
3468 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3469 'uploader': 'NoCopyrightSounds',
3470 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3471 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3472 'title': 'NCS Releases',
3473 },
3474 'playlist_mincount': 166,
18db7548 3475 }, {
3476 'note': 'Topic, should redirect to playlist?list=UU...',
3477 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3478 'info_dict': {
3479 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3480 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3481 'title': 'Uploads from Royalty Free Music - Topic',
3482 'uploader': 'Royalty Free Music - Topic',
3483 },
3484 'expected_warnings': [
3485 'A channel/user page was given',
3486 'The URL does not have a videos tab',
3487 ],
3488 'playlist_mincount': 101,
3489 }, {
3490 'note': 'Topic without a UU playlist',
3491 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3492 'info_dict': {
3493 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3494 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3495 },
3496 'expected_warnings': [
3497 'A channel/user page was given',
3498 'The URL does not have a videos tab',
3499 'Falling back to channel URL',
3500 ],
3501 'playlist_mincount': 9,
abcdd12b 3502 }, {
3503 'note': 'Youtube music Album',
3504 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3505 'info_dict': {
3506 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3507 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3508 },
3509 'playlist_count': 50,
47193e02 3510 }, {
3511 'note': 'unlisted single video playlist',
3512 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3513 'info_dict': {
3514 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3515 'uploader': 'colethedj',
3516 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3517 'title': 'yt-dlp unlisted playlist test',
3518 'availability': 'unlisted'
3519 },
3520 'playlist_count': 1,
29f7c58a 3521 }]
3522
3523 @classmethod
3524 def suitable(cls, url):
3525 return False if YoutubeIE.suitable(url) else super(
3526 YoutubeTabIE, cls).suitable(url)
8bdd16b4 3527
3528 def _extract_channel_id(self, webpage):
3529 channel_id = self._html_search_meta(
3530 'channelId', webpage, 'channel id', default=None)
3531 if channel_id:
3532 return channel_id
3533 channel_url = self._html_search_meta(
3534 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3535 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3536 'twitter:app:url:googleplay'), webpage, 'channel url')
3537 return self._search_regex(
3538 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3539 channel_url, 'channel id')
15f6397c 3540
8bdd16b4 3541 @staticmethod
cd7c66cf 3542 def _extract_basic_item_renderer(item):
3543 # Modified from _extract_grid_item_renderer
201c1459 3544 known_basic_renderers = (
3545 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 3546 )
3547 for key, renderer in item.items():
201c1459 3548 if not isinstance(renderer, dict):
cd7c66cf 3549 continue
201c1459 3550 elif key in known_basic_renderers:
3551 return renderer
3552 elif key.startswith('grid') and key.endswith('Renderer'):
3553 return renderer
8bdd16b4 3554
8bdd16b4 3555 def _grid_entries(self, grid_renderer):
3556 for item in grid_renderer['items']:
3557 if not isinstance(item, dict):
39b62db1 3558 continue
cd7c66cf 3559 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 3560 if not isinstance(renderer, dict):
3561 continue
fe93e2c4 3562 title = self._get_text(renderer.get('title'))
3563
8bdd16b4 3564 # playlist
3565 playlist_id = renderer.get('playlistId')
3566 if playlist_id:
3567 yield self.url_result(
3568 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3569 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3570 video_title=title)
201c1459 3571 continue
8bdd16b4 3572 # video
3573 video_id = renderer.get('videoId')
3574 if video_id:
3575 yield self._extract_video(renderer)
201c1459 3576 continue
8bdd16b4 3577 # channel
3578 channel_id = renderer.get('channelId')
3579 if channel_id:
8bdd16b4 3580 yield self.url_result(
3581 'https://www.youtube.com/channel/%s' % channel_id,
3582 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 3583 continue
3584 # generic endpoint URL support
3585 ep_url = urljoin('https://www.youtube.com/', try_get(
3586 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3587 compat_str))
3588 if ep_url:
3589 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3590 if ie.suitable(ep_url):
3591 yield self.url_result(
3592 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3593 break
8bdd16b4 3594
3d3dddc9 3595 def _shelf_entries_from_content(self, shelf_renderer):
3596 content = shelf_renderer.get('content')
3597 if not isinstance(content, dict):
8bdd16b4 3598 return
cd7c66cf 3599 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 3600 if renderer:
3601 # TODO: add support for nested playlists so each shelf is processed
3602 # as separate playlist
3603 # TODO: this includes only first N items
3604 for entry in self._grid_entries(renderer):
3605 yield entry
3606 renderer = content.get('horizontalListRenderer')
3607 if renderer:
3608 # TODO
3609 pass
8bdd16b4 3610
29f7c58a 3611 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3612 ep = try_get(
3613 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3614 compat_str)
3615 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3616 if shelf_url:
29f7c58a 3617 # Skipping links to another channels, note that checking for
3618 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3619 # will not work
3620 if skip_channels and '/channels?' in shelf_url:
3621 return
fe93e2c4 3622 title = self._get_text(shelf_renderer, lambda x: x['title'])
3d3dddc9 3623 yield self.url_result(shelf_url, video_title=title)
3624 # Shelf may not contain shelf URL, fallback to extraction from content
3625 for entry in self._shelf_entries_from_content(shelf_renderer):
3626 yield entry
c5e8d7af 3627
8bdd16b4 3628 def _playlist_entries(self, video_list_renderer):
3629 for content in video_list_renderer['contents']:
3630 if not isinstance(content, dict):
3631 continue
3632 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3633 if not isinstance(renderer, dict):
3634 continue
3635 video_id = renderer.get('videoId')
3636 if not video_id:
3637 continue
3638 yield self._extract_video(renderer)
07aeced6 3639
3462ffa8 3640 def _rich_entries(self, rich_grid_renderer):
3641 renderer = try_get(
70d5c17b 3642 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3643 video_id = renderer.get('videoId')
3644 if not video_id:
3645 return
3646 yield self._extract_video(renderer)
3647
8bdd16b4 3648 def _video_entry(self, video_renderer):
3649 video_id = video_renderer.get('videoId')
3650 if video_id:
3651 return self._extract_video(video_renderer)
dacb3a86 3652
8bdd16b4 3653 def _post_thread_entries(self, post_thread_renderer):
3654 post_renderer = try_get(
3655 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3656 if not post_renderer:
3657 return
3658 # video attachment
3659 video_renderer = try_get(
895b0931 3660 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3661 video_id = video_renderer.get('videoId')
3662 if video_id:
3663 entry = self._extract_video(video_renderer)
8bdd16b4 3664 if entry:
3665 yield entry
895b0931 3666 # playlist attachment
3667 playlist_id = try_get(
3668 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3669 if playlist_id:
3670 yield self.url_result(
e28f1c0a 3671 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3672 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3673 # inline video links
3674 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3675 for run in runs:
3676 if not isinstance(run, dict):
3677 continue
3678 ep_url = try_get(
3679 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3680 if not ep_url:
3681 continue
3682 if not YoutubeIE.suitable(ep_url):
3683 continue
3684 ep_video_id = YoutubeIE._match_id(ep_url)
3685 if video_id == ep_video_id:
3686 continue
895b0931 3687 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 3688
8bdd16b4 3689 def _post_thread_continuation_entries(self, post_thread_continuation):
3690 contents = post_thread_continuation.get('contents')
3691 if not isinstance(contents, list):
3692 return
3693 for content in contents:
3694 renderer = content.get('backstagePostThreadRenderer')
3695 if not isinstance(renderer, dict):
3696 continue
3697 for entry in self._post_thread_entries(renderer):
3698 yield entry
07aeced6 3699
39ed931e 3700 r''' # unused
3701 def _rich_grid_entries(self, contents):
3702 for content in contents:
3703 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3704 if video_renderer:
3705 entry = self._video_entry(video_renderer)
3706 if entry:
3707 yield entry
3708 '''
f4f751af 3709 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3462ffa8 3710
70d5c17b 3711 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3712 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3713 for content in contents:
3714 if not isinstance(content, dict):
8bdd16b4 3715 continue
70d5c17b 3716 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3717 if not is_renderer:
70d5c17b 3718 renderer = content.get('richItemRenderer')
3462ffa8 3719 if renderer:
3720 for entry in self._rich_entries(renderer):
3721 yield entry
3722 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3723 continue
3462ffa8 3724 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3725 for isr_content in isr_contents:
3726 if not isinstance(isr_content, dict):
3727 continue
69184e41 3728
3729 known_renderers = {
3730 'playlistVideoListRenderer': self._playlist_entries,
3731 'gridRenderer': self._grid_entries,
3732 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3733 'backstagePostThreadRenderer': self._post_thread_entries,
3734 'videoRenderer': lambda x: [self._video_entry(x)],
3735 }
3736 for key, renderer in isr_content.items():
3737 if key not in known_renderers:
3738 continue
3739 for entry in known_renderers[key](renderer):
3740 if entry:
3741 yield entry
3462ffa8 3742 continuation_list[0] = self._extract_continuation(renderer)
69184e41 3743 break
70d5c17b 3744
3462ffa8 3745 if not continuation_list[0]:
3746 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3747
3748 if not continuation_list[0]:
3749 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3750
3751 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3752 tab_content = try_get(tab, lambda x: x['content'], dict)
3753 if not tab_content:
3754 return
3462ffa8 3755 parent_renderer = (
29f7c58a 3756 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3757 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3758 for entry in extract_entries(parent_renderer):
3759 yield entry
3462ffa8 3760 continuation = continuation_list[0]
fe93e2c4 3761 visitor_data = None
d069eca7 3762
8bdd16b4 3763 for page_num in itertools.count(1):
3764 if not continuation:
3765 break
11f9be09 3766 headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
79360d99 3767 response = self._extract_response(
3768 item_id='%s page %s' % (item_id, page_num),
fe93e2c4 3769 query=continuation, headers=headers, ytcfg=ytcfg,
79360d99 3770 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3771
3772 if not response:
8bdd16b4 3773 break
f4f751af 3774 visitor_data = try_get(
3775 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
ebf1b291 3776
69184e41 3777 known_continuation_renderers = {
3778 'playlistVideoListContinuation': self._playlist_entries,
3779 'gridContinuation': self._grid_entries,
3780 'itemSectionContinuation': self._post_thread_continuation_entries,
3781 'sectionListContinuation': extract_entries, # for feeds
3782 }
8bdd16b4 3783 continuation_contents = try_get(
69184e41 3784 response, lambda x: x['continuationContents'], dict) or {}
3785 continuation_renderer = None
3786 for key, value in continuation_contents.items():
3787 if key not in known_continuation_renderers:
3462ffa8 3788 continue
69184e41 3789 continuation_renderer = value
3790 continuation_list = [None]
3791 for entry in known_continuation_renderers[key](continuation_renderer):
3792 yield entry
3793 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3794 break
3795 if continuation_renderer:
3796 continue
c5e8d7af 3797
a1b535bd 3798 known_renderers = {
3799 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3800 'gridVideoRenderer': (self._grid_entries, 'items'),
d61fc646 3801 'gridChannelRenderer': (self._grid_entries, 'items'),
a1b535bd 3802 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3803 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3804 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3805 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3806 }
cce889b9 3807 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3808 continuation_items = try_get(
cce889b9 3809 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3810 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3811 video_items_renderer = None
3812 for key, value in continuation_item.items():
3813 if key not in known_renderers:
8bdd16b4 3814 continue
a1b535bd 3815 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3816 continuation_list = [None]
a1b535bd 3817 for entry in known_renderers[key][0](video_items_renderer):
3818 yield entry
9ba5705a 3819 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3820 break
3821 if video_items_renderer:
3822 continue
8bdd16b4 3823 break
9558dcec 3824
8bdd16b4 3825 @staticmethod
3826 def _extract_selected_tab(tabs):
3827 for tab in tabs:
cd684175 3828 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3829 if renderer.get('selected') is True:
3830 return renderer
2b3c2546 3831 else:
8bdd16b4 3832 raise ExtractorError('Unable to find selected tab')
b82f815f 3833
47193e02 3834 @classmethod
3835 def _extract_uploader(cls, data):
8bdd16b4 3836 uploader = {}
47193e02 3837 renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3838 owner = try_get(
3839 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3840 if owner:
3841 uploader['uploader'] = owner.get('text')
3842 uploader['uploader_id'] = try_get(
3843 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3844 uploader['uploader_url'] = urljoin(
3845 'https://www.youtube.com/',
3846 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3847 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3848
d069eca7 3849 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3850 playlist_id = title = description = channel_url = channel_name = channel_id = None
3851 thumbnails_list = tags = []
3852
8bdd16b4 3853 selected_tab = self._extract_selected_tab(tabs)
3854 renderer = try_get(
3855 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3856 if renderer:
b60419c5 3857 channel_name = renderer.get('title')
3858 channel_url = renderer.get('channelUrl')
3859 channel_id = renderer.get('externalId')
39ed931e 3860 else:
64c0d954 3861 renderer = try_get(
3862 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3863
8bdd16b4 3864 if renderer:
3865 title = renderer.get('title')
ecc97af3 3866 description = renderer.get('description', '')
b60419c5 3867 playlist_id = channel_id
3868 tags = renderer.get('keywords', '').split()
3869 thumbnails_list = (
3870 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3871 or try_get(
47193e02 3872 self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3873 lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
ff84930c 3874 list)
b60419c5 3875 or [])
3876
3877 thumbnails = []
3878 for t in thumbnails_list:
3879 if not isinstance(t, dict):
3880 continue
3881 thumbnail_url = url_or_none(t.get('url'))
3882 if not thumbnail_url:
3883 continue
3884 thumbnails.append({
3885 'url': thumbnail_url,
3886 'width': int_or_none(t.get('width')),
3887 'height': int_or_none(t.get('height')),
3888 })
3462ffa8 3889 if playlist_id is None:
70d5c17b 3890 playlist_id = item_id
3891 if title is None:
39ed931e 3892 title = (
3893 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3894 or playlist_id)
b60419c5 3895 title += format_field(selected_tab, 'title', ' - %s')
cd684175 3896 title += format_field(selected_tab, 'expandedText', ' - %s')
b60419c5 3897 metadata = {
3898 'playlist_id': playlist_id,
3899 'playlist_title': title,
3900 'playlist_description': description,
3901 'uploader': channel_name,
3902 'uploader_id': channel_id,
3903 'uploader_url': channel_url,
3904 'thumbnails': thumbnails,
3905 'tags': tags,
3906 }
47193e02 3907 availability = self._extract_availability(data)
3908 if availability:
3909 metadata['availability'] = availability
b60419c5 3910 if not channel_id:
3911 metadata.update(self._extract_uploader(data))
3912 metadata.update({
3913 'channel': metadata['uploader'],
3914 'channel_id': metadata['uploader_id'],
3915 'channel_url': metadata['uploader_url']})
11f9be09 3916 ytcfg = self.extract_ytcfg(item_id, webpage)
b60419c5 3917 return self.playlist_result(
d069eca7
M
3918 self._entries(
3919 selected_tab, playlist_id,
3920 self._extract_identity_token(webpage, item_id),
fe93e2c4 3921 self._extract_account_syncid(ytcfg, data), ytcfg),
b60419c5 3922 **metadata)
73c4ac2c 3923
79360d99 3924 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
2be71994 3925 first_id = last_id = None
11f9be09 3926 ytcfg = self.extract_ytcfg(playlist_id, webpage)
3927 headers = self.generate_api_headers(
fe93e2c4 3928 ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3929 identity_token=self._extract_identity_token(webpage, item_id=playlist_id))
2be71994 3930 for page_num in itertools.count(1):
cd7c66cf 3931 videos = list(self._playlist_entries(playlist))
3932 if not videos:
3933 return
2be71994 3934 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3935 if start >= len(videos):
3936 return
3937 for video in videos[start:]:
3938 if video['id'] == first_id:
3939 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3940 return
3941 yield video
3942 first_id = first_id or videos[0]['id']
3943 last_id = videos[-1]['id']
79360d99 3944 watch_endpoint = try_get(
3945 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3946 query = {
3947 'playlistId': playlist_id,
3948 'videoId': watch_endpoint.get('videoId') or last_id,
3949 'index': watch_endpoint.get('index') or len(videos),
3950 'params': watch_endpoint.get('params') or 'OAE%3D'
3951 }
3952 response = self._extract_response(
3953 item_id='%s page %d' % (playlist_id, page_num),
fe93e2c4 3954 query=query, ep='next', headers=headers, ytcfg=ytcfg,
79360d99 3955 check_get_keys='contents'
3956 )
cd7c66cf 3957 playlist = try_get(
79360d99 3958 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 3959
79360d99 3960 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
8bdd16b4 3961 title = playlist.get('title') or try_get(
3962 data, lambda x: x['titleText']['simpleText'], compat_str)
3963 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3964
3965 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3966 playlist_url = urljoin(url, try_get(
3967 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3968 compat_str))
3969 if playlist_url and playlist_url != url:
3970 return self.url_result(
3971 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3972 video_title=title)
cd7c66cf 3973
8bdd16b4 3974 return self.playlist_result(
79360d99 3975 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
cd7c66cf 3976 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 3977
47193e02 3978 def _extract_availability(self, data):
3979 """
3980 Gets the availability of a given playlist/tab.
3981 Note: Unless YouTube tells us explicitly, we do not assume it is public
3982 @param data: response
3983 """
3984 is_private = is_unlisted = None
3985 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
3986 badge_labels = self._extract_badges(renderer)
3987
3988 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
3989 privacy_dropdown_entries = try_get(
3990 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
3991 for renderer_dict in privacy_dropdown_entries:
3992 is_selected = try_get(
3993 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
3994 if not is_selected:
3995 continue
fe93e2c4 3996 label = self._get_text(
3997 try_get(renderer_dict, lambda x: x['privacyDropdownItemRenderer']['label'], dict) or [])
47193e02 3998 if label:
3999 badge_labels.add(label.lower())
4000 break
4001
4002 for badge_label in badge_labels:
4003 if badge_label == 'unlisted':
4004 is_unlisted = True
4005 elif badge_label == 'private':
4006 is_private = True
4007 elif badge_label == 'public':
4008 is_unlisted = is_private = False
4009 return self._availability(is_private, False, False, False, is_unlisted)
4010
4011 @staticmethod
4012 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4013 sidebar_renderer = try_get(
4014 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4015 for item in sidebar_renderer:
4016 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4017 if renderer:
4018 return renderer
4019
358de58c 4020 def _reload_with_unavailable_videos(self, item_id, data, webpage):
4021 """
4022 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4023 """
5d342002 4024 browse_id = params = None
47193e02 4025 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4026 if not renderer:
4027 return
4028 menu_renderer = try_get(
4029 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4030 for menu_item in menu_renderer:
4031 if not isinstance(menu_item, dict):
358de58c 4032 continue
47193e02 4033 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4034 text = try_get(
4035 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
4036 if not text or text.lower() != 'show unavailable videos':
4037 continue
4038 browse_endpoint = try_get(
4039 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4040 browse_id = browse_endpoint.get('browseId')
4041 params = browse_endpoint.get('params')
4042 break
5d342002 4043
11f9be09 4044 ytcfg = self.extract_ytcfg(item_id, webpage)
4045 headers = self.generate_api_headers(
fe93e2c4 4046 ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
47193e02 4047 identity_token=self._extract_identity_token(webpage, item_id=item_id),
4048 visitor_data=try_get(
4049 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
4050 query = {
4051 'params': params or 'wgYCCAA=',
4052 'browseId': browse_id or 'VL%s' % item_id
4053 }
4054 return self._extract_response(
4055 item_id=item_id, headers=headers, query=query,
fe93e2c4 4056 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
47193e02 4057 note='Downloading API JSON with unavailable videos')
358de58c 4058
cd7c66cf 4059 def _extract_webpage(self, url, item_id):
a06916d9 4060 retries = self.get_param('extractor_retries', 3)
62bff2c1 4061 count = -1
c705177d 4062 last_error = 'Incomplete yt initial data recieved'
14fdfea9 4063 while count < retries:
62bff2c1 4064 count += 1
14fdfea9 4065 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 4066 # See: https://github.com/yt-dlp/yt-dlp/issues/116
4067 if count:
c705177d 4068 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 4069 webpage = self._download_webpage(
4070 url, item_id,
cd7c66cf 4071 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
11f9be09 4072 data = self.extract_yt_initial_data(item_id, webpage)
14fdfea9 4073 if data.get('contents') or data.get('currentVideoEndpoint'):
4074 break
95c01b6c 4075 # Extract alerts here only when there is error
4076 self._extract_and_report_alerts(data)
c705177d 4077 if count >= retries:
6a39ee13 4078 raise ExtractorError(last_error)
cd7c66cf 4079 return webpage, data
4080
9297939e 4081 @staticmethod
4082 def _smuggle_data(entries, data):
4083 for entry in entries:
4084 if data:
4085 entry['url'] = smuggle_url(entry['url'], data)
4086 yield entry
4087
cd7c66cf 4088 def _real_extract(self, url):
9297939e 4089 url, smuggled_data = unsmuggle_url(url, {})
4090 if self.is_music_url(url):
4091 smuggled_data['is_music_url'] = True
fe03a6cd 4092 info_dict = self.__real_extract(url, smuggled_data)
9297939e 4093 if info_dict.get('entries'):
4094 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4095 return info_dict
4096
fe03a6cd 4097 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4098
4099 def __real_extract(self, url, smuggled_data):
cd7c66cf 4100 item_id = self._match_id(url)
4101 url = compat_urlparse.urlunparse(
4102 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 4103 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 4104
fe03a6cd 4105 def get_mobj(url):
4106 mobj = self._url_re.match(url).groupdict()
07cce701 4107 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 4108 return mobj
4109
4110 mobj = get_mobj(url)
4111 # Youtube returns incomplete data if tabname is not lower case
4112 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4113
4114 if is_channel:
4115 if smuggled_data.get('is_music_url'):
4116 if item_id[:2] == 'VL':
4117 # Youtube music VL channels have an equivalent playlist
4118 item_id = item_id[2:]
4119 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
abcdd12b 4120 elif item_id[:2] == 'MP':
4121 # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4122 item_id = self._search_regex(
4123 r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4124 self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4125 'playlist id')
4126 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
fe03a6cd 4127 elif mobj['channel_type'] == 'browse':
4128 # Youtube music /browse/ should be changed to /channel/
4129 pre = 'https://www.youtube.com/channel/%s' % item_id
4130 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4131 # Home URLs should redirect to /videos/
6a39ee13 4132 self.report_warning(
cd7c66cf 4133 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4134 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 4135 tab = '/videos'
4136
4137 url = ''.join((pre, tab, post))
4138 mobj = get_mobj(url)
cd7c66cf 4139
4140 # Handle both video/playlist URLs
201c1459 4141 qs = parse_qs(url)
cd7c66cf 4142 video_id = qs.get('v', [None])[0]
4143 playlist_id = qs.get('list', [None])[0]
4144
fe03a6cd 4145 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 4146 if not playlist_id:
fe03a6cd 4147 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 4148 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 4149 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6a39ee13 4150 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
cd7c66cf 4151 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
18db7548 4152 mobj = get_mobj(url)
cd7c66cf 4153
4154 if video_id and playlist_id:
a06916d9 4155 if self.get_param('noplaylist'):
cd7c66cf 4156 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4157 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4158 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4159
4160 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 4161
18db7548 4162 tabs = try_get(
4163 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4164 if tabs:
4165 selected_tab = self._extract_selected_tab(tabs)
4166 tab_name = selected_tab.get('title', '')
09f1580e 4167 if 'no-youtube-channel-redirect' not in compat_opts:
4168 if mobj['tab'] == '/live':
4169 # Live tab should have redirected to the video
4170 raise ExtractorError('The channel is not currently live', expected=True)
4171 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4172 if not mobj['not_channel'] and item_id[:2] == 'UC':
4173 # Topic channels don't have /videos. Use the equivalent playlist instead
4174 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4175 pl_id = 'UU%s' % item_id[2:]
4176 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4177 try:
4178 pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4179 for alert_type, alert_message in self._extract_alerts(pl_data):
4180 if alert_type == 'error':
4181 raise ExtractorError('Youtube said: %s' % alert_message)
4182 item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4183 except ExtractorError:
4184 self.report_warning('The playlist gave error. Falling back to channel URL')
4185 else:
4186 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
18db7548 4187
4188 self.write_debug('Final URL: %s' % url)
4189
358de58c 4190 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 4191 if 'no-youtube-unavailable-videos' not in compat_opts:
4192 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
95c01b6c 4193 self._extract_and_report_alerts(data)
8bdd16b4 4194 tabs = try_get(
4195 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4196 if tabs:
d069eca7 4197 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 4198
8bdd16b4 4199 playlist = try_get(
4200 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4201 if playlist:
79360d99 4202 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
cd7c66cf 4203
a0566bbf 4204 video_id = try_get(
4205 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4206 compat_str) or video_id
8bdd16b4 4207 if video_id:
09f1580e 4208 if mobj['tab'] != '/live': # live tab is expected to redirect to video
4209 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 4210 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 4211
8bdd16b4 4212 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 4213
c5e8d7af 4214
8bdd16b4 4215class YoutubePlaylistIE(InfoExtractor):
4216 IE_DESC = 'YouTube.com playlists'
4217 _VALID_URL = r'''(?x)(?:
4218 (?:https?://)?
4219 (?:\w+\.)?
4220 (?:
4221 (?:
4222 youtube(?:kids)?\.com|
29f7c58a 4223 invidio\.us
8bdd16b4 4224 )
4225 /.*?\?.*?\blist=
4226 )?
4227 (?P<id>%(playlist_id)s)
4228 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4229 IE_NAME = 'youtube:playlist'
cdc628a4 4230 _TESTS = [{
8bdd16b4 4231 'note': 'issue #673',
4232 'url': 'PLBB231211A4F62143',
cdc628a4 4233 'info_dict': {
8bdd16b4 4234 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4235 'id': 'PLBB231211A4F62143',
4236 'uploader': 'Wickydoo',
4237 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
11f9be09 4238 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
8bdd16b4 4239 },
4240 'playlist_mincount': 29,
4241 }, {
4242 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4243 'info_dict': {
4244 'title': 'YDL_safe_search',
4245 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4246 },
4247 'playlist_count': 2,
4248 'skip': 'This playlist is private',
9558dcec 4249 }, {
8bdd16b4 4250 'note': 'embedded',
4251 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4252 'playlist_count': 4,
9558dcec 4253 'info_dict': {
8bdd16b4 4254 'title': 'JODA15',
4255 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4256 'uploader': 'milan',
4257 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 4258 }
cdc628a4 4259 }, {
8bdd16b4 4260 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
11f9be09 4261 'playlist_mincount': 654,
8bdd16b4 4262 'info_dict': {
4263 'title': '2018 Chinese New Singles (11/6 updated)',
4264 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4265 'uploader': 'LBK',
4266 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
11f9be09 4267 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
8bdd16b4 4268 }
daa0df9e 4269 }, {
29f7c58a 4270 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4271 'only_matching': True,
4272 }, {
4273 # music album playlist
4274 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4275 'only_matching': True,
4276 }]
4277
4278 @classmethod
4279 def suitable(cls, url):
201c1459 4280 if YoutubeTabIE.suitable(url):
4281 return False
1bdae7d3 4282 # Hack for lazy extractors until more generic solution is implemented
4283 # (see #28780)
4284 from .youtube import parse_qs
201c1459 4285 qs = parse_qs(url)
4286 if qs.get('v', [None])[0]:
4287 return False
4288 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 4289
4290 def _real_extract(self, url):
4291 playlist_id = self._match_id(url)
46953e7e 4292 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 4293 url = update_url_query(
4294 'https://www.youtube.com/playlist',
4295 parse_qs(url) or {'list': playlist_id})
4296 if is_music_url:
4297 url = smuggle_url(url, {'is_music_url': True})
4298 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 4299
4300
4301class YoutubeYtBeIE(InfoExtractor):
c76eb41b 4302 IE_DESC = 'youtu.be'
29f7c58a 4303 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4304 _TESTS = [{
8bdd16b4 4305 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4306 'info_dict': {
4307 'id': 'yeWKywCrFtk',
4308 'ext': 'mp4',
4309 'title': 'Small Scale Baler and Braiding Rugs',
4310 'uploader': 'Backus-Page House Museum',
4311 'uploader_id': 'backuspagemuseum',
4312 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4313 'upload_date': '20161008',
4314 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4315 'categories': ['Nonprofits & Activism'],
4316 'tags': list,
4317 'like_count': int,
4318 'dislike_count': int,
4319 },
4320 'params': {
4321 'noplaylist': True,
4322 'skip_download': True,
4323 },
39e7107d 4324 }, {
8bdd16b4 4325 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 4326 'only_matching': True,
cdc628a4
PH
4327 }]
4328
8bdd16b4 4329 def _real_extract(self, url):
29f7c58a 4330 mobj = re.match(self._VALID_URL, url)
4331 video_id = mobj.group('id')
4332 playlist_id = mobj.group('playlist_id')
8bdd16b4 4333 return self.url_result(
29f7c58a 4334 update_url_query('https://www.youtube.com/watch', {
4335 'v': video_id,
4336 'list': playlist_id,
4337 'feature': 'youtu.be',
4338 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 4339
4340
4341class YoutubeYtUserIE(InfoExtractor):
c76eb41b 4342 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 4343 _VALID_URL = r'ytuser:(?P<id>.+)'
4344 _TESTS = [{
4345 'url': 'ytuser:phihag',
4346 'only_matching': True,
4347 }]
4348
4349 def _real_extract(self, url):
4350 user_id = self._match_id(url)
4351 return self.url_result(
4352 'https://www.youtube.com/user/%s' % user_id,
4353 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 4354
b05654f0 4355
3d3dddc9 4356class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 4357 IE_NAME = 'youtube:favorites'
4358 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4359 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4360 _LOGIN_REQUIRED = True
4361 _TESTS = [{
4362 'url': ':ytfav',
4363 'only_matching': True,
4364 }, {
4365 'url': ':ytfavorites',
4366 'only_matching': True,
4367 }]
4368
4369 def _real_extract(self, url):
4370 return self.url_result(
4371 'https://www.youtube.com/playlist?list=LL',
4372 ie=YoutubeTabIE.ie_key())
4373
4374
79360d99 4375class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
69184e41 4376 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
4377 # there doesn't appear to be a real limit, for example if you search for
4378 # 'python' you get more than 8.000.000 results
4379 _MAX_RESULTS = float('inf')
78caa52a 4380 IE_NAME = 'youtube:search'
b05654f0 4381 _SEARCH_KEY = 'ytsearch'
6c894ea1 4382 _SEARCH_PARAMS = None
9dd8e46a 4383 _TESTS = []
b05654f0 4384
6c894ea1 4385 def _entries(self, query, n):
a5c56234 4386 data = {'query': query}
6c894ea1
U
4387 if self._SEARCH_PARAMS:
4388 data['params'] = self._SEARCH_PARAMS
4389 total = 0
fe93e2c4 4390 continuation = {}
6c894ea1 4391 for page_num in itertools.count(1):
fe93e2c4 4392 data.update(continuation)
79360d99 4393 search = self._extract_response(
4394 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4395 check_get_keys=('contents', 'onResponseReceivedCommands')
4396 )
6c894ea1 4397 if not search:
b4c08069 4398 break
6c894ea1
U
4399 slr_contents = try_get(
4400 search,
4401 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4402 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4403 list)
4404 if not slr_contents:
a22b2fd1 4405 break
0366ae87 4406
0366ae87
M
4407 # Youtube sometimes adds promoted content to searches,
4408 # changing the index location of videos and token.
4409 # So we search through all entries till we find them.
fe93e2c4 4410 continuation = None
30a074c2 4411 for slr_content in slr_contents:
fe93e2c4 4412 if not continuation:
4413 continuation = self._extract_continuation({'contents': [slr_content]})
a96c6d15 4414
30a074c2 4415 isr_contents = try_get(
4416 slr_content,
4417 lambda x: x['itemSectionRenderer']['contents'],
4418 list)
9da76d30 4419 if not isr_contents:
30a074c2 4420 continue
4421 for content in isr_contents:
4422 if not isinstance(content, dict):
4423 continue
4424 video = content.get('videoRenderer')
4425 if not isinstance(video, dict):
4426 continue
4427 video_id = video.get('videoId')
4428 if not video_id:
4429 continue
4430
4431 yield self._extract_video(video)
4432 total += 1
4433 if total == n:
4434 return
0366ae87 4435
fe93e2c4 4436 if not continuation:
6c894ea1 4437 break
b05654f0 4438
6c894ea1
U
4439 def _get_n_results(self, query, n):
4440 """Get a specified number of results for a query"""
11f9be09 4441 return self.playlist_result(self._entries(query, n), query, query)
75dff0ee 4442
c9ae7b95 4443
a3dd9248 4444class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 4445 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 4446 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 4447 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 4448 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 4449
c9ae7b95 4450
386e1dd9 4451class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 4452 IE_DESC = 'YouTube.com search URLs'
386e1dd9 4453 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4454 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 4455 # _MAX_RESULTS = 100
3462ffa8 4456 _TESTS = [{
4457 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4458 'playlist_mincount': 5,
4459 'info_dict': {
11f9be09 4460 'id': 'youtube-dl test video',
3462ffa8 4461 'title': 'youtube-dl test video',
4462 }
4463 }, {
4464 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4465 'only_matching': True,
4466 }]
4467
386e1dd9 4468 @classmethod
4469 def _make_valid_url(cls):
4470 return cls._VALID_URL
4471
3462ffa8 4472 def _real_extract(self, url):
386e1dd9 4473 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4474 query = (qs.get('search_query') or qs.get('q'))[0]
4475 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4476 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 4477
4478
4479class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 4480 """
25f14e9f 4481 Base class for feed extractors
3d3dddc9 4482 Subclasses must define the _FEED_NAME property.
d7ae0639 4483 """
b2e8bc1b 4484 _LOGIN_REQUIRED = True
ef2f3c7f 4485 _TESTS = []
d7ae0639
JMF
4486
4487 @property
4488 def IE_NAME(self):
78caa52a 4489 return 'youtube:%s' % self._FEED_NAME
04cc9617 4490
3853309f 4491 def _real_extract(self, url):
3d3dddc9 4492 return self.url_result(
4493 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4494 ie=YoutubeTabIE.ie_key())
25f14e9f
S
4495
4496
ef2f3c7f 4497class YoutubeWatchLaterIE(InfoExtractor):
4498 IE_NAME = 'youtube:watchlater'
70d5c17b 4499 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 4500 _VALID_URL = r':ytwatchlater'
bc7a9cd8 4501 _TESTS = [{
8bdd16b4 4502 'url': ':ytwatchlater',
bc7a9cd8
S
4503 'only_matching': True,
4504 }]
25f14e9f
S
4505
4506 def _real_extract(self, url):
ef2f3c7f 4507 return self.url_result(
4508 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 4509
4510
25f14e9f
S
4511class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4512 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 4513 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 4514 _FEED_NAME = 'recommended'
45db527f 4515 _LOGIN_REQUIRED = False
3d3dddc9 4516 _TESTS = [{
4517 'url': ':ytrec',
4518 'only_matching': True,
4519 }, {
4520 'url': ':ytrecommended',
4521 'only_matching': True,
4522 }, {
4523 'url': 'https://youtube.com',
4524 'only_matching': True,
4525 }]
1ed5b5c9 4526
1ed5b5c9 4527
25f14e9f 4528class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 4529 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 4530 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 4531 _FEED_NAME = 'subscriptions'
3d3dddc9 4532 _TESTS = [{
4533 'url': ':ytsubs',
4534 'only_matching': True,
4535 }, {
4536 'url': ':ytsubscriptions',
4537 'only_matching': True,
4538 }]
1ed5b5c9 4539
1ed5b5c9 4540
25f14e9f 4541class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
4542 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4543 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 4544 _FEED_NAME = 'history'
3d3dddc9 4545 _TESTS = [{
4546 'url': ':ythistory',
4547 'only_matching': True,
4548 }]
1ed5b5c9
JMF
4549
4550
15870e90
PH
4551class YoutubeTruncatedURLIE(InfoExtractor):
4552 IE_NAME = 'youtube:truncated_url'
4553 IE_DESC = False # Do not list
975d35db 4554 _VALID_URL = r'''(?x)
b95aab84
PH
4555 (?:https?://)?
4556 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4557 (?:watch\?(?:
c4808c60 4558 feature=[a-z_]+|
b95aab84
PH
4559 annotation_id=annotation_[^&]+|
4560 x-yt-cl=[0-9]+|
c1708b89 4561 hl=[^&]*|
287be8c6 4562 t=[0-9]+
b95aab84
PH
4563 )?
4564 |
4565 attribution_link\?a=[^&]+
4566 )
4567 $
975d35db 4568 '''
15870e90 4569
c4808c60 4570 _TESTS = [{
2d3d2997 4571 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 4572 'only_matching': True,
dc2fc736 4573 }, {
2d3d2997 4574 'url': 'https://www.youtube.com/watch?',
dc2fc736 4575 'only_matching': True,
b95aab84
PH
4576 }, {
4577 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4578 'only_matching': True,
4579 }, {
4580 'url': 'https://www.youtube.com/watch?feature=foo',
4581 'only_matching': True,
c1708b89
PH
4582 }, {
4583 'url': 'https://www.youtube.com/watch?hl=en-GB',
4584 'only_matching': True,
287be8c6
PH
4585 }, {
4586 'url': 'https://www.youtube.com/watch?t=2372',
4587 'only_matching': True,
c4808c60
PH
4588 }]
4589
15870e90
PH
4590 def _real_extract(self, url):
4591 raise ExtractorError(
78caa52a
PH
4592 'Did you forget to quote the URL? Remember that & is a meta '
4593 'character in most shells, so you want to put the URL in quotes, '
3867038a 4594 'like youtube-dl '
2d3d2997 4595 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 4596 ' or simply youtube-dl BaW_jenozKc .',
15870e90 4597 expected=True)
772fd5cc
PH
4598
4599
4600class YoutubeTruncatedIDIE(InfoExtractor):
4601 IE_NAME = 'youtube:truncated_id'
4602 IE_DESC = False # Do not list
b95aab84 4603 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
4604
4605 _TESTS = [{
4606 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4607 'only_matching': True,
4608 }]
4609
4610 def _real_extract(self, url):
4611 video_id = self._match_id(url)
4612 raise ExtractorError(
4613 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4614 expected=True)