]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[youtube] Use new API for additional video extraction requests (#328)
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
d92f5d5a 5import calendar
109dd3b2 6import copy
a5c56234 7import hashlib
0ca96d48 8import itertools
c5e8d7af 9import json
c4417ddb 10import os.path
d77ab8e2 11import random
c5e8d7af 12import re
8a784c74 13import time
e0df6211 14import traceback
c5e8d7af 15
b05654f0 16from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 17from ..compat import (
edf3e38e 18 compat_chr,
29f7c58a 19 compat_HTTPError,
c5e8d7af 20 compat_parse_qs,
545cc85d 21 compat_str,
7fd002c0 22 compat_urllib_parse_unquote_plus,
15707c7e 23 compat_urllib_parse_urlencode,
7c80519c 24 compat_urllib_parse_urlparse,
7c61bd36 25 compat_urlparse,
4bb4a188 26)
545cc85d 27from ..jsinterp import JSInterpreter
4bb4a188 28from ..utils import (
c224251a 29 bool_or_none,
c5e8d7af 30 clean_html,
26fe8ffe 31 dict_get,
d92f5d5a 32 datetime_from_str,
358de58c 33 error_to_compat_str,
c5e8d7af 34 ExtractorError,
b60419c5 35 format_field,
2d30521a 36 float_or_none,
dd27fd17 37 int_or_none,
94278f72 38 mimetype2ext,
6310acf5 39 parse_codecs,
7c80519c 40 parse_duration,
dca3ff4a 41 qualities,
3995d37d 42 remove_start,
cf7e015f 43 smuggle_url,
dbdaaa23 44 str_or_none,
c93d53f5 45 str_to_int,
556dbe7f 46 try_get,
c5e8d7af
PH
47 unescapeHTML,
48 unified_strdate,
cf7e015f 49 unsmuggle_url,
8bdd16b4 50 update_url_query,
21c340b8 51 url_or_none,
6e6bc8da 52 urlencode_postdata,
d92f5d5a 53 urljoin
c5e8d7af
PH
54)
55
5f6a1245 56
201c1459 57def parse_qs(url):
58 return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
59
60
de7f3446 61class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
62 """Provide base functions for Youtube extractors"""
63 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 64 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
65
66 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
67 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
68 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 69
3462ffa8 70 _RESERVED_NAMES = (
bea74222 71 r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|shorts|'
46953e7e 72 r'movies|results|shared|hashtag|trending|feed|feeds|oembed|get_video_info|'
cd7c66cf 73 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 74
b2e8bc1b
JMF
75 _NETRC_MACHINE = 'youtube'
76 # If True it will raise an error if no login info is provided
77 _LOGIN_REQUIRED = False
78
70d5c17b 79 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 80
b2e8bc1b 81 def _login(self):
83317f69 82 """
83 Attempt to log in to YouTube.
84 True is returned if successful or skipped.
85 False is returned if login failed.
86
87 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
88 """
9d5d4d64 89
90 def warn(message):
91 self.report_warning(message)
92
93 # username+password login is broken
94 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
95 self.raise_login_required(
96 'Login details are needed to download this content', method='cookies')
68217024 97 username, password = self._get_login_info()
9d5d4d64 98 if username:
99 warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
100 return
101 # Everything below this is broken!
102
b2e8bc1b
JMF
103 # No authentication to be performed
104 if username is None:
a06916d9 105 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
69ea8ca4 106 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
a06916d9 107 # if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
545cc85d 108 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 109 return True
b2e8bc1b 110
7cc3570e
PH
111 login_page = self._download_webpage(
112 self._LOGIN_URL, None,
69ea8ca4
PH
113 note='Downloading login page',
114 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
115 if login_page is False:
116 return
b2e8bc1b 117
1212e997 118 login_form = self._hidden_inputs(login_page)
c5e8d7af 119
e00eb564
S
120 def req(url, f_req, note, errnote):
121 data = login_form.copy()
122 data.update({
123 'pstMsg': 1,
124 'checkConnection': 'youtube',
125 'checkedDomains': 'youtube',
126 'hl': 'en',
127 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 128 'f.req': json.dumps(f_req),
e00eb564
S
129 'flowName': 'GlifWebSignIn',
130 'flowEntry': 'ServiceLogin',
baf67a60
S
131 # TODO: reverse actual botguard identifier generation algo
132 'bgRequest': '["identifier",""]',
041bc3ad 133 })
e00eb564
S
134 return self._download_json(
135 url, None, note=note, errnote=errnote,
136 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
137 fatal=False,
138 data=urlencode_postdata(data), headers={
139 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
140 'Google-Accounts-XSRF': 1,
141 })
142
3995d37d
S
143 lookup_req = [
144 username,
145 None, [], None, 'US', None, None, 2, False, True,
146 [
147 None, None,
148 [2, 1, None, 1,
149 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
150 None, [], 4],
151 1, [None, None, []], None, None, None, True
152 ],
153 username,
154 ]
155
e00eb564 156 lookup_results = req(
3995d37d 157 self._LOOKUP_URL, lookup_req,
e00eb564
S
158 'Looking up account info', 'Unable to look up account info')
159
160 if lookup_results is False:
161 return False
041bc3ad 162
3995d37d
S
163 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
164 if not user_hash:
165 warn('Unable to extract user hash')
166 return False
167
168 challenge_req = [
169 user_hash,
170 None, 1, None, [1, None, None, None, [password, None, True]],
171 [
172 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
173 1, [None, None, []], None, None, None, True
174 ]]
83317f69 175
3995d37d
S
176 challenge_results = req(
177 self._CHALLENGE_URL, challenge_req,
178 'Logging in', 'Unable to log in')
83317f69 179
3995d37d 180 if challenge_results is False:
e00eb564 181 return
83317f69 182
3995d37d
S
183 login_res = try_get(challenge_results, lambda x: x[0][5], list)
184 if login_res:
185 login_msg = try_get(login_res, lambda x: x[5], compat_str)
186 warn(
187 'Unable to login: %s' % 'Invalid password'
188 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
189 return False
190
191 res = try_get(challenge_results, lambda x: x[0][-1], list)
192 if not res:
193 warn('Unable to extract result entry')
194 return False
195
9a6628aa
S
196 login_challenge = try_get(res, lambda x: x[0][0], list)
197 if login_challenge:
198 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
199 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
200 # SEND_SUCCESS - TFA code has been successfully sent to phone
201 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 202 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
203 if status == 'QUOTA_EXCEEDED':
204 warn('Exceeded the limit of TFA codes, try later')
205 return False
206
207 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
208 if not tl:
209 warn('Unable to extract TL')
210 return False
211
212 tfa_code = self._get_tfa_info('2-step verification code')
213
214 if not tfa_code:
215 warn(
216 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
217 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
218 return False
219
220 tfa_code = remove_start(tfa_code, 'G-')
221
222 tfa_req = [
223 user_hash, None, 2, None,
224 [
225 9, None, None, None, None, None, None, None,
226 [None, tfa_code, True, 2]
227 ]]
228
229 tfa_results = req(
230 self._TFA_URL.format(tl), tfa_req,
231 'Submitting TFA code', 'Unable to submit TFA code')
232
233 if tfa_results is False:
234 return False
235
236 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
237 if tfa_res:
238 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
239 warn(
240 'Unable to finish TFA: %s' % 'Invalid TFA code'
241 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
242 return False
243
244 check_cookie_url = try_get(
245 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
246 else:
247 CHALLENGES = {
248 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
249 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
250 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
251 }
252 challenge = CHALLENGES.get(
253 challenge_str,
254 '%s returned error %s.' % (self.IE_NAME, challenge_str))
255 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
256 return False
3995d37d
S
257 else:
258 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
259
260 if not check_cookie_url:
261 warn('Unable to extract CheckCookie URL')
262 return False
e00eb564
S
263
264 check_cookie_results = self._download_webpage(
3995d37d
S
265 check_cookie_url, None, 'Checking cookie', fatal=False)
266
267 if check_cookie_results is False:
268 return False
e00eb564 269
3995d37d
S
270 if 'https://myaccount.google.com/' not in check_cookie_results:
271 warn('Unable to log in')
b2e8bc1b 272 return False
e00eb564 273
b2e8bc1b
JMF
274 return True
275
cce889b9 276 def _initialize_consent(self):
277 cookies = self._get_cookies('https://www.youtube.com/')
278 if cookies.get('__Secure-3PSID'):
279 return
280 consent_id = None
281 consent = cookies.get('CONSENT')
282 if consent:
283 if 'YES' in consent.value:
284 return
285 consent_id = self._search_regex(
286 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
287 if not consent_id:
288 consent_id = random.randint(100, 999)
289 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 290
b2e8bc1b 291 def _real_initialize(self):
cce889b9 292 self._initialize_consent()
b2e8bc1b
JMF
293 if self._downloader is None:
294 return
b2e8bc1b
JMF
295 if not self._login():
296 return
c5e8d7af 297
a0566bbf 298 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 299 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
300 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 301
109dd3b2 302 _YT_DEFAULT_YTCFGS = {
303 'WEB': {
304 'INNERTUBE_API_VERSION': 'v1',
305 'INNERTUBE_CLIENT_NAME': 'WEB',
306 'INNERTUBE_CLIENT_VERSION': '2.20210622.10.00',
307 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
308 'INNERTUBE_CONTEXT': {
309 'client': {
310 'clientName': 'WEB',
311 'clientVersion': '2.20210622.10.00',
312 'hl': 'en',
313 }
314 },
315 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
316 },
317 'WEB_REMIX': {
318 'INNERTUBE_API_VERSION': 'v1',
319 'INNERTUBE_CLIENT_NAME': 'WEB_REMIX',
320 'INNERTUBE_CLIENT_VERSION': '1.20210621.00.00',
321 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
322 'INNERTUBE_CONTEXT': {
323 'client': {
324 'clientName': 'WEB_REMIX',
325 'clientVersion': '1.20210621.00.00',
326 'hl': 'en',
327 }
328 },
329 'INNERTUBE_CONTEXT_CLIENT_NAME': 67
330 },
331 'WEB_EMBEDDED_PLAYER': {
332 'INNERTUBE_API_VERSION': 'v1',
333 'INNERTUBE_CLIENT_NAME': 'WEB_EMBEDDED_PLAYER',
334 'INNERTUBE_CLIENT_VERSION': '1.20210620.0.1',
335 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
336 'INNERTUBE_CONTEXT': {
337 'client': {
338 'clientName': 'WEB_EMBEDDED_PLAYER',
339 'clientVersion': '1.20210620.0.1',
340 'hl': 'en',
341 }
342 },
343 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
344 },
345 'ANDROID': {
346 'INNERTUBE_API_VERSION': 'v1',
347 'INNERTUBE_CLIENT_NAME': 'ANDROID',
348 'INNERTUBE_CLIENT_VERSION': '16.20',
349 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
350 'INNERTUBE_CONTEXT': {
351 'client': {
352 'clientName': 'ANDROID',
353 'clientVersion': '16.20',
354 'hl': 'en',
355 }
356 },
357 'INNERTUBE_CONTEXT_CLIENT_NAME': 'ANDROID'
358 },
359 'ANDROID_EMBEDDED_PLAYER': {
360 'INNERTUBE_API_VERSION': 'v1',
361 'INNERTUBE_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER',
362 'INNERTUBE_CLIENT_VERSION': '16.20',
363 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
364 'INNERTUBE_CONTEXT': {
365 'client': {
366 'clientName': 'ANDROID_EMBEDDED_PLAYER',
367 'clientVersion': '16.20',
368 'hl': 'en',
369 }
370 },
371 'INNERTUBE_CONTEXT_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER'
372 },
373 'ANDROID_MUSIC': {
374 'INNERTUBE_API_VERSION': 'v1',
375 'INNERTUBE_CLIENT_NAME': 'ANDROID_MUSIC',
376 'INNERTUBE_CLIENT_VERSION': '4.32',
377 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
378 'INNERTUBE_CONTEXT': {
379 'client': {
380 'clientName': 'ANDROID_MUSIC',
381 'clientVersion': '4.32',
382 'hl': 'en',
383 }
384 },
385 'INNERTUBE_CONTEXT_CLIENT_NAME': 'ANDROID_MUSIC'
386 }
387 }
388
389 _YT_DEFAULT_INNERTUBE_HOSTS = {
390 'DIRECT': 'youtubei.googleapis.com',
391 'WEB': 'www.youtube.com',
392 'WEB_REMIX': 'music.youtube.com',
393 'ANDROID_MUSIC': 'music.youtube.com'
394 }
395
396 def _get_default_ytcfg(self, client='WEB'):
397 if client in self._YT_DEFAULT_YTCFGS:
398 return copy.deepcopy(self._YT_DEFAULT_YTCFGS[client])
399 self.write_debug(f'INNERTUBE default client {client} does not exist - falling back to WEB client.')
400 return copy.deepcopy(self._YT_DEFAULT_YTCFGS['WEB'])
401
402 def _get_innertube_host(self, client='WEB'):
403 return dict_get(self._YT_DEFAULT_INNERTUBE_HOSTS, (client, 'WEB'))
404
405 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='WEB'):
406 # try_get but with fallback to default ytcfg client values when present
407 _func = lambda y: try_get(y, getter, expected_type)
408 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
409
410 def _extract_client_name(self, ytcfg, default_client='WEB'):
411 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str, default_client)
412
413 def _extract_client_version(self, ytcfg, default_client='WEB'):
414 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str, default_client)
415
416 def _extract_api_key(self, ytcfg=None, default_client='WEB'):
417 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
418
419 def _extract_context(self, ytcfg=None, default_client='WEB'):
420 _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
421 context = _get_context(ytcfg)
422 if context:
423 return context
424
425 context = _get_context(self._get_default_ytcfg(default_client))
426 if not ytcfg:
427 return context
428
429 # Recreate the client context (required)
430 context['client'].update({
431 'clientVersion': self._extract_client_version(ytcfg, default_client),
432 'clientName': self._extract_client_name(ytcfg, default_client),
433 })
434 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
435 if visitor_data:
436 context['client']['visitorData'] = visitor_data
437 return context
438
439 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
1974e99f 440 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
441 # See: https://github.com/yt-dlp/yt-dlp/issues/393
442 yt_cookies = self._get_cookies('https://www.youtube.com')
443 sapisid_cookie = dict_get(
444 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
a5c56234
M
445 if sapisid_cookie is None:
446 return
447 time_now = round(time.time())
1974e99f 448 # SAPISID cookie is required if not already present
449 if not yt_cookies.get('SAPISID'):
450 self._set_cookie(
451 '.youtube.com', 'SAPISID', sapisid_cookie.value, secure=True, expire_time=time_now + 3600)
452 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
453 sapisidhash = hashlib.sha1(
109dd3b2 454 f'{time_now} {sapisid_cookie.value} {origin}'.encode('utf-8')).hexdigest()
1974e99f 455 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
456
457 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 458 note='Downloading API JSON', errnote='Unable to download API page',
109dd3b2 459 context=None, api_key=None, api_hostname=None, default_client='WEB'):
f4f751af 460
109dd3b2 461 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 462 data.update(query)
109dd3b2 463 real_headers = self._generate_api_headers(client=default_client)
f4f751af 464 real_headers.update({'content-type': 'application/json'})
465 if headers:
466 real_headers.update(headers)
545cc85d 467 return self._download_json(
109dd3b2 468 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
a5c56234 469 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 470 data=json.dumps(data).encode('utf8'), headers=real_headers,
471 query={'key': api_key or self._extract_api_key()})
472
8bdd16b4 473 def _extract_yt_initial_data(self, video_id, webpage):
474 return self._parse_json(
475 self._search_regex(
29f7c58a 476 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 477 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 478 video_id)
0c148415 479
a1c5d2ca
M
480 def _extract_identity_token(self, webpage, item_id):
481 ytcfg = self._extract_ytcfg(item_id, webpage)
482 if ytcfg:
483 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
484 if token:
485 return token
486 return self._search_regex(
487 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
488 'identity token', default=None)
489
490 @staticmethod
491 def _extract_account_syncid(data):
8ea3f7b9 492 """
493 Extract syncId required to download private playlists of secondary channels
494 @param data Either response or ytcfg
495 """
496 sync_ids = (try_get(
497 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
498 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
a1c5d2ca
M
499 if len(sync_ids) >= 2 and sync_ids[1]:
500 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
501 # and just "user_syncid||" for primary channel. We only want the channel_syncid
502 return sync_ids[0]
8ea3f7b9 503 # ytcfg includes channel_syncid if on secondary channel
504 return data.get('DELEGATED_SESSION_ID')
a1c5d2ca 505
29f7c58a 506 def _extract_ytcfg(self, video_id, webpage):
8c54a305 507 if not webpage:
508 return {}
29f7c58a 509 return self._parse_json(
510 self._search_regex(
511 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 512 default='{}'), video_id, fatal=False) or {}
513
109dd3b2 514 def _generate_api_headers(self, ytcfg=None, identity_token=None, account_syncid=None,
515 visitor_data=None, api_hostname=None, client='WEB'):
516 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(client))
f4f751af 517 headers = {
109dd3b2 518 'X-YouTube-Client-Name': compat_str(
519 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=client)),
520 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, client),
521 'Origin': origin
f4f751af 522 }
523 if identity_token:
109dd3b2 524 headers['X-Youtube-Identity-Token'] = identity_token
f4f751af 525 if account_syncid:
526 headers['X-Goog-PageId'] = account_syncid
527 headers['X-Goog-AuthUser'] = 0
528 if visitor_data:
109dd3b2 529 headers['X-Goog-Visitor-Id'] = visitor_data
530 auth = self._generate_sapisidhash_header(origin)
f4f751af 531 if auth is not None:
532 headers['Authorization'] = auth
109dd3b2 533 headers['X-Origin'] = origin
f4f751af 534 return headers
29f7c58a 535
109dd3b2 536 @staticmethod
537 def _extract_alerts(data):
538 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
539 if not isinstance(alert_dict, dict):
540 continue
541 for alert in alert_dict.values():
542 alert_type = alert.get('type')
543 if not alert_type:
544 continue
545 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str) or ''
546 if message:
547 yield alert_type, message
548 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
549 message += try_get(run, lambda x: x['text'], compat_str)
550 if message:
551 yield alert_type, message
552
553 def _report_alerts(self, alerts, expected=True):
554 errors = []
555 warnings = []
556 for alert_type, alert_message in alerts:
557 if alert_type.lower() == 'error':
558 errors.append([alert_type, alert_message])
559 else:
560 warnings.append([alert_type, alert_message])
561
562 for alert_type, alert_message in (warnings + errors[:-1]):
563 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
564 if errors:
565 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
566
567 def _extract_and_report_alerts(self, data, *args, **kwargs):
568 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
569
570 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
571 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
572 default_client='WEB'):
573 response = None
574 last_error = None
575 count = -1
576 retries = self.get_param('extractor_retries', 3)
577 if check_get_keys is None:
578 check_get_keys = []
579 while count < retries:
580 count += 1
581 if last_error:
582 self.report_warning('%s. Retrying ...' % last_error)
583 try:
584 response = self._call_api(
585 ep=ep, fatal=True, headers=headers,
586 video_id=item_id, query=query,
587 context=self._extract_context(ytcfg, default_client),
588 api_key=self._extract_api_key(ytcfg, default_client),
589 api_hostname=api_hostname, default_client=default_client,
590 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
591 except ExtractorError as e:
592 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
593 # Downloading page may result in intermittent 5xx HTTP error
594 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
595 last_error = 'HTTP Error %s' % e.cause.code
596 if count < retries:
597 continue
598 if fatal:
599 raise
600 else:
601 self.report_warning(error_to_compat_str(e))
602 return
603
604 else:
605 # Youtube may send alerts if there was an issue with the continuation page
606 try:
607 self._extract_and_report_alerts(response, expected=False)
608 except ExtractorError as e:
609 if fatal:
610 raise
611 self.report_warning(error_to_compat_str(e))
612 return
613 if not check_get_keys or dict_get(response, check_get_keys):
614 break
615 # Youtube sometimes sends incomplete data
616 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
617 last_error = 'Incomplete data received'
618 if count >= retries:
619 if fatal:
620 raise ExtractorError(last_error)
621 else:
622 self.report_warning(last_error)
623 return
624 return response
625
9297939e 626 @staticmethod
627 def is_music_url(url):
628 return re.match(r'https?://music\.youtube\.com/', url) is not None
629
30a074c2 630 def _extract_video(self, renderer):
631 video_id = renderer.get('videoId')
632 title = try_get(
633 renderer,
634 (lambda x: x['title']['runs'][0]['text'],
635 lambda x: x['title']['simpleText']), compat_str)
636 description = try_get(
637 renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
638 compat_str)
639 duration = parse_duration(try_get(
640 renderer, lambda x: x['lengthText']['simpleText'], compat_str))
641 view_count_text = try_get(
642 renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
643 view_count = str_to_int(self._search_regex(
644 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
645 'view count', default=None))
646 uploader = try_get(
bc2ca1bb 647 renderer,
648 (lambda x: x['ownerText']['runs'][0]['text'],
649 lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
30a074c2 650 return {
39ed931e 651 '_type': 'url',
30a074c2 652 'ie_key': YoutubeIE.ie_key(),
653 'id': video_id,
654 'url': video_id,
655 'title': title,
656 'description': description,
657 'duration': duration,
658 'view_count': view_count,
659 'uploader': uploader,
660 }
661
0c148415 662
360e1ca5 663class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 664 IE_DESC = 'YouTube.com'
bc2ca1bb 665 _INVIDIOUS_SITES = (
666 # invidious-redirect websites
667 r'(?:www\.)?redirect\.invidious\.io',
668 r'(?:(?:www|dev)\.)?invidio\.us',
669 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
670 r'(?:www\.)?invidious\.pussthecat\.org',
bc2ca1bb 671 r'(?:www\.)?invidious\.zee\.li',
bc2ca1bb 672 r'(?:www\.)?invidious\.ethibox\.fr',
bc2ca1bb 673 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
bc2ca1bb 674 # youtube-dl invidious instances list
675 r'(?:(?:www|no)\.)?invidiou\.sh',
676 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
677 r'(?:www\.)?invidious\.kabi\.tk',
bc2ca1bb 678 r'(?:www\.)?invidious\.mastodon\.host',
679 r'(?:www\.)?invidious\.zapashcanon\.fr',
ed807c18 680 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
201c1459 681 r'(?:www\.)?invidious\.tinfoil-hat\.net',
682 r'(?:www\.)?invidious\.himiko\.cloud',
683 r'(?:www\.)?invidious\.reallyancient\.tech',
bc2ca1bb 684 r'(?:www\.)?invidious\.tube',
685 r'(?:www\.)?invidiou\.site',
686 r'(?:www\.)?invidious\.site',
687 r'(?:www\.)?invidious\.xyz',
688 r'(?:www\.)?invidious\.nixnet\.xyz',
201c1459 689 r'(?:www\.)?invidious\.048596\.xyz',
bc2ca1bb 690 r'(?:www\.)?invidious\.drycat\.fr',
201c1459 691 r'(?:www\.)?inv\.skyn3t\.in',
bc2ca1bb 692 r'(?:www\.)?tube\.poal\.co',
693 r'(?:www\.)?tube\.connect\.cafe',
694 r'(?:www\.)?vid\.wxzm\.sx',
695 r'(?:www\.)?vid\.mint\.lgbt',
201c1459 696 r'(?:www\.)?vid\.puffyan\.us',
bc2ca1bb 697 r'(?:www\.)?yewtu\.be',
698 r'(?:www\.)?yt\.elukerio\.org',
699 r'(?:www\.)?yt\.lelux\.fi',
700 r'(?:www\.)?invidious\.ggc-project\.de',
701 r'(?:www\.)?yt\.maisputain\.ovh',
201c1459 702 r'(?:www\.)?ytprivate\.com',
703 r'(?:www\.)?invidious\.13ad\.de',
bc2ca1bb 704 r'(?:www\.)?invidious\.toot\.koeln',
705 r'(?:www\.)?invidious\.fdn\.fr',
706 r'(?:www\.)?watch\.nettohikari\.com',
ed807c18 707 r'(?:www\.)?invidious\.namazso\.eu',
708 r'(?:www\.)?invidious\.silkky\.cloud',
709 r'(?:www\.)?invidious\.exonip\.de',
710 r'(?:www\.)?invidious\.riverside\.rocks',
711 r'(?:www\.)?invidious\.blamefran\.net',
712 r'(?:www\.)?invidious\.moomoo\.de',
713 r'(?:www\.)?ytb\.trom\.tf',
714 r'(?:www\.)?yt\.cyberhost\.uk',
bc2ca1bb 715 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
716 r'(?:www\.)?qklhadlycap4cnod\.onion',
717 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
718 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
719 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
720 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
721 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
722 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
ed807c18 723 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
724 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
725 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
726 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
bc2ca1bb 727 )
cb7dfeea 728 _VALID_URL = r"""(?x)^
c5e8d7af 729 (
edb53e2d 730 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 731 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
732 (?:www\.)?deturl\.com/www\.youtube\.com|
733 (?:www\.)?pwnyoutube\.com|
734 (?:www\.)?hooktube\.com|
735 (?:www\.)?yourepeat\.com|
736 tube\.majestyc\.net|
737 %(invidious)s|
738 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
739 (?:.*?\#/)? # handle anchor (#/) redirect urls
740 (?: # the various things that can precede the ID:
ac7553d0 741 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 742 |(?: # or the v= param in all its forms
f7000f3a 743 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 744 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 745 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
746 v=
747 )
f4b05232 748 ))
cbaed4bb
S
749 |(?:
750 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
751 vid\.plus| # or vid.plus/xxxx
752 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 753 %(invidious)s
cbaed4bb 754 )/
edb53e2d 755 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 756 )
c5e8d7af 757 )? # all until now is optional -> you can pass the naked ID
201c1459 758 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 759 (?(1).+)? # if we found the ID, everything can follow
9297939e 760 (?:\#|$)""" % {
bc2ca1bb 761 'invidious': '|'.join(_INVIDIOUS_SITES),
762 }
e40c758c 763 _PLAYER_INFO_RE = (
cc2db878 764 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
765 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 766 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 767 )
2c62dc26 768 _formats = {
c2d3cb4c 769 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
770 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
771 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
772 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
773 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
774 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
775 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
776 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 777 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 778 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
779 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
780 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
781 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
782 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
783 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 784 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 785 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
786 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 787
788
789 # 3D videos
c2d3cb4c 790 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
791 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
792 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
793 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 794 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
795 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
796 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 797
96fb5605 798 # Apple HTTP Live Streaming
11f12195 799 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 800 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
801 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
802 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
803 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
804 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 805 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
806 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
807
808 # DASH mp4 video
d23028a8
S
809 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
810 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
811 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
812 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
813 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 814 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
815 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
816 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
817 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
818 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
819 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
820 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 821
f6f1fc92 822 # Dash mp4 audio
d23028a8
S
823 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
824 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
825 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
826 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
827 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
828 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
829 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
830
831 # Dash webm
d23028a8
S
832 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
833 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
834 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
835 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
836 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
837 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
838 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
839 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
840 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
841 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
842 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
843 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
844 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
845 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
846 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 847 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
848 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
849 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
850 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
851 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
852 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
853 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
854
855 # Dash webm audio
d23028a8
S
856 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
857 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 858
0857baad 859 # Dash webm audio with opus inside
d23028a8
S
860 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
861 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
862 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 863
ce6b9a2d
PH
864 # RTMP (unnamed)
865 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
866
867 # av01 video only formats sometimes served with "unknown" codecs
868 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
869 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
870 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
871 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 872 }
29f7c58a 873 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 874
109dd3b2 875 _AGE_GATE_REASONS = (
876 'Sign in to confirm your age',
877 'This video may be inappropriate for some users.',
878 'Sorry, this content is age-restricted.')
879
fd5c4aab
S
880 _GEO_BYPASS = False
881
78caa52a 882 IE_NAME = 'youtube'
2eb88d95
PH
883 _TESTS = [
884 {
2d3d2997 885 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
886 'info_dict': {
887 'id': 'BaW_jenozKc',
888 'ext': 'mp4',
3867038a 889 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
890 'uploader': 'Philipp Hagemeister',
891 'uploader_id': 'phihag',
ec85ded8 892 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
893 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
894 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 895 'upload_date': '20121002',
3867038a 896 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 897 'categories': ['Science & Technology'],
3867038a 898 'tags': ['youtube-dl'],
556dbe7f 899 'duration': 10,
dbdaaa23 900 'view_count': int,
3e7c1224
PH
901 'like_count': int,
902 'dislike_count': int,
7c80519c 903 'start_time': 1,
297a564b 904 'end_time': 9,
2eb88d95 905 }
0e853ca4 906 },
fccd3771 907 {
4bc3a23e
PH
908 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
909 'note': 'Embed-only video (#1746)',
910 'info_dict': {
911 'id': 'yZIXLfi8CZQ',
912 'ext': 'mp4',
913 'upload_date': '20120608',
914 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
915 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
916 'uploader': 'SET India',
94bfcd23 917 'uploader_id': 'setindia',
ec85ded8 918 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 919 'age_limit': 18,
545cc85d 920 },
921 'skip': 'Private video',
fccd3771 922 },
11b56058 923 {
8bdd16b4 924 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
925 'note': 'Use the first video ID in the URL',
926 'info_dict': {
927 'id': 'BaW_jenozKc',
928 'ext': 'mp4',
3867038a 929 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
930 'uploader': 'Philipp Hagemeister',
931 'uploader_id': 'phihag',
ec85ded8 932 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 933 'upload_date': '20121002',
3867038a 934 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 935 'categories': ['Science & Technology'],
3867038a 936 'tags': ['youtube-dl'],
556dbe7f 937 'duration': 10,
dbdaaa23 938 'view_count': int,
11b56058
PM
939 'like_count': int,
940 'dislike_count': int,
34a7de29
S
941 },
942 'params': {
943 'skip_download': True,
944 },
11b56058 945 },
dd27fd17 946 {
2d3d2997 947 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
948 'note': '256k DASH audio (format 141) via DASH manifest',
949 'info_dict': {
950 'id': 'a9LDPn-MO4I',
951 'ext': 'm4a',
952 'upload_date': '20121002',
953 'uploader_id': '8KVIDEO',
ec85ded8 954 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
955 'description': '',
956 'uploader': '8KVIDEO',
957 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 958 },
4bc3a23e
PH
959 'params': {
960 'youtube_include_dash_manifest': True,
961 'format': '141',
4919603f 962 },
de3c7fe0 963 'skip': 'format 141 not served anymore',
dd27fd17 964 },
8bdd16b4 965 # DASH manifest with encrypted signature
966 {
967 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
968 'info_dict': {
969 'id': 'IB3lcPjvWLA',
970 'ext': 'm4a',
971 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
972 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
973 'duration': 244,
974 'uploader': 'AfrojackVEVO',
975 'uploader_id': 'AfrojackVEVO',
976 'upload_date': '20131011',
cc2db878 977 'abr': 129.495,
8bdd16b4 978 },
979 'params': {
980 'youtube_include_dash_manifest': True,
981 'format': '141/bestaudio[ext=m4a]',
982 },
983 },
aa79ac0c
PH
984 # Controversy video
985 {
986 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
987 'info_dict': {
988 'id': 'T4XJQO3qol8',
989 'ext': 'mp4',
556dbe7f 990 'duration': 219,
aa79ac0c 991 'upload_date': '20100909',
4fe54c12 992 'uploader': 'Amazing Atheist',
aa79ac0c 993 'uploader_id': 'TheAmazingAtheist',
ec85ded8 994 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c 995 'title': 'Burning Everyone\'s Koran',
545cc85d 996 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
aa79ac0c 997 }
c522adb1 998 },
dd2d55f1 999 # Normal age-gate video (embed allowed)
c522adb1 1000 {
2d3d2997 1001 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1002 'info_dict': {
1003 'id': 'HtVdAasjOgU',
1004 'ext': 'mp4',
1005 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1006 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1007 'duration': 142,
c522adb1
JMF
1008 'uploader': 'The Witcher',
1009 'uploader_id': 'WitcherGame',
ec85ded8 1010 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 1011 'upload_date': '20140605',
34952f09 1012 'age_limit': 18,
c522adb1
JMF
1013 },
1014 },
8bdd16b4 1015 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1016 # YouTube Red ad is not captured for creator
1017 {
1018 'url': '__2ABJjxzNo',
1019 'info_dict': {
1020 'id': '__2ABJjxzNo',
1021 'ext': 'mp4',
1022 'duration': 266,
1023 'upload_date': '20100430',
1024 'uploader_id': 'deadmau5',
1025 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 1026 'creator': 'deadmau5',
1027 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1028 'uploader': 'deadmau5',
1029 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1030 'alt_title': 'Some Chords',
8bdd16b4 1031 },
1032 'expected_warnings': [
1033 'DASH manifest missing',
1034 ]
1035 },
067aa17e 1036 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1037 {
1038 'url': 'lqQg6PlCWgI',
1039 'info_dict': {
1040 'id': 'lqQg6PlCWgI',
1041 'ext': 'mp4',
556dbe7f 1042 'duration': 6085,
90227264 1043 'upload_date': '20150827',
cbe2bd91 1044 'uploader_id': 'olympic',
ec85ded8 1045 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 1046 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 1047 'uploader': 'Olympic',
cbe2bd91
PH
1048 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1049 },
1050 'params': {
1051 'skip_download': 'requires avconv',
e52a40ab 1052 }
cbe2bd91 1053 },
6271f1ca
PH
1054 # Non-square pixels
1055 {
1056 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1057 'info_dict': {
1058 'id': '_b-2C3KPAM0',
1059 'ext': 'mp4',
1060 'stretched_ratio': 16 / 9.,
556dbe7f 1061 'duration': 85,
6271f1ca
PH
1062 'upload_date': '20110310',
1063 'uploader_id': 'AllenMeow',
ec85ded8 1064 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 1065 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 1066 'uploader': '孫ᄋᄅ',
6271f1ca
PH
1067 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1068 },
06b491eb
S
1069 },
1070 # url_encoded_fmt_stream_map is empty string
1071 {
1072 'url': 'qEJwOuvDf7I',
1073 'info_dict': {
1074 'id': 'qEJwOuvDf7I',
f57b7835 1075 'ext': 'webm',
06b491eb
S
1076 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1077 'description': '',
1078 'upload_date': '20150404',
1079 'uploader_id': 'spbelect',
1080 'uploader': 'Наблюдатели Петербурга',
1081 },
1082 'params': {
1083 'skip_download': 'requires avconv',
e323cf3f
S
1084 },
1085 'skip': 'This live event has ended.',
06b491eb 1086 },
067aa17e 1087 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1088 {
1089 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1090 'info_dict': {
1091 'id': 'FIl7x6_3R5Y',
eb6793ba 1092 'ext': 'webm',
da77d856
S
1093 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1094 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1095 'duration': 220,
da77d856
S
1096 'upload_date': '20150625',
1097 'uploader_id': 'dorappi2000',
ec85ded8 1098 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 1099 'uploader': 'dorappi2000',
eb6793ba 1100 'formats': 'mincount:31',
da77d856 1101 },
eb6793ba 1102 'skip': 'not actual anymore',
2ee8f5d8 1103 },
8a1a26ce
YCH
1104 # DASH manifest with segment_list
1105 {
1106 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1107 'md5': '8ce563a1d667b599d21064e982ab9e31',
1108 'info_dict': {
1109 'id': 'CsmdDsKjzN8',
1110 'ext': 'mp4',
17ee98e1 1111 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
1112 'uploader': 'Airtek',
1113 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1114 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1115 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1116 },
1117 'params': {
1118 'youtube_include_dash_manifest': True,
1119 'format': '135', # bestvideo
be49068d
S
1120 },
1121 'skip': 'This live event has ended.',
2ee8f5d8 1122 },
cf7e015f
S
1123 {
1124 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 1125 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 1126 'info_dict': {
545cc85d 1127 'id': 'jvGDaLqkpTg',
1128 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1129 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
1130 },
1131 'playlist': [{
1132 'info_dict': {
545cc85d 1133 'id': 'jvGDaLqkpTg',
cf7e015f 1134 'ext': 'mp4',
545cc85d 1135 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1136 'description': 'md5:e03b909557865076822aa169218d6a5d',
1137 'duration': 10643,
1138 'upload_date': '20161111',
1139 'uploader': 'Team PGP',
1140 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1141 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1142 },
1143 }, {
1144 'info_dict': {
545cc85d 1145 'id': '3AKt1R1aDnw',
cf7e015f 1146 'ext': 'mp4',
545cc85d 1147 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1148 'description': 'md5:e03b909557865076822aa169218d6a5d',
1149 'duration': 10991,
1150 'upload_date': '20161111',
1151 'uploader': 'Team PGP',
1152 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1153 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1154 },
1155 }, {
1156 'info_dict': {
545cc85d 1157 'id': 'RtAMM00gpVc',
cf7e015f 1158 'ext': 'mp4',
545cc85d 1159 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1160 'description': 'md5:e03b909557865076822aa169218d6a5d',
1161 'duration': 10995,
1162 'upload_date': '20161111',
1163 'uploader': 'Team PGP',
1164 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1165 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1166 },
1167 }, {
1168 'info_dict': {
545cc85d 1169 'id': '6N2fdlP3C5U',
cf7e015f 1170 'ext': 'mp4',
545cc85d 1171 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1172 'description': 'md5:e03b909557865076822aa169218d6a5d',
1173 'duration': 10990,
1174 'upload_date': '20161111',
1175 'uploader': 'Team PGP',
1176 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1177 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1178 },
1179 }],
1180 'params': {
1181 'skip_download': True,
1182 },
cbaed4bb 1183 },
f9f49d87 1184 {
067aa17e 1185 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1186 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1187 'info_dict': {
1188 'id': 'gVfLd0zydlo',
1189 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1190 },
1191 'playlist_count': 2,
be49068d 1192 'skip': 'Not multifeed anymore',
f9f49d87 1193 },
cbaed4bb 1194 {
2d3d2997 1195 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1196 'only_matching': True,
0e49d9a6 1197 },
6d4fc66b 1198 {
2d3d2997 1199 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1200 'only_matching': True,
1201 },
0e49d9a6 1202 {
067aa17e 1203 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1204 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1205 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1206 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1207 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1208 'info_dict': {
1209 'id': 'lsguqyKfVQg',
1210 'ext': 'mp4',
1211 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 1212 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 1213 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1214 'duration': 133,
0e49d9a6
LL
1215 'upload_date': '20151119',
1216 'uploader_id': 'IronSoulElf',
ec85ded8 1217 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1218 'uploader': 'IronSoulElf',
eb6793ba
S
1219 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
1220 'track': 'Dark Walk - Position Music',
1221 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 1222 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
1223 },
1224 'params': {
1225 'skip_download': True,
1226 },
1227 },
61f92af1 1228 {
067aa17e 1229 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1230 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1231 'only_matching': True,
1232 },
313dfc45
LL
1233 {
1234 # Video with yt:stretch=17:0
1235 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1236 'info_dict': {
1237 'id': 'Q39EVAstoRM',
1238 'ext': 'mp4',
1239 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1240 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1241 'upload_date': '20151107',
1242 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1243 'uploader': 'CH GAMER DROID',
1244 },
1245 'params': {
1246 'skip_download': True,
1247 },
be49068d 1248 'skip': 'This video does not exist.',
313dfc45 1249 },
201c1459 1250 {
1251 # Video with incomplete 'yt:stretch=16:'
1252 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1253 'only_matching': True,
1254 },
7caf9830
S
1255 {
1256 # Video licensed under Creative Commons
1257 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1258 'info_dict': {
1259 'id': 'M4gD1WSo5mA',
1260 'ext': 'mp4',
1261 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1262 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1263 'duration': 721,
7caf9830
S
1264 'upload_date': '20150127',
1265 'uploader_id': 'BerkmanCenter',
ec85ded8 1266 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1267 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1268 'license': 'Creative Commons Attribution license (reuse allowed)',
1269 },
1270 'params': {
1271 'skip_download': True,
1272 },
1273 },
fd050249
S
1274 {
1275 # Channel-like uploader_url
1276 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1277 'info_dict': {
1278 'id': 'eQcmzGIKrzg',
1279 'ext': 'mp4',
1280 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1281 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1282 'duration': 4060,
fd050249 1283 'upload_date': '20151119',
eb6793ba 1284 'uploader': 'Bernie Sanders',
fd050249 1285 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1286 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1287 'license': 'Creative Commons Attribution license (reuse allowed)',
1288 },
1289 'params': {
1290 'skip_download': True,
1291 },
1292 },
040ac686
S
1293 {
1294 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1295 'only_matching': True,
7f29cf54
S
1296 },
1297 {
067aa17e 1298 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1299 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1300 'only_matching': True,
6496ccb4
S
1301 },
1302 {
1303 # Rental video preview
1304 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1305 'info_dict': {
1306 'id': 'uGpuVWrhIzE',
1307 'ext': 'mp4',
1308 'title': 'Piku - Trailer',
1309 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1310 'upload_date': '20150811',
1311 'uploader': 'FlixMatrix',
1312 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1313 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1314 'license': 'Standard YouTube License',
1315 },
1316 'params': {
1317 'skip_download': True,
1318 },
eb6793ba 1319 'skip': 'This video is not available.',
022a5d66 1320 },
12afdc2a
S
1321 {
1322 # YouTube Red video with episode data
1323 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1324 'info_dict': {
1325 'id': 'iqKdEhx-dD4',
1326 'ext': 'mp4',
1327 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1328 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1329 'duration': 2085,
12afdc2a
S
1330 'upload_date': '20170118',
1331 'uploader': 'Vsauce',
1332 'uploader_id': 'Vsauce',
1333 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1334 'series': 'Mind Field',
1335 'season_number': 1,
1336 'episode_number': 1,
1337 },
1338 'params': {
1339 'skip_download': True,
1340 },
1341 'expected_warnings': [
1342 'Skipping DASH manifest',
1343 ],
1344 },
c7121fa7
S
1345 {
1346 # The following content has been identified by the YouTube community
1347 # as inappropriate or offensive to some audiences.
1348 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1349 'info_dict': {
1350 'id': '6SJNVb0GnPI',
1351 'ext': 'mp4',
1352 'title': 'Race Differences in Intelligence',
1353 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1354 'duration': 965,
1355 'upload_date': '20140124',
1356 'uploader': 'New Century Foundation',
1357 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1358 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1359 },
1360 'params': {
1361 'skip_download': True,
1362 },
545cc85d 1363 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1364 },
022a5d66
S
1365 {
1366 # itag 212
1367 'url': '1t24XAntNCY',
1368 'only_matching': True,
fd5c4aab
S
1369 },
1370 {
1371 # geo restricted to JP
1372 'url': 'sJL6WA-aGkQ',
1373 'only_matching': True,
1374 },
cd5a74a2
S
1375 {
1376 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1377 'only_matching': True,
1378 },
bc2ca1bb 1379 {
1380 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1381 'only_matching': True,
1382 },
1383 {
1384 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1385 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1386 'only_matching': True,
1387 },
825cd268
RA
1388 {
1389 # DRM protected
1390 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1391 'only_matching': True,
4fe54c12
S
1392 },
1393 {
1394 # Video with unsupported adaptive stream type formats
1395 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1396 'info_dict': {
1397 'id': 'Z4Vy8R84T1U',
1398 'ext': 'mp4',
1399 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1400 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1401 'duration': 433,
1402 'upload_date': '20130923',
1403 'uploader': 'Amelia Putri Harwita',
1404 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1405 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1406 'formats': 'maxcount:10',
1407 },
1408 'params': {
1409 'skip_download': True,
1410 'youtube_include_dash_manifest': False,
1411 },
5429d6a9 1412 'skip': 'not actual anymore',
5caabd3c 1413 },
1414 {
822b9d9c 1415 # Youtube Music Auto-generated description
5caabd3c 1416 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1417 'info_dict': {
1418 'id': 'MgNrAu2pzNs',
1419 'ext': 'mp4',
1420 'title': 'Voyeur Girl',
1421 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1422 'upload_date': '20190312',
5429d6a9
S
1423 'uploader': 'Stephen - Topic',
1424 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1425 'artist': 'Stephen',
1426 'track': 'Voyeur Girl',
1427 'album': 'it\'s too much love to know my dear',
1428 'release_date': '20190313',
1429 'release_year': 2019,
1430 },
1431 'params': {
1432 'skip_download': True,
1433 },
1434 },
66b48727
RA
1435 {
1436 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1437 'only_matching': True,
1438 },
011e75e6
S
1439 {
1440 # invalid -> valid video id redirection
1441 'url': 'DJztXj2GPfl',
1442 'info_dict': {
1443 'id': 'DJztXj2GPfk',
1444 'ext': 'mp4',
1445 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1446 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1447 'upload_date': '20090125',
1448 'uploader': 'Prochorowka',
1449 'uploader_id': 'Prochorowka',
1450 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1451 'artist': 'Panjabi MC',
1452 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1453 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1454 },
1455 'params': {
1456 'skip_download': True,
1457 },
545cc85d 1458 'skip': 'Video unavailable',
ea74e00b
DP
1459 },
1460 {
1461 # empty description results in an empty string
1462 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1463 'info_dict': {
1464 'id': 'x41yOUIvK2k',
1465 'ext': 'mp4',
1466 'title': 'IMG 3456',
1467 'description': '',
1468 'upload_date': '20170613',
1469 'uploader_id': 'ElevageOrVert',
1470 'uploader': 'ElevageOrVert',
1471 },
1472 'params': {
1473 'skip_download': True,
1474 },
1475 },
a0566bbf 1476 {
29f7c58a 1477 # with '};' inside yt initial data (see [1])
1478 # see [2] for an example with '};' inside ytInitialPlayerResponse
1479 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1480 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1481 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1482 'info_dict': {
1483 'id': 'CHqg6qOn4no',
1484 'ext': 'mp4',
1485 'title': 'Part 77 Sort a list of simple types in c#',
1486 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1487 'upload_date': '20130831',
1488 'uploader_id': 'kudvenkat',
1489 'uploader': 'kudvenkat',
1490 },
1491 'params': {
1492 'skip_download': True,
1493 },
1494 },
29f7c58a 1495 {
1496 # another example of '};' in ytInitialData
1497 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1498 'only_matching': True,
1499 },
1500 {
1501 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1502 'only_matching': True,
1503 },
545cc85d 1504 {
cc2db878 1505 # https://github.com/ytdl-org/youtube-dl/pull/28094
1506 'url': 'OtqTfy26tG0',
1507 'info_dict': {
1508 'id': 'OtqTfy26tG0',
1509 'ext': 'mp4',
1510 'title': 'Burn Out',
1511 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1512 'upload_date': '20141120',
1513 'uploader': 'The Cinematic Orchestra - Topic',
1514 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1515 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1516 'artist': 'The Cinematic Orchestra',
1517 'track': 'Burn Out',
1518 'album': 'Every Day',
1519 'release_data': None,
1520 'release_year': None,
1521 },
1522 'params': {
1523 'skip_download': True,
1524 },
545cc85d 1525 },
bc2ca1bb 1526 {
1527 # controversial video, only works with bpctr when authenticated with cookies
1528 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1529 'only_matching': True,
1530 },
f7ad7160 1531 {
1532 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1533 'url': 'cBvYw8_A0vQ',
1534 'info_dict': {
1535 'id': 'cBvYw8_A0vQ',
1536 'ext': 'mp4',
1537 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1538 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1539 'upload_date': '20201120',
1540 'uploader': 'Walk around Japan',
1541 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1542 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1543 },
1544 'params': {
1545 'skip_download': True,
1546 },
0fb983f6 1547 }, {
1548 # Has multiple audio streams
1549 'url': 'WaOKSUlf4TM',
1550 'only_matching': True
9297939e 1551 }, {
1552 # Requires Premium: has format 141 when requested using YTM url
1553 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1554 'only_matching': True
1555 }, {
120916da 1556 # multiple subtitles with same lang_code
1557 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1558 'only_matching': True,
109dd3b2 1559 }, {
1560 # Force use android client fallback
1561 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1562 'info_dict': {
1563 'id': 'YOelRv7fMxY',
1564 'title': 'Digging a Secret Tunnel from my Workshop',
1565 'ext': '3gp',
1566 'upload_date': '20210624',
1567 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1568 'uploader': 'colinfurze',
1569 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1570 'description': 'md5:ecb672623246d98c6c562eed6ae798c3'
1571 },
1572 'params': {
1573 'format': '17', # 3gp format available on android
1574 'extractor_args': {'youtube': {'player_client': ['android']}},
1575 },
120916da 1576 },
109dd3b2 1577 {
1578 # Skip download of additional client configs (remix client config in this case)
1579 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1580 'only_matching': True,
1581 'params': {
1582 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1583 },
1584 }
2eb88d95
PH
1585 ]
1586
201c1459 1587 @classmethod
1588 def suitable(cls, url):
1bdae7d3 1589 # Hack for lazy extractors until more generic solution is implemented
1590 # (see #28780)
1591 from .youtube import parse_qs
201c1459 1592 qs = parse_qs(url)
1593 if qs.get('list', [None])[0]:
1594 return False
1595 return super(YoutubeIE, cls).suitable(url)
1596
e0df6211
PH
1597 def __init__(self, *args, **kwargs):
1598 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1599 self._code_cache = {}
83799698 1600 self._player_cache = {}
e0df6211 1601
109dd3b2 1602 def _extract_player_url(self, ytcfg=None, webpage=None):
1603 player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)
1604 if not player_url:
1605 player_url = self._search_regex(
1606 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1607 webpage, 'player URL', fatal=False)
1608 if player_url.startswith('//'):
1609 player_url = 'https:' + player_url
1610 elif not re.match(r'https?://', player_url):
1611 player_url = compat_urlparse.urljoin(
1612 'https://www.youtube.com', player_url)
1613 return player_url
1614
60064c53
PH
1615 def _signature_cache_id(self, example_sig):
1616 """ Return a string representation of a signature """
78caa52a 1617 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1618
e40c758c
S
1619 @classmethod
1620 def _extract_player_info(cls, player_url):
1621 for player_re in cls._PLAYER_INFO_RE:
1622 id_m = re.search(player_re, player_url)
1623 if id_m:
1624 break
1625 else:
c081b35c 1626 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1627 return id_m.group('id')
e40c758c 1628
109dd3b2 1629 def _load_player(self, video_id, player_url, fatal=True) -> bool:
1630 player_id = self._extract_player_info(player_url)
1631 if player_id not in self._code_cache:
1632 self._code_cache[player_id] = self._download_webpage(
1633 player_url, video_id, fatal=fatal,
1634 note='Downloading player ' + player_id,
1635 errnote='Download of %s failed' % player_url)
1636 return player_id in self._code_cache
1637
e40c758c 1638 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1639 player_id = self._extract_player_info(player_url)
e0df6211 1640
c4417ddb 1641 # Read from filesystem cache
545cc85d 1642 func_id = 'js_%s_%s' % (
1643 player_id, self._signature_cache_id(example_sig))
c4417ddb 1644 assert os.path.basename(func_id) == func_id
a0e07d31 1645
69ea8ca4 1646 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1647 if cache_spec is not None:
78caa52a 1648 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1649
109dd3b2 1650 if self._load_player(video_id, player_url):
1651 code = self._code_cache[player_id]
1652 res = self._parse_sig_js(code)
e0df6211 1653
109dd3b2 1654 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1655 cache_res = res(test_string)
1656 cache_spec = [ord(c) for c in cache_res]
83799698 1657
109dd3b2 1658 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1659 return res
83799698 1660
60064c53 1661 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1662 def gen_sig_code(idxs):
1663 def _genslice(start, end, step):
78caa52a 1664 starts = '' if start == 0 else str(start)
8bcc8756 1665 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1666 steps = '' if step == 1 else (':%d' % step)
78caa52a 1667 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1668
1669 step = None
7af808a5
PH
1670 # Quelch pyflakes warnings - start will be set when step is set
1671 start = '(Never used)'
edf3e38e
PH
1672 for i, prev in zip(idxs[1:], idxs[:-1]):
1673 if step is not None:
1674 if i - prev == step:
1675 continue
1676 yield _genslice(start, prev, step)
1677 step = None
1678 continue
1679 if i - prev in [-1, 1]:
1680 step = i - prev
1681 start = prev
1682 continue
1683 else:
78caa52a 1684 yield 's[%d]' % prev
edf3e38e 1685 if step is None:
78caa52a 1686 yield 's[%d]' % i
edf3e38e
PH
1687 else:
1688 yield _genslice(start, i, step)
1689
78caa52a 1690 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1691 cache_res = func(test_string)
edf3e38e 1692 cache_spec = [ord(c) for c in cache_res]
78caa52a 1693 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1694 signature_id_tuple = '(%s)' % (
1695 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1696 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1697 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1698 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1699
e0df6211
PH
1700 def _parse_sig_js(self, jscode):
1701 funcname = self._search_regex(
abefc03f
S
1702 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1703 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1704 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1705 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1706 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1707 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1708 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1709 # Obsolete patterns
1710 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1711 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1712 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1713 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1714 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1715 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1716 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1717 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1718 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1719
1720 jsi = JSInterpreter(jscode)
1721 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1722 return lambda s: initial_function([s])
1723
545cc85d 1724 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1725 """Turn the encrypted s field into a working signature"""
6b37f0be 1726
c8bf86d5 1727 if player_url is None:
69ea8ca4 1728 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1729
c8bf86d5 1730 try:
62af3a0e 1731 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1732 if player_id not in self._player_cache:
1733 func = self._extract_signature_function(
60064c53 1734 video_id, player_url, s
c8bf86d5
PH
1735 )
1736 self._player_cache[player_id] = func
1737 func = self._player_cache[player_id]
a06916d9 1738 if self.get_param('youtube_print_sig_code'):
60064c53 1739 self._print_sig_code(func, s)
c8bf86d5
PH
1740 return func(s)
1741 except Exception as e:
1742 tb = traceback.format_exc()
1743 raise ExtractorError(
78caa52a 1744 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1745
109dd3b2 1746 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1747 """
1748 Extract signatureTimestamp (sts)
1749 Required to tell API what sig/player version is in use.
1750 """
1751 sts = None
1752 if isinstance(ytcfg, dict):
1753 sts = int_or_none(ytcfg.get('STS'))
1754
1755 if not sts:
1756 # Attempt to extract from player
1757 if player_url is None:
1758 error_msg = 'Cannot extract signature timestamp without player_url.'
1759 if fatal:
1760 raise ExtractorError(error_msg)
1761 self.report_warning(error_msg)
1762 return
1763 if self._load_player(video_id, player_url, fatal=fatal):
1764 player_id = self._extract_player_info(player_url)
1765 code = self._code_cache[player_id]
1766 sts = int_or_none(self._search_regex(
1767 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
1768 'JS player signature timestamp', group='sts', fatal=fatal))
1769 return sts
1770
545cc85d 1771 def _mark_watched(self, video_id, player_response):
21c340b8
S
1772 playback_url = url_or_none(try_get(
1773 player_response,
545cc85d 1774 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
d77ab8e2
S
1775 if not playback_url:
1776 return
1777 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1778 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1779
1780 # cpn generation algorithm is reverse engineered from base.js.
1781 # In fact it works even with dummy cpn.
1782 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1783 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1784
1785 qs.update({
1786 'ver': ['2'],
1787 'cpn': [cpn],
1788 })
1789 playback_url = compat_urlparse.urlunparse(
15707c7e 1790 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1791
1792 self._download_webpage(
1793 playback_url, video_id, 'Marking watched',
1794 'Unable to mark watched', fatal=False)
1795
66c9fa36
S
1796 @staticmethod
1797 def _extract_urls(webpage):
1798 # Embedded YouTube player
1799 entries = [
1800 unescapeHTML(mobj.group('url'))
1801 for mobj in re.finditer(r'''(?x)
1802 (?:
1803 <iframe[^>]+?src=|
1804 data-video-url=|
1805 <embed[^>]+?src=|
1806 embedSWF\(?:\s*|
1807 <object[^>]+data=|
1808 new\s+SWFObject\(
1809 )
1810 (["\'])
1811 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1812 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1813 \1''', webpage)]
1814
1815 # lazyYT YouTube embed
1816 entries.extend(list(map(
1817 unescapeHTML,
1818 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1819
1820 # Wordpress "YouTube Video Importer" plugin
1821 matches = re.findall(r'''(?x)<div[^>]+
1822 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1823 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1824 entries.extend(m[-1] for m in matches)
1825
1826 return entries
1827
1828 @staticmethod
1829 def _extract_url(webpage):
1830 urls = YoutubeIE._extract_urls(webpage)
1831 return urls[0] if urls else None
1832
97665381
PH
1833 @classmethod
1834 def extract_id(cls, url):
1835 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1836 if mobj is None:
69ea8ca4 1837 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1838 video_id = mobj.group(2)
1839 return video_id
1840
545cc85d 1841 def _extract_chapters_from_json(self, data, video_id, duration):
84213ea8 1842 chapters_list = try_get(
8bdd16b4 1843 data,
84213ea8
S
1844 lambda x: x['playerOverlays']
1845 ['playerOverlayRenderer']
1846 ['decoratedPlayerBarRenderer']
1847 ['decoratedPlayerBarRenderer']
1848 ['playerBar']
1849 ['chapteredPlayerBarRenderer']
1850 ['chapters'],
1851 list)
1852 if not chapters_list:
1853 return
1854
1855 def chapter_time(chapter):
1856 return float_or_none(
1857 try_get(
1858 chapter,
1859 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1860 int),
1861 scale=1000)
1862 chapters = []
1863 for next_num, chapter in enumerate(chapters_list, start=1):
1864 start_time = chapter_time(chapter)
1865 if start_time is None:
1866 continue
1867 end_time = (chapter_time(chapters_list[next_num])
1868 if next_num < len(chapters_list) else duration)
1869 if end_time is None:
1870 continue
1871 title = try_get(
1872 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1873 compat_str)
1874 chapters.append({
1875 'start_time': start_time,
1876 'end_time': end_time,
1877 'title': title,
1878 })
1879 return chapters
1880
545cc85d 1881 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1882 return self._parse_json(self._search_regex(
1883 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1884 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 1885
d92f5d5a 1886 @staticmethod
1887 def parse_time_text(time_text):
1888 """
1889 Parse the comment time text
1890 time_text is in the format 'X units ago (edited)'
1891 """
1892 time_text_split = time_text.split(' ')
1893 if len(time_text_split) >= 3:
1894 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
1895
a1c5d2ca
M
1896 @staticmethod
1897 def _join_text_entries(runs):
1898 text = None
1899 for run in runs:
1900 if not isinstance(run, dict):
1901 continue
1902 sub_text = try_get(run, lambda x: x['text'], compat_str)
1903 if sub_text:
1904 if not text:
1905 text = sub_text
1906 continue
1907 text += sub_text
1908 return text
1909
1910 def _extract_comment(self, comment_renderer, parent=None):
1911 comment_id = comment_renderer.get('commentId')
1912 if not comment_id:
1913 return
1914 comment_text_runs = try_get(comment_renderer, lambda x: x['contentText']['runs']) or []
1915 text = self._join_text_entries(comment_text_runs) or ''
1916 comment_time_text = try_get(comment_renderer, lambda x: x['publishedTimeText']['runs']) or []
1917 time_text = self._join_text_entries(comment_time_text)
d92f5d5a 1918 timestamp = calendar.timegm(self.parse_time_text(time_text).timetuple())
a1c5d2ca
M
1919 author = try_get(comment_renderer, lambda x: x['authorText']['simpleText'], compat_str)
1920 author_id = try_get(comment_renderer,
1921 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
1922 votes = str_to_int(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
1923 lambda x: x['likeCount']), compat_str)) or 0
1924 author_thumbnail = try_get(comment_renderer,
1925 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
1926
1927 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
1928 is_liked = try_get(comment_renderer, lambda x: x['isLiked'], bool)
a1c5d2ca
M
1929 return {
1930 'id': comment_id,
1931 'text': text,
d92f5d5a 1932 'timestamp': timestamp,
a1c5d2ca
M
1933 'time_text': time_text,
1934 'like_count': votes,
1935 'is_favorited': is_liked,
1936 'author': author,
1937 'author_id': author_id,
1938 'author_thumbnail': author_thumbnail,
1939 'author_is_uploader': author_is_uploader,
1940 'parent': parent or 'root'
1941 }
1942
1943 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
f4f751af 1944 ytcfg, session_token_list, parent=None, comment_counts=None):
a1c5d2ca
M
1945
1946 def extract_thread(parent_renderer):
1947 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
1948 if not parent:
1949 comment_counts[2] = 0
1950 for content in contents:
1951 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
1952 comment_renderer = try_get(
1953 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
1954 content, (lambda x: x['commentRenderer'], dict))
1955
1956 if not comment_renderer:
1957 continue
1958 comment = self._extract_comment(comment_renderer, parent)
1959 if not comment:
1960 continue
1961 comment_counts[0] += 1
1962 yield comment
1963 # Attempt to get the replies
1964 comment_replies_renderer = try_get(
1965 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
1966
1967 if comment_replies_renderer:
1968 comment_counts[2] += 1
1969 comment_entries_iter = self._comment_entries(
f4f751af 1970 comment_replies_renderer, identity_token, account_syncid, ytcfg,
a1c5d2ca
M
1971 parent=comment.get('id'), session_token_list=session_token_list,
1972 comment_counts=comment_counts)
1973
1974 for reply_comment in comment_entries_iter:
1975 yield reply_comment
1976
1977 if not comment_counts:
1978 # comment so far, est. total comments, current comment thread #
1979 comment_counts = [0, 0, 0]
a1c5d2ca
M
1980
1981 # TODO: Generalize the download code with TabIE
f4f751af 1982 context = self._extract_context(ytcfg)
1983 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
a1c5d2ca
M
1984 continuation = YoutubeTabIE._extract_continuation(root_continuation_data) # TODO
1985 first_continuation = False
1986 if parent is None:
1987 first_continuation = True
1988
1989 for page_num in itertools.count(0):
1990 if not continuation:
1991 break
f4f751af 1992 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
a06916d9 1993 retries = self.get_param('extractor_retries', 3)
a1c5d2ca
M
1994 count = -1
1995 last_error = None
1996
1997 while count < retries:
1998 count += 1
1999 if last_error:
2000 self.report_warning('%s. Retrying ...' % last_error)
2001 try:
2002 query = {
2003 'ctoken': continuation['ctoken'],
2004 'pbj': 1,
2005 'type': 'next',
2006 }
45261e06 2007 if 'itct' in continuation:
2008 query['itct'] = continuation['itct']
a1c5d2ca
M
2009 if parent:
2010 query['action_get_comment_replies'] = 1
2011 else:
2012 query['action_get_comments'] = 1
2013
2014 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2015 if page_num == 0:
2016 if first_continuation:
d92f5d5a 2017 note_prefix = 'Downloading initial comment continuation page'
a1c5d2ca 2018 else:
d92f5d5a 2019 note_prefix = ' Downloading comment reply thread %d %s' % (comment_counts[2], comment_prog_str)
a1c5d2ca 2020 else:
d92f5d5a 2021 note_prefix = '%sDownloading comment%s page %d %s' % (
2022 ' ' if parent else '',
a1c5d2ca
M
2023 ' replies' if parent else '',
2024 page_num,
2025 comment_prog_str)
2026
2027 browse = self._download_json(
2028 'https://www.youtube.com/comment_service_ajax', None,
2029 '%s %s' % (note_prefix, '(retry #%d)' % count if count else ''),
2030 headers=headers, query=query,
2031 data=urlencode_postdata({
2032 'session_token': session_token_list[0]
2033 }))
2034 except ExtractorError as e:
2035 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404, 413):
2036 if e.cause.code == 413:
d92f5d5a 2037 self.report_warning('Assumed end of comments (received HTTP Error 413)')
a1c5d2ca
M
2038 return
2039 # Downloading page may result in intermittent 5xx HTTP error
2040 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
2041 last_error = 'HTTP Error %s' % e.cause.code
2042 if e.cause.code == 404:
d92f5d5a 2043 last_error = last_error + ' (this API is probably deprecated)'
a1c5d2ca
M
2044 if count < retries:
2045 continue
2046 raise
2047 else:
2048 session_token = try_get(browse, lambda x: x['xsrf_token'], compat_str)
2049 if session_token:
2050 session_token_list[0] = session_token
2051
2052 response = try_get(browse,
2053 (lambda x: x['response'],
45261e06 2054 lambda x: x[1]['response']), dict) or {}
a1c5d2ca
M
2055
2056 if response.get('continuationContents'):
2057 break
2058
2059 # YouTube sometimes gives reload: now json if something went wrong (e.g. bad auth)
45261e06 2060 if isinstance(browse, dict):
2061 if browse.get('reload'):
2062 raise ExtractorError('Invalid or missing params in continuation request', expected=False)
2063
2064 # TODO: not tested, merged from old extractor
2065 err_msg = browse.get('externalErrorMessage')
2066 if err_msg:
2067 last_error = err_msg
2068 continue
a1c5d2ca 2069
45261e06 2070 response_error = try_get(response, lambda x: x['responseContext']['errors']['error'][0], dict) or {}
2071 err_msg = response_error.get('externalErrorMessage')
a1c5d2ca 2072 if err_msg:
45261e06 2073 last_error = err_msg
2074 continue
a1c5d2ca
M
2075
2076 # Youtube sometimes sends incomplete data
2077 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
2078 last_error = 'Incomplete data received'
2079 if count >= retries:
6a39ee13 2080 raise ExtractorError(last_error)
a1c5d2ca
M
2081
2082 if not response:
2083 break
f4f751af 2084 visitor_data = try_get(
2085 response,
2086 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2087 compat_str) or visitor_data
a1c5d2ca
M
2088
2089 known_continuation_renderers = {
2090 'itemSectionContinuation': extract_thread,
2091 'commentRepliesContinuation': extract_thread
2092 }
2093
2094 # extract next root continuation from the results
2095 continuation_contents = try_get(
2096 response, lambda x: x['continuationContents'], dict) or {}
2097
2098 for key, value in continuation_contents.items():
2099 if key not in known_continuation_renderers:
2100 continue
2101 continuation_renderer = value
2102
2103 if first_continuation:
2104 first_continuation = False
2105 expected_comment_count = try_get(
2106 continuation_renderer,
2107 (lambda x: x['header']['commentsHeaderRenderer']['countText']['runs'][0]['text'],
2108 lambda x: x['header']['commentsHeaderRenderer']['commentsCount']['runs'][0]['text']),
2109 compat_str)
2110
2111 if expected_comment_count:
2112 comment_counts[1] = str_to_int(expected_comment_count)
d92f5d5a 2113 self.to_screen('Downloading ~%d comments' % str_to_int(expected_comment_count))
a1c5d2ca
M
2114 yield comment_counts[1]
2115
2116 # TODO: cli arg.
2117 # 1/True for newest, 0/False for popular (default)
2118 comment_sort_index = int(True)
2119 sort_continuation_renderer = try_get(
2120 continuation_renderer,
2121 lambda x: x['header']['commentsHeaderRenderer']['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems']
2122 [comment_sort_index]['continuation']['reloadContinuationData'], dict)
2123 # If this fails, the initial continuation page
2124 # starts off with popular anyways.
2125 if sort_continuation_renderer:
2126 continuation = YoutubeTabIE._build_continuation_query(
2127 continuation=sort_continuation_renderer.get('continuation'),
2128 ctp=sort_continuation_renderer.get('clickTrackingParams'))
d92f5d5a 2129 self.to_screen('Sorting comments by %s' % ('popular' if comment_sort_index == 0 else 'newest'))
a1c5d2ca
M
2130 break
2131
2132 for entry in known_continuation_renderers[key](continuation_renderer):
2133 yield entry
2134
2135 continuation = YoutubeTabIE._extract_continuation(continuation_renderer) # TODO
2136 break
2137
2138 def _extract_comments(self, ytcfg, video_id, contents, webpage, xsrf_token):
2139 """Entry for comment extraction"""
2140 comments = []
2141 known_entry_comment_renderers = (
2142 'itemSectionRenderer',
2143 )
2144 estimated_total = 0
2145 for entry in contents:
2146 for key, renderer in entry.items():
2147 if key not in known_entry_comment_renderers:
2148 continue
2149
2150 comment_iter = self._comment_entries(
2151 renderer,
2152 identity_token=self._extract_identity_token(webpage, item_id=video_id),
2153 account_syncid=self._extract_account_syncid(ytcfg),
f4f751af 2154 ytcfg=ytcfg,
a1c5d2ca
M
2155 session_token_list=[xsrf_token])
2156
2157 for comment in comment_iter:
2158 if isinstance(comment, int):
2159 estimated_total = comment
2160 continue
2161 comments.append(comment)
2162 break
d92f5d5a 2163 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
a1c5d2ca
M
2164 return {
2165 'comments': comments,
2166 'comment_count': len(comments),
2167 }
2168
109dd3b2 2169 @staticmethod
2170 def _generate_player_context(sts=None):
2171 context = {
2172 'html5Preference': 'HTML5_PREF_WANTS',
2173 }
2174 if sts is not None:
2175 context['signatureTimestamp'] = sts
2176 return {
2177 'playbackContext': {
2178 'contentPlaybackContext': context
2179 }
2180 }
2181
4e6767b5 2182 @staticmethod
2183 def _get_video_info_params(video_id):
2184 return {
2185 'video_id': video_id,
2186 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
2187 'html5': '1',
2188 'c': 'TVHTML5',
2189 'cver': '6.20180913',
2190 }
2191
c5e8d7af 2192 def _real_extract(self, url):
cf7e015f 2193 url, smuggled_data = unsmuggle_url(url, {})
545cc85d 2194 video_id = self._match_id(url)
9297939e 2195
2196 is_music_url = smuggled_data.get('is_music_url') or self.is_music_url(url)
2197
545cc85d 2198 base_url = self.http_scheme() + '//www.youtube.com/'
b3d12425 2199 webpage_url = base_url + 'watch?v=' + video_id
2200 webpage = self._download_webpage(
cce889b9 2201 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
545cc85d 2202
109dd3b2 2203 ytcfg = self._extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2204 identity_token = self._extract_identity_token(webpage, video_id)
2205 syncid = self._extract_account_syncid(ytcfg)
2206 headers = self._generate_api_headers(ytcfg, identity_token, syncid)
2207
2208 player_url = self._extract_player_url(ytcfg, webpage)
2209
2210 player_client = try_get(self._configuration_arg('player_client'), lambda x: x[0], str) or ''
2211 if player_client.upper() not in ('WEB', 'ANDROID'):
2212 player_client = 'WEB'
2213 force_mobile_client = player_client.upper() == 'ANDROID'
2214 player_skip = self._configuration_arg('player_skip') or []
2215
9297939e 2216 def get_text(x):
2217 if not x:
2218 return
2219 text = x.get('simpleText')
2220 if text and isinstance(text, compat_str):
2221 return text
2222 runs = x.get('runs')
2223 if not isinstance(runs, list):
2224 return
2225 return ''.join([r['text'] for r in runs if isinstance(r.get('text'), compat_str)])
2226
2227 ytm_streaming_data = {}
2228 if is_music_url:
109dd3b2 2229 ytm_webpage = None
2230 sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2231 if sts and not force_mobile_client and 'configs' not in player_skip:
2232 ytm_webpage = self._download_webpage(
2233 'https://music.youtube.com',
2234 video_id, fatal=False, note="Downloading remix client config")
2235
2236 ytm_cfg = self._extract_ytcfg(video_id, ytm_webpage) or {}
2237 ytm_client = 'WEB_REMIX'
2238 if not sts or force_mobile_client:
2239 # Android client already has signature descrambled
2240 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2241 if not sts:
2242 self.report_warning('Falling back to mobile remix client for player API.')
2243 ytm_client = 'ANDROID_MUSIC'
2244 ytm_cfg = {}
2245
2246 ytm_headers = self._generate_api_headers(
2247 ytm_cfg, identity_token, syncid,
2248 client=ytm_client)
2249 ytm_query = {'videoId': video_id}
2250 ytm_query.update(self._generate_player_context(sts))
2251
2252 ytm_player_response = self._extract_response(
2253 item_id=video_id, ep='player', query=ytm_query,
2254 ytcfg=ytm_cfg, headers=ytm_headers, fatal=False,
2255 default_client=ytm_client,
2256 note='Downloading %sremix player API JSON' % ('mobile ' if force_mobile_client else ''))
2257
2258 ytm_streaming_data = try_get(ytm_player_response, lambda x: x['streamingData']) or {}
545cc85d 2259 player_response = None
2260 if webpage:
2261 player_response = self._extract_yt_initial_variable(
2262 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2263 video_id, 'initial player response')
f4f751af 2264
109dd3b2 2265 if not player_response or force_mobile_client:
2266 sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2267 yt_client = 'WEB'
2268 ytpcfg = ytcfg
2269 ytp_headers = headers
2270 if not sts or force_mobile_client:
2271 # Android client already has signature descrambled
2272 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2273 if not sts:
2274 self.report_warning('Falling back to mobile client for player API.')
2275 yt_client = 'ANDROID'
2276 ytpcfg = {}
2277 ytp_headers = self._generate_api_headers(ytpcfg, identity_token, syncid, yt_client)
2278
2279 yt_query = {'videoId': video_id}
2280 yt_query.update(self._generate_player_context(sts))
2281 player_response = self._extract_response(
2282 item_id=video_id, ep='player', query=yt_query,
2283 ytcfg=ytpcfg, headers=ytp_headers, fatal=False,
2284 default_client=yt_client,
2285 note='Downloading %splayer API JSON' % ('mobile ' if force_mobile_client else '')
2286 )
545cc85d 2287
109dd3b2 2288 # Age-gate workarounds
545cc85d 2289 playability_status = player_response.get('playabilityStatus') or {}
109dd3b2 2290 if playability_status.get('reason') in self._AGE_GATE_REASONS:
545cc85d 2291 pr = self._parse_json(try_get(compat_parse_qs(
2292 self._download_webpage(
2293 base_url + 'get_video_info', video_id,
4e6767b5 2294 'Refetching age-gated info webpage', 'unable to download video info webpage',
2295 query=self._get_video_info_params(video_id), fatal=False)),
545cc85d 2296 lambda x: x['player_response'][0],
2297 compat_str) or '{}', video_id)
109dd3b2 2298 if not pr:
2299 self.report_warning('Falling back to embedded-only age-gate workaround.')
2300 embed_webpage = None
2301 sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2302 if sts and not force_mobile_client and 'configs' not in player_skip:
2303 embed_webpage = self._download_webpage(
2304 'https://www.youtube.com/embed/%s?html5=1' % video_id,
2305 video_id=video_id, note='Downloading age-gated embed config')
2306
2307 ytcfg_age = self._extract_ytcfg(video_id, embed_webpage) or {}
2308 # If we extracted the embed webpage, it'll tell us if we can view the video
2309 embedded_pr = self._parse_json(
2310 try_get(ytcfg_age, lambda x: x['PLAYER_VARS']['embedded_player_response'], str) or '{}',
2311 video_id=video_id)
2312 embedded_ps_reason = try_get(embedded_pr, lambda x: x['playabilityStatus']['reason'], str) or ''
2313 if embedded_ps_reason not in self._AGE_GATE_REASONS:
2314 yt_client = 'WEB_EMBEDDED_PLAYER'
2315 if not sts or force_mobile_client:
2316 # Android client already has signature descrambled
2317 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2318 if not sts:
2319 self.report_warning(
2320 'Falling back to mobile embedded client for player API (note: some formats may be missing).')
2321 yt_client = 'ANDROID_EMBEDDED_PLAYER'
2322 ytcfg_age = {}
2323
2324 ytage_headers = self._generate_api_headers(
2325 ytcfg_age, identity_token, syncid, client=yt_client)
2326 yt_age_query = {'videoId': video_id}
2327 yt_age_query.update(self._generate_player_context(sts))
2328 pr = self._extract_response(
2329 item_id=video_id, ep='player', query=yt_age_query,
2330 ytcfg=ytcfg_age, headers=ytage_headers, fatal=False,
2331 default_client=yt_client,
2332 note='Downloading %sage-gated player API JSON' % ('mobile ' if force_mobile_client else '')
2333 ) or {}
2334
545cc85d 2335 if pr:
2336 player_response = pr
2337
2338 trailer_video_id = try_get(
2339 playability_status,
2340 lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
2341 compat_str)
2342 if trailer_video_id:
2343 return self.url_result(
2344 trailer_video_id, self.ie_key(), trailer_video_id)
cf7e015f 2345
545cc85d 2346 search_meta = (
2347 lambda x: self._html_search_meta(x, webpage, default=None)) \
2348 if webpage else lambda x: None
dbdaaa23 2349
545cc85d 2350 video_details = player_response.get('videoDetails') or {}
37357d21 2351 microformat = try_get(
545cc85d 2352 player_response,
2353 lambda x: x['microformat']['playerMicroformatRenderer'],
2354 dict) or {}
2355 video_title = video_details.get('title') \
2356 or get_text(microformat.get('title')) \
2357 or search_meta(['og:title', 'twitter:title', 'title'])
2358 video_description = video_details.get('shortDescription')
cf7e015f 2359
8fe10494 2360 if not smuggled_data.get('force_singlefeed', False):
a06916d9 2361 if not self.get_param('noplaylist'):
8fe10494
S
2362 multifeed_metadata_list = try_get(
2363 player_response,
2364 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
545cc85d 2365 compat_str)
8fe10494
S
2366 if multifeed_metadata_list:
2367 entries = []
2368 feed_ids = []
2369 for feed in multifeed_metadata_list.split(','):
2370 # Unquote should take place before split on comma (,) since textual
2371 # fields may contain comma as well (see
067aa17e 2372 # https://github.com/ytdl-org/youtube-dl/issues/8536)
545cc85d 2373 feed_data = compat_parse_qs(
2374 compat_urllib_parse_unquote_plus(feed))
6b09401b
S
2375
2376 def feed_entry(name):
545cc85d 2377 return try_get(
2378 feed_data, lambda x: x[name][0], compat_str)
6b09401b
S
2379
2380 feed_id = feed_entry('id')
2381 if not feed_id:
2382 continue
2383 feed_title = feed_entry('title')
2384 title = video_title
2385 if feed_title:
2386 title += ' (%s)' % feed_title
8fe10494
S
2387 entries.append({
2388 '_type': 'url_transparent',
2389 'ie_key': 'Youtube',
2390 'url': smuggle_url(
545cc85d 2391 base_url + 'watch?v=' + feed_data['id'][0],
8fe10494 2392 {'force_singlefeed': True}),
6b09401b 2393 'title': title,
8fe10494 2394 })
6b09401b 2395 feed_ids.append(feed_id)
8fe10494
S
2396 self.to_screen(
2397 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2398 % (', '.join(feed_ids), video_id))
545cc85d 2399 return self.playlist_result(
2400 entries, video_id, video_title, video_description)
8fe10494
S
2401 else:
2402 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 2403
9297939e 2404 formats, itags, stream_ids = [], [], []
cc2db878 2405 itag_qualities = {}
d3fc8074 2406 q = qualities([
2407 'tiny', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
2408 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2409 ])
9297939e 2410
545cc85d 2411 streaming_data = player_response.get('streamingData') or {}
2412 streaming_formats = streaming_data.get('formats') or []
2413 streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
9297939e 2414 streaming_formats.extend(ytm_streaming_data.get('formats') or [])
2415 streaming_formats.extend(ytm_streaming_data.get('adaptiveFormats') or [])
2416
545cc85d 2417 for fmt in streaming_formats:
2418 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2419 continue
321bf820 2420
cc2db878 2421 itag = str_or_none(fmt.get('itag'))
9297939e 2422 audio_track = fmt.get('audioTrack') or {}
2423 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2424 if stream_id in stream_ids:
2425 continue
2426
cc2db878 2427 quality = fmt.get('quality')
d3fc8074 2428 if quality == 'tiny' or not quality:
2429 quality = fmt.get('audioQuality', '').lower() or quality
cc2db878 2430 if itag and quality:
2431 itag_qualities[itag] = quality
2432 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2433 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2434 # number of fragment that would subsequently requested with (`&sq=N`)
2435 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2436 continue
2437
545cc85d 2438 fmt_url = fmt.get('url')
2439 if not fmt_url:
2440 sc = compat_parse_qs(fmt.get('signatureCipher'))
2441 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2442 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2443 if not (sc and fmt_url and encrypted_sig):
2444 continue
545cc85d 2445 if not player_url:
201e9eaa 2446 continue
545cc85d 2447 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2448 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2449 fmt_url += '&' + sp + '=' + signature
2450
545cc85d 2451 if itag:
2452 itags.append(itag)
9297939e 2453 stream_ids.append(stream_id)
2454
cc2db878 2455 tbr = float_or_none(
2456 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 2457 dct = {
2458 'asr': int_or_none(fmt.get('audioSampleRate')),
2459 'filesize': int_or_none(fmt.get('contentLength')),
2460 'format_id': itag,
0fb983f6 2461 'format_note': audio_track.get('displayName') or fmt.get('qualityLabel') or quality,
545cc85d 2462 'fps': int_or_none(fmt.get('fps')),
2463 'height': int_or_none(fmt.get('height')),
dca3ff4a 2464 'quality': q(quality),
cc2db878 2465 'tbr': tbr,
545cc85d 2466 'url': fmt_url,
2467 'width': fmt.get('width'),
0fb983f6 2468 'language': audio_track.get('id', '').split('.')[0],
545cc85d 2469 }
2470 mimetype = fmt.get('mimeType')
2471 if mimetype:
2472 mobj = re.match(
2473 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', mimetype)
2474 if mobj:
2475 dct['ext'] = mimetype2ext(mobj.group(1))
2476 dct.update(parse_codecs(mobj.group(2)))
cc2db878 2477 no_audio = dct.get('acodec') == 'none'
2478 no_video = dct.get('vcodec') == 'none'
2479 if no_audio:
2480 dct['vbr'] = tbr
2481 if no_video:
2482 dct['abr'] = tbr
2483 if no_audio or no_video:
545cc85d 2484 dct['downloader_options'] = {
2485 # Youtube throttles chunks >~10M
2486 'http_chunk_size': 10485760,
bf1317d2 2487 }
7c60c33e 2488 if dct.get('ext'):
2489 dct['container'] = dct['ext'] + '_dash'
545cc85d 2490 formats.append(dct)
2491
5d3a0e79 2492 skip_manifests = self._configuration_arg('skip') or []
2493 get_dash = 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True)
2494 get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2495
9297939e 2496 for sd in (streaming_data, ytm_streaming_data):
5d3a0e79 2497 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
9297939e 2498 if hls_manifest_url:
2499 for f in self._extract_m3u8_formats(
2500 hls_manifest_url, video_id, 'mp4', fatal=False):
2501 itag = self._search_regex(
2502 r'/itag/(\d+)', f['url'], 'itag', default=None)
2503 if itag:
2504 f['format_id'] = itag
8d68ab98 2505 formats.append(f)
545cc85d 2506
5d3a0e79 2507 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2508 if dash_manifest_url:
2509 for f in self._extract_mpd_formats(
2510 dash_manifest_url, video_id, fatal=False):
2511 itag = f['format_id']
2512 if itag in itags:
2513 continue
2514 if itag in itag_qualities:
2515 f['quality'] = q(itag_qualities[itag])
2516 filesize = int_or_none(self._search_regex(
2517 r'/clen/(\d+)', f.get('fragment_base_url')
2518 or f['url'], 'file size', default=None))
2519 if filesize:
2520 f['filesize'] = filesize
2521 formats.append(f)
bf1317d2 2522
545cc85d 2523 if not formats:
a06916d9 2524 if not self.get_param('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
b7da73eb 2525 self.raise_no_formats(
545cc85d 2526 'This video is DRM protected.', expected=True)
2527 pemr = try_get(
2528 playability_status,
2529 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
2530 dict) or {}
2531 reason = get_text(pemr.get('reason')) or playability_status.get('reason')
2532 subreason = pemr.get('subreason')
2533 if subreason:
2534 subreason = clean_html(get_text(subreason))
2535 if subreason == 'The uploader has not made this video available in your country.':
2536 countries = microformat.get('availableCountries')
2537 if not countries:
2538 regions_allowed = search_meta('regionsAllowed')
2539 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2540 self.raise_geo_restricted(subreason, countries, metadata_available=True)
545cc85d 2541 reason += '\n' + subreason
2542 if reason:
b7da73eb 2543 self.raise_no_formats(reason, expected=True)
bf1317d2 2544
545cc85d 2545 self._sort_formats(formats)
bf1317d2 2546
545cc85d 2547 keywords = video_details.get('keywords') or []
2548 if not keywords and webpage:
2549 keywords = [
2550 unescapeHTML(m.group('content'))
2551 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2552 for keyword in keywords:
2553 if keyword.startswith('yt:stretch='):
201c1459 2554 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2555 if mobj:
2556 # NB: float is intentional for forcing float division
2557 w, h = (float(v) for v in mobj.groups())
2558 if w > 0 and h > 0:
2559 ratio = w / h
2560 for f in formats:
2561 if f.get('vcodec') != 'none':
2562 f['stretched_ratio'] = ratio
2563 break
6449cd80 2564
545cc85d 2565 thumbnails = []
2566 for container in (video_details, microformat):
2567 for thumbnail in (try_get(
2568 container,
2569 lambda x: x['thumbnail']['thumbnails'], list) or []):
2570 thumbnail_url = thumbnail.get('url')
2571 if not thumbnail_url:
bf1317d2 2572 continue
1988fab7 2573 # Sometimes youtube gives a wrong thumbnail URL. See:
2574 # https://github.com/yt-dlp/yt-dlp/issues/233
2575 # https://github.com/ytdl-org/youtube-dl/issues/28023
2576 if 'maxresdefault' in thumbnail_url:
2577 thumbnail_url = thumbnail_url.split('?')[0]
545cc85d 2578 thumbnails.append({
545cc85d 2579 'url': thumbnail_url,
ff2751ac 2580 'height': int_or_none(thumbnail.get('height')),
545cc85d 2581 'width': int_or_none(thumbnail.get('width')),
ff2751ac 2582 'preference': 1 if 'maxresdefault' in thumbnail_url else -1
545cc85d 2583 })
ff2751ac 2584 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2585 if thumbnail_url:
2586 thumbnails.append({
2587 'url': thumbnail_url,
2588 'preference': 1 if 'maxresdefault' in thumbnail_url else -1
2589 })
2590 # All videos have a maxresdefault thumbnail, but sometimes it does not appear in the webpage
2591 # See: https://github.com/ytdl-org/youtube-dl/issues/29049
2592 thumbnails.append({
2593 'url': 'https://i.ytimg.com/vi/%s/maxresdefault.jpg' % video_id,
2594 'preference': 1,
2595 })
2596 self._remove_duplicate_formats(thumbnails)
545cc85d 2597
2598 category = microformat.get('category') or search_meta('genre')
2599 channel_id = video_details.get('channelId') \
2600 or microformat.get('externalChannelId') \
2601 or search_meta('channelId')
2602 duration = int_or_none(
2603 video_details.get('lengthSeconds')
2604 or microformat.get('lengthSeconds')) \
2605 or parse_duration(search_meta('duration'))
2606 is_live = video_details.get('isLive')
2607 owner_profile_url = microformat.get('ownerProfileUrl')
2608
2609 info = {
2610 'id': video_id,
2611 'title': self._live_title(video_title) if is_live else video_title,
2612 'formats': formats,
2613 'thumbnails': thumbnails,
2614 'description': video_description,
2615 'upload_date': unified_strdate(
2616 microformat.get('uploadDate')
2617 or search_meta('uploadDate')),
2618 'uploader': video_details['author'],
2619 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2620 'uploader_url': owner_profile_url,
2621 'channel_id': channel_id,
2622 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
2623 'duration': duration,
2624 'view_count': int_or_none(
2625 video_details.get('viewCount')
2626 or microformat.get('viewCount')
2627 or search_meta('interactionCount')),
2628 'average_rating': float_or_none(video_details.get('averageRating')),
2629 'age_limit': 18 if (
2630 microformat.get('isFamilySafe') is False
2631 or search_meta('isFamilyFriendly') == 'false'
2632 or search_meta('og:restrictions:age') == '18+') else 0,
2633 'webpage_url': webpage_url,
2634 'categories': [category] if category else None,
2635 'tags': keywords,
2636 'is_live': is_live,
2637 'playable_in_embed': playability_status.get('playableInEmbed'),
c224251a 2638 'was_live': video_details.get('isLiveContent'),
545cc85d 2639 }
b477fc13 2640
545cc85d 2641 pctr = try_get(
2642 player_response,
2643 lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
2644 subtitles = {}
2645 if pctr:
774d79cc 2646 def process_language(container, base_url, lang_code, sub_name, query):
120916da 2647 lang_subs = container.setdefault(lang_code, [])
545cc85d 2648 for fmt in self._SUBTITLE_FORMATS:
2649 query.update({
2650 'fmt': fmt,
2651 })
2652 lang_subs.append({
2653 'ext': fmt,
2654 'url': update_url_query(base_url, query),
774d79cc 2655 'name': sub_name,
545cc85d 2656 })
7e72694b 2657
545cc85d 2658 for caption_track in (pctr.get('captionTracks') or []):
2659 base_url = caption_track.get('baseUrl')
2660 if not base_url:
2661 continue
2662 if caption_track.get('kind') != 'asr':
120916da 2663 lang_code = (
2664 remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2665 or caption_track.get('languageCode'))
545cc85d 2666 if not lang_code:
2667 continue
2668 process_language(
774d79cc 2669 subtitles, base_url, lang_code,
2670 try_get(caption_track, lambda x: x.get('name').get('simpleText')),
2671 {})
545cc85d 2672 continue
2673 automatic_captions = {}
2674 for translation_language in (pctr.get('translationLanguages') or []):
2675 translation_language_code = translation_language.get('languageCode')
2676 if not translation_language_code:
2677 continue
2678 process_language(
2679 automatic_captions, base_url, translation_language_code,
49c258e1 2680 try_get(translation_language, (
2681 lambda x: x['languageName']['simpleText'],
2682 lambda x: x['languageName']['runs'][0]['text'])),
545cc85d 2683 {'tlang': translation_language_code})
2684 info['automatic_captions'] = automatic_captions
2685 info['subtitles'] = subtitles
7e72694b 2686
545cc85d 2687 parsed_url = compat_urllib_parse_urlparse(url)
2688 for component in [parsed_url.fragment, parsed_url.query]:
2689 query = compat_parse_qs(component)
2690 for k, v in query.items():
2691 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2692 d_k += '_time'
2693 if d_k not in info and k in s_ks:
2694 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2695
2696 # Youtube Music Auto-generated description
822b9d9c 2697 if video_description:
38d70284 2698 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2699 if mobj:
822b9d9c
RA
2700 release_year = mobj.group('release_year')
2701 release_date = mobj.group('release_date')
2702 if release_date:
2703 release_date = release_date.replace('-', '')
2704 if not release_year:
545cc85d 2705 release_year = release_date[:4]
2706 info.update({
2707 'album': mobj.group('album'.strip()),
2708 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2709 'track': mobj.group('track').strip(),
2710 'release_date': release_date,
cc2db878 2711 'release_year': int_or_none(release_year),
545cc85d 2712 })
7e72694b 2713
545cc85d 2714 initial_data = None
2715 if webpage:
2716 initial_data = self._extract_yt_initial_variable(
2717 webpage, self._YT_INITIAL_DATA_RE, video_id,
2718 'yt initial data')
2719 if not initial_data:
109dd3b2 2720 initial_data = self._extract_response(
2721 item_id=video_id, ep='next', fatal=False,
2722 ytcfg=ytcfg, headers=headers, query={'videoId': video_id},
2723 note='Downloading initial data API JSON')
545cc85d 2724
c60ee3a2 2725 try:
2726 # This will error if there is no livechat
2727 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2728 info['subtitles']['live_chat'] = [{
2729 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
2730 'video_id': video_id,
2731 'ext': 'json',
2732 'protocol': 'youtube_live_chat' if is_live else 'youtube_live_chat_replay',
2733 }]
2734 except (KeyError, IndexError, TypeError):
2735 pass
545cc85d 2736
2737 if initial_data:
2738 chapters = self._extract_chapters_from_json(
2739 initial_data, video_id, duration)
2740 if not chapters:
2741 for engagment_pannel in (initial_data.get('engagementPanels') or []):
2742 contents = try_get(
2743 engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
2744 list)
2745 if not contents:
2746 continue
2747
2748 def chapter_time(mmlir):
2749 return parse_duration(
2750 get_text(mmlir.get('timeDescription')))
2751
2752 chapters = []
2753 for next_num, content in enumerate(contents, start=1):
2754 mmlir = content.get('macroMarkersListItemRenderer') or {}
2755 start_time = chapter_time(mmlir)
2756 end_time = chapter_time(try_get(
2757 contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
2758 if next_num < len(contents) else duration
2759 if start_time is None or end_time is None:
2760 continue
2761 chapters.append({
2762 'start_time': start_time,
2763 'end_time': end_time,
2764 'title': get_text(mmlir.get('title')),
2765 })
2766 if chapters:
2767 break
2768 if chapters:
2769 info['chapters'] = chapters
2770
2771 contents = try_get(
2772 initial_data,
2773 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2774 list) or []
2775 for content in contents:
2776 vpir = content.get('videoPrimaryInfoRenderer')
2777 if vpir:
2778 stl = vpir.get('superTitleLink')
2779 if stl:
2780 stl = get_text(stl)
2781 if try_get(
2782 vpir,
2783 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2784 info['location'] = stl
2785 else:
2786 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2787 if mobj:
2788 info.update({
2789 'series': mobj.group(1),
2790 'season_number': int(mobj.group(2)),
2791 'episode_number': int(mobj.group(3)),
2792 })
2793 for tlb in (try_get(
2794 vpir,
2795 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2796 list) or []):
2797 tbr = tlb.get('toggleButtonRenderer') or {}
2798 for getter, regex in [(
2799 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2800 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2801 lambda x: x['accessibility'],
2802 lambda x: x['accessibilityData']['accessibilityData'],
2803 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2804 label = (try_get(tbr, getter, dict) or {}).get('label')
2805 if label:
2806 mobj = re.match(regex, label)
2807 if mobj:
2808 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2809 break
2810 sbr_tooltip = try_get(
2811 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2812 if sbr_tooltip:
2813 like_count, dislike_count = sbr_tooltip.split(' / ')
2814 info.update({
2815 'like_count': str_to_int(like_count),
2816 'dislike_count': str_to_int(dislike_count),
2817 })
2818 vsir = content.get('videoSecondaryInfoRenderer')
2819 if vsir:
2820 info['channel'] = get_text(try_get(
2821 vsir,
2822 lambda x: x['owner']['videoOwnerRenderer']['title'],
cce889b9 2823 dict))
545cc85d 2824 rows = try_get(
2825 vsir,
2826 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2827 list) or []
2828 multiple_songs = False
2829 for row in rows:
2830 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2831 multiple_songs = True
2832 break
2833 for row in rows:
2834 mrr = row.get('metadataRowRenderer') or {}
2835 mrr_title = mrr.get('title')
2836 if not mrr_title:
2837 continue
2838 mrr_title = get_text(mrr['title'])
2839 mrr_contents_text = get_text(mrr['contents'][0])
2840 if mrr_title == 'License':
2841 info['license'] = mrr_contents_text
2842 elif not multiple_songs:
2843 if mrr_title == 'Album':
2844 info['album'] = mrr_contents_text
2845 elif mrr_title == 'Artist':
2846 info['artist'] = mrr_contents_text
2847 elif mrr_title == 'Song':
2848 info['track'] = mrr_contents_text
2849
2850 fallbacks = {
2851 'channel': 'uploader',
2852 'channel_id': 'uploader_id',
2853 'channel_url': 'uploader_url',
2854 }
2855 for to, frm in fallbacks.items():
2856 if not info.get(to):
2857 info[to] = info.get(frm)
2858
2859 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
2860 v = info.get(s_k)
2861 if v:
2862 info[d_k] = v
b84071c0 2863
c224251a
M
2864 is_private = bool_or_none(video_details.get('isPrivate'))
2865 is_unlisted = bool_or_none(microformat.get('isUnlisted'))
2866 is_membersonly = None
b28f8d24 2867 is_premium = None
c224251a
M
2868 if initial_data and is_private is not None:
2869 is_membersonly = False
b28f8d24 2870 is_premium = False
c224251a
M
2871 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list)
2872 for content in contents or []:
2873 badges = try_get(content, lambda x: x['videoPrimaryInfoRenderer']['badges'], list)
2874 for badge in badges or []:
2875 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label']) or ''
2876 if label.lower() == 'members only':
2877 is_membersonly = True
2878 break
b28f8d24
M
2879 elif label.lower() == 'premium':
2880 is_premium = True
2881 break
2882 if is_membersonly or is_premium:
c224251a
M
2883 break
2884
2885 # TODO: Add this for playlists
2886 info['availability'] = self._availability(
2887 is_private=is_private,
b28f8d24 2888 needs_premium=is_premium,
c224251a
M
2889 needs_subscription=is_membersonly,
2890 needs_auth=info['age_limit'] >= 18,
2891 is_unlisted=None if is_private is None else is_unlisted)
2892
06167fbb 2893 # get xsrf for annotations or comments
a06916d9 2894 get_annotations = self.get_param('writeannotations', False)
2895 get_comments = self.get_param('getcomments', False)
06167fbb 2896 if get_annotations or get_comments:
29f7c58a 2897 xsrf_token = None
545cc85d 2898 ytcfg = self._extract_ytcfg(video_id, webpage)
29f7c58a 2899 if ytcfg:
2900 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2901 if not xsrf_token:
2902 xsrf_token = self._search_regex(
2903 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 2904 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 2905
2906 # annotations
06167fbb 2907 if get_annotations:
64b6a4e9
RA
2908 invideo_url = try_get(
2909 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2910 if xsrf_token and invideo_url:
29f7c58a 2911 xsrf_field_name = None
2912 if ytcfg:
2913 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2914 if not xsrf_field_name:
2915 xsrf_field_name = self._search_regex(
2916 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 2917 webpage, 'xsrf field name',
29f7c58a 2918 group='xsrf_field_name', default='session_token')
8a784c74 2919 info['annotations'] = self._download_webpage(
64b6a4e9
RA
2920 self._proto_relative_url(invideo_url),
2921 video_id, note='Downloading annotations',
2922 errnote='Unable to download video annotations', fatal=False,
2923 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 2924
277d6ff5 2925 if get_comments:
a1c5d2ca 2926 info['__post_extractor'] = lambda: self._extract_comments(ytcfg, video_id, contents, webpage, xsrf_token)
4ea3be0a 2927
545cc85d 2928 self.mark_watched(video_id, player_response)
d77ab8e2 2929
545cc85d 2930 return info
c5e8d7af 2931
5f6a1245 2932
8bdd16b4 2933class YoutubeTabIE(YoutubeBaseInfoExtractor):
2934 IE_DESC = 'YouTube.com tab'
70d5c17b 2935 _VALID_URL = r'''(?x)
2936 https?://
2937 (?:\w+\.)?
2938 (?:
2939 youtube(?:kids)?\.com|
2940 invidio\.us
2941 )/
2942 (?:
fe03a6cd 2943 (?P<channel_type>channel|c|user|browse)/|
70d5c17b 2944 (?P<not_channel>
9ba5705a 2945 feed/|hashtag/|
70d5c17b 2946 (?:playlist|watch)\?.*?\blist=
2947 )|
29f7c58a 2948 (?!(?:%s)\b) # Direct URLs
70d5c17b 2949 )
2950 (?P<id>[^/?\#&]+)
2951 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 2952 IE_NAME = 'youtube:tab'
2953
81127aa5 2954 _TESTS = [{
da692b79 2955 'note': 'playlists, multipage',
8bdd16b4 2956 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2957 'playlist_mincount': 94,
2958 'info_dict': {
2959 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2960 'title': 'Игорь Клейнер - Playlists',
2961 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2962 'uploader': 'Игорь Клейнер',
2963 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 2964 },
2965 }, {
da692b79 2966 'note': 'playlists, multipage, different order',
8bdd16b4 2967 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2968 'playlist_mincount': 94,
2969 'info_dict': {
2970 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2971 'title': 'Игорь Клейнер - Playlists',
2972 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2973 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
2974 'uploader': 'Игорь Клейнер',
8bdd16b4 2975 },
201c1459 2976 }, {
da692b79 2977 'note': 'playlists, series',
201c1459 2978 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
2979 'playlist_mincount': 5,
2980 'info_dict': {
2981 'id': 'UCYO_jab_esuFRV4b17AJtAw',
2982 'title': '3Blue1Brown - Playlists',
2983 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
da692b79 2984 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
2985 'uploader': '3Blue1Brown',
201c1459 2986 },
8bdd16b4 2987 }, {
da692b79 2988 'note': 'playlists, singlepage',
8bdd16b4 2989 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2990 'playlist_mincount': 4,
2991 'info_dict': {
2992 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2993 'title': 'ThirstForScience - Playlists',
2994 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 2995 'uploader': 'ThirstForScience',
2996 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 2997 }
2998 }, {
2999 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3000 'only_matching': True,
3001 }, {
da692b79 3002 'note': 'basic, single video playlist',
0e30a7b9 3003 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 3004 'info_dict': {
0e30a7b9 3005 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3006 'uploader': 'Sergey M.',
3007 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 3008 'title': 'youtube-dl public playlist',
81127aa5 3009 },
0e30a7b9 3010 'playlist_count': 1,
9291475f 3011 }, {
da692b79 3012 'note': 'empty playlist',
0e30a7b9 3013 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 3014 'info_dict': {
0e30a7b9 3015 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3016 'uploader': 'Sergey M.',
3017 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 3018 'title': 'youtube-dl empty playlist',
9291475f
PH
3019 },
3020 'playlist_count': 0,
3021 }, {
da692b79 3022 'note': 'Home tab',
8bdd16b4 3023 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 3024 'info_dict': {
8bdd16b4 3025 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3026 'title': 'lex will - Home',
3027 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3028 'uploader': 'lex will',
3029 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3030 },
8bdd16b4 3031 'playlist_mincount': 2,
9291475f 3032 }, {
da692b79 3033 'note': 'Videos tab',
8bdd16b4 3034 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 3035 'info_dict': {
8bdd16b4 3036 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3037 'title': 'lex will - Videos',
3038 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3039 'uploader': 'lex will',
3040 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3041 },
8bdd16b4 3042 'playlist_mincount': 975,
9291475f 3043 }, {
da692b79 3044 'note': 'Videos tab, sorted by popular',
8bdd16b4 3045 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 3046 'info_dict': {
8bdd16b4 3047 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3048 'title': 'lex will - Videos',
3049 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3050 'uploader': 'lex will',
3051 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3052 },
8bdd16b4 3053 'playlist_mincount': 199,
9291475f 3054 }, {
da692b79 3055 'note': 'Playlists tab',
8bdd16b4 3056 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 3057 'info_dict': {
8bdd16b4 3058 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3059 'title': 'lex will - Playlists',
3060 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3061 'uploader': 'lex will',
3062 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3063 },
8bdd16b4 3064 'playlist_mincount': 17,
ac7553d0 3065 }, {
da692b79 3066 'note': 'Community tab',
8bdd16b4 3067 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 3068 'info_dict': {
8bdd16b4 3069 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3070 'title': 'lex will - Community',
3071 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3072 'uploader': 'lex will',
3073 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3074 },
3075 'playlist_mincount': 18,
87dadd45 3076 }, {
da692b79 3077 'note': 'Channels tab',
8bdd16b4 3078 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 3079 'info_dict': {
8bdd16b4 3080 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3081 'title': 'lex will - Channels',
3082 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3083 'uploader': 'lex will',
3084 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3085 },
deaec5af 3086 'playlist_mincount': 12,
cd684175 3087 }, {
3088 'note': 'Search tab',
3089 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3090 'playlist_mincount': 40,
3091 'info_dict': {
3092 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3093 'title': '3Blue1Brown - Search - linear algebra',
3094 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3095 'uploader': '3Blue1Brown',
3096 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3097 },
6b08cdf6 3098 }, {
a0566bbf 3099 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3100 'only_matching': True,
3101 }, {
a0566bbf 3102 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3103 'only_matching': True,
3104 }, {
a0566bbf 3105 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3106 'only_matching': True,
3107 }, {
3108 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3109 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3110 'info_dict': {
3111 'title': '29C3: Not my department',
3112 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3113 'uploader': 'Christiaan008',
3114 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 3115 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 3116 },
3117 'playlist_count': 96,
3118 }, {
3119 'note': 'Large playlist',
3120 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 3121 'info_dict': {
8bdd16b4 3122 'title': 'Uploads from Cauchemar',
3123 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3124 'uploader': 'Cauchemar',
3125 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 3126 },
8bdd16b4 3127 'playlist_mincount': 1123,
3128 }, {
da692b79 3129 'note': 'even larger playlist, 8832 videos',
8bdd16b4 3130 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3131 'only_matching': True,
4b7df0d3
JMF
3132 }, {
3133 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3134 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3135 'info_dict': {
acf757f4
PH
3136 'title': 'Uploads from Interstellar Movie',
3137 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 3138 'uploader': 'Interstellar Movie',
8bdd16b4 3139 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 3140 },
481cc733 3141 'playlist_mincount': 21,
358de58c 3142 }, {
3143 'note': 'Playlist with "show unavailable videos" button',
3144 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3145 'info_dict': {
3146 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3147 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3148 'uploader': 'Phim Siêu Nhân Nhật Bản',
3149 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3150 },
da692b79 3151 'playlist_mincount': 200,
5d342002 3152 }, {
da692b79 3153 'note': 'Playlist with unavailable videos in page 7',
5d342002 3154 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3155 'info_dict': {
3156 'title': 'Uploads from BlankTV',
3157 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3158 'uploader': 'BlankTV',
3159 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3160 },
da692b79 3161 'playlist_mincount': 1000,
8bdd16b4 3162 }, {
da692b79 3163 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
8bdd16b4 3164 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3165 'info_dict': {
3166 'title': 'Data Analysis with Dr Mike Pound',
3167 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3168 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3169 'uploader': 'Computerphile',
deaec5af 3170 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 3171 },
3172 'playlist_mincount': 11,
3173 }, {
a0566bbf 3174 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 3175 'only_matching': True,
dacb3a86 3176 }, {
da692b79 3177 'note': 'Playlist URL that does not actually serve a playlist',
dacb3a86
S
3178 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3179 'info_dict': {
3180 'id': 'FqZTN594JQw',
3181 'ext': 'webm',
3182 'title': "Smiley's People 01 detective, Adventure Series, Action",
3183 'uploader': 'STREEM',
3184 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 3185 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
3186 'upload_date': '20150526',
3187 'license': 'Standard YouTube License',
3188 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3189 'categories': ['People & Blogs'],
3190 'tags': list,
dbdaaa23 3191 'view_count': int,
dacb3a86
S
3192 'like_count': int,
3193 'dislike_count': int,
3194 },
3195 'params': {
3196 'skip_download': True,
3197 },
13a75688 3198 'skip': 'This video is not available.',
dacb3a86 3199 'add_ie': [YoutubeIE.ie_key()],
481cc733 3200 }, {
8bdd16b4 3201 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 3202 'only_matching': True,
66b48727 3203 }, {
8bdd16b4 3204 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 3205 'only_matching': True,
a0566bbf 3206 }, {
3207 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3208 'info_dict': {
da692b79 3209 'id': 'X1whbWASnNQ', # This will keep changing
a0566bbf 3210 'ext': 'mp4',
deaec5af 3211 'title': compat_str,
a0566bbf 3212 'uploader': 'Sky News',
3213 'uploader_id': 'skynews',
3214 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
da692b79 3215 'upload_date': r're:\d{8}',
3216 'description': compat_str,
a0566bbf 3217 'categories': ['News & Politics'],
3218 'tags': list,
3219 'like_count': int,
3220 'dislike_count': int,
3221 },
3222 'params': {
3223 'skip_download': True,
3224 },
da692b79 3225 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
a0566bbf 3226 }, {
3227 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3228 'info_dict': {
3229 'id': 'a48o2S1cPoo',
3230 'ext': 'mp4',
3231 'title': 'The Young Turks - Live Main Show',
3232 'uploader': 'The Young Turks',
3233 'uploader_id': 'TheYoungTurks',
3234 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3235 'upload_date': '20150715',
3236 'license': 'Standard YouTube License',
3237 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3238 'categories': ['News & Politics'],
3239 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3240 'like_count': int,
3241 'dislike_count': int,
3242 },
3243 'params': {
3244 'skip_download': True,
3245 },
3246 'only_matching': True,
3247 }, {
3248 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3249 'only_matching': True,
3250 }, {
3251 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3252 'only_matching': True,
09f1580e 3253 }, {
3254 'note': 'A channel that is not live. Should raise error',
3255 'url': 'https://www.youtube.com/user/numberphile/live',
3256 'only_matching': True,
3d3dddc9 3257 }, {
3258 'url': 'https://www.youtube.com/feed/trending',
3259 'only_matching': True,
3260 }, {
3d3dddc9 3261 'url': 'https://www.youtube.com/feed/library',
3262 'only_matching': True,
3263 }, {
3d3dddc9 3264 'url': 'https://www.youtube.com/feed/history',
3265 'only_matching': True,
3266 }, {
3d3dddc9 3267 'url': 'https://www.youtube.com/feed/subscriptions',
3268 'only_matching': True,
3269 }, {
3d3dddc9 3270 'url': 'https://www.youtube.com/feed/watch_later',
3271 'only_matching': True,
3272 }, {
da692b79 3273 'note': 'Recommended - redirects to home page',
3d3dddc9 3274 'url': 'https://www.youtube.com/feed/recommended',
3275 'only_matching': True,
29f7c58a 3276 }, {
da692b79 3277 'note': 'inline playlist with not always working continuations',
29f7c58a 3278 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3279 'only_matching': True,
3280 }, {
3281 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3282 'only_matching': True,
3283 }, {
3284 'url': 'https://www.youtube.com/course',
3285 'only_matching': True,
3286 }, {
3287 'url': 'https://www.youtube.com/zsecurity',
3288 'only_matching': True,
3289 }, {
3290 'url': 'http://www.youtube.com/NASAgovVideo/videos',
3291 'only_matching': True,
3292 }, {
3293 'url': 'https://www.youtube.com/TheYoungTurks/live',
3294 'only_matching': True,
39ed931e 3295 }, {
3296 'url': 'https://www.youtube.com/hashtag/cctv9',
3297 'info_dict': {
3298 'id': 'cctv9',
3299 'title': '#cctv9',
3300 },
3301 'playlist_mincount': 350,
201c1459 3302 }, {
3303 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3304 'only_matching': True,
9297939e 3305 }, {
da692b79 3306 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
9297939e 3307 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3308 'only_matching': True
fe03a6cd 3309 }, {
3310 'note': '/browse/ should redirect to /channel/',
3311 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3312 'only_matching': True
3313 }, {
3314 'note': 'VLPL, should redirect to playlist?list=PL...',
3315 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3316 'info_dict': {
3317 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3318 'uploader': 'NoCopyrightSounds',
3319 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3320 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3321 'title': 'NCS Releases',
3322 },
3323 'playlist_mincount': 166,
18db7548 3324 }, {
3325 'note': 'Topic, should redirect to playlist?list=UU...',
3326 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3327 'info_dict': {
3328 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3329 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3330 'title': 'Uploads from Royalty Free Music - Topic',
3331 'uploader': 'Royalty Free Music - Topic',
3332 },
3333 'expected_warnings': [
3334 'A channel/user page was given',
3335 'The URL does not have a videos tab',
3336 ],
3337 'playlist_mincount': 101,
3338 }, {
3339 'note': 'Topic without a UU playlist',
3340 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3341 'info_dict': {
3342 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3343 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3344 },
3345 'expected_warnings': [
3346 'A channel/user page was given',
3347 'The URL does not have a videos tab',
3348 'Falling back to channel URL',
3349 ],
3350 'playlist_mincount': 9,
abcdd12b 3351 }, {
3352 'note': 'Youtube music Album',
3353 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3354 'info_dict': {
3355 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3356 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3357 },
3358 'playlist_count': 50,
29f7c58a 3359 }]
3360
3361 @classmethod
3362 def suitable(cls, url):
3363 return False if YoutubeIE.suitable(url) else super(
3364 YoutubeTabIE, cls).suitable(url)
8bdd16b4 3365
3366 def _extract_channel_id(self, webpage):
3367 channel_id = self._html_search_meta(
3368 'channelId', webpage, 'channel id', default=None)
3369 if channel_id:
3370 return channel_id
3371 channel_url = self._html_search_meta(
3372 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3373 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3374 'twitter:app:url:googleplay'), webpage, 'channel url')
3375 return self._search_regex(
3376 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3377 channel_url, 'channel id')
15f6397c 3378
8bdd16b4 3379 @staticmethod
cd7c66cf 3380 def _extract_basic_item_renderer(item):
3381 # Modified from _extract_grid_item_renderer
201c1459 3382 known_basic_renderers = (
3383 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 3384 )
3385 for key, renderer in item.items():
201c1459 3386 if not isinstance(renderer, dict):
cd7c66cf 3387 continue
201c1459 3388 elif key in known_basic_renderers:
3389 return renderer
3390 elif key.startswith('grid') and key.endswith('Renderer'):
3391 return renderer
8bdd16b4 3392
8bdd16b4 3393 def _grid_entries(self, grid_renderer):
3394 for item in grid_renderer['items']:
3395 if not isinstance(item, dict):
39b62db1 3396 continue
cd7c66cf 3397 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 3398 if not isinstance(renderer, dict):
3399 continue
3400 title = try_get(
201c1459 3401 renderer, (lambda x: x['title']['runs'][0]['text'],
3402 lambda x: x['title']['simpleText']), compat_str)
8bdd16b4 3403 # playlist
3404 playlist_id = renderer.get('playlistId')
3405 if playlist_id:
3406 yield self.url_result(
3407 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3408 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3409 video_title=title)
201c1459 3410 continue
8bdd16b4 3411 # video
3412 video_id = renderer.get('videoId')
3413 if video_id:
3414 yield self._extract_video(renderer)
201c1459 3415 continue
8bdd16b4 3416 # channel
3417 channel_id = renderer.get('channelId')
3418 if channel_id:
3419 title = try_get(
3420 renderer, lambda x: x['title']['simpleText'], compat_str)
3421 yield self.url_result(
3422 'https://www.youtube.com/channel/%s' % channel_id,
3423 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 3424 continue
3425 # generic endpoint URL support
3426 ep_url = urljoin('https://www.youtube.com/', try_get(
3427 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3428 compat_str))
3429 if ep_url:
3430 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3431 if ie.suitable(ep_url):
3432 yield self.url_result(
3433 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3434 break
8bdd16b4 3435
3d3dddc9 3436 def _shelf_entries_from_content(self, shelf_renderer):
3437 content = shelf_renderer.get('content')
3438 if not isinstance(content, dict):
8bdd16b4 3439 return
cd7c66cf 3440 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 3441 if renderer:
3442 # TODO: add support for nested playlists so each shelf is processed
3443 # as separate playlist
3444 # TODO: this includes only first N items
3445 for entry in self._grid_entries(renderer):
3446 yield entry
3447 renderer = content.get('horizontalListRenderer')
3448 if renderer:
3449 # TODO
3450 pass
8bdd16b4 3451
29f7c58a 3452 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3453 ep = try_get(
3454 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3455 compat_str)
3456 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3457 if shelf_url:
29f7c58a 3458 # Skipping links to another channels, note that checking for
3459 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3460 # will not work
3461 if skip_channels and '/channels?' in shelf_url:
3462 return
3d3dddc9 3463 title = try_get(
3464 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
3465 yield self.url_result(shelf_url, video_title=title)
3466 # Shelf may not contain shelf URL, fallback to extraction from content
3467 for entry in self._shelf_entries_from_content(shelf_renderer):
3468 yield entry
c5e8d7af 3469
8bdd16b4 3470 def _playlist_entries(self, video_list_renderer):
3471 for content in video_list_renderer['contents']:
3472 if not isinstance(content, dict):
3473 continue
3474 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3475 if not isinstance(renderer, dict):
3476 continue
3477 video_id = renderer.get('videoId')
3478 if not video_id:
3479 continue
3480 yield self._extract_video(renderer)
07aeced6 3481
3462ffa8 3482 def _rich_entries(self, rich_grid_renderer):
3483 renderer = try_get(
70d5c17b 3484 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3485 video_id = renderer.get('videoId')
3486 if not video_id:
3487 return
3488 yield self._extract_video(renderer)
3489
8bdd16b4 3490 def _video_entry(self, video_renderer):
3491 video_id = video_renderer.get('videoId')
3492 if video_id:
3493 return self._extract_video(video_renderer)
dacb3a86 3494
8bdd16b4 3495 def _post_thread_entries(self, post_thread_renderer):
3496 post_renderer = try_get(
3497 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3498 if not post_renderer:
3499 return
3500 # video attachment
3501 video_renderer = try_get(
895b0931 3502 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3503 video_id = video_renderer.get('videoId')
3504 if video_id:
3505 entry = self._extract_video(video_renderer)
8bdd16b4 3506 if entry:
3507 yield entry
895b0931 3508 # playlist attachment
3509 playlist_id = try_get(
3510 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3511 if playlist_id:
3512 yield self.url_result(
e28f1c0a 3513 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3514 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3515 # inline video links
3516 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3517 for run in runs:
3518 if not isinstance(run, dict):
3519 continue
3520 ep_url = try_get(
3521 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3522 if not ep_url:
3523 continue
3524 if not YoutubeIE.suitable(ep_url):
3525 continue
3526 ep_video_id = YoutubeIE._match_id(ep_url)
3527 if video_id == ep_video_id:
3528 continue
895b0931 3529 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 3530
8bdd16b4 3531 def _post_thread_continuation_entries(self, post_thread_continuation):
3532 contents = post_thread_continuation.get('contents')
3533 if not isinstance(contents, list):
3534 return
3535 for content in contents:
3536 renderer = content.get('backstagePostThreadRenderer')
3537 if not isinstance(renderer, dict):
3538 continue
3539 for entry in self._post_thread_entries(renderer):
3540 yield entry
07aeced6 3541
39ed931e 3542 r''' # unused
3543 def _rich_grid_entries(self, contents):
3544 for content in contents:
3545 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3546 if video_renderer:
3547 entry = self._video_entry(video_renderer)
3548 if entry:
3549 yield entry
3550 '''
3551
29f7c58a 3552 @staticmethod
3553 def _build_continuation_query(continuation, ctp=None):
3554 query = {
3555 'ctoken': continuation,
3556 'continuation': continuation,
3557 }
3558 if ctp:
3559 query['itct'] = ctp
3560 return query
3561
8bdd16b4 3562 @staticmethod
3563 def _extract_next_continuation_data(renderer):
3564 next_continuation = try_get(
3565 renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
3566 if not next_continuation:
3567 return
3568 continuation = next_continuation.get('continuation')
3569 if not continuation:
3570 return
3571 ctp = next_continuation.get('clickTrackingParams')
29f7c58a 3572 return YoutubeTabIE._build_continuation_query(continuation, ctp)
c5e8d7af 3573
8bdd16b4 3574 @classmethod
3575 def _extract_continuation(cls, renderer):
3576 next_continuation = cls._extract_next_continuation_data(renderer)
3577 if next_continuation:
3578 return next_continuation
cc2db878 3579 contents = []
3580 for key in ('contents', 'items'):
3581 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
8bdd16b4 3582 for content in contents:
3583 if not isinstance(content, dict):
3584 continue
3585 continuation_ep = try_get(
3586 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
3587 dict)
3588 if not continuation_ep:
3589 continue
3590 continuation = try_get(
3591 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
3592 if not continuation:
3593 continue
3594 ctp = continuation_ep.get('clickTrackingParams')
29f7c58a 3595 return YoutubeTabIE._build_continuation_query(continuation, ctp)
448830ce 3596
f4f751af 3597 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3462ffa8 3598
70d5c17b 3599 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3600 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3601 for content in contents:
3602 if not isinstance(content, dict):
8bdd16b4 3603 continue
70d5c17b 3604 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3605 if not is_renderer:
70d5c17b 3606 renderer = content.get('richItemRenderer')
3462ffa8 3607 if renderer:
3608 for entry in self._rich_entries(renderer):
3609 yield entry
3610 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3611 continue
3462ffa8 3612 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3613 for isr_content in isr_contents:
3614 if not isinstance(isr_content, dict):
3615 continue
69184e41 3616
3617 known_renderers = {
3618 'playlistVideoListRenderer': self._playlist_entries,
3619 'gridRenderer': self._grid_entries,
3620 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3621 'backstagePostThreadRenderer': self._post_thread_entries,
3622 'videoRenderer': lambda x: [self._video_entry(x)],
3623 }
3624 for key, renderer in isr_content.items():
3625 if key not in known_renderers:
3626 continue
3627 for entry in known_renderers[key](renderer):
3628 if entry:
3629 yield entry
3462ffa8 3630 continuation_list[0] = self._extract_continuation(renderer)
69184e41 3631 break
70d5c17b 3632
3462ffa8 3633 if not continuation_list[0]:
3634 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3635
3636 if not continuation_list[0]:
3637 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3638
3639 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3640 tab_content = try_get(tab, lambda x: x['content'], dict)
3641 if not tab_content:
3642 return
3462ffa8 3643 parent_renderer = (
29f7c58a 3644 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3645 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3646 for entry in extract_entries(parent_renderer):
3647 yield entry
3462ffa8 3648 continuation = continuation_list[0]
f4f751af 3649 context = self._extract_context(ytcfg)
3650 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
d069eca7 3651
8bdd16b4 3652 for page_num in itertools.count(1):
3653 if not continuation:
3654 break
79360d99 3655 query = {
3656 'continuation': continuation['continuation'],
3657 'clickTracking': {'clickTrackingParams': continuation['itct']}
3658 }
f4f751af 3659 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
79360d99 3660 response = self._extract_response(
3661 item_id='%s page %s' % (item_id, page_num),
3662 query=query, headers=headers, ytcfg=ytcfg,
3663 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3664
3665 if not response:
8bdd16b4 3666 break
f4f751af 3667 visitor_data = try_get(
3668 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
ebf1b291 3669
69184e41 3670 known_continuation_renderers = {
3671 'playlistVideoListContinuation': self._playlist_entries,
3672 'gridContinuation': self._grid_entries,
3673 'itemSectionContinuation': self._post_thread_continuation_entries,
3674 'sectionListContinuation': extract_entries, # for feeds
3675 }
8bdd16b4 3676 continuation_contents = try_get(
69184e41 3677 response, lambda x: x['continuationContents'], dict) or {}
3678 continuation_renderer = None
3679 for key, value in continuation_contents.items():
3680 if key not in known_continuation_renderers:
3462ffa8 3681 continue
69184e41 3682 continuation_renderer = value
3683 continuation_list = [None]
3684 for entry in known_continuation_renderers[key](continuation_renderer):
3685 yield entry
3686 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3687 break
3688 if continuation_renderer:
3689 continue
c5e8d7af 3690
a1b535bd 3691 known_renderers = {
3692 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3693 'gridVideoRenderer': (self._grid_entries, 'items'),
3694 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3695 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3696 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3697 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3698 }
cce889b9 3699 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3700 continuation_items = try_get(
cce889b9 3701 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3702 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3703 video_items_renderer = None
3704 for key, value in continuation_item.items():
3705 if key not in known_renderers:
8bdd16b4 3706 continue
a1b535bd 3707 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3708 continuation_list = [None]
a1b535bd 3709 for entry in known_renderers[key][0](video_items_renderer):
3710 yield entry
9ba5705a 3711 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3712 break
3713 if video_items_renderer:
3714 continue
8bdd16b4 3715 break
9558dcec 3716
8bdd16b4 3717 @staticmethod
3718 def _extract_selected_tab(tabs):
3719 for tab in tabs:
cd684175 3720 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3721 if renderer.get('selected') is True:
3722 return renderer
2b3c2546 3723 else:
8bdd16b4 3724 raise ExtractorError('Unable to find selected tab')
b82f815f 3725
8bdd16b4 3726 @staticmethod
3727 def _extract_uploader(data):
3728 uploader = {}
3729 sidebar_renderer = try_get(
3730 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3731 if sidebar_renderer:
3732 for item in sidebar_renderer:
3733 if not isinstance(item, dict):
3734 continue
3735 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3736 if not isinstance(renderer, dict):
3737 continue
3738 owner = try_get(
3739 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3740 if owner:
3741 uploader['uploader'] = owner.get('text')
3742 uploader['uploader_id'] = try_get(
3743 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3744 uploader['uploader_url'] = urljoin(
3745 'https://www.youtube.com/',
3746 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3747 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3748
d069eca7 3749 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3750 playlist_id = title = description = channel_url = channel_name = channel_id = None
3751 thumbnails_list = tags = []
3752
8bdd16b4 3753 selected_tab = self._extract_selected_tab(tabs)
3754 renderer = try_get(
3755 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3756 if renderer:
b60419c5 3757 channel_name = renderer.get('title')
3758 channel_url = renderer.get('channelUrl')
3759 channel_id = renderer.get('externalId')
39ed931e 3760 else:
64c0d954 3761 renderer = try_get(
3762 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3763
8bdd16b4 3764 if renderer:
3765 title = renderer.get('title')
ecc97af3 3766 description = renderer.get('description', '')
b60419c5 3767 playlist_id = channel_id
3768 tags = renderer.get('keywords', '').split()
3769 thumbnails_list = (
3770 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3771 or try_get(
3772 data,
3773 lambda x: x['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3774 list)
b60419c5 3775 or [])
3776
3777 thumbnails = []
3778 for t in thumbnails_list:
3779 if not isinstance(t, dict):
3780 continue
3781 thumbnail_url = url_or_none(t.get('url'))
3782 if not thumbnail_url:
3783 continue
3784 thumbnails.append({
3785 'url': thumbnail_url,
3786 'width': int_or_none(t.get('width')),
3787 'height': int_or_none(t.get('height')),
3788 })
3462ffa8 3789 if playlist_id is None:
70d5c17b 3790 playlist_id = item_id
3791 if title is None:
39ed931e 3792 title = (
3793 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3794 or playlist_id)
b60419c5 3795 title += format_field(selected_tab, 'title', ' - %s')
cd684175 3796 title += format_field(selected_tab, 'expandedText', ' - %s')
b60419c5 3797
3798 metadata = {
3799 'playlist_id': playlist_id,
3800 'playlist_title': title,
3801 'playlist_description': description,
3802 'uploader': channel_name,
3803 'uploader_id': channel_id,
3804 'uploader_url': channel_url,
3805 'thumbnails': thumbnails,
3806 'tags': tags,
3807 }
3808 if not channel_id:
3809 metadata.update(self._extract_uploader(data))
3810 metadata.update({
3811 'channel': metadata['uploader'],
3812 'channel_id': metadata['uploader_id'],
3813 'channel_url': metadata['uploader_url']})
3814 return self.playlist_result(
d069eca7
M
3815 self._entries(
3816 selected_tab, playlist_id,
3817 self._extract_identity_token(webpage, item_id),
f4f751af 3818 self._extract_account_syncid(data),
3819 self._extract_ytcfg(item_id, webpage)),
b60419c5 3820 **metadata)
73c4ac2c 3821
79360d99 3822 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
2be71994 3823 first_id = last_id = None
79360d99 3824 ytcfg = self._extract_ytcfg(playlist_id, webpage)
3825 headers = self._generate_api_headers(
3826 ytcfg, account_syncid=self._extract_account_syncid(data),
3827 identity_token=self._extract_identity_token(webpage, item_id=playlist_id),
3828 visitor_data=try_get(self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
2be71994 3829 for page_num in itertools.count(1):
cd7c66cf 3830 videos = list(self._playlist_entries(playlist))
3831 if not videos:
3832 return
2be71994 3833 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3834 if start >= len(videos):
3835 return
3836 for video in videos[start:]:
3837 if video['id'] == first_id:
3838 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3839 return
3840 yield video
3841 first_id = first_id or videos[0]['id']
3842 last_id = videos[-1]['id']
79360d99 3843 watch_endpoint = try_get(
3844 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3845 query = {
3846 'playlistId': playlist_id,
3847 'videoId': watch_endpoint.get('videoId') or last_id,
3848 'index': watch_endpoint.get('index') or len(videos),
3849 'params': watch_endpoint.get('params') or 'OAE%3D'
3850 }
3851 response = self._extract_response(
3852 item_id='%s page %d' % (playlist_id, page_num),
3853 query=query,
3854 ep='next',
3855 headers=headers,
3856 check_get_keys='contents'
3857 )
cd7c66cf 3858 playlist = try_get(
79360d99 3859 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 3860
79360d99 3861 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
8bdd16b4 3862 title = playlist.get('title') or try_get(
3863 data, lambda x: x['titleText']['simpleText'], compat_str)
3864 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3865
3866 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3867 playlist_url = urljoin(url, try_get(
3868 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3869 compat_str))
3870 if playlist_url and playlist_url != url:
3871 return self.url_result(
3872 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3873 video_title=title)
cd7c66cf 3874
8bdd16b4 3875 return self.playlist_result(
79360d99 3876 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
cd7c66cf 3877 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 3878
358de58c 3879 def _reload_with_unavailable_videos(self, item_id, data, webpage):
3880 """
3881 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3882 """
3883 sidebar_renderer = try_get(
5d342002 3884 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3885 if not sidebar_renderer:
3886 return
3887 browse_id = params = None
358de58c 3888 for item in sidebar_renderer:
3889 if not isinstance(item, dict):
3890 continue
3891 renderer = item.get('playlistSidebarPrimaryInfoRenderer')
3892 menu_renderer = try_get(
3893 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3894 for menu_item in menu_renderer:
3895 if not isinstance(menu_item, dict):
3896 continue
3897 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3898 text = try_get(
3899 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3900 if not text or text.lower() != 'show unavailable videos':
3901 continue
3902 browse_endpoint = try_get(
3903 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3904 browse_id = browse_endpoint.get('browseId')
3905 params = browse_endpoint.get('params')
5d342002 3906 break
3907
3908 ytcfg = self._extract_ytcfg(item_id, webpage)
3909 headers = self._generate_api_headers(
3910 ytcfg, account_syncid=self._extract_account_syncid(ytcfg),
3911 identity_token=self._extract_identity_token(webpage, item_id=item_id),
3912 visitor_data=try_get(
3913 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
3914 query = {
3915 'params': params or 'wgYCCAA=',
3916 'browseId': browse_id or 'VL%s' % item_id
3917 }
3918 return self._extract_response(
3919 item_id=item_id, headers=headers, query=query,
3920 check_get_keys='contents', fatal=False,
3921 note='Downloading API JSON with unavailable videos')
358de58c 3922
cd7c66cf 3923 def _extract_webpage(self, url, item_id):
a06916d9 3924 retries = self.get_param('extractor_retries', 3)
62bff2c1 3925 count = -1
c705177d 3926 last_error = 'Incomplete yt initial data recieved'
14fdfea9 3927 while count < retries:
62bff2c1 3928 count += 1
14fdfea9 3929 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 3930 # See: https://github.com/yt-dlp/yt-dlp/issues/116
3931 if count:
c705177d 3932 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 3933 webpage = self._download_webpage(
3934 url, item_id,
cd7c66cf 3935 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
14fdfea9 3936 data = self._extract_yt_initial_data(item_id, webpage)
14fdfea9 3937 if data.get('contents') or data.get('currentVideoEndpoint'):
3938 break
95c01b6c 3939 # Extract alerts here only when there is error
3940 self._extract_and_report_alerts(data)
c705177d 3941 if count >= retries:
6a39ee13 3942 raise ExtractorError(last_error)
cd7c66cf 3943 return webpage, data
3944
9297939e 3945 @staticmethod
3946 def _smuggle_data(entries, data):
3947 for entry in entries:
3948 if data:
3949 entry['url'] = smuggle_url(entry['url'], data)
3950 yield entry
3951
cd7c66cf 3952 def _real_extract(self, url):
9297939e 3953 url, smuggled_data = unsmuggle_url(url, {})
3954 if self.is_music_url(url):
3955 smuggled_data['is_music_url'] = True
fe03a6cd 3956 info_dict = self.__real_extract(url, smuggled_data)
9297939e 3957 if info_dict.get('entries'):
3958 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
3959 return info_dict
3960
fe03a6cd 3961 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
3962
3963 def __real_extract(self, url, smuggled_data):
cd7c66cf 3964 item_id = self._match_id(url)
3965 url = compat_urlparse.urlunparse(
3966 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 3967 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 3968
fe03a6cd 3969 def get_mobj(url):
3970 mobj = self._url_re.match(url).groupdict()
07cce701 3971 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 3972 return mobj
3973
3974 mobj = get_mobj(url)
3975 # Youtube returns incomplete data if tabname is not lower case
3976 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
3977
3978 if is_channel:
3979 if smuggled_data.get('is_music_url'):
3980 if item_id[:2] == 'VL':
3981 # Youtube music VL channels have an equivalent playlist
3982 item_id = item_id[2:]
3983 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
abcdd12b 3984 elif item_id[:2] == 'MP':
3985 # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
3986 item_id = self._search_regex(
3987 r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
3988 self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
3989 'playlist id')
3990 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
fe03a6cd 3991 elif mobj['channel_type'] == 'browse':
3992 # Youtube music /browse/ should be changed to /channel/
3993 pre = 'https://www.youtube.com/channel/%s' % item_id
3994 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
3995 # Home URLs should redirect to /videos/
6a39ee13 3996 self.report_warning(
cd7c66cf 3997 'A channel/user page was given. All the channel\'s videos will be downloaded. '
3998 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 3999 tab = '/videos'
4000
4001 url = ''.join((pre, tab, post))
4002 mobj = get_mobj(url)
cd7c66cf 4003
4004 # Handle both video/playlist URLs
201c1459 4005 qs = parse_qs(url)
cd7c66cf 4006 video_id = qs.get('v', [None])[0]
4007 playlist_id = qs.get('list', [None])[0]
4008
fe03a6cd 4009 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 4010 if not playlist_id:
fe03a6cd 4011 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 4012 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 4013 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6a39ee13 4014 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
cd7c66cf 4015 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
18db7548 4016 mobj = get_mobj(url)
cd7c66cf 4017
4018 if video_id and playlist_id:
a06916d9 4019 if self.get_param('noplaylist'):
cd7c66cf 4020 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4021 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4022 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4023
4024 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 4025
18db7548 4026 tabs = try_get(
4027 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4028 if tabs:
4029 selected_tab = self._extract_selected_tab(tabs)
4030 tab_name = selected_tab.get('title', '')
09f1580e 4031 if 'no-youtube-channel-redirect' not in compat_opts:
4032 if mobj['tab'] == '/live':
4033 # Live tab should have redirected to the video
4034 raise ExtractorError('The channel is not currently live', expected=True)
4035 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4036 if not mobj['not_channel'] and item_id[:2] == 'UC':
4037 # Topic channels don't have /videos. Use the equivalent playlist instead
4038 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4039 pl_id = 'UU%s' % item_id[2:]
4040 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4041 try:
4042 pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4043 for alert_type, alert_message in self._extract_alerts(pl_data):
4044 if alert_type == 'error':
4045 raise ExtractorError('Youtube said: %s' % alert_message)
4046 item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4047 except ExtractorError:
4048 self.report_warning('The playlist gave error. Falling back to channel URL')
4049 else:
4050 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
18db7548 4051
4052 self.write_debug('Final URL: %s' % url)
4053
358de58c 4054 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 4055 if 'no-youtube-unavailable-videos' not in compat_opts:
4056 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
95c01b6c 4057 self._extract_and_report_alerts(data)
358de58c 4058
8bdd16b4 4059 tabs = try_get(
4060 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4061 if tabs:
d069eca7 4062 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 4063
8bdd16b4 4064 playlist = try_get(
4065 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4066 if playlist:
79360d99 4067 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
cd7c66cf 4068
a0566bbf 4069 video_id = try_get(
4070 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4071 compat_str) or video_id
8bdd16b4 4072 if video_id:
09f1580e 4073 if mobj['tab'] != '/live': # live tab is expected to redirect to video
4074 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 4075 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 4076
8bdd16b4 4077 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 4078
c5e8d7af 4079
8bdd16b4 4080class YoutubePlaylistIE(InfoExtractor):
4081 IE_DESC = 'YouTube.com playlists'
4082 _VALID_URL = r'''(?x)(?:
4083 (?:https?://)?
4084 (?:\w+\.)?
4085 (?:
4086 (?:
4087 youtube(?:kids)?\.com|
29f7c58a 4088 invidio\.us
8bdd16b4 4089 )
4090 /.*?\?.*?\blist=
4091 )?
4092 (?P<id>%(playlist_id)s)
4093 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4094 IE_NAME = 'youtube:playlist'
cdc628a4 4095 _TESTS = [{
8bdd16b4 4096 'note': 'issue #673',
4097 'url': 'PLBB231211A4F62143',
cdc628a4 4098 'info_dict': {
8bdd16b4 4099 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4100 'id': 'PLBB231211A4F62143',
4101 'uploader': 'Wickydoo',
4102 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4103 },
4104 'playlist_mincount': 29,
4105 }, {
4106 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4107 'info_dict': {
4108 'title': 'YDL_safe_search',
4109 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4110 },
4111 'playlist_count': 2,
4112 'skip': 'This playlist is private',
9558dcec 4113 }, {
8bdd16b4 4114 'note': 'embedded',
4115 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4116 'playlist_count': 4,
9558dcec 4117 'info_dict': {
8bdd16b4 4118 'title': 'JODA15',
4119 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4120 'uploader': 'milan',
4121 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 4122 }
cdc628a4 4123 }, {
8bdd16b4 4124 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4125 'playlist_mincount': 982,
4126 'info_dict': {
4127 'title': '2018 Chinese New Singles (11/6 updated)',
4128 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4129 'uploader': 'LBK',
4130 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4131 }
daa0df9e 4132 }, {
29f7c58a 4133 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4134 'only_matching': True,
4135 }, {
4136 # music album playlist
4137 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4138 'only_matching': True,
4139 }]
4140
4141 @classmethod
4142 def suitable(cls, url):
201c1459 4143 if YoutubeTabIE.suitable(url):
4144 return False
1bdae7d3 4145 # Hack for lazy extractors until more generic solution is implemented
4146 # (see #28780)
4147 from .youtube import parse_qs
201c1459 4148 qs = parse_qs(url)
4149 if qs.get('v', [None])[0]:
4150 return False
4151 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 4152
4153 def _real_extract(self, url):
4154 playlist_id = self._match_id(url)
46953e7e 4155 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 4156 url = update_url_query(
4157 'https://www.youtube.com/playlist',
4158 parse_qs(url) or {'list': playlist_id})
4159 if is_music_url:
4160 url = smuggle_url(url, {'is_music_url': True})
4161 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 4162
4163
4164class YoutubeYtBeIE(InfoExtractor):
c76eb41b 4165 IE_DESC = 'youtu.be'
29f7c58a 4166 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4167 _TESTS = [{
8bdd16b4 4168 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4169 'info_dict': {
4170 'id': 'yeWKywCrFtk',
4171 'ext': 'mp4',
4172 'title': 'Small Scale Baler and Braiding Rugs',
4173 'uploader': 'Backus-Page House Museum',
4174 'uploader_id': 'backuspagemuseum',
4175 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4176 'upload_date': '20161008',
4177 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4178 'categories': ['Nonprofits & Activism'],
4179 'tags': list,
4180 'like_count': int,
4181 'dislike_count': int,
4182 },
4183 'params': {
4184 'noplaylist': True,
4185 'skip_download': True,
4186 },
39e7107d 4187 }, {
8bdd16b4 4188 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 4189 'only_matching': True,
cdc628a4
PH
4190 }]
4191
8bdd16b4 4192 def _real_extract(self, url):
29f7c58a 4193 mobj = re.match(self._VALID_URL, url)
4194 video_id = mobj.group('id')
4195 playlist_id = mobj.group('playlist_id')
8bdd16b4 4196 return self.url_result(
29f7c58a 4197 update_url_query('https://www.youtube.com/watch', {
4198 'v': video_id,
4199 'list': playlist_id,
4200 'feature': 'youtu.be',
4201 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 4202
4203
4204class YoutubeYtUserIE(InfoExtractor):
c76eb41b 4205 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 4206 _VALID_URL = r'ytuser:(?P<id>.+)'
4207 _TESTS = [{
4208 'url': 'ytuser:phihag',
4209 'only_matching': True,
4210 }]
4211
4212 def _real_extract(self, url):
4213 user_id = self._match_id(url)
4214 return self.url_result(
4215 'https://www.youtube.com/user/%s' % user_id,
4216 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 4217
b05654f0 4218
3d3dddc9 4219class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 4220 IE_NAME = 'youtube:favorites'
4221 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4222 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4223 _LOGIN_REQUIRED = True
4224 _TESTS = [{
4225 'url': ':ytfav',
4226 'only_matching': True,
4227 }, {
4228 'url': ':ytfavorites',
4229 'only_matching': True,
4230 }]
4231
4232 def _real_extract(self, url):
4233 return self.url_result(
4234 'https://www.youtube.com/playlist?list=LL',
4235 ie=YoutubeTabIE.ie_key())
4236
4237
79360d99 4238class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
69184e41 4239 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
4240 # there doesn't appear to be a real limit, for example if you search for
4241 # 'python' you get more than 8.000.000 results
4242 _MAX_RESULTS = float('inf')
78caa52a 4243 IE_NAME = 'youtube:search'
b05654f0 4244 _SEARCH_KEY = 'ytsearch'
6c894ea1 4245 _SEARCH_PARAMS = None
9dd8e46a 4246 _TESTS = []
b05654f0 4247
6c894ea1 4248 def _entries(self, query, n):
a5c56234 4249 data = {'query': query}
6c894ea1
U
4250 if self._SEARCH_PARAMS:
4251 data['params'] = self._SEARCH_PARAMS
4252 total = 0
4253 for page_num in itertools.count(1):
79360d99 4254 search = self._extract_response(
4255 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4256 check_get_keys=('contents', 'onResponseReceivedCommands')
4257 )
6c894ea1 4258 if not search:
b4c08069 4259 break
6c894ea1
U
4260 slr_contents = try_get(
4261 search,
4262 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4263 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4264 list)
4265 if not slr_contents:
a22b2fd1 4266 break
0366ae87 4267
0366ae87
M
4268 # Youtube sometimes adds promoted content to searches,
4269 # changing the index location of videos and token.
4270 # So we search through all entries till we find them.
30a074c2 4271 continuation_token = None
4272 for slr_content in slr_contents:
a96c6d15 4273 if continuation_token is None:
4274 continuation_token = try_get(
4275 slr_content,
4276 lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
4277 compat_str)
4278
30a074c2 4279 isr_contents = try_get(
4280 slr_content,
4281 lambda x: x['itemSectionRenderer']['contents'],
4282 list)
9da76d30 4283 if not isr_contents:
30a074c2 4284 continue
4285 for content in isr_contents:
4286 if not isinstance(content, dict):
4287 continue
4288 video = content.get('videoRenderer')
4289 if not isinstance(video, dict):
4290 continue
4291 video_id = video.get('videoId')
4292 if not video_id:
4293 continue
4294
4295 yield self._extract_video(video)
4296 total += 1
4297 if total == n:
4298 return
0366ae87 4299
0366ae87 4300 if not continuation_token:
6c894ea1 4301 break
0366ae87 4302 data['continuation'] = continuation_token
b05654f0 4303
6c894ea1
U
4304 def _get_n_results(self, query, n):
4305 """Get a specified number of results for a query"""
4306 return self.playlist_result(self._entries(query, n), query)
75dff0ee 4307
c9ae7b95 4308
a3dd9248 4309class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 4310 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 4311 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 4312 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 4313 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 4314
c9ae7b95 4315
386e1dd9 4316class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 4317 IE_DESC = 'YouTube.com search URLs'
386e1dd9 4318 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4319 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 4320 # _MAX_RESULTS = 100
3462ffa8 4321 _TESTS = [{
4322 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4323 'playlist_mincount': 5,
4324 'info_dict': {
4325 'title': 'youtube-dl test video',
4326 }
4327 }, {
4328 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4329 'only_matching': True,
4330 }]
4331
386e1dd9 4332 @classmethod
4333 def _make_valid_url(cls):
4334 return cls._VALID_URL
4335
3462ffa8 4336 def _real_extract(self, url):
386e1dd9 4337 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4338 query = (qs.get('search_query') or qs.get('q'))[0]
4339 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4340 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 4341
4342
4343class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 4344 """
25f14e9f 4345 Base class for feed extractors
3d3dddc9 4346 Subclasses must define the _FEED_NAME property.
d7ae0639 4347 """
b2e8bc1b 4348 _LOGIN_REQUIRED = True
ef2f3c7f 4349 _TESTS = []
d7ae0639
JMF
4350
4351 @property
4352 def IE_NAME(self):
78caa52a 4353 return 'youtube:%s' % self._FEED_NAME
04cc9617 4354
3853309f 4355 def _real_extract(self, url):
3d3dddc9 4356 return self.url_result(
4357 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4358 ie=YoutubeTabIE.ie_key())
25f14e9f
S
4359
4360
ef2f3c7f 4361class YoutubeWatchLaterIE(InfoExtractor):
4362 IE_NAME = 'youtube:watchlater'
70d5c17b 4363 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 4364 _VALID_URL = r':ytwatchlater'
bc7a9cd8 4365 _TESTS = [{
8bdd16b4 4366 'url': ':ytwatchlater',
bc7a9cd8
S
4367 'only_matching': True,
4368 }]
25f14e9f
S
4369
4370 def _real_extract(self, url):
ef2f3c7f 4371 return self.url_result(
4372 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 4373
4374
25f14e9f
S
4375class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4376 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 4377 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 4378 _FEED_NAME = 'recommended'
45db527f 4379 _LOGIN_REQUIRED = False
3d3dddc9 4380 _TESTS = [{
4381 'url': ':ytrec',
4382 'only_matching': True,
4383 }, {
4384 'url': ':ytrecommended',
4385 'only_matching': True,
4386 }, {
4387 'url': 'https://youtube.com',
4388 'only_matching': True,
4389 }]
1ed5b5c9 4390
1ed5b5c9 4391
25f14e9f 4392class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 4393 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 4394 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 4395 _FEED_NAME = 'subscriptions'
3d3dddc9 4396 _TESTS = [{
4397 'url': ':ytsubs',
4398 'only_matching': True,
4399 }, {
4400 'url': ':ytsubscriptions',
4401 'only_matching': True,
4402 }]
1ed5b5c9 4403
1ed5b5c9 4404
25f14e9f 4405class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
4406 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4407 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 4408 _FEED_NAME = 'history'
3d3dddc9 4409 _TESTS = [{
4410 'url': ':ythistory',
4411 'only_matching': True,
4412 }]
1ed5b5c9
JMF
4413
4414
15870e90
PH
4415class YoutubeTruncatedURLIE(InfoExtractor):
4416 IE_NAME = 'youtube:truncated_url'
4417 IE_DESC = False # Do not list
975d35db 4418 _VALID_URL = r'''(?x)
b95aab84
PH
4419 (?:https?://)?
4420 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4421 (?:watch\?(?:
c4808c60 4422 feature=[a-z_]+|
b95aab84
PH
4423 annotation_id=annotation_[^&]+|
4424 x-yt-cl=[0-9]+|
c1708b89 4425 hl=[^&]*|
287be8c6 4426 t=[0-9]+
b95aab84
PH
4427 )?
4428 |
4429 attribution_link\?a=[^&]+
4430 )
4431 $
975d35db 4432 '''
15870e90 4433
c4808c60 4434 _TESTS = [{
2d3d2997 4435 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 4436 'only_matching': True,
dc2fc736 4437 }, {
2d3d2997 4438 'url': 'https://www.youtube.com/watch?',
dc2fc736 4439 'only_matching': True,
b95aab84
PH
4440 }, {
4441 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4442 'only_matching': True,
4443 }, {
4444 'url': 'https://www.youtube.com/watch?feature=foo',
4445 'only_matching': True,
c1708b89
PH
4446 }, {
4447 'url': 'https://www.youtube.com/watch?hl=en-GB',
4448 'only_matching': True,
287be8c6
PH
4449 }, {
4450 'url': 'https://www.youtube.com/watch?t=2372',
4451 'only_matching': True,
c4808c60
PH
4452 }]
4453
15870e90
PH
4454 def _real_extract(self, url):
4455 raise ExtractorError(
78caa52a
PH
4456 'Did you forget to quote the URL? Remember that & is a meta '
4457 'character in most shells, so you want to put the URL in quotes, '
3867038a 4458 'like youtube-dl '
2d3d2997 4459 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 4460 ' or simply youtube-dl BaW_jenozKc .',
15870e90 4461 expected=True)
772fd5cc
PH
4462
4463
4464class YoutubeTruncatedIDIE(InfoExtractor):
4465 IE_NAME = 'youtube:truncated_id'
4466 IE_DESC = False # Do not list
b95aab84 4467 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
4468
4469 _TESTS = [{
4470 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4471 'only_matching': True,
4472 }]
4473
4474 def _real_extract(self, url):
4475 video_id = self._match_id(url)
4476 raise ExtractorError(
4477 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4478 expected=True)