]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
Improve `extractor_args` parsing
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
d92f5d5a 5import calendar
109dd3b2 6import copy
a5c56234 7import hashlib
0ca96d48 8import itertools
c5e8d7af 9import json
c4417ddb 10import os.path
d77ab8e2 11import random
c5e8d7af 12import re
8a784c74 13import time
e0df6211 14import traceback
c5e8d7af 15
b05654f0 16from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 17from ..compat import (
edf3e38e 18 compat_chr,
29f7c58a 19 compat_HTTPError,
c5e8d7af 20 compat_parse_qs,
545cc85d 21 compat_str,
7fd002c0 22 compat_urllib_parse_unquote_plus,
15707c7e 23 compat_urllib_parse_urlencode,
7c80519c 24 compat_urllib_parse_urlparse,
7c61bd36 25 compat_urlparse,
4bb4a188 26)
545cc85d 27from ..jsinterp import JSInterpreter
4bb4a188 28from ..utils import (
c224251a 29 bool_or_none,
c5e8d7af 30 clean_html,
26fe8ffe 31 dict_get,
d92f5d5a 32 datetime_from_str,
358de58c 33 error_to_compat_str,
c5e8d7af 34 ExtractorError,
b60419c5 35 format_field,
2d30521a 36 float_or_none,
dd27fd17 37 int_or_none,
94278f72 38 mimetype2ext,
6310acf5 39 parse_codecs,
7c80519c 40 parse_duration,
dca3ff4a 41 qualities,
3995d37d 42 remove_start,
cf7e015f 43 smuggle_url,
dbdaaa23 44 str_or_none,
c93d53f5 45 str_to_int,
556dbe7f 46 try_get,
c5e8d7af
PH
47 unescapeHTML,
48 unified_strdate,
cf7e015f 49 unsmuggle_url,
8bdd16b4 50 update_url_query,
21c340b8 51 url_or_none,
6e6bc8da 52 urlencode_postdata,
d92f5d5a 53 urljoin
c5e8d7af
PH
54)
55
5f6a1245 56
201c1459 57def parse_qs(url):
58 return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
59
60
de7f3446 61class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
62 """Provide base functions for Youtube extractors"""
63 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 64 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
65
66 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
67 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
68 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 69
3462ffa8 70 _RESERVED_NAMES = (
bea74222 71 r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|shorts|'
46953e7e 72 r'movies|results|shared|hashtag|trending|feed|feeds|oembed|get_video_info|'
cd7c66cf 73 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 74
b2e8bc1b
JMF
75 _NETRC_MACHINE = 'youtube'
76 # If True it will raise an error if no login info is provided
77 _LOGIN_REQUIRED = False
78
70d5c17b 79 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 80
b2e8bc1b 81 def _login(self):
83317f69 82 """
83 Attempt to log in to YouTube.
84 True is returned if successful or skipped.
85 False is returned if login failed.
86
87 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
88 """
9d5d4d64 89
90 def warn(message):
91 self.report_warning(message)
92
93 # username+password login is broken
94 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
95 self.raise_login_required(
96 'Login details are needed to download this content', method='cookies')
68217024 97 username, password = self._get_login_info()
9d5d4d64 98 if username:
99 warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
100 return
101 # Everything below this is broken!
102
b2e8bc1b
JMF
103 # No authentication to be performed
104 if username is None:
a06916d9 105 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
69ea8ca4 106 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
a06916d9 107 # if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
545cc85d 108 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 109 return True
b2e8bc1b 110
7cc3570e
PH
111 login_page = self._download_webpage(
112 self._LOGIN_URL, None,
69ea8ca4
PH
113 note='Downloading login page',
114 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
115 if login_page is False:
116 return
b2e8bc1b 117
1212e997 118 login_form = self._hidden_inputs(login_page)
c5e8d7af 119
e00eb564
S
120 def req(url, f_req, note, errnote):
121 data = login_form.copy()
122 data.update({
123 'pstMsg': 1,
124 'checkConnection': 'youtube',
125 'checkedDomains': 'youtube',
126 'hl': 'en',
127 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 128 'f.req': json.dumps(f_req),
e00eb564
S
129 'flowName': 'GlifWebSignIn',
130 'flowEntry': 'ServiceLogin',
baf67a60
S
131 # TODO: reverse actual botguard identifier generation algo
132 'bgRequest': '["identifier",""]',
041bc3ad 133 })
e00eb564
S
134 return self._download_json(
135 url, None, note=note, errnote=errnote,
136 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
137 fatal=False,
138 data=urlencode_postdata(data), headers={
139 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
140 'Google-Accounts-XSRF': 1,
141 })
142
3995d37d
S
143 lookup_req = [
144 username,
145 None, [], None, 'US', None, None, 2, False, True,
146 [
147 None, None,
148 [2, 1, None, 1,
149 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
150 None, [], 4],
151 1, [None, None, []], None, None, None, True
152 ],
153 username,
154 ]
155
e00eb564 156 lookup_results = req(
3995d37d 157 self._LOOKUP_URL, lookup_req,
e00eb564
S
158 'Looking up account info', 'Unable to look up account info')
159
160 if lookup_results is False:
161 return False
041bc3ad 162
3995d37d
S
163 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
164 if not user_hash:
165 warn('Unable to extract user hash')
166 return False
167
168 challenge_req = [
169 user_hash,
170 None, 1, None, [1, None, None, None, [password, None, True]],
171 [
172 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
173 1, [None, None, []], None, None, None, True
174 ]]
83317f69 175
3995d37d
S
176 challenge_results = req(
177 self._CHALLENGE_URL, challenge_req,
178 'Logging in', 'Unable to log in')
83317f69 179
3995d37d 180 if challenge_results is False:
e00eb564 181 return
83317f69 182
3995d37d
S
183 login_res = try_get(challenge_results, lambda x: x[0][5], list)
184 if login_res:
185 login_msg = try_get(login_res, lambda x: x[5], compat_str)
186 warn(
187 'Unable to login: %s' % 'Invalid password'
188 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
189 return False
190
191 res = try_get(challenge_results, lambda x: x[0][-1], list)
192 if not res:
193 warn('Unable to extract result entry')
194 return False
195
9a6628aa
S
196 login_challenge = try_get(res, lambda x: x[0][0], list)
197 if login_challenge:
198 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
199 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
200 # SEND_SUCCESS - TFA code has been successfully sent to phone
201 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 202 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
203 if status == 'QUOTA_EXCEEDED':
204 warn('Exceeded the limit of TFA codes, try later')
205 return False
206
207 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
208 if not tl:
209 warn('Unable to extract TL')
210 return False
211
212 tfa_code = self._get_tfa_info('2-step verification code')
213
214 if not tfa_code:
215 warn(
216 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
217 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
218 return False
219
220 tfa_code = remove_start(tfa_code, 'G-')
221
222 tfa_req = [
223 user_hash, None, 2, None,
224 [
225 9, None, None, None, None, None, None, None,
226 [None, tfa_code, True, 2]
227 ]]
228
229 tfa_results = req(
230 self._TFA_URL.format(tl), tfa_req,
231 'Submitting TFA code', 'Unable to submit TFA code')
232
233 if tfa_results is False:
234 return False
235
236 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
237 if tfa_res:
238 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
239 warn(
240 'Unable to finish TFA: %s' % 'Invalid TFA code'
241 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
242 return False
243
244 check_cookie_url = try_get(
245 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
246 else:
247 CHALLENGES = {
248 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
249 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
250 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
251 }
252 challenge = CHALLENGES.get(
253 challenge_str,
254 '%s returned error %s.' % (self.IE_NAME, challenge_str))
255 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
256 return False
3995d37d
S
257 else:
258 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
259
260 if not check_cookie_url:
261 warn('Unable to extract CheckCookie URL')
262 return False
e00eb564
S
263
264 check_cookie_results = self._download_webpage(
3995d37d
S
265 check_cookie_url, None, 'Checking cookie', fatal=False)
266
267 if check_cookie_results is False:
268 return False
e00eb564 269
3995d37d
S
270 if 'https://myaccount.google.com/' not in check_cookie_results:
271 warn('Unable to log in')
b2e8bc1b 272 return False
e00eb564 273
b2e8bc1b
JMF
274 return True
275
cce889b9 276 def _initialize_consent(self):
277 cookies = self._get_cookies('https://www.youtube.com/')
278 if cookies.get('__Secure-3PSID'):
279 return
280 consent_id = None
281 consent = cookies.get('CONSENT')
282 if consent:
283 if 'YES' in consent.value:
284 return
285 consent_id = self._search_regex(
286 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
287 if not consent_id:
288 consent_id = random.randint(100, 999)
289 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 290
b2e8bc1b 291 def _real_initialize(self):
cce889b9 292 self._initialize_consent()
b2e8bc1b
JMF
293 if self._downloader is None:
294 return
b2e8bc1b
JMF
295 if not self._login():
296 return
c5e8d7af 297
a0566bbf 298 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 299 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
300 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 301
109dd3b2 302 _YT_DEFAULT_YTCFGS = {
303 'WEB': {
304 'INNERTUBE_API_VERSION': 'v1',
305 'INNERTUBE_CLIENT_NAME': 'WEB',
306 'INNERTUBE_CLIENT_VERSION': '2.20210622.10.00',
307 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
308 'INNERTUBE_CONTEXT': {
309 'client': {
310 'clientName': 'WEB',
311 'clientVersion': '2.20210622.10.00',
312 'hl': 'en',
313 }
314 },
315 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
316 },
317 'WEB_REMIX': {
318 'INNERTUBE_API_VERSION': 'v1',
319 'INNERTUBE_CLIENT_NAME': 'WEB_REMIX',
320 'INNERTUBE_CLIENT_VERSION': '1.20210621.00.00',
321 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
322 'INNERTUBE_CONTEXT': {
323 'client': {
324 'clientName': 'WEB_REMIX',
325 'clientVersion': '1.20210621.00.00',
326 'hl': 'en',
327 }
328 },
329 'INNERTUBE_CONTEXT_CLIENT_NAME': 67
330 },
331 'WEB_EMBEDDED_PLAYER': {
332 'INNERTUBE_API_VERSION': 'v1',
333 'INNERTUBE_CLIENT_NAME': 'WEB_EMBEDDED_PLAYER',
334 'INNERTUBE_CLIENT_VERSION': '1.20210620.0.1',
335 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
336 'INNERTUBE_CONTEXT': {
337 'client': {
338 'clientName': 'WEB_EMBEDDED_PLAYER',
339 'clientVersion': '1.20210620.0.1',
340 'hl': 'en',
341 }
342 },
343 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
344 },
345 'ANDROID': {
346 'INNERTUBE_API_VERSION': 'v1',
347 'INNERTUBE_CLIENT_NAME': 'ANDROID',
348 'INNERTUBE_CLIENT_VERSION': '16.20',
349 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
350 'INNERTUBE_CONTEXT': {
351 'client': {
352 'clientName': 'ANDROID',
353 'clientVersion': '16.20',
354 'hl': 'en',
355 }
356 },
357 'INNERTUBE_CONTEXT_CLIENT_NAME': 'ANDROID'
358 },
359 'ANDROID_EMBEDDED_PLAYER': {
360 'INNERTUBE_API_VERSION': 'v1',
361 'INNERTUBE_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER',
362 'INNERTUBE_CLIENT_VERSION': '16.20',
363 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
364 'INNERTUBE_CONTEXT': {
365 'client': {
366 'clientName': 'ANDROID_EMBEDDED_PLAYER',
367 'clientVersion': '16.20',
368 'hl': 'en',
369 }
370 },
371 'INNERTUBE_CONTEXT_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER'
372 },
373 'ANDROID_MUSIC': {
374 'INNERTUBE_API_VERSION': 'v1',
375 'INNERTUBE_CLIENT_NAME': 'ANDROID_MUSIC',
376 'INNERTUBE_CLIENT_VERSION': '4.32',
377 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
378 'INNERTUBE_CONTEXT': {
379 'client': {
380 'clientName': 'ANDROID_MUSIC',
381 'clientVersion': '4.32',
382 'hl': 'en',
383 }
384 },
385 'INNERTUBE_CONTEXT_CLIENT_NAME': 'ANDROID_MUSIC'
386 }
387 }
388
389 _YT_DEFAULT_INNERTUBE_HOSTS = {
390 'DIRECT': 'youtubei.googleapis.com',
391 'WEB': 'www.youtube.com',
392 'WEB_REMIX': 'music.youtube.com',
393 'ANDROID_MUSIC': 'music.youtube.com'
394 }
395
396 def _get_default_ytcfg(self, client='WEB'):
397 if client in self._YT_DEFAULT_YTCFGS:
398 return copy.deepcopy(self._YT_DEFAULT_YTCFGS[client])
399 self.write_debug(f'INNERTUBE default client {client} does not exist - falling back to WEB client.')
400 return copy.deepcopy(self._YT_DEFAULT_YTCFGS['WEB'])
401
402 def _get_innertube_host(self, client='WEB'):
403 return dict_get(self._YT_DEFAULT_INNERTUBE_HOSTS, (client, 'WEB'))
404
405 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='WEB'):
406 # try_get but with fallback to default ytcfg client values when present
407 _func = lambda y: try_get(y, getter, expected_type)
408 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
409
410 def _extract_client_name(self, ytcfg, default_client='WEB'):
411 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str, default_client)
412
413 def _extract_client_version(self, ytcfg, default_client='WEB'):
414 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str, default_client)
415
416 def _extract_api_key(self, ytcfg=None, default_client='WEB'):
417 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
418
419 def _extract_context(self, ytcfg=None, default_client='WEB'):
420 _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
421 context = _get_context(ytcfg)
422 if context:
423 return context
424
425 context = _get_context(self._get_default_ytcfg(default_client))
426 if not ytcfg:
427 return context
428
429 # Recreate the client context (required)
430 context['client'].update({
431 'clientVersion': self._extract_client_version(ytcfg, default_client),
432 'clientName': self._extract_client_name(ytcfg, default_client),
433 })
434 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
435 if visitor_data:
436 context['client']['visitorData'] = visitor_data
437 return context
438
439 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
1974e99f 440 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
441 # See: https://github.com/yt-dlp/yt-dlp/issues/393
442 yt_cookies = self._get_cookies('https://www.youtube.com')
443 sapisid_cookie = dict_get(
444 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
a5c56234
M
445 if sapisid_cookie is None:
446 return
447 time_now = round(time.time())
1974e99f 448 # SAPISID cookie is required if not already present
449 if not yt_cookies.get('SAPISID'):
450 self._set_cookie(
451 '.youtube.com', 'SAPISID', sapisid_cookie.value, secure=True, expire_time=time_now + 3600)
452 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
453 sapisidhash = hashlib.sha1(
109dd3b2 454 f'{time_now} {sapisid_cookie.value} {origin}'.encode('utf-8')).hexdigest()
1974e99f 455 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
456
457 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 458 note='Downloading API JSON', errnote='Unable to download API page',
109dd3b2 459 context=None, api_key=None, api_hostname=None, default_client='WEB'):
f4f751af 460
109dd3b2 461 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 462 data.update(query)
109dd3b2 463 real_headers = self._generate_api_headers(client=default_client)
f4f751af 464 real_headers.update({'content-type': 'application/json'})
465 if headers:
466 real_headers.update(headers)
545cc85d 467 return self._download_json(
109dd3b2 468 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
a5c56234 469 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 470 data=json.dumps(data).encode('utf8'), headers=real_headers,
471 query={'key': api_key or self._extract_api_key()})
472
8bdd16b4 473 def _extract_yt_initial_data(self, video_id, webpage):
474 return self._parse_json(
475 self._search_regex(
29f7c58a 476 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 477 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 478 video_id)
0c148415 479
a1c5d2ca
M
480 def _extract_identity_token(self, webpage, item_id):
481 ytcfg = self._extract_ytcfg(item_id, webpage)
482 if ytcfg:
483 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
484 if token:
485 return token
486 return self._search_regex(
487 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
488 'identity token', default=None)
489
490 @staticmethod
491 def _extract_account_syncid(data):
8ea3f7b9 492 """
493 Extract syncId required to download private playlists of secondary channels
494 @param data Either response or ytcfg
495 """
496 sync_ids = (try_get(
497 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
498 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
a1c5d2ca
M
499 if len(sync_ids) >= 2 and sync_ids[1]:
500 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
501 # and just "user_syncid||" for primary channel. We only want the channel_syncid
502 return sync_ids[0]
8ea3f7b9 503 # ytcfg includes channel_syncid if on secondary channel
504 return data.get('DELEGATED_SESSION_ID')
a1c5d2ca 505
29f7c58a 506 def _extract_ytcfg(self, video_id, webpage):
8c54a305 507 if not webpage:
508 return {}
29f7c58a 509 return self._parse_json(
510 self._search_regex(
511 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 512 default='{}'), video_id, fatal=False) or {}
513
109dd3b2 514 def _generate_api_headers(self, ytcfg=None, identity_token=None, account_syncid=None,
515 visitor_data=None, api_hostname=None, client='WEB'):
516 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(client))
f4f751af 517 headers = {
109dd3b2 518 'X-YouTube-Client-Name': compat_str(
519 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=client)),
520 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, client),
521 'Origin': origin
f4f751af 522 }
523 if identity_token:
109dd3b2 524 headers['X-Youtube-Identity-Token'] = identity_token
f4f751af 525 if account_syncid:
526 headers['X-Goog-PageId'] = account_syncid
527 headers['X-Goog-AuthUser'] = 0
528 if visitor_data:
109dd3b2 529 headers['X-Goog-Visitor-Id'] = visitor_data
530 auth = self._generate_sapisidhash_header(origin)
f4f751af 531 if auth is not None:
532 headers['Authorization'] = auth
109dd3b2 533 headers['X-Origin'] = origin
f4f751af 534 return headers
29f7c58a 535
109dd3b2 536 @staticmethod
537 def _extract_alerts(data):
538 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
539 if not isinstance(alert_dict, dict):
540 continue
541 for alert in alert_dict.values():
542 alert_type = alert.get('type')
543 if not alert_type:
544 continue
545 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str) or ''
546 if message:
547 yield alert_type, message
548 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
549 message += try_get(run, lambda x: x['text'], compat_str)
550 if message:
551 yield alert_type, message
552
553 def _report_alerts(self, alerts, expected=True):
554 errors = []
555 warnings = []
556 for alert_type, alert_message in alerts:
557 if alert_type.lower() == 'error':
558 errors.append([alert_type, alert_message])
559 else:
560 warnings.append([alert_type, alert_message])
561
562 for alert_type, alert_message in (warnings + errors[:-1]):
563 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
564 if errors:
565 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
566
567 def _extract_and_report_alerts(self, data, *args, **kwargs):
568 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
569
570 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
571 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
572 default_client='WEB'):
573 response = None
574 last_error = None
575 count = -1
576 retries = self.get_param('extractor_retries', 3)
577 if check_get_keys is None:
578 check_get_keys = []
579 while count < retries:
580 count += 1
581 if last_error:
582 self.report_warning('%s. Retrying ...' % last_error)
583 try:
584 response = self._call_api(
585 ep=ep, fatal=True, headers=headers,
586 video_id=item_id, query=query,
587 context=self._extract_context(ytcfg, default_client),
588 api_key=self._extract_api_key(ytcfg, default_client),
589 api_hostname=api_hostname, default_client=default_client,
590 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
591 except ExtractorError as e:
592 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
593 # Downloading page may result in intermittent 5xx HTTP error
594 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
595 last_error = 'HTTP Error %s' % e.cause.code
596 if count < retries:
597 continue
598 if fatal:
599 raise
600 else:
601 self.report_warning(error_to_compat_str(e))
602 return
603
604 else:
605 # Youtube may send alerts if there was an issue with the continuation page
606 try:
607 self._extract_and_report_alerts(response, expected=False)
608 except ExtractorError as e:
609 if fatal:
610 raise
611 self.report_warning(error_to_compat_str(e))
612 return
613 if not check_get_keys or dict_get(response, check_get_keys):
614 break
615 # Youtube sometimes sends incomplete data
616 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
617 last_error = 'Incomplete data received'
618 if count >= retries:
619 if fatal:
620 raise ExtractorError(last_error)
621 else:
622 self.report_warning(last_error)
623 return
624 return response
625
9297939e 626 @staticmethod
627 def is_music_url(url):
628 return re.match(r'https?://music\.youtube\.com/', url) is not None
629
30a074c2 630 def _extract_video(self, renderer):
631 video_id = renderer.get('videoId')
632 title = try_get(
633 renderer,
634 (lambda x: x['title']['runs'][0]['text'],
635 lambda x: x['title']['simpleText']), compat_str)
636 description = try_get(
637 renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
638 compat_str)
639 duration = parse_duration(try_get(
640 renderer, lambda x: x['lengthText']['simpleText'], compat_str))
641 view_count_text = try_get(
642 renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
643 view_count = str_to_int(self._search_regex(
644 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
645 'view count', default=None))
646 uploader = try_get(
bc2ca1bb 647 renderer,
648 (lambda x: x['ownerText']['runs'][0]['text'],
649 lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
30a074c2 650 return {
39ed931e 651 '_type': 'url',
30a074c2 652 'ie_key': YoutubeIE.ie_key(),
653 'id': video_id,
654 'url': video_id,
655 'title': title,
656 'description': description,
657 'duration': duration,
658 'view_count': view_count,
659 'uploader': uploader,
660 }
661
0c148415 662
360e1ca5 663class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 664 IE_DESC = 'YouTube.com'
bc2ca1bb 665 _INVIDIOUS_SITES = (
666 # invidious-redirect websites
667 r'(?:www\.)?redirect\.invidious\.io',
668 r'(?:(?:www|dev)\.)?invidio\.us',
669 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
670 r'(?:www\.)?invidious\.pussthecat\.org',
bc2ca1bb 671 r'(?:www\.)?invidious\.zee\.li',
bc2ca1bb 672 r'(?:www\.)?invidious\.ethibox\.fr',
bc2ca1bb 673 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
bc2ca1bb 674 # youtube-dl invidious instances list
675 r'(?:(?:www|no)\.)?invidiou\.sh',
676 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
677 r'(?:www\.)?invidious\.kabi\.tk',
bc2ca1bb 678 r'(?:www\.)?invidious\.mastodon\.host',
679 r'(?:www\.)?invidious\.zapashcanon\.fr',
ed807c18 680 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
201c1459 681 r'(?:www\.)?invidious\.tinfoil-hat\.net',
682 r'(?:www\.)?invidious\.himiko\.cloud',
683 r'(?:www\.)?invidious\.reallyancient\.tech',
bc2ca1bb 684 r'(?:www\.)?invidious\.tube',
685 r'(?:www\.)?invidiou\.site',
686 r'(?:www\.)?invidious\.site',
687 r'(?:www\.)?invidious\.xyz',
688 r'(?:www\.)?invidious\.nixnet\.xyz',
201c1459 689 r'(?:www\.)?invidious\.048596\.xyz',
bc2ca1bb 690 r'(?:www\.)?invidious\.drycat\.fr',
201c1459 691 r'(?:www\.)?inv\.skyn3t\.in',
bc2ca1bb 692 r'(?:www\.)?tube\.poal\.co',
693 r'(?:www\.)?tube\.connect\.cafe',
694 r'(?:www\.)?vid\.wxzm\.sx',
695 r'(?:www\.)?vid\.mint\.lgbt',
201c1459 696 r'(?:www\.)?vid\.puffyan\.us',
bc2ca1bb 697 r'(?:www\.)?yewtu\.be',
698 r'(?:www\.)?yt\.elukerio\.org',
699 r'(?:www\.)?yt\.lelux\.fi',
700 r'(?:www\.)?invidious\.ggc-project\.de',
701 r'(?:www\.)?yt\.maisputain\.ovh',
201c1459 702 r'(?:www\.)?ytprivate\.com',
703 r'(?:www\.)?invidious\.13ad\.de',
bc2ca1bb 704 r'(?:www\.)?invidious\.toot\.koeln',
705 r'(?:www\.)?invidious\.fdn\.fr',
706 r'(?:www\.)?watch\.nettohikari\.com',
ed807c18 707 r'(?:www\.)?invidious\.namazso\.eu',
708 r'(?:www\.)?invidious\.silkky\.cloud',
709 r'(?:www\.)?invidious\.exonip\.de',
710 r'(?:www\.)?invidious\.riverside\.rocks',
711 r'(?:www\.)?invidious\.blamefran\.net',
712 r'(?:www\.)?invidious\.moomoo\.de',
713 r'(?:www\.)?ytb\.trom\.tf',
714 r'(?:www\.)?yt\.cyberhost\.uk',
bc2ca1bb 715 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
716 r'(?:www\.)?qklhadlycap4cnod\.onion',
717 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
718 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
719 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
720 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
721 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
722 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
ed807c18 723 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
724 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
725 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
726 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
bc2ca1bb 727 )
cb7dfeea 728 _VALID_URL = r"""(?x)^
c5e8d7af 729 (
edb53e2d 730 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 731 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
732 (?:www\.)?deturl\.com/www\.youtube\.com|
733 (?:www\.)?pwnyoutube\.com|
734 (?:www\.)?hooktube\.com|
735 (?:www\.)?yourepeat\.com|
736 tube\.majestyc\.net|
737 %(invidious)s|
738 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
739 (?:.*?\#/)? # handle anchor (#/) redirect urls
740 (?: # the various things that can precede the ID:
ac7553d0 741 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 742 |(?: # or the v= param in all its forms
f7000f3a 743 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 744 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 745 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
746 v=
747 )
f4b05232 748 ))
cbaed4bb
S
749 |(?:
750 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
751 vid\.plus| # or vid.plus/xxxx
752 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 753 %(invidious)s
cbaed4bb 754 )/
edb53e2d 755 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 756 )
c5e8d7af 757 )? # all until now is optional -> you can pass the naked ID
201c1459 758 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 759 (?(1).+)? # if we found the ID, everything can follow
9297939e 760 (?:\#|$)""" % {
bc2ca1bb 761 'invidious': '|'.join(_INVIDIOUS_SITES),
762 }
e40c758c 763 _PLAYER_INFO_RE = (
cc2db878 764 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
765 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 766 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 767 )
2c62dc26 768 _formats = {
c2d3cb4c 769 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
770 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
771 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
772 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
773 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
774 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
775 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
776 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 777 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 778 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
779 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
780 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
781 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
782 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
783 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 784 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 785 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
786 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 787
788
789 # 3D videos
c2d3cb4c 790 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
791 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
792 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
793 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 794 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
795 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
796 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 797
96fb5605 798 # Apple HTTP Live Streaming
11f12195 799 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 800 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
801 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
802 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
803 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
804 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 805 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
806 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
807
808 # DASH mp4 video
d23028a8
S
809 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
810 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
811 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
812 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
813 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 814 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
815 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
816 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
817 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
818 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
819 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
820 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 821
f6f1fc92 822 # Dash mp4 audio
d23028a8
S
823 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
824 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
825 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
826 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
827 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
828 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
829 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
830
831 # Dash webm
d23028a8
S
832 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
833 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
834 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
835 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
836 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
837 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
838 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
839 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
840 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
841 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
842 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
843 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
844 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
845 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
846 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 847 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
848 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
849 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
850 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
851 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
852 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
853 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
854
855 # Dash webm audio
d23028a8
S
856 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
857 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 858
0857baad 859 # Dash webm audio with opus inside
d23028a8
S
860 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
861 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
862 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 863
ce6b9a2d
PH
864 # RTMP (unnamed)
865 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
866
867 # av01 video only formats sometimes served with "unknown" codecs
868 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
869 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
870 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
871 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 872 }
29f7c58a 873 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 874
109dd3b2 875 _AGE_GATE_REASONS = (
876 'Sign in to confirm your age',
877 'This video may be inappropriate for some users.',
878 'Sorry, this content is age-restricted.')
879
fd5c4aab
S
880 _GEO_BYPASS = False
881
78caa52a 882 IE_NAME = 'youtube'
2eb88d95
PH
883 _TESTS = [
884 {
2d3d2997 885 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
886 'info_dict': {
887 'id': 'BaW_jenozKc',
888 'ext': 'mp4',
3867038a 889 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
890 'uploader': 'Philipp Hagemeister',
891 'uploader_id': 'phihag',
ec85ded8 892 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
893 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
894 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 895 'upload_date': '20121002',
3867038a 896 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 897 'categories': ['Science & Technology'],
3867038a 898 'tags': ['youtube-dl'],
556dbe7f 899 'duration': 10,
dbdaaa23 900 'view_count': int,
3e7c1224
PH
901 'like_count': int,
902 'dislike_count': int,
7c80519c 903 'start_time': 1,
297a564b 904 'end_time': 9,
2eb88d95 905 }
0e853ca4 906 },
fccd3771 907 {
4bc3a23e
PH
908 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
909 'note': 'Embed-only video (#1746)',
910 'info_dict': {
911 'id': 'yZIXLfi8CZQ',
912 'ext': 'mp4',
913 'upload_date': '20120608',
914 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
915 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
916 'uploader': 'SET India',
94bfcd23 917 'uploader_id': 'setindia',
ec85ded8 918 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 919 'age_limit': 18,
545cc85d 920 },
921 'skip': 'Private video',
fccd3771 922 },
11b56058 923 {
8bdd16b4 924 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
925 'note': 'Use the first video ID in the URL',
926 'info_dict': {
927 'id': 'BaW_jenozKc',
928 'ext': 'mp4',
3867038a 929 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
930 'uploader': 'Philipp Hagemeister',
931 'uploader_id': 'phihag',
ec85ded8 932 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 933 'upload_date': '20121002',
3867038a 934 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 935 'categories': ['Science & Technology'],
3867038a 936 'tags': ['youtube-dl'],
556dbe7f 937 'duration': 10,
dbdaaa23 938 'view_count': int,
11b56058
PM
939 'like_count': int,
940 'dislike_count': int,
34a7de29
S
941 },
942 'params': {
943 'skip_download': True,
944 },
11b56058 945 },
dd27fd17 946 {
2d3d2997 947 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
948 'note': '256k DASH audio (format 141) via DASH manifest',
949 'info_dict': {
950 'id': 'a9LDPn-MO4I',
951 'ext': 'm4a',
952 'upload_date': '20121002',
953 'uploader_id': '8KVIDEO',
ec85ded8 954 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
955 'description': '',
956 'uploader': '8KVIDEO',
957 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 958 },
4bc3a23e
PH
959 'params': {
960 'youtube_include_dash_manifest': True,
961 'format': '141',
4919603f 962 },
de3c7fe0 963 'skip': 'format 141 not served anymore',
dd27fd17 964 },
8bdd16b4 965 # DASH manifest with encrypted signature
966 {
967 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
968 'info_dict': {
969 'id': 'IB3lcPjvWLA',
970 'ext': 'm4a',
971 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
972 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
973 'duration': 244,
974 'uploader': 'AfrojackVEVO',
975 'uploader_id': 'AfrojackVEVO',
976 'upload_date': '20131011',
cc2db878 977 'abr': 129.495,
8bdd16b4 978 },
979 'params': {
980 'youtube_include_dash_manifest': True,
981 'format': '141/bestaudio[ext=m4a]',
982 },
983 },
aa79ac0c
PH
984 # Controversy video
985 {
986 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
987 'info_dict': {
988 'id': 'T4XJQO3qol8',
989 'ext': 'mp4',
556dbe7f 990 'duration': 219,
aa79ac0c 991 'upload_date': '20100909',
4fe54c12 992 'uploader': 'Amazing Atheist',
aa79ac0c 993 'uploader_id': 'TheAmazingAtheist',
ec85ded8 994 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c 995 'title': 'Burning Everyone\'s Koran',
545cc85d 996 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
aa79ac0c 997 }
c522adb1 998 },
dd2d55f1 999 # Normal age-gate video (embed allowed)
c522adb1 1000 {
2d3d2997 1001 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1002 'info_dict': {
1003 'id': 'HtVdAasjOgU',
1004 'ext': 'mp4',
1005 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1006 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1007 'duration': 142,
c522adb1
JMF
1008 'uploader': 'The Witcher',
1009 'uploader_id': 'WitcherGame',
ec85ded8 1010 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 1011 'upload_date': '20140605',
34952f09 1012 'age_limit': 18,
c522adb1
JMF
1013 },
1014 },
8bdd16b4 1015 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1016 # YouTube Red ad is not captured for creator
1017 {
1018 'url': '__2ABJjxzNo',
1019 'info_dict': {
1020 'id': '__2ABJjxzNo',
1021 'ext': 'mp4',
1022 'duration': 266,
1023 'upload_date': '20100430',
1024 'uploader_id': 'deadmau5',
1025 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 1026 'creator': 'deadmau5',
1027 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1028 'uploader': 'deadmau5',
1029 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1030 'alt_title': 'Some Chords',
8bdd16b4 1031 },
1032 'expected_warnings': [
1033 'DASH manifest missing',
1034 ]
1035 },
067aa17e 1036 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1037 {
1038 'url': 'lqQg6PlCWgI',
1039 'info_dict': {
1040 'id': 'lqQg6PlCWgI',
1041 'ext': 'mp4',
556dbe7f 1042 'duration': 6085,
90227264 1043 'upload_date': '20150827',
cbe2bd91 1044 'uploader_id': 'olympic',
ec85ded8 1045 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 1046 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 1047 'uploader': 'Olympic',
cbe2bd91
PH
1048 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1049 },
1050 'params': {
1051 'skip_download': 'requires avconv',
e52a40ab 1052 }
cbe2bd91 1053 },
6271f1ca
PH
1054 # Non-square pixels
1055 {
1056 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1057 'info_dict': {
1058 'id': '_b-2C3KPAM0',
1059 'ext': 'mp4',
1060 'stretched_ratio': 16 / 9.,
556dbe7f 1061 'duration': 85,
6271f1ca
PH
1062 'upload_date': '20110310',
1063 'uploader_id': 'AllenMeow',
ec85ded8 1064 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 1065 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 1066 'uploader': '孫ᄋᄅ',
6271f1ca
PH
1067 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1068 },
06b491eb
S
1069 },
1070 # url_encoded_fmt_stream_map is empty string
1071 {
1072 'url': 'qEJwOuvDf7I',
1073 'info_dict': {
1074 'id': 'qEJwOuvDf7I',
f57b7835 1075 'ext': 'webm',
06b491eb
S
1076 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1077 'description': '',
1078 'upload_date': '20150404',
1079 'uploader_id': 'spbelect',
1080 'uploader': 'Наблюдатели Петербурга',
1081 },
1082 'params': {
1083 'skip_download': 'requires avconv',
e323cf3f
S
1084 },
1085 'skip': 'This live event has ended.',
06b491eb 1086 },
067aa17e 1087 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1088 {
1089 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1090 'info_dict': {
1091 'id': 'FIl7x6_3R5Y',
eb6793ba 1092 'ext': 'webm',
da77d856
S
1093 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1094 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1095 'duration': 220,
da77d856
S
1096 'upload_date': '20150625',
1097 'uploader_id': 'dorappi2000',
ec85ded8 1098 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 1099 'uploader': 'dorappi2000',
eb6793ba 1100 'formats': 'mincount:31',
da77d856 1101 },
eb6793ba 1102 'skip': 'not actual anymore',
2ee8f5d8 1103 },
8a1a26ce
YCH
1104 # DASH manifest with segment_list
1105 {
1106 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1107 'md5': '8ce563a1d667b599d21064e982ab9e31',
1108 'info_dict': {
1109 'id': 'CsmdDsKjzN8',
1110 'ext': 'mp4',
17ee98e1 1111 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
1112 'uploader': 'Airtek',
1113 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1114 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1115 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1116 },
1117 'params': {
1118 'youtube_include_dash_manifest': True,
1119 'format': '135', # bestvideo
be49068d
S
1120 },
1121 'skip': 'This live event has ended.',
2ee8f5d8 1122 },
cf7e015f
S
1123 {
1124 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 1125 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 1126 'info_dict': {
545cc85d 1127 'id': 'jvGDaLqkpTg',
1128 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1129 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
1130 },
1131 'playlist': [{
1132 'info_dict': {
545cc85d 1133 'id': 'jvGDaLqkpTg',
cf7e015f 1134 'ext': 'mp4',
545cc85d 1135 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1136 'description': 'md5:e03b909557865076822aa169218d6a5d',
1137 'duration': 10643,
1138 'upload_date': '20161111',
1139 'uploader': 'Team PGP',
1140 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1141 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1142 },
1143 }, {
1144 'info_dict': {
545cc85d 1145 'id': '3AKt1R1aDnw',
cf7e015f 1146 'ext': 'mp4',
545cc85d 1147 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1148 'description': 'md5:e03b909557865076822aa169218d6a5d',
1149 'duration': 10991,
1150 'upload_date': '20161111',
1151 'uploader': 'Team PGP',
1152 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1153 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1154 },
1155 }, {
1156 'info_dict': {
545cc85d 1157 'id': 'RtAMM00gpVc',
cf7e015f 1158 'ext': 'mp4',
545cc85d 1159 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1160 'description': 'md5:e03b909557865076822aa169218d6a5d',
1161 'duration': 10995,
1162 'upload_date': '20161111',
1163 'uploader': 'Team PGP',
1164 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1165 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1166 },
1167 }, {
1168 'info_dict': {
545cc85d 1169 'id': '6N2fdlP3C5U',
cf7e015f 1170 'ext': 'mp4',
545cc85d 1171 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1172 'description': 'md5:e03b909557865076822aa169218d6a5d',
1173 'duration': 10990,
1174 'upload_date': '20161111',
1175 'uploader': 'Team PGP',
1176 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1177 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1178 },
1179 }],
1180 'params': {
1181 'skip_download': True,
1182 },
cbaed4bb 1183 },
f9f49d87 1184 {
067aa17e 1185 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1186 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1187 'info_dict': {
1188 'id': 'gVfLd0zydlo',
1189 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1190 },
1191 'playlist_count': 2,
be49068d 1192 'skip': 'Not multifeed anymore',
f9f49d87 1193 },
cbaed4bb 1194 {
2d3d2997 1195 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1196 'only_matching': True,
0e49d9a6 1197 },
6d4fc66b 1198 {
2d3d2997 1199 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1200 'only_matching': True,
1201 },
0e49d9a6 1202 {
067aa17e 1203 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1204 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1205 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1206 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1207 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1208 'info_dict': {
1209 'id': 'lsguqyKfVQg',
1210 'ext': 'mp4',
1211 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 1212 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 1213 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1214 'duration': 133,
0e49d9a6
LL
1215 'upload_date': '20151119',
1216 'uploader_id': 'IronSoulElf',
ec85ded8 1217 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1218 'uploader': 'IronSoulElf',
eb6793ba
S
1219 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
1220 'track': 'Dark Walk - Position Music',
1221 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 1222 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
1223 },
1224 'params': {
1225 'skip_download': True,
1226 },
1227 },
61f92af1 1228 {
067aa17e 1229 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1230 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1231 'only_matching': True,
1232 },
313dfc45
LL
1233 {
1234 # Video with yt:stretch=17:0
1235 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1236 'info_dict': {
1237 'id': 'Q39EVAstoRM',
1238 'ext': 'mp4',
1239 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1240 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1241 'upload_date': '20151107',
1242 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1243 'uploader': 'CH GAMER DROID',
1244 },
1245 'params': {
1246 'skip_download': True,
1247 },
be49068d 1248 'skip': 'This video does not exist.',
313dfc45 1249 },
201c1459 1250 {
1251 # Video with incomplete 'yt:stretch=16:'
1252 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1253 'only_matching': True,
1254 },
7caf9830
S
1255 {
1256 # Video licensed under Creative Commons
1257 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1258 'info_dict': {
1259 'id': 'M4gD1WSo5mA',
1260 'ext': 'mp4',
1261 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1262 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1263 'duration': 721,
7caf9830
S
1264 'upload_date': '20150127',
1265 'uploader_id': 'BerkmanCenter',
ec85ded8 1266 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1267 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1268 'license': 'Creative Commons Attribution license (reuse allowed)',
1269 },
1270 'params': {
1271 'skip_download': True,
1272 },
1273 },
fd050249
S
1274 {
1275 # Channel-like uploader_url
1276 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1277 'info_dict': {
1278 'id': 'eQcmzGIKrzg',
1279 'ext': 'mp4',
1280 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1281 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1282 'duration': 4060,
fd050249 1283 'upload_date': '20151119',
eb6793ba 1284 'uploader': 'Bernie Sanders',
fd050249 1285 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1286 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1287 'license': 'Creative Commons Attribution license (reuse allowed)',
1288 },
1289 'params': {
1290 'skip_download': True,
1291 },
1292 },
040ac686
S
1293 {
1294 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1295 'only_matching': True,
7f29cf54
S
1296 },
1297 {
067aa17e 1298 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1299 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1300 'only_matching': True,
6496ccb4
S
1301 },
1302 {
1303 # Rental video preview
1304 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1305 'info_dict': {
1306 'id': 'uGpuVWrhIzE',
1307 'ext': 'mp4',
1308 'title': 'Piku - Trailer',
1309 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1310 'upload_date': '20150811',
1311 'uploader': 'FlixMatrix',
1312 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1313 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1314 'license': 'Standard YouTube License',
1315 },
1316 'params': {
1317 'skip_download': True,
1318 },
eb6793ba 1319 'skip': 'This video is not available.',
022a5d66 1320 },
12afdc2a
S
1321 {
1322 # YouTube Red video with episode data
1323 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1324 'info_dict': {
1325 'id': 'iqKdEhx-dD4',
1326 'ext': 'mp4',
1327 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1328 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1329 'duration': 2085,
12afdc2a
S
1330 'upload_date': '20170118',
1331 'uploader': 'Vsauce',
1332 'uploader_id': 'Vsauce',
1333 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1334 'series': 'Mind Field',
1335 'season_number': 1,
1336 'episode_number': 1,
1337 },
1338 'params': {
1339 'skip_download': True,
1340 },
1341 'expected_warnings': [
1342 'Skipping DASH manifest',
1343 ],
1344 },
c7121fa7
S
1345 {
1346 # The following content has been identified by the YouTube community
1347 # as inappropriate or offensive to some audiences.
1348 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1349 'info_dict': {
1350 'id': '6SJNVb0GnPI',
1351 'ext': 'mp4',
1352 'title': 'Race Differences in Intelligence',
1353 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1354 'duration': 965,
1355 'upload_date': '20140124',
1356 'uploader': 'New Century Foundation',
1357 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1358 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1359 },
1360 'params': {
1361 'skip_download': True,
1362 },
545cc85d 1363 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1364 },
022a5d66
S
1365 {
1366 # itag 212
1367 'url': '1t24XAntNCY',
1368 'only_matching': True,
fd5c4aab
S
1369 },
1370 {
1371 # geo restricted to JP
1372 'url': 'sJL6WA-aGkQ',
1373 'only_matching': True,
1374 },
cd5a74a2
S
1375 {
1376 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1377 'only_matching': True,
1378 },
bc2ca1bb 1379 {
1380 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1381 'only_matching': True,
1382 },
1383 {
1384 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1385 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1386 'only_matching': True,
1387 },
825cd268
RA
1388 {
1389 # DRM protected
1390 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1391 'only_matching': True,
4fe54c12
S
1392 },
1393 {
1394 # Video with unsupported adaptive stream type formats
1395 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1396 'info_dict': {
1397 'id': 'Z4Vy8R84T1U',
1398 'ext': 'mp4',
1399 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1400 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1401 'duration': 433,
1402 'upload_date': '20130923',
1403 'uploader': 'Amelia Putri Harwita',
1404 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1405 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1406 'formats': 'maxcount:10',
1407 },
1408 'params': {
1409 'skip_download': True,
1410 'youtube_include_dash_manifest': False,
1411 },
5429d6a9 1412 'skip': 'not actual anymore',
5caabd3c 1413 },
1414 {
822b9d9c 1415 # Youtube Music Auto-generated description
5caabd3c 1416 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1417 'info_dict': {
1418 'id': 'MgNrAu2pzNs',
1419 'ext': 'mp4',
1420 'title': 'Voyeur Girl',
1421 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1422 'upload_date': '20190312',
5429d6a9
S
1423 'uploader': 'Stephen - Topic',
1424 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1425 'artist': 'Stephen',
1426 'track': 'Voyeur Girl',
1427 'album': 'it\'s too much love to know my dear',
1428 'release_date': '20190313',
1429 'release_year': 2019,
1430 },
1431 'params': {
1432 'skip_download': True,
1433 },
1434 },
66b48727
RA
1435 {
1436 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1437 'only_matching': True,
1438 },
011e75e6
S
1439 {
1440 # invalid -> valid video id redirection
1441 'url': 'DJztXj2GPfl',
1442 'info_dict': {
1443 'id': 'DJztXj2GPfk',
1444 'ext': 'mp4',
1445 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1446 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1447 'upload_date': '20090125',
1448 'uploader': 'Prochorowka',
1449 'uploader_id': 'Prochorowka',
1450 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1451 'artist': 'Panjabi MC',
1452 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1453 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1454 },
1455 'params': {
1456 'skip_download': True,
1457 },
545cc85d 1458 'skip': 'Video unavailable',
ea74e00b
DP
1459 },
1460 {
1461 # empty description results in an empty string
1462 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1463 'info_dict': {
1464 'id': 'x41yOUIvK2k',
1465 'ext': 'mp4',
1466 'title': 'IMG 3456',
1467 'description': '',
1468 'upload_date': '20170613',
1469 'uploader_id': 'ElevageOrVert',
1470 'uploader': 'ElevageOrVert',
1471 },
1472 'params': {
1473 'skip_download': True,
1474 },
1475 },
a0566bbf 1476 {
29f7c58a 1477 # with '};' inside yt initial data (see [1])
1478 # see [2] for an example with '};' inside ytInitialPlayerResponse
1479 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1480 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1481 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1482 'info_dict': {
1483 'id': 'CHqg6qOn4no',
1484 'ext': 'mp4',
1485 'title': 'Part 77 Sort a list of simple types in c#',
1486 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1487 'upload_date': '20130831',
1488 'uploader_id': 'kudvenkat',
1489 'uploader': 'kudvenkat',
1490 },
1491 'params': {
1492 'skip_download': True,
1493 },
1494 },
29f7c58a 1495 {
1496 # another example of '};' in ytInitialData
1497 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1498 'only_matching': True,
1499 },
1500 {
1501 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1502 'only_matching': True,
1503 },
545cc85d 1504 {
cc2db878 1505 # https://github.com/ytdl-org/youtube-dl/pull/28094
1506 'url': 'OtqTfy26tG0',
1507 'info_dict': {
1508 'id': 'OtqTfy26tG0',
1509 'ext': 'mp4',
1510 'title': 'Burn Out',
1511 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1512 'upload_date': '20141120',
1513 'uploader': 'The Cinematic Orchestra - Topic',
1514 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1515 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1516 'artist': 'The Cinematic Orchestra',
1517 'track': 'Burn Out',
1518 'album': 'Every Day',
1519 'release_data': None,
1520 'release_year': None,
1521 },
1522 'params': {
1523 'skip_download': True,
1524 },
545cc85d 1525 },
bc2ca1bb 1526 {
1527 # controversial video, only works with bpctr when authenticated with cookies
1528 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1529 'only_matching': True,
1530 },
f7ad7160 1531 {
1532 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1533 'url': 'cBvYw8_A0vQ',
1534 'info_dict': {
1535 'id': 'cBvYw8_A0vQ',
1536 'ext': 'mp4',
1537 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1538 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1539 'upload_date': '20201120',
1540 'uploader': 'Walk around Japan',
1541 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1542 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1543 },
1544 'params': {
1545 'skip_download': True,
1546 },
0fb983f6 1547 }, {
1548 # Has multiple audio streams
1549 'url': 'WaOKSUlf4TM',
1550 'only_matching': True
9297939e 1551 }, {
1552 # Requires Premium: has format 141 when requested using YTM url
1553 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1554 'only_matching': True
1555 }, {
120916da 1556 # multiple subtitles with same lang_code
1557 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1558 'only_matching': True,
109dd3b2 1559 }, {
1560 # Force use android client fallback
1561 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1562 'info_dict': {
1563 'id': 'YOelRv7fMxY',
1564 'title': 'Digging a Secret Tunnel from my Workshop',
1565 'ext': '3gp',
1566 'upload_date': '20210624',
1567 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1568 'uploader': 'colinfurze',
1569 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1570 'description': 'md5:ecb672623246d98c6c562eed6ae798c3'
1571 },
1572 'params': {
1573 'format': '17', # 3gp format available on android
1574 'extractor_args': {'youtube': {'player_client': ['android']}},
1575 },
120916da 1576 },
109dd3b2 1577 {
1578 # Skip download of additional client configs (remix client config in this case)
1579 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1580 'only_matching': True,
1581 'params': {
1582 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1583 },
1584 }
2eb88d95
PH
1585 ]
1586
201c1459 1587 @classmethod
1588 def suitable(cls, url):
1bdae7d3 1589 # Hack for lazy extractors until more generic solution is implemented
1590 # (see #28780)
1591 from .youtube import parse_qs
201c1459 1592 qs = parse_qs(url)
1593 if qs.get('list', [None])[0]:
1594 return False
1595 return super(YoutubeIE, cls).suitable(url)
1596
e0df6211
PH
1597 def __init__(self, *args, **kwargs):
1598 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1599 self._code_cache = {}
83799698 1600 self._player_cache = {}
e0df6211 1601
109dd3b2 1602 def _extract_player_url(self, ytcfg=None, webpage=None):
1603 player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)
1604 if not player_url:
1605 player_url = self._search_regex(
1606 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1607 webpage, 'player URL', fatal=False)
1608 if player_url.startswith('//'):
1609 player_url = 'https:' + player_url
1610 elif not re.match(r'https?://', player_url):
1611 player_url = compat_urlparse.urljoin(
1612 'https://www.youtube.com', player_url)
1613 return player_url
1614
60064c53
PH
1615 def _signature_cache_id(self, example_sig):
1616 """ Return a string representation of a signature """
78caa52a 1617 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1618
e40c758c
S
1619 @classmethod
1620 def _extract_player_info(cls, player_url):
1621 for player_re in cls._PLAYER_INFO_RE:
1622 id_m = re.search(player_re, player_url)
1623 if id_m:
1624 break
1625 else:
c081b35c 1626 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1627 return id_m.group('id')
e40c758c 1628
109dd3b2 1629 def _load_player(self, video_id, player_url, fatal=True) -> bool:
1630 player_id = self._extract_player_info(player_url)
1631 if player_id not in self._code_cache:
1632 self._code_cache[player_id] = self._download_webpage(
1633 player_url, video_id, fatal=fatal,
1634 note='Downloading player ' + player_id,
1635 errnote='Download of %s failed' % player_url)
1636 return player_id in self._code_cache
1637
e40c758c 1638 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1639 player_id = self._extract_player_info(player_url)
e0df6211 1640
c4417ddb 1641 # Read from filesystem cache
545cc85d 1642 func_id = 'js_%s_%s' % (
1643 player_id, self._signature_cache_id(example_sig))
c4417ddb 1644 assert os.path.basename(func_id) == func_id
a0e07d31 1645
69ea8ca4 1646 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1647 if cache_spec is not None:
78caa52a 1648 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1649
109dd3b2 1650 if self._load_player(video_id, player_url):
1651 code = self._code_cache[player_id]
1652 res = self._parse_sig_js(code)
e0df6211 1653
109dd3b2 1654 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1655 cache_res = res(test_string)
1656 cache_spec = [ord(c) for c in cache_res]
83799698 1657
109dd3b2 1658 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1659 return res
83799698 1660
60064c53 1661 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1662 def gen_sig_code(idxs):
1663 def _genslice(start, end, step):
78caa52a 1664 starts = '' if start == 0 else str(start)
8bcc8756 1665 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1666 steps = '' if step == 1 else (':%d' % step)
78caa52a 1667 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1668
1669 step = None
7af808a5
PH
1670 # Quelch pyflakes warnings - start will be set when step is set
1671 start = '(Never used)'
edf3e38e
PH
1672 for i, prev in zip(idxs[1:], idxs[:-1]):
1673 if step is not None:
1674 if i - prev == step:
1675 continue
1676 yield _genslice(start, prev, step)
1677 step = None
1678 continue
1679 if i - prev in [-1, 1]:
1680 step = i - prev
1681 start = prev
1682 continue
1683 else:
78caa52a 1684 yield 's[%d]' % prev
edf3e38e 1685 if step is None:
78caa52a 1686 yield 's[%d]' % i
edf3e38e
PH
1687 else:
1688 yield _genslice(start, i, step)
1689
78caa52a 1690 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1691 cache_res = func(test_string)
edf3e38e 1692 cache_spec = [ord(c) for c in cache_res]
78caa52a 1693 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1694 signature_id_tuple = '(%s)' % (
1695 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1696 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1697 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1698 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1699
e0df6211
PH
1700 def _parse_sig_js(self, jscode):
1701 funcname = self._search_regex(
abefc03f
S
1702 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1703 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1704 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1705 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1706 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1707 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1708 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1709 # Obsolete patterns
1710 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1711 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1712 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1713 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1714 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1715 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1716 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1717 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1718 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1719
1720 jsi = JSInterpreter(jscode)
1721 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1722 return lambda s: initial_function([s])
1723
545cc85d 1724 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1725 """Turn the encrypted s field into a working signature"""
6b37f0be 1726
c8bf86d5 1727 if player_url is None:
69ea8ca4 1728 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1729
c8bf86d5 1730 try:
62af3a0e 1731 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1732 if player_id not in self._player_cache:
1733 func = self._extract_signature_function(
60064c53 1734 video_id, player_url, s
c8bf86d5
PH
1735 )
1736 self._player_cache[player_id] = func
1737 func = self._player_cache[player_id]
a06916d9 1738 if self.get_param('youtube_print_sig_code'):
60064c53 1739 self._print_sig_code(func, s)
c8bf86d5
PH
1740 return func(s)
1741 except Exception as e:
1742 tb = traceback.format_exc()
1743 raise ExtractorError(
78caa52a 1744 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1745
109dd3b2 1746 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1747 """
1748 Extract signatureTimestamp (sts)
1749 Required to tell API what sig/player version is in use.
1750 """
1751 sts = None
1752 if isinstance(ytcfg, dict):
1753 sts = int_or_none(ytcfg.get('STS'))
1754
1755 if not sts:
1756 # Attempt to extract from player
1757 if player_url is None:
1758 error_msg = 'Cannot extract signature timestamp without player_url.'
1759 if fatal:
1760 raise ExtractorError(error_msg)
1761 self.report_warning(error_msg)
1762 return
1763 if self._load_player(video_id, player_url, fatal=fatal):
1764 player_id = self._extract_player_info(player_url)
1765 code = self._code_cache[player_id]
1766 sts = int_or_none(self._search_regex(
1767 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
1768 'JS player signature timestamp', group='sts', fatal=fatal))
1769 return sts
1770
545cc85d 1771 def _mark_watched(self, video_id, player_response):
21c340b8
S
1772 playback_url = url_or_none(try_get(
1773 player_response,
545cc85d 1774 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
d77ab8e2
S
1775 if not playback_url:
1776 return
1777 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1778 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1779
1780 # cpn generation algorithm is reverse engineered from base.js.
1781 # In fact it works even with dummy cpn.
1782 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1783 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1784
1785 qs.update({
1786 'ver': ['2'],
1787 'cpn': [cpn],
1788 })
1789 playback_url = compat_urlparse.urlunparse(
15707c7e 1790 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1791
1792 self._download_webpage(
1793 playback_url, video_id, 'Marking watched',
1794 'Unable to mark watched', fatal=False)
1795
66c9fa36
S
1796 @staticmethod
1797 def _extract_urls(webpage):
1798 # Embedded YouTube player
1799 entries = [
1800 unescapeHTML(mobj.group('url'))
1801 for mobj in re.finditer(r'''(?x)
1802 (?:
1803 <iframe[^>]+?src=|
1804 data-video-url=|
1805 <embed[^>]+?src=|
1806 embedSWF\(?:\s*|
1807 <object[^>]+data=|
1808 new\s+SWFObject\(
1809 )
1810 (["\'])
1811 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1812 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1813 \1''', webpage)]
1814
1815 # lazyYT YouTube embed
1816 entries.extend(list(map(
1817 unescapeHTML,
1818 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1819
1820 # Wordpress "YouTube Video Importer" plugin
1821 matches = re.findall(r'''(?x)<div[^>]+
1822 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1823 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1824 entries.extend(m[-1] for m in matches)
1825
1826 return entries
1827
1828 @staticmethod
1829 def _extract_url(webpage):
1830 urls = YoutubeIE._extract_urls(webpage)
1831 return urls[0] if urls else None
1832
97665381
PH
1833 @classmethod
1834 def extract_id(cls, url):
1835 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1836 if mobj is None:
69ea8ca4 1837 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1838 video_id = mobj.group(2)
1839 return video_id
1840
545cc85d 1841 def _extract_chapters_from_json(self, data, video_id, duration):
84213ea8 1842 chapters_list = try_get(
8bdd16b4 1843 data,
84213ea8
S
1844 lambda x: x['playerOverlays']
1845 ['playerOverlayRenderer']
1846 ['decoratedPlayerBarRenderer']
1847 ['decoratedPlayerBarRenderer']
1848 ['playerBar']
1849 ['chapteredPlayerBarRenderer']
1850 ['chapters'],
1851 list)
1852 if not chapters_list:
1853 return
1854
1855 def chapter_time(chapter):
1856 return float_or_none(
1857 try_get(
1858 chapter,
1859 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1860 int),
1861 scale=1000)
1862 chapters = []
1863 for next_num, chapter in enumerate(chapters_list, start=1):
1864 start_time = chapter_time(chapter)
1865 if start_time is None:
1866 continue
1867 end_time = (chapter_time(chapters_list[next_num])
1868 if next_num < len(chapters_list) else duration)
1869 if end_time is None:
1870 continue
1871 title = try_get(
1872 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1873 compat_str)
1874 chapters.append({
1875 'start_time': start_time,
1876 'end_time': end_time,
1877 'title': title,
1878 })
1879 return chapters
1880
545cc85d 1881 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1882 return self._parse_json(self._search_regex(
1883 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1884 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 1885
d92f5d5a 1886 @staticmethod
1887 def parse_time_text(time_text):
1888 """
1889 Parse the comment time text
1890 time_text is in the format 'X units ago (edited)'
1891 """
1892 time_text_split = time_text.split(' ')
1893 if len(time_text_split) >= 3:
1894 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
1895
a1c5d2ca
M
1896 @staticmethod
1897 def _join_text_entries(runs):
1898 text = None
1899 for run in runs:
1900 if not isinstance(run, dict):
1901 continue
1902 sub_text = try_get(run, lambda x: x['text'], compat_str)
1903 if sub_text:
1904 if not text:
1905 text = sub_text
1906 continue
1907 text += sub_text
1908 return text
1909
1910 def _extract_comment(self, comment_renderer, parent=None):
1911 comment_id = comment_renderer.get('commentId')
1912 if not comment_id:
1913 return
1914 comment_text_runs = try_get(comment_renderer, lambda x: x['contentText']['runs']) or []
1915 text = self._join_text_entries(comment_text_runs) or ''
1916 comment_time_text = try_get(comment_renderer, lambda x: x['publishedTimeText']['runs']) or []
1917 time_text = self._join_text_entries(comment_time_text)
d92f5d5a 1918 timestamp = calendar.timegm(self.parse_time_text(time_text).timetuple())
a1c5d2ca
M
1919 author = try_get(comment_renderer, lambda x: x['authorText']['simpleText'], compat_str)
1920 author_id = try_get(comment_renderer,
1921 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
1922 votes = str_to_int(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
1923 lambda x: x['likeCount']), compat_str)) or 0
1924 author_thumbnail = try_get(comment_renderer,
1925 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
1926
1927 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
1928 is_liked = try_get(comment_renderer, lambda x: x['isLiked'], bool)
a1c5d2ca
M
1929 return {
1930 'id': comment_id,
1931 'text': text,
d92f5d5a 1932 'timestamp': timestamp,
a1c5d2ca
M
1933 'time_text': time_text,
1934 'like_count': votes,
1935 'is_favorited': is_liked,
1936 'author': author,
1937 'author_id': author_id,
1938 'author_thumbnail': author_thumbnail,
1939 'author_is_uploader': author_is_uploader,
1940 'parent': parent or 'root'
1941 }
1942
1943 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
f4f751af 1944 ytcfg, session_token_list, parent=None, comment_counts=None):
a1c5d2ca
M
1945
1946 def extract_thread(parent_renderer):
1947 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
1948 if not parent:
1949 comment_counts[2] = 0
1950 for content in contents:
1951 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
1952 comment_renderer = try_get(
1953 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
1954 content, (lambda x: x['commentRenderer'], dict))
1955
1956 if not comment_renderer:
1957 continue
1958 comment = self._extract_comment(comment_renderer, parent)
1959 if not comment:
1960 continue
1961 comment_counts[0] += 1
1962 yield comment
1963 # Attempt to get the replies
1964 comment_replies_renderer = try_get(
1965 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
1966
1967 if comment_replies_renderer:
1968 comment_counts[2] += 1
1969 comment_entries_iter = self._comment_entries(
f4f751af 1970 comment_replies_renderer, identity_token, account_syncid, ytcfg,
a1c5d2ca
M
1971 parent=comment.get('id'), session_token_list=session_token_list,
1972 comment_counts=comment_counts)
1973
1974 for reply_comment in comment_entries_iter:
1975 yield reply_comment
1976
1977 if not comment_counts:
1978 # comment so far, est. total comments, current comment thread #
1979 comment_counts = [0, 0, 0]
a1c5d2ca
M
1980
1981 # TODO: Generalize the download code with TabIE
f4f751af 1982 context = self._extract_context(ytcfg)
1983 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
a1c5d2ca
M
1984 continuation = YoutubeTabIE._extract_continuation(root_continuation_data) # TODO
1985 first_continuation = False
1986 if parent is None:
1987 first_continuation = True
1988
1989 for page_num in itertools.count(0):
1990 if not continuation:
1991 break
f4f751af 1992 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
a06916d9 1993 retries = self.get_param('extractor_retries', 3)
a1c5d2ca
M
1994 count = -1
1995 last_error = None
1996
1997 while count < retries:
1998 count += 1
1999 if last_error:
2000 self.report_warning('%s. Retrying ...' % last_error)
2001 try:
2002 query = {
2003 'ctoken': continuation['ctoken'],
2004 'pbj': 1,
2005 'type': 'next',
2006 }
45261e06 2007 if 'itct' in continuation:
2008 query['itct'] = continuation['itct']
a1c5d2ca
M
2009 if parent:
2010 query['action_get_comment_replies'] = 1
2011 else:
2012 query['action_get_comments'] = 1
2013
2014 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2015 if page_num == 0:
2016 if first_continuation:
d92f5d5a 2017 note_prefix = 'Downloading initial comment continuation page'
a1c5d2ca 2018 else:
d92f5d5a 2019 note_prefix = ' Downloading comment reply thread %d %s' % (comment_counts[2], comment_prog_str)
a1c5d2ca 2020 else:
d92f5d5a 2021 note_prefix = '%sDownloading comment%s page %d %s' % (
2022 ' ' if parent else '',
a1c5d2ca
M
2023 ' replies' if parent else '',
2024 page_num,
2025 comment_prog_str)
2026
2027 browse = self._download_json(
2028 'https://www.youtube.com/comment_service_ajax', None,
2029 '%s %s' % (note_prefix, '(retry #%d)' % count if count else ''),
2030 headers=headers, query=query,
2031 data=urlencode_postdata({
2032 'session_token': session_token_list[0]
2033 }))
2034 except ExtractorError as e:
2035 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404, 413):
2036 if e.cause.code == 413:
d92f5d5a 2037 self.report_warning('Assumed end of comments (received HTTP Error 413)')
a1c5d2ca
M
2038 return
2039 # Downloading page may result in intermittent 5xx HTTP error
2040 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
2041 last_error = 'HTTP Error %s' % e.cause.code
2042 if e.cause.code == 404:
d92f5d5a 2043 last_error = last_error + ' (this API is probably deprecated)'
a1c5d2ca
M
2044 if count < retries:
2045 continue
2046 raise
2047 else:
2048 session_token = try_get(browse, lambda x: x['xsrf_token'], compat_str)
2049 if session_token:
2050 session_token_list[0] = session_token
2051
2052 response = try_get(browse,
2053 (lambda x: x['response'],
45261e06 2054 lambda x: x[1]['response']), dict) or {}
a1c5d2ca
M
2055
2056 if response.get('continuationContents'):
2057 break
2058
2059 # YouTube sometimes gives reload: now json if something went wrong (e.g. bad auth)
45261e06 2060 if isinstance(browse, dict):
2061 if browse.get('reload'):
2062 raise ExtractorError('Invalid or missing params in continuation request', expected=False)
2063
2064 # TODO: not tested, merged from old extractor
2065 err_msg = browse.get('externalErrorMessage')
2066 if err_msg:
2067 last_error = err_msg
2068 continue
a1c5d2ca 2069
45261e06 2070 response_error = try_get(response, lambda x: x['responseContext']['errors']['error'][0], dict) or {}
2071 err_msg = response_error.get('externalErrorMessage')
a1c5d2ca 2072 if err_msg:
45261e06 2073 last_error = err_msg
2074 continue
a1c5d2ca
M
2075
2076 # Youtube sometimes sends incomplete data
2077 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
2078 last_error = 'Incomplete data received'
2079 if count >= retries:
6a39ee13 2080 raise ExtractorError(last_error)
a1c5d2ca
M
2081
2082 if not response:
2083 break
f4f751af 2084 visitor_data = try_get(
2085 response,
2086 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2087 compat_str) or visitor_data
a1c5d2ca
M
2088
2089 known_continuation_renderers = {
2090 'itemSectionContinuation': extract_thread,
2091 'commentRepliesContinuation': extract_thread
2092 }
2093
2094 # extract next root continuation from the results
2095 continuation_contents = try_get(
2096 response, lambda x: x['continuationContents'], dict) or {}
2097
2098 for key, value in continuation_contents.items():
2099 if key not in known_continuation_renderers:
2100 continue
2101 continuation_renderer = value
2102
2103 if first_continuation:
2104 first_continuation = False
2105 expected_comment_count = try_get(
2106 continuation_renderer,
2107 (lambda x: x['header']['commentsHeaderRenderer']['countText']['runs'][0]['text'],
2108 lambda x: x['header']['commentsHeaderRenderer']['commentsCount']['runs'][0]['text']),
2109 compat_str)
2110
2111 if expected_comment_count:
2112 comment_counts[1] = str_to_int(expected_comment_count)
d92f5d5a 2113 self.to_screen('Downloading ~%d comments' % str_to_int(expected_comment_count))
a1c5d2ca
M
2114 yield comment_counts[1]
2115
2116 # TODO: cli arg.
2117 # 1/True for newest, 0/False for popular (default)
2118 comment_sort_index = int(True)
2119 sort_continuation_renderer = try_get(
2120 continuation_renderer,
2121 lambda x: x['header']['commentsHeaderRenderer']['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems']
2122 [comment_sort_index]['continuation']['reloadContinuationData'], dict)
2123 # If this fails, the initial continuation page
2124 # starts off with popular anyways.
2125 if sort_continuation_renderer:
2126 continuation = YoutubeTabIE._build_continuation_query(
2127 continuation=sort_continuation_renderer.get('continuation'),
2128 ctp=sort_continuation_renderer.get('clickTrackingParams'))
d92f5d5a 2129 self.to_screen('Sorting comments by %s' % ('popular' if comment_sort_index == 0 else 'newest'))
a1c5d2ca
M
2130 break
2131
2132 for entry in known_continuation_renderers[key](continuation_renderer):
2133 yield entry
2134
2135 continuation = YoutubeTabIE._extract_continuation(continuation_renderer) # TODO
2136 break
2137
2138 def _extract_comments(self, ytcfg, video_id, contents, webpage, xsrf_token):
2139 """Entry for comment extraction"""
2140 comments = []
2141 known_entry_comment_renderers = (
2142 'itemSectionRenderer',
2143 )
2144 estimated_total = 0
2145 for entry in contents:
2146 for key, renderer in entry.items():
2147 if key not in known_entry_comment_renderers:
2148 continue
2149
2150 comment_iter = self._comment_entries(
2151 renderer,
2152 identity_token=self._extract_identity_token(webpage, item_id=video_id),
2153 account_syncid=self._extract_account_syncid(ytcfg),
f4f751af 2154 ytcfg=ytcfg,
a1c5d2ca
M
2155 session_token_list=[xsrf_token])
2156
2157 for comment in comment_iter:
2158 if isinstance(comment, int):
2159 estimated_total = comment
2160 continue
2161 comments.append(comment)
2162 break
d92f5d5a 2163 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
a1c5d2ca
M
2164 return {
2165 'comments': comments,
2166 'comment_count': len(comments),
2167 }
2168
109dd3b2 2169 @staticmethod
2170 def _generate_player_context(sts=None):
2171 context = {
2172 'html5Preference': 'HTML5_PREF_WANTS',
2173 }
2174 if sts is not None:
2175 context['signatureTimestamp'] = sts
2176 return {
2177 'playbackContext': {
2178 'contentPlaybackContext': context
2179 }
2180 }
2181
4e6767b5 2182 @staticmethod
2183 def _get_video_info_params(video_id):
2184 return {
2185 'video_id': video_id,
2186 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
2187 'html5': '1',
2188 'c': 'TVHTML5',
2189 'cver': '6.20180913',
2190 }
2191
c5e8d7af 2192 def _real_extract(self, url):
cf7e015f 2193 url, smuggled_data = unsmuggle_url(url, {})
545cc85d 2194 video_id = self._match_id(url)
9297939e 2195
2196 is_music_url = smuggled_data.get('is_music_url') or self.is_music_url(url)
2197
545cc85d 2198 base_url = self.http_scheme() + '//www.youtube.com/'
b3d12425 2199 webpage_url = base_url + 'watch?v=' + video_id
2200 webpage = self._download_webpage(
cce889b9 2201 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
545cc85d 2202
109dd3b2 2203 ytcfg = self._extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2204 identity_token = self._extract_identity_token(webpage, video_id)
2205 syncid = self._extract_account_syncid(ytcfg)
2206 headers = self._generate_api_headers(ytcfg, identity_token, syncid)
2207
2208 player_url = self._extract_player_url(ytcfg, webpage)
2209
4bb6b02f 2210 player_client = (self._configuration_arg('player_client') or [''])[0]
2211 if player_client not in ('web', 'android', ''):
2212 self.report_warning(f'Invalid player_client {player_client} given. Falling back to WEB')
2213 force_mobile_client = player_client == 'android'
2214 player_skip = self._configuration_arg('player_skip')
109dd3b2 2215
9297939e 2216 def get_text(x):
2217 if not x:
2218 return
2219 text = x.get('simpleText')
2220 if text and isinstance(text, compat_str):
2221 return text
2222 runs = x.get('runs')
2223 if not isinstance(runs, list):
2224 return
2225 return ''.join([r['text'] for r in runs if isinstance(r.get('text'), compat_str)])
2226
2227 ytm_streaming_data = {}
2228 if is_music_url:
109dd3b2 2229 ytm_webpage = None
2230 sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2231 if sts and not force_mobile_client and 'configs' not in player_skip:
2232 ytm_webpage = self._download_webpage(
2233 'https://music.youtube.com',
2234 video_id, fatal=False, note="Downloading remix client config")
2235
2236 ytm_cfg = self._extract_ytcfg(video_id, ytm_webpage) or {}
2237 ytm_client = 'WEB_REMIX'
2238 if not sts or force_mobile_client:
2239 # Android client already has signature descrambled
2240 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2241 if not sts:
2242 self.report_warning('Falling back to mobile remix client for player API.')
2243 ytm_client = 'ANDROID_MUSIC'
2244 ytm_cfg = {}
2245
2246 ytm_headers = self._generate_api_headers(
2247 ytm_cfg, identity_token, syncid,
2248 client=ytm_client)
2249 ytm_query = {'videoId': video_id}
2250 ytm_query.update(self._generate_player_context(sts))
2251
2252 ytm_player_response = self._extract_response(
2253 item_id=video_id, ep='player', query=ytm_query,
2254 ytcfg=ytm_cfg, headers=ytm_headers, fatal=False,
2255 default_client=ytm_client,
2256 note='Downloading %sremix player API JSON' % ('mobile ' if force_mobile_client else ''))
2257
2258 ytm_streaming_data = try_get(ytm_player_response, lambda x: x['streamingData']) or {}
545cc85d 2259 player_response = None
2260 if webpage:
2261 player_response = self._extract_yt_initial_variable(
2262 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2263 video_id, 'initial player response')
f4f751af 2264
109dd3b2 2265 if not player_response or force_mobile_client:
2266 sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2267 yt_client = 'WEB'
2268 ytpcfg = ytcfg
2269 ytp_headers = headers
2270 if not sts or force_mobile_client:
2271 # Android client already has signature descrambled
2272 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2273 if not sts:
2274 self.report_warning('Falling back to mobile client for player API.')
2275 yt_client = 'ANDROID'
2276 ytpcfg = {}
2277 ytp_headers = self._generate_api_headers(ytpcfg, identity_token, syncid, yt_client)
2278
2279 yt_query = {'videoId': video_id}
2280 yt_query.update(self._generate_player_context(sts))
2281 player_response = self._extract_response(
2282 item_id=video_id, ep='player', query=yt_query,
2283 ytcfg=ytpcfg, headers=ytp_headers, fatal=False,
2284 default_client=yt_client,
2285 note='Downloading %splayer API JSON' % ('mobile ' if force_mobile_client else '')
2286 )
545cc85d 2287
109dd3b2 2288 # Age-gate workarounds
545cc85d 2289 playability_status = player_response.get('playabilityStatus') or {}
109dd3b2 2290 if playability_status.get('reason') in self._AGE_GATE_REASONS:
545cc85d 2291 pr = self._parse_json(try_get(compat_parse_qs(
2292 self._download_webpage(
2293 base_url + 'get_video_info', video_id,
4e6767b5 2294 'Refetching age-gated info webpage', 'unable to download video info webpage',
2295 query=self._get_video_info_params(video_id), fatal=False)),
545cc85d 2296 lambda x: x['player_response'][0],
2297 compat_str) or '{}', video_id)
109dd3b2 2298 if not pr:
2299 self.report_warning('Falling back to embedded-only age-gate workaround.')
2300 embed_webpage = None
2301 sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2302 if sts and not force_mobile_client and 'configs' not in player_skip:
2303 embed_webpage = self._download_webpage(
2304 'https://www.youtube.com/embed/%s?html5=1' % video_id,
2305 video_id=video_id, note='Downloading age-gated embed config')
2306
2307 ytcfg_age = self._extract_ytcfg(video_id, embed_webpage) or {}
2308 # If we extracted the embed webpage, it'll tell us if we can view the video
2309 embedded_pr = self._parse_json(
2310 try_get(ytcfg_age, lambda x: x['PLAYER_VARS']['embedded_player_response'], str) or '{}',
2311 video_id=video_id)
2312 embedded_ps_reason = try_get(embedded_pr, lambda x: x['playabilityStatus']['reason'], str) or ''
2313 if embedded_ps_reason not in self._AGE_GATE_REASONS:
2314 yt_client = 'WEB_EMBEDDED_PLAYER'
2315 if not sts or force_mobile_client:
2316 # Android client already has signature descrambled
2317 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2318 if not sts:
2319 self.report_warning(
2320 'Falling back to mobile embedded client for player API (note: some formats may be missing).')
2321 yt_client = 'ANDROID_EMBEDDED_PLAYER'
2322 ytcfg_age = {}
2323
2324 ytage_headers = self._generate_api_headers(
2325 ytcfg_age, identity_token, syncid, client=yt_client)
2326 yt_age_query = {'videoId': video_id}
2327 yt_age_query.update(self._generate_player_context(sts))
2328 pr = self._extract_response(
2329 item_id=video_id, ep='player', query=yt_age_query,
2330 ytcfg=ytcfg_age, headers=ytage_headers, fatal=False,
2331 default_client=yt_client,
2332 note='Downloading %sage-gated player API JSON' % ('mobile ' if force_mobile_client else '')
2333 ) or {}
2334
545cc85d 2335 if pr:
2336 player_response = pr
2337
2338 trailer_video_id = try_get(
2339 playability_status,
2340 lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
2341 compat_str)
2342 if trailer_video_id:
2343 return self.url_result(
2344 trailer_video_id, self.ie_key(), trailer_video_id)
cf7e015f 2345
545cc85d 2346 search_meta = (
2347 lambda x: self._html_search_meta(x, webpage, default=None)) \
2348 if webpage else lambda x: None
dbdaaa23 2349
545cc85d 2350 video_details = player_response.get('videoDetails') or {}
37357d21 2351 microformat = try_get(
545cc85d 2352 player_response,
2353 lambda x: x['microformat']['playerMicroformatRenderer'],
2354 dict) or {}
2355 video_title = video_details.get('title') \
2356 or get_text(microformat.get('title')) \
2357 or search_meta(['og:title', 'twitter:title', 'title'])
2358 video_description = video_details.get('shortDescription')
cf7e015f 2359
8fe10494 2360 if not smuggled_data.get('force_singlefeed', False):
a06916d9 2361 if not self.get_param('noplaylist'):
8fe10494
S
2362 multifeed_metadata_list = try_get(
2363 player_response,
2364 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
545cc85d 2365 compat_str)
8fe10494
S
2366 if multifeed_metadata_list:
2367 entries = []
2368 feed_ids = []
2369 for feed in multifeed_metadata_list.split(','):
2370 # Unquote should take place before split on comma (,) since textual
2371 # fields may contain comma as well (see
067aa17e 2372 # https://github.com/ytdl-org/youtube-dl/issues/8536)
545cc85d 2373 feed_data = compat_parse_qs(
2374 compat_urllib_parse_unquote_plus(feed))
6b09401b
S
2375
2376 def feed_entry(name):
545cc85d 2377 return try_get(
2378 feed_data, lambda x: x[name][0], compat_str)
6b09401b
S
2379
2380 feed_id = feed_entry('id')
2381 if not feed_id:
2382 continue
2383 feed_title = feed_entry('title')
2384 title = video_title
2385 if feed_title:
2386 title += ' (%s)' % feed_title
8fe10494
S
2387 entries.append({
2388 '_type': 'url_transparent',
2389 'ie_key': 'Youtube',
2390 'url': smuggle_url(
545cc85d 2391 base_url + 'watch?v=' + feed_data['id'][0],
8fe10494 2392 {'force_singlefeed': True}),
6b09401b 2393 'title': title,
8fe10494 2394 })
6b09401b 2395 feed_ids.append(feed_id)
8fe10494
S
2396 self.to_screen(
2397 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2398 % (', '.join(feed_ids), video_id))
545cc85d 2399 return self.playlist_result(
2400 entries, video_id, video_title, video_description)
8fe10494
S
2401 else:
2402 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 2403
9297939e 2404 formats, itags, stream_ids = [], [], []
cc2db878 2405 itag_qualities = {}
d3fc8074 2406 q = qualities([
2407 'tiny', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
2408 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2409 ])
9297939e 2410
545cc85d 2411 streaming_data = player_response.get('streamingData') or {}
2412 streaming_formats = streaming_data.get('formats') or []
2413 streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
9297939e 2414 streaming_formats.extend(ytm_streaming_data.get('formats') or [])
2415 streaming_formats.extend(ytm_streaming_data.get('adaptiveFormats') or [])
2416
545cc85d 2417 for fmt in streaming_formats:
2418 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2419 continue
321bf820 2420
cc2db878 2421 itag = str_or_none(fmt.get('itag'))
9297939e 2422 audio_track = fmt.get('audioTrack') or {}
2423 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2424 if stream_id in stream_ids:
2425 continue
2426
cc2db878 2427 quality = fmt.get('quality')
d3fc8074 2428 if quality == 'tiny' or not quality:
2429 quality = fmt.get('audioQuality', '').lower() or quality
cc2db878 2430 if itag and quality:
2431 itag_qualities[itag] = quality
2432 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2433 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2434 # number of fragment that would subsequently requested with (`&sq=N`)
2435 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2436 continue
2437
545cc85d 2438 fmt_url = fmt.get('url')
2439 if not fmt_url:
2440 sc = compat_parse_qs(fmt.get('signatureCipher'))
2441 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2442 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2443 if not (sc and fmt_url and encrypted_sig):
2444 continue
545cc85d 2445 if not player_url:
201e9eaa 2446 continue
545cc85d 2447 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2448 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2449 fmt_url += '&' + sp + '=' + signature
2450
545cc85d 2451 if itag:
2452 itags.append(itag)
9297939e 2453 stream_ids.append(stream_id)
2454
cc2db878 2455 tbr = float_or_none(
2456 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 2457 dct = {
2458 'asr': int_or_none(fmt.get('audioSampleRate')),
2459 'filesize': int_or_none(fmt.get('contentLength')),
2460 'format_id': itag,
0fb983f6 2461 'format_note': audio_track.get('displayName') or fmt.get('qualityLabel') or quality,
545cc85d 2462 'fps': int_or_none(fmt.get('fps')),
2463 'height': int_or_none(fmt.get('height')),
dca3ff4a 2464 'quality': q(quality),
cc2db878 2465 'tbr': tbr,
545cc85d 2466 'url': fmt_url,
2467 'width': fmt.get('width'),
0fb983f6 2468 'language': audio_track.get('id', '').split('.')[0],
545cc85d 2469 }
2470 mimetype = fmt.get('mimeType')
2471 if mimetype:
2472 mobj = re.match(
2473 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', mimetype)
2474 if mobj:
2475 dct['ext'] = mimetype2ext(mobj.group(1))
2476 dct.update(parse_codecs(mobj.group(2)))
cc2db878 2477 no_audio = dct.get('acodec') == 'none'
2478 no_video = dct.get('vcodec') == 'none'
2479 if no_audio:
2480 dct['vbr'] = tbr
2481 if no_video:
2482 dct['abr'] = tbr
2483 if no_audio or no_video:
545cc85d 2484 dct['downloader_options'] = {
2485 # Youtube throttles chunks >~10M
2486 'http_chunk_size': 10485760,
bf1317d2 2487 }
7c60c33e 2488 if dct.get('ext'):
2489 dct['container'] = dct['ext'] + '_dash'
545cc85d 2490 formats.append(dct)
2491
4bb6b02f 2492 skip_manifests = self._configuration_arg('skip')
5d3a0e79 2493 get_dash = 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True)
2494 get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2495
9297939e 2496 for sd in (streaming_data, ytm_streaming_data):
5d3a0e79 2497 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
9297939e 2498 if hls_manifest_url:
2499 for f in self._extract_m3u8_formats(
2500 hls_manifest_url, video_id, 'mp4', fatal=False):
2501 itag = self._search_regex(
2502 r'/itag/(\d+)', f['url'], 'itag', default=None)
2503 if itag:
2504 f['format_id'] = itag
8d68ab98 2505 formats.append(f)
545cc85d 2506
5d3a0e79 2507 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2508 if dash_manifest_url:
2509 for f in self._extract_mpd_formats(
2510 dash_manifest_url, video_id, fatal=False):
2511 itag = f['format_id']
2512 if itag in itags:
2513 continue
2514 if itag in itag_qualities:
2515 f['quality'] = q(itag_qualities[itag])
2516 filesize = int_or_none(self._search_regex(
2517 r'/clen/(\d+)', f.get('fragment_base_url')
2518 or f['url'], 'file size', default=None))
2519 if filesize:
2520 f['filesize'] = filesize
2521 formats.append(f)
bf1317d2 2522
545cc85d 2523 if not formats:
a06916d9 2524 if not self.get_param('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
b7da73eb 2525 self.raise_no_formats(
545cc85d 2526 'This video is DRM protected.', expected=True)
2527 pemr = try_get(
2528 playability_status,
2529 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
2530 dict) or {}
2531 reason = get_text(pemr.get('reason')) or playability_status.get('reason')
2532 subreason = pemr.get('subreason')
2533 if subreason:
2534 subreason = clean_html(get_text(subreason))
2535 if subreason == 'The uploader has not made this video available in your country.':
2536 countries = microformat.get('availableCountries')
2537 if not countries:
2538 regions_allowed = search_meta('regionsAllowed')
2539 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2540 self.raise_geo_restricted(subreason, countries, metadata_available=True)
545cc85d 2541 reason += '\n' + subreason
2542 if reason:
b7da73eb 2543 self.raise_no_formats(reason, expected=True)
bf1317d2 2544
545cc85d 2545 self._sort_formats(formats)
bf1317d2 2546
545cc85d 2547 keywords = video_details.get('keywords') or []
2548 if not keywords and webpage:
2549 keywords = [
2550 unescapeHTML(m.group('content'))
2551 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2552 for keyword in keywords:
2553 if keyword.startswith('yt:stretch='):
201c1459 2554 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2555 if mobj:
2556 # NB: float is intentional for forcing float division
2557 w, h = (float(v) for v in mobj.groups())
2558 if w > 0 and h > 0:
2559 ratio = w / h
2560 for f in formats:
2561 if f.get('vcodec') != 'none':
2562 f['stretched_ratio'] = ratio
2563 break
6449cd80 2564
545cc85d 2565 thumbnails = []
2566 for container in (video_details, microformat):
2567 for thumbnail in (try_get(
2568 container,
2569 lambda x: x['thumbnail']['thumbnails'], list) or []):
2570 thumbnail_url = thumbnail.get('url')
2571 if not thumbnail_url:
bf1317d2 2572 continue
1988fab7 2573 # Sometimes youtube gives a wrong thumbnail URL. See:
2574 # https://github.com/yt-dlp/yt-dlp/issues/233
2575 # https://github.com/ytdl-org/youtube-dl/issues/28023
2576 if 'maxresdefault' in thumbnail_url:
2577 thumbnail_url = thumbnail_url.split('?')[0]
545cc85d 2578 thumbnails.append({
545cc85d 2579 'url': thumbnail_url,
ff2751ac 2580 'height': int_or_none(thumbnail.get('height')),
545cc85d 2581 'width': int_or_none(thumbnail.get('width')),
ff2751ac 2582 'preference': 1 if 'maxresdefault' in thumbnail_url else -1
545cc85d 2583 })
ff2751ac 2584 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2585 if thumbnail_url:
2586 thumbnails.append({
2587 'url': thumbnail_url,
2588 'preference': 1 if 'maxresdefault' in thumbnail_url else -1
2589 })
2590 # All videos have a maxresdefault thumbnail, but sometimes it does not appear in the webpage
2591 # See: https://github.com/ytdl-org/youtube-dl/issues/29049
2592 thumbnails.append({
2593 'url': 'https://i.ytimg.com/vi/%s/maxresdefault.jpg' % video_id,
2594 'preference': 1,
2595 })
2596 self._remove_duplicate_formats(thumbnails)
545cc85d 2597
2598 category = microformat.get('category') or search_meta('genre')
2599 channel_id = video_details.get('channelId') \
2600 or microformat.get('externalChannelId') \
2601 or search_meta('channelId')
2602 duration = int_or_none(
2603 video_details.get('lengthSeconds')
2604 or microformat.get('lengthSeconds')) \
2605 or parse_duration(search_meta('duration'))
2606 is_live = video_details.get('isLive')
f6745c49 2607 is_upcoming = video_details.get('isUpcoming')
545cc85d 2608 owner_profile_url = microformat.get('ownerProfileUrl')
2609
2610 info = {
2611 'id': video_id,
2612 'title': self._live_title(video_title) if is_live else video_title,
2613 'formats': formats,
2614 'thumbnails': thumbnails,
2615 'description': video_description,
2616 'upload_date': unified_strdate(
2617 microformat.get('uploadDate')
2618 or search_meta('uploadDate')),
2619 'uploader': video_details['author'],
2620 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2621 'uploader_url': owner_profile_url,
2622 'channel_id': channel_id,
2623 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
2624 'duration': duration,
2625 'view_count': int_or_none(
2626 video_details.get('viewCount')
2627 or microformat.get('viewCount')
2628 or search_meta('interactionCount')),
2629 'average_rating': float_or_none(video_details.get('averageRating')),
2630 'age_limit': 18 if (
2631 microformat.get('isFamilySafe') is False
2632 or search_meta('isFamilyFriendly') == 'false'
2633 or search_meta('og:restrictions:age') == '18+') else 0,
2634 'webpage_url': webpage_url,
2635 'categories': [category] if category else None,
2636 'tags': keywords,
2637 'is_live': is_live,
2638 'playable_in_embed': playability_status.get('playableInEmbed'),
c224251a 2639 'was_live': video_details.get('isLiveContent'),
545cc85d 2640 }
b477fc13 2641
545cc85d 2642 pctr = try_get(
2643 player_response,
2644 lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
2645 subtitles = {}
2646 if pctr:
774d79cc 2647 def process_language(container, base_url, lang_code, sub_name, query):
120916da 2648 lang_subs = container.setdefault(lang_code, [])
545cc85d 2649 for fmt in self._SUBTITLE_FORMATS:
2650 query.update({
2651 'fmt': fmt,
2652 })
2653 lang_subs.append({
2654 'ext': fmt,
2655 'url': update_url_query(base_url, query),
774d79cc 2656 'name': sub_name,
545cc85d 2657 })
7e72694b 2658
545cc85d 2659 for caption_track in (pctr.get('captionTracks') or []):
2660 base_url = caption_track.get('baseUrl')
2661 if not base_url:
2662 continue
2663 if caption_track.get('kind') != 'asr':
120916da 2664 lang_code = (
2665 remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2666 or caption_track.get('languageCode'))
545cc85d 2667 if not lang_code:
2668 continue
2669 process_language(
774d79cc 2670 subtitles, base_url, lang_code,
2671 try_get(caption_track, lambda x: x.get('name').get('simpleText')),
2672 {})
545cc85d 2673 continue
2674 automatic_captions = {}
2675 for translation_language in (pctr.get('translationLanguages') or []):
2676 translation_language_code = translation_language.get('languageCode')
2677 if not translation_language_code:
2678 continue
2679 process_language(
2680 automatic_captions, base_url, translation_language_code,
49c258e1 2681 try_get(translation_language, (
2682 lambda x: x['languageName']['simpleText'],
2683 lambda x: x['languageName']['runs'][0]['text'])),
545cc85d 2684 {'tlang': translation_language_code})
2685 info['automatic_captions'] = automatic_captions
2686 info['subtitles'] = subtitles
7e72694b 2687
545cc85d 2688 parsed_url = compat_urllib_parse_urlparse(url)
2689 for component in [parsed_url.fragment, parsed_url.query]:
2690 query = compat_parse_qs(component)
2691 for k, v in query.items():
2692 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2693 d_k += '_time'
2694 if d_k not in info and k in s_ks:
2695 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2696
2697 # Youtube Music Auto-generated description
822b9d9c 2698 if video_description:
38d70284 2699 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2700 if mobj:
822b9d9c
RA
2701 release_year = mobj.group('release_year')
2702 release_date = mobj.group('release_date')
2703 if release_date:
2704 release_date = release_date.replace('-', '')
2705 if not release_year:
545cc85d 2706 release_year = release_date[:4]
2707 info.update({
2708 'album': mobj.group('album'.strip()),
2709 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2710 'track': mobj.group('track').strip(),
2711 'release_date': release_date,
cc2db878 2712 'release_year': int_or_none(release_year),
545cc85d 2713 })
7e72694b 2714
545cc85d 2715 initial_data = None
2716 if webpage:
2717 initial_data = self._extract_yt_initial_variable(
2718 webpage, self._YT_INITIAL_DATA_RE, video_id,
2719 'yt initial data')
2720 if not initial_data:
109dd3b2 2721 initial_data = self._extract_response(
2722 item_id=video_id, ep='next', fatal=False,
2723 ytcfg=ytcfg, headers=headers, query={'videoId': video_id},
2724 note='Downloading initial data API JSON')
545cc85d 2725
c60ee3a2 2726 try:
2727 # This will error if there is no livechat
2728 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2729 info['subtitles']['live_chat'] = [{
2730 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
2731 'video_id': video_id,
2732 'ext': 'json',
f6745c49 2733 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
c60ee3a2 2734 }]
2735 except (KeyError, IndexError, TypeError):
2736 pass
545cc85d 2737
2738 if initial_data:
2739 chapters = self._extract_chapters_from_json(
2740 initial_data, video_id, duration)
2741 if not chapters:
2742 for engagment_pannel in (initial_data.get('engagementPanels') or []):
2743 contents = try_get(
2744 engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
2745 list)
2746 if not contents:
2747 continue
2748
2749 def chapter_time(mmlir):
2750 return parse_duration(
2751 get_text(mmlir.get('timeDescription')))
2752
2753 chapters = []
2754 for next_num, content in enumerate(contents, start=1):
2755 mmlir = content.get('macroMarkersListItemRenderer') or {}
2756 start_time = chapter_time(mmlir)
2757 end_time = chapter_time(try_get(
2758 contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
2759 if next_num < len(contents) else duration
2760 if start_time is None or end_time is None:
2761 continue
2762 chapters.append({
2763 'start_time': start_time,
2764 'end_time': end_time,
2765 'title': get_text(mmlir.get('title')),
2766 })
2767 if chapters:
2768 break
2769 if chapters:
2770 info['chapters'] = chapters
2771
2772 contents = try_get(
2773 initial_data,
2774 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2775 list) or []
2776 for content in contents:
2777 vpir = content.get('videoPrimaryInfoRenderer')
2778 if vpir:
2779 stl = vpir.get('superTitleLink')
2780 if stl:
2781 stl = get_text(stl)
2782 if try_get(
2783 vpir,
2784 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2785 info['location'] = stl
2786 else:
2787 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2788 if mobj:
2789 info.update({
2790 'series': mobj.group(1),
2791 'season_number': int(mobj.group(2)),
2792 'episode_number': int(mobj.group(3)),
2793 })
2794 for tlb in (try_get(
2795 vpir,
2796 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2797 list) or []):
2798 tbr = tlb.get('toggleButtonRenderer') or {}
2799 for getter, regex in [(
2800 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2801 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2802 lambda x: x['accessibility'],
2803 lambda x: x['accessibilityData']['accessibilityData'],
2804 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2805 label = (try_get(tbr, getter, dict) or {}).get('label')
2806 if label:
2807 mobj = re.match(regex, label)
2808 if mobj:
2809 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2810 break
2811 sbr_tooltip = try_get(
2812 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2813 if sbr_tooltip:
2814 like_count, dislike_count = sbr_tooltip.split(' / ')
2815 info.update({
2816 'like_count': str_to_int(like_count),
2817 'dislike_count': str_to_int(dislike_count),
2818 })
2819 vsir = content.get('videoSecondaryInfoRenderer')
2820 if vsir:
2821 info['channel'] = get_text(try_get(
2822 vsir,
2823 lambda x: x['owner']['videoOwnerRenderer']['title'],
cce889b9 2824 dict))
545cc85d 2825 rows = try_get(
2826 vsir,
2827 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2828 list) or []
2829 multiple_songs = False
2830 for row in rows:
2831 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2832 multiple_songs = True
2833 break
2834 for row in rows:
2835 mrr = row.get('metadataRowRenderer') or {}
2836 mrr_title = mrr.get('title')
2837 if not mrr_title:
2838 continue
2839 mrr_title = get_text(mrr['title'])
2840 mrr_contents_text = get_text(mrr['contents'][0])
2841 if mrr_title == 'License':
2842 info['license'] = mrr_contents_text
2843 elif not multiple_songs:
2844 if mrr_title == 'Album':
2845 info['album'] = mrr_contents_text
2846 elif mrr_title == 'Artist':
2847 info['artist'] = mrr_contents_text
2848 elif mrr_title == 'Song':
2849 info['track'] = mrr_contents_text
2850
2851 fallbacks = {
2852 'channel': 'uploader',
2853 'channel_id': 'uploader_id',
2854 'channel_url': 'uploader_url',
2855 }
2856 for to, frm in fallbacks.items():
2857 if not info.get(to):
2858 info[to] = info.get(frm)
2859
2860 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
2861 v = info.get(s_k)
2862 if v:
2863 info[d_k] = v
b84071c0 2864
c224251a
M
2865 is_private = bool_or_none(video_details.get('isPrivate'))
2866 is_unlisted = bool_or_none(microformat.get('isUnlisted'))
2867 is_membersonly = None
b28f8d24 2868 is_premium = None
c224251a
M
2869 if initial_data and is_private is not None:
2870 is_membersonly = False
b28f8d24 2871 is_premium = False
c224251a
M
2872 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list)
2873 for content in contents or []:
2874 badges = try_get(content, lambda x: x['videoPrimaryInfoRenderer']['badges'], list)
2875 for badge in badges or []:
2876 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label']) or ''
2877 if label.lower() == 'members only':
2878 is_membersonly = True
2879 break
b28f8d24
M
2880 elif label.lower() == 'premium':
2881 is_premium = True
2882 break
2883 if is_membersonly or is_premium:
c224251a
M
2884 break
2885
2886 # TODO: Add this for playlists
2887 info['availability'] = self._availability(
2888 is_private=is_private,
b28f8d24 2889 needs_premium=is_premium,
c224251a
M
2890 needs_subscription=is_membersonly,
2891 needs_auth=info['age_limit'] >= 18,
2892 is_unlisted=None if is_private is None else is_unlisted)
2893
06167fbb 2894 # get xsrf for annotations or comments
a06916d9 2895 get_annotations = self.get_param('writeannotations', False)
2896 get_comments = self.get_param('getcomments', False)
06167fbb 2897 if get_annotations or get_comments:
29f7c58a 2898 xsrf_token = None
545cc85d 2899 ytcfg = self._extract_ytcfg(video_id, webpage)
29f7c58a 2900 if ytcfg:
2901 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2902 if not xsrf_token:
2903 xsrf_token = self._search_regex(
2904 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 2905 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 2906
2907 # annotations
06167fbb 2908 if get_annotations:
64b6a4e9
RA
2909 invideo_url = try_get(
2910 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2911 if xsrf_token and invideo_url:
29f7c58a 2912 xsrf_field_name = None
2913 if ytcfg:
2914 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2915 if not xsrf_field_name:
2916 xsrf_field_name = self._search_regex(
2917 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 2918 webpage, 'xsrf field name',
29f7c58a 2919 group='xsrf_field_name', default='session_token')
8a784c74 2920 info['annotations'] = self._download_webpage(
64b6a4e9
RA
2921 self._proto_relative_url(invideo_url),
2922 video_id, note='Downloading annotations',
2923 errnote='Unable to download video annotations', fatal=False,
2924 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 2925
277d6ff5 2926 if get_comments:
a1c5d2ca 2927 info['__post_extractor'] = lambda: self._extract_comments(ytcfg, video_id, contents, webpage, xsrf_token)
4ea3be0a 2928
545cc85d 2929 self.mark_watched(video_id, player_response)
d77ab8e2 2930
545cc85d 2931 return info
c5e8d7af 2932
5f6a1245 2933
8bdd16b4 2934class YoutubeTabIE(YoutubeBaseInfoExtractor):
2935 IE_DESC = 'YouTube.com tab'
70d5c17b 2936 _VALID_URL = r'''(?x)
2937 https?://
2938 (?:\w+\.)?
2939 (?:
2940 youtube(?:kids)?\.com|
2941 invidio\.us
2942 )/
2943 (?:
fe03a6cd 2944 (?P<channel_type>channel|c|user|browse)/|
70d5c17b 2945 (?P<not_channel>
9ba5705a 2946 feed/|hashtag/|
70d5c17b 2947 (?:playlist|watch)\?.*?\blist=
2948 )|
29f7c58a 2949 (?!(?:%s)\b) # Direct URLs
70d5c17b 2950 )
2951 (?P<id>[^/?\#&]+)
2952 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 2953 IE_NAME = 'youtube:tab'
2954
81127aa5 2955 _TESTS = [{
da692b79 2956 'note': 'playlists, multipage',
8bdd16b4 2957 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2958 'playlist_mincount': 94,
2959 'info_dict': {
2960 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2961 'title': 'Игорь Клейнер - Playlists',
2962 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2963 'uploader': 'Игорь Клейнер',
2964 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 2965 },
2966 }, {
da692b79 2967 'note': 'playlists, multipage, different order',
8bdd16b4 2968 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2969 'playlist_mincount': 94,
2970 'info_dict': {
2971 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2972 'title': 'Игорь Клейнер - Playlists',
2973 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2974 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
2975 'uploader': 'Игорь Клейнер',
8bdd16b4 2976 },
201c1459 2977 }, {
da692b79 2978 'note': 'playlists, series',
201c1459 2979 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
2980 'playlist_mincount': 5,
2981 'info_dict': {
2982 'id': 'UCYO_jab_esuFRV4b17AJtAw',
2983 'title': '3Blue1Brown - Playlists',
2984 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
da692b79 2985 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
2986 'uploader': '3Blue1Brown',
201c1459 2987 },
8bdd16b4 2988 }, {
da692b79 2989 'note': 'playlists, singlepage',
8bdd16b4 2990 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2991 'playlist_mincount': 4,
2992 'info_dict': {
2993 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2994 'title': 'ThirstForScience - Playlists',
2995 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 2996 'uploader': 'ThirstForScience',
2997 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 2998 }
2999 }, {
3000 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3001 'only_matching': True,
3002 }, {
da692b79 3003 'note': 'basic, single video playlist',
0e30a7b9 3004 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 3005 'info_dict': {
0e30a7b9 3006 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3007 'uploader': 'Sergey M.',
3008 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 3009 'title': 'youtube-dl public playlist',
81127aa5 3010 },
0e30a7b9 3011 'playlist_count': 1,
9291475f 3012 }, {
da692b79 3013 'note': 'empty playlist',
0e30a7b9 3014 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 3015 'info_dict': {
0e30a7b9 3016 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3017 'uploader': 'Sergey M.',
3018 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 3019 'title': 'youtube-dl empty playlist',
9291475f
PH
3020 },
3021 'playlist_count': 0,
3022 }, {
da692b79 3023 'note': 'Home tab',
8bdd16b4 3024 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 3025 'info_dict': {
8bdd16b4 3026 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3027 'title': 'lex will - Home',
3028 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3029 'uploader': 'lex will',
3030 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3031 },
8bdd16b4 3032 'playlist_mincount': 2,
9291475f 3033 }, {
da692b79 3034 'note': 'Videos tab',
8bdd16b4 3035 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 3036 'info_dict': {
8bdd16b4 3037 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3038 'title': 'lex will - Videos',
3039 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3040 'uploader': 'lex will',
3041 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3042 },
8bdd16b4 3043 'playlist_mincount': 975,
9291475f 3044 }, {
da692b79 3045 'note': 'Videos tab, sorted by popular',
8bdd16b4 3046 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 3047 'info_dict': {
8bdd16b4 3048 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3049 'title': 'lex will - Videos',
3050 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3051 'uploader': 'lex will',
3052 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3053 },
8bdd16b4 3054 'playlist_mincount': 199,
9291475f 3055 }, {
da692b79 3056 'note': 'Playlists tab',
8bdd16b4 3057 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 3058 'info_dict': {
8bdd16b4 3059 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3060 'title': 'lex will - Playlists',
3061 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3062 'uploader': 'lex will',
3063 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3064 },
8bdd16b4 3065 'playlist_mincount': 17,
ac7553d0 3066 }, {
da692b79 3067 'note': 'Community tab',
8bdd16b4 3068 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 3069 'info_dict': {
8bdd16b4 3070 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3071 'title': 'lex will - Community',
3072 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3073 'uploader': 'lex will',
3074 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3075 },
3076 'playlist_mincount': 18,
87dadd45 3077 }, {
da692b79 3078 'note': 'Channels tab',
8bdd16b4 3079 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 3080 'info_dict': {
8bdd16b4 3081 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3082 'title': 'lex will - Channels',
3083 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3084 'uploader': 'lex will',
3085 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3086 },
deaec5af 3087 'playlist_mincount': 12,
cd684175 3088 }, {
3089 'note': 'Search tab',
3090 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3091 'playlist_mincount': 40,
3092 'info_dict': {
3093 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3094 'title': '3Blue1Brown - Search - linear algebra',
3095 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3096 'uploader': '3Blue1Brown',
3097 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3098 },
6b08cdf6 3099 }, {
a0566bbf 3100 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3101 'only_matching': True,
3102 }, {
a0566bbf 3103 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3104 'only_matching': True,
3105 }, {
a0566bbf 3106 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3107 'only_matching': True,
3108 }, {
3109 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3110 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3111 'info_dict': {
3112 'title': '29C3: Not my department',
3113 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3114 'uploader': 'Christiaan008',
3115 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 3116 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 3117 },
3118 'playlist_count': 96,
3119 }, {
3120 'note': 'Large playlist',
3121 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 3122 'info_dict': {
8bdd16b4 3123 'title': 'Uploads from Cauchemar',
3124 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3125 'uploader': 'Cauchemar',
3126 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 3127 },
8bdd16b4 3128 'playlist_mincount': 1123,
3129 }, {
da692b79 3130 'note': 'even larger playlist, 8832 videos',
8bdd16b4 3131 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3132 'only_matching': True,
4b7df0d3
JMF
3133 }, {
3134 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3135 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3136 'info_dict': {
acf757f4
PH
3137 'title': 'Uploads from Interstellar Movie',
3138 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 3139 'uploader': 'Interstellar Movie',
8bdd16b4 3140 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 3141 },
481cc733 3142 'playlist_mincount': 21,
358de58c 3143 }, {
3144 'note': 'Playlist with "show unavailable videos" button',
3145 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3146 'info_dict': {
3147 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3148 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3149 'uploader': 'Phim Siêu Nhân Nhật Bản',
3150 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3151 },
da692b79 3152 'playlist_mincount': 200,
5d342002 3153 }, {
da692b79 3154 'note': 'Playlist with unavailable videos in page 7',
5d342002 3155 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3156 'info_dict': {
3157 'title': 'Uploads from BlankTV',
3158 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3159 'uploader': 'BlankTV',
3160 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3161 },
da692b79 3162 'playlist_mincount': 1000,
8bdd16b4 3163 }, {
da692b79 3164 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
8bdd16b4 3165 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3166 'info_dict': {
3167 'title': 'Data Analysis with Dr Mike Pound',
3168 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3169 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3170 'uploader': 'Computerphile',
deaec5af 3171 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 3172 },
3173 'playlist_mincount': 11,
3174 }, {
a0566bbf 3175 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 3176 'only_matching': True,
dacb3a86 3177 }, {
da692b79 3178 'note': 'Playlist URL that does not actually serve a playlist',
dacb3a86
S
3179 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3180 'info_dict': {
3181 'id': 'FqZTN594JQw',
3182 'ext': 'webm',
3183 'title': "Smiley's People 01 detective, Adventure Series, Action",
3184 'uploader': 'STREEM',
3185 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 3186 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
3187 'upload_date': '20150526',
3188 'license': 'Standard YouTube License',
3189 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3190 'categories': ['People & Blogs'],
3191 'tags': list,
dbdaaa23 3192 'view_count': int,
dacb3a86
S
3193 'like_count': int,
3194 'dislike_count': int,
3195 },
3196 'params': {
3197 'skip_download': True,
3198 },
13a75688 3199 'skip': 'This video is not available.',
dacb3a86 3200 'add_ie': [YoutubeIE.ie_key()],
481cc733 3201 }, {
8bdd16b4 3202 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 3203 'only_matching': True,
66b48727 3204 }, {
8bdd16b4 3205 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 3206 'only_matching': True,
a0566bbf 3207 }, {
3208 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3209 'info_dict': {
da692b79 3210 'id': 'X1whbWASnNQ', # This will keep changing
a0566bbf 3211 'ext': 'mp4',
deaec5af 3212 'title': compat_str,
a0566bbf 3213 'uploader': 'Sky News',
3214 'uploader_id': 'skynews',
3215 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
da692b79 3216 'upload_date': r're:\d{8}',
3217 'description': compat_str,
a0566bbf 3218 'categories': ['News & Politics'],
3219 'tags': list,
3220 'like_count': int,
3221 'dislike_count': int,
3222 },
3223 'params': {
3224 'skip_download': True,
3225 },
da692b79 3226 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
a0566bbf 3227 }, {
3228 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3229 'info_dict': {
3230 'id': 'a48o2S1cPoo',
3231 'ext': 'mp4',
3232 'title': 'The Young Turks - Live Main Show',
3233 'uploader': 'The Young Turks',
3234 'uploader_id': 'TheYoungTurks',
3235 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3236 'upload_date': '20150715',
3237 'license': 'Standard YouTube License',
3238 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3239 'categories': ['News & Politics'],
3240 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3241 'like_count': int,
3242 'dislike_count': int,
3243 },
3244 'params': {
3245 'skip_download': True,
3246 },
3247 'only_matching': True,
3248 }, {
3249 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3250 'only_matching': True,
3251 }, {
3252 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3253 'only_matching': True,
09f1580e 3254 }, {
3255 'note': 'A channel that is not live. Should raise error',
3256 'url': 'https://www.youtube.com/user/numberphile/live',
3257 'only_matching': True,
3d3dddc9 3258 }, {
3259 'url': 'https://www.youtube.com/feed/trending',
3260 'only_matching': True,
3261 }, {
3d3dddc9 3262 'url': 'https://www.youtube.com/feed/library',
3263 'only_matching': True,
3264 }, {
3d3dddc9 3265 'url': 'https://www.youtube.com/feed/history',
3266 'only_matching': True,
3267 }, {
3d3dddc9 3268 'url': 'https://www.youtube.com/feed/subscriptions',
3269 'only_matching': True,
3270 }, {
3d3dddc9 3271 'url': 'https://www.youtube.com/feed/watch_later',
3272 'only_matching': True,
3273 }, {
da692b79 3274 'note': 'Recommended - redirects to home page',
3d3dddc9 3275 'url': 'https://www.youtube.com/feed/recommended',
3276 'only_matching': True,
29f7c58a 3277 }, {
da692b79 3278 'note': 'inline playlist with not always working continuations',
29f7c58a 3279 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3280 'only_matching': True,
3281 }, {
3282 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3283 'only_matching': True,
3284 }, {
3285 'url': 'https://www.youtube.com/course',
3286 'only_matching': True,
3287 }, {
3288 'url': 'https://www.youtube.com/zsecurity',
3289 'only_matching': True,
3290 }, {
3291 'url': 'http://www.youtube.com/NASAgovVideo/videos',
3292 'only_matching': True,
3293 }, {
3294 'url': 'https://www.youtube.com/TheYoungTurks/live',
3295 'only_matching': True,
39ed931e 3296 }, {
3297 'url': 'https://www.youtube.com/hashtag/cctv9',
3298 'info_dict': {
3299 'id': 'cctv9',
3300 'title': '#cctv9',
3301 },
3302 'playlist_mincount': 350,
201c1459 3303 }, {
3304 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3305 'only_matching': True,
9297939e 3306 }, {
da692b79 3307 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
9297939e 3308 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3309 'only_matching': True
fe03a6cd 3310 }, {
3311 'note': '/browse/ should redirect to /channel/',
3312 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3313 'only_matching': True
3314 }, {
3315 'note': 'VLPL, should redirect to playlist?list=PL...',
3316 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3317 'info_dict': {
3318 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3319 'uploader': 'NoCopyrightSounds',
3320 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3321 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3322 'title': 'NCS Releases',
3323 },
3324 'playlist_mincount': 166,
18db7548 3325 }, {
3326 'note': 'Topic, should redirect to playlist?list=UU...',
3327 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3328 'info_dict': {
3329 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3330 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3331 'title': 'Uploads from Royalty Free Music - Topic',
3332 'uploader': 'Royalty Free Music - Topic',
3333 },
3334 'expected_warnings': [
3335 'A channel/user page was given',
3336 'The URL does not have a videos tab',
3337 ],
3338 'playlist_mincount': 101,
3339 }, {
3340 'note': 'Topic without a UU playlist',
3341 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3342 'info_dict': {
3343 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3344 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3345 },
3346 'expected_warnings': [
3347 'A channel/user page was given',
3348 'The URL does not have a videos tab',
3349 'Falling back to channel URL',
3350 ],
3351 'playlist_mincount': 9,
abcdd12b 3352 }, {
3353 'note': 'Youtube music Album',
3354 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3355 'info_dict': {
3356 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3357 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3358 },
3359 'playlist_count': 50,
29f7c58a 3360 }]
3361
3362 @classmethod
3363 def suitable(cls, url):
3364 return False if YoutubeIE.suitable(url) else super(
3365 YoutubeTabIE, cls).suitable(url)
8bdd16b4 3366
3367 def _extract_channel_id(self, webpage):
3368 channel_id = self._html_search_meta(
3369 'channelId', webpage, 'channel id', default=None)
3370 if channel_id:
3371 return channel_id
3372 channel_url = self._html_search_meta(
3373 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3374 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3375 'twitter:app:url:googleplay'), webpage, 'channel url')
3376 return self._search_regex(
3377 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3378 channel_url, 'channel id')
15f6397c 3379
8bdd16b4 3380 @staticmethod
cd7c66cf 3381 def _extract_basic_item_renderer(item):
3382 # Modified from _extract_grid_item_renderer
201c1459 3383 known_basic_renderers = (
3384 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 3385 )
3386 for key, renderer in item.items():
201c1459 3387 if not isinstance(renderer, dict):
cd7c66cf 3388 continue
201c1459 3389 elif key in known_basic_renderers:
3390 return renderer
3391 elif key.startswith('grid') and key.endswith('Renderer'):
3392 return renderer
8bdd16b4 3393
8bdd16b4 3394 def _grid_entries(self, grid_renderer):
3395 for item in grid_renderer['items']:
3396 if not isinstance(item, dict):
39b62db1 3397 continue
cd7c66cf 3398 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 3399 if not isinstance(renderer, dict):
3400 continue
3401 title = try_get(
201c1459 3402 renderer, (lambda x: x['title']['runs'][0]['text'],
3403 lambda x: x['title']['simpleText']), compat_str)
8bdd16b4 3404 # playlist
3405 playlist_id = renderer.get('playlistId')
3406 if playlist_id:
3407 yield self.url_result(
3408 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3409 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3410 video_title=title)
201c1459 3411 continue
8bdd16b4 3412 # video
3413 video_id = renderer.get('videoId')
3414 if video_id:
3415 yield self._extract_video(renderer)
201c1459 3416 continue
8bdd16b4 3417 # channel
3418 channel_id = renderer.get('channelId')
3419 if channel_id:
3420 title = try_get(
3421 renderer, lambda x: x['title']['simpleText'], compat_str)
3422 yield self.url_result(
3423 'https://www.youtube.com/channel/%s' % channel_id,
3424 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 3425 continue
3426 # generic endpoint URL support
3427 ep_url = urljoin('https://www.youtube.com/', try_get(
3428 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3429 compat_str))
3430 if ep_url:
3431 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3432 if ie.suitable(ep_url):
3433 yield self.url_result(
3434 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3435 break
8bdd16b4 3436
3d3dddc9 3437 def _shelf_entries_from_content(self, shelf_renderer):
3438 content = shelf_renderer.get('content')
3439 if not isinstance(content, dict):
8bdd16b4 3440 return
cd7c66cf 3441 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 3442 if renderer:
3443 # TODO: add support for nested playlists so each shelf is processed
3444 # as separate playlist
3445 # TODO: this includes only first N items
3446 for entry in self._grid_entries(renderer):
3447 yield entry
3448 renderer = content.get('horizontalListRenderer')
3449 if renderer:
3450 # TODO
3451 pass
8bdd16b4 3452
29f7c58a 3453 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3454 ep = try_get(
3455 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3456 compat_str)
3457 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3458 if shelf_url:
29f7c58a 3459 # Skipping links to another channels, note that checking for
3460 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3461 # will not work
3462 if skip_channels and '/channels?' in shelf_url:
3463 return
3d3dddc9 3464 title = try_get(
3465 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
3466 yield self.url_result(shelf_url, video_title=title)
3467 # Shelf may not contain shelf URL, fallback to extraction from content
3468 for entry in self._shelf_entries_from_content(shelf_renderer):
3469 yield entry
c5e8d7af 3470
8bdd16b4 3471 def _playlist_entries(self, video_list_renderer):
3472 for content in video_list_renderer['contents']:
3473 if not isinstance(content, dict):
3474 continue
3475 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3476 if not isinstance(renderer, dict):
3477 continue
3478 video_id = renderer.get('videoId')
3479 if not video_id:
3480 continue
3481 yield self._extract_video(renderer)
07aeced6 3482
3462ffa8 3483 def _rich_entries(self, rich_grid_renderer):
3484 renderer = try_get(
70d5c17b 3485 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3486 video_id = renderer.get('videoId')
3487 if not video_id:
3488 return
3489 yield self._extract_video(renderer)
3490
8bdd16b4 3491 def _video_entry(self, video_renderer):
3492 video_id = video_renderer.get('videoId')
3493 if video_id:
3494 return self._extract_video(video_renderer)
dacb3a86 3495
8bdd16b4 3496 def _post_thread_entries(self, post_thread_renderer):
3497 post_renderer = try_get(
3498 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3499 if not post_renderer:
3500 return
3501 # video attachment
3502 video_renderer = try_get(
895b0931 3503 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3504 video_id = video_renderer.get('videoId')
3505 if video_id:
3506 entry = self._extract_video(video_renderer)
8bdd16b4 3507 if entry:
3508 yield entry
895b0931 3509 # playlist attachment
3510 playlist_id = try_get(
3511 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3512 if playlist_id:
3513 yield self.url_result(
e28f1c0a 3514 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3515 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3516 # inline video links
3517 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3518 for run in runs:
3519 if not isinstance(run, dict):
3520 continue
3521 ep_url = try_get(
3522 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3523 if not ep_url:
3524 continue
3525 if not YoutubeIE.suitable(ep_url):
3526 continue
3527 ep_video_id = YoutubeIE._match_id(ep_url)
3528 if video_id == ep_video_id:
3529 continue
895b0931 3530 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 3531
8bdd16b4 3532 def _post_thread_continuation_entries(self, post_thread_continuation):
3533 contents = post_thread_continuation.get('contents')
3534 if not isinstance(contents, list):
3535 return
3536 for content in contents:
3537 renderer = content.get('backstagePostThreadRenderer')
3538 if not isinstance(renderer, dict):
3539 continue
3540 for entry in self._post_thread_entries(renderer):
3541 yield entry
07aeced6 3542
39ed931e 3543 r''' # unused
3544 def _rich_grid_entries(self, contents):
3545 for content in contents:
3546 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3547 if video_renderer:
3548 entry = self._video_entry(video_renderer)
3549 if entry:
3550 yield entry
3551 '''
3552
29f7c58a 3553 @staticmethod
3554 def _build_continuation_query(continuation, ctp=None):
3555 query = {
3556 'ctoken': continuation,
3557 'continuation': continuation,
3558 }
3559 if ctp:
3560 query['itct'] = ctp
3561 return query
3562
8bdd16b4 3563 @staticmethod
3564 def _extract_next_continuation_data(renderer):
3565 next_continuation = try_get(
3566 renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
3567 if not next_continuation:
3568 return
3569 continuation = next_continuation.get('continuation')
3570 if not continuation:
3571 return
3572 ctp = next_continuation.get('clickTrackingParams')
29f7c58a 3573 return YoutubeTabIE._build_continuation_query(continuation, ctp)
c5e8d7af 3574
8bdd16b4 3575 @classmethod
3576 def _extract_continuation(cls, renderer):
3577 next_continuation = cls._extract_next_continuation_data(renderer)
3578 if next_continuation:
3579 return next_continuation
cc2db878 3580 contents = []
3581 for key in ('contents', 'items'):
3582 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
8bdd16b4 3583 for content in contents:
3584 if not isinstance(content, dict):
3585 continue
3586 continuation_ep = try_get(
3587 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
3588 dict)
3589 if not continuation_ep:
3590 continue
3591 continuation = try_get(
3592 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
3593 if not continuation:
3594 continue
3595 ctp = continuation_ep.get('clickTrackingParams')
29f7c58a 3596 return YoutubeTabIE._build_continuation_query(continuation, ctp)
448830ce 3597
f4f751af 3598 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3462ffa8 3599
70d5c17b 3600 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3601 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3602 for content in contents:
3603 if not isinstance(content, dict):
8bdd16b4 3604 continue
70d5c17b 3605 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3606 if not is_renderer:
70d5c17b 3607 renderer = content.get('richItemRenderer')
3462ffa8 3608 if renderer:
3609 for entry in self._rich_entries(renderer):
3610 yield entry
3611 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3612 continue
3462ffa8 3613 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3614 for isr_content in isr_contents:
3615 if not isinstance(isr_content, dict):
3616 continue
69184e41 3617
3618 known_renderers = {
3619 'playlistVideoListRenderer': self._playlist_entries,
3620 'gridRenderer': self._grid_entries,
3621 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3622 'backstagePostThreadRenderer': self._post_thread_entries,
3623 'videoRenderer': lambda x: [self._video_entry(x)],
3624 }
3625 for key, renderer in isr_content.items():
3626 if key not in known_renderers:
3627 continue
3628 for entry in known_renderers[key](renderer):
3629 if entry:
3630 yield entry
3462ffa8 3631 continuation_list[0] = self._extract_continuation(renderer)
69184e41 3632 break
70d5c17b 3633
3462ffa8 3634 if not continuation_list[0]:
3635 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3636
3637 if not continuation_list[0]:
3638 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3639
3640 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3641 tab_content = try_get(tab, lambda x: x['content'], dict)
3642 if not tab_content:
3643 return
3462ffa8 3644 parent_renderer = (
29f7c58a 3645 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3646 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3647 for entry in extract_entries(parent_renderer):
3648 yield entry
3462ffa8 3649 continuation = continuation_list[0]
f4f751af 3650 context = self._extract_context(ytcfg)
3651 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
d069eca7 3652
8bdd16b4 3653 for page_num in itertools.count(1):
3654 if not continuation:
3655 break
79360d99 3656 query = {
3657 'continuation': continuation['continuation'],
3658 'clickTracking': {'clickTrackingParams': continuation['itct']}
3659 }
f4f751af 3660 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
79360d99 3661 response = self._extract_response(
3662 item_id='%s page %s' % (item_id, page_num),
3663 query=query, headers=headers, ytcfg=ytcfg,
3664 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3665
3666 if not response:
8bdd16b4 3667 break
f4f751af 3668 visitor_data = try_get(
3669 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
ebf1b291 3670
69184e41 3671 known_continuation_renderers = {
3672 'playlistVideoListContinuation': self._playlist_entries,
3673 'gridContinuation': self._grid_entries,
3674 'itemSectionContinuation': self._post_thread_continuation_entries,
3675 'sectionListContinuation': extract_entries, # for feeds
3676 }
8bdd16b4 3677 continuation_contents = try_get(
69184e41 3678 response, lambda x: x['continuationContents'], dict) or {}
3679 continuation_renderer = None
3680 for key, value in continuation_contents.items():
3681 if key not in known_continuation_renderers:
3462ffa8 3682 continue
69184e41 3683 continuation_renderer = value
3684 continuation_list = [None]
3685 for entry in known_continuation_renderers[key](continuation_renderer):
3686 yield entry
3687 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3688 break
3689 if continuation_renderer:
3690 continue
c5e8d7af 3691
a1b535bd 3692 known_renderers = {
3693 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3694 'gridVideoRenderer': (self._grid_entries, 'items'),
3695 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3696 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3697 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3698 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3699 }
cce889b9 3700 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3701 continuation_items = try_get(
cce889b9 3702 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3703 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3704 video_items_renderer = None
3705 for key, value in continuation_item.items():
3706 if key not in known_renderers:
8bdd16b4 3707 continue
a1b535bd 3708 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3709 continuation_list = [None]
a1b535bd 3710 for entry in known_renderers[key][0](video_items_renderer):
3711 yield entry
9ba5705a 3712 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3713 break
3714 if video_items_renderer:
3715 continue
8bdd16b4 3716 break
9558dcec 3717
8bdd16b4 3718 @staticmethod
3719 def _extract_selected_tab(tabs):
3720 for tab in tabs:
cd684175 3721 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3722 if renderer.get('selected') is True:
3723 return renderer
2b3c2546 3724 else:
8bdd16b4 3725 raise ExtractorError('Unable to find selected tab')
b82f815f 3726
8bdd16b4 3727 @staticmethod
3728 def _extract_uploader(data):
3729 uploader = {}
3730 sidebar_renderer = try_get(
3731 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3732 if sidebar_renderer:
3733 for item in sidebar_renderer:
3734 if not isinstance(item, dict):
3735 continue
3736 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3737 if not isinstance(renderer, dict):
3738 continue
3739 owner = try_get(
3740 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3741 if owner:
3742 uploader['uploader'] = owner.get('text')
3743 uploader['uploader_id'] = try_get(
3744 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3745 uploader['uploader_url'] = urljoin(
3746 'https://www.youtube.com/',
3747 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3748 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3749
d069eca7 3750 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3751 playlist_id = title = description = channel_url = channel_name = channel_id = None
3752 thumbnails_list = tags = []
3753
8bdd16b4 3754 selected_tab = self._extract_selected_tab(tabs)
3755 renderer = try_get(
3756 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3757 if renderer:
b60419c5 3758 channel_name = renderer.get('title')
3759 channel_url = renderer.get('channelUrl')
3760 channel_id = renderer.get('externalId')
39ed931e 3761 else:
64c0d954 3762 renderer = try_get(
3763 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3764
8bdd16b4 3765 if renderer:
3766 title = renderer.get('title')
ecc97af3 3767 description = renderer.get('description', '')
b60419c5 3768 playlist_id = channel_id
3769 tags = renderer.get('keywords', '').split()
3770 thumbnails_list = (
3771 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3772 or try_get(
3773 data,
3774 lambda x: x['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3775 list)
b60419c5 3776 or [])
3777
3778 thumbnails = []
3779 for t in thumbnails_list:
3780 if not isinstance(t, dict):
3781 continue
3782 thumbnail_url = url_or_none(t.get('url'))
3783 if not thumbnail_url:
3784 continue
3785 thumbnails.append({
3786 'url': thumbnail_url,
3787 'width': int_or_none(t.get('width')),
3788 'height': int_or_none(t.get('height')),
3789 })
3462ffa8 3790 if playlist_id is None:
70d5c17b 3791 playlist_id = item_id
3792 if title is None:
39ed931e 3793 title = (
3794 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3795 or playlist_id)
b60419c5 3796 title += format_field(selected_tab, 'title', ' - %s')
cd684175 3797 title += format_field(selected_tab, 'expandedText', ' - %s')
b60419c5 3798
3799 metadata = {
3800 'playlist_id': playlist_id,
3801 'playlist_title': title,
3802 'playlist_description': description,
3803 'uploader': channel_name,
3804 'uploader_id': channel_id,
3805 'uploader_url': channel_url,
3806 'thumbnails': thumbnails,
3807 'tags': tags,
3808 }
3809 if not channel_id:
3810 metadata.update(self._extract_uploader(data))
3811 metadata.update({
3812 'channel': metadata['uploader'],
3813 'channel_id': metadata['uploader_id'],
3814 'channel_url': metadata['uploader_url']})
3815 return self.playlist_result(
d069eca7
M
3816 self._entries(
3817 selected_tab, playlist_id,
3818 self._extract_identity_token(webpage, item_id),
f4f751af 3819 self._extract_account_syncid(data),
3820 self._extract_ytcfg(item_id, webpage)),
b60419c5 3821 **metadata)
73c4ac2c 3822
79360d99 3823 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
2be71994 3824 first_id = last_id = None
79360d99 3825 ytcfg = self._extract_ytcfg(playlist_id, webpage)
3826 headers = self._generate_api_headers(
3827 ytcfg, account_syncid=self._extract_account_syncid(data),
3828 identity_token=self._extract_identity_token(webpage, item_id=playlist_id),
3829 visitor_data=try_get(self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
2be71994 3830 for page_num in itertools.count(1):
cd7c66cf 3831 videos = list(self._playlist_entries(playlist))
3832 if not videos:
3833 return
2be71994 3834 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3835 if start >= len(videos):
3836 return
3837 for video in videos[start:]:
3838 if video['id'] == first_id:
3839 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3840 return
3841 yield video
3842 first_id = first_id or videos[0]['id']
3843 last_id = videos[-1]['id']
79360d99 3844 watch_endpoint = try_get(
3845 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3846 query = {
3847 'playlistId': playlist_id,
3848 'videoId': watch_endpoint.get('videoId') or last_id,
3849 'index': watch_endpoint.get('index') or len(videos),
3850 'params': watch_endpoint.get('params') or 'OAE%3D'
3851 }
3852 response = self._extract_response(
3853 item_id='%s page %d' % (playlist_id, page_num),
3854 query=query,
3855 ep='next',
3856 headers=headers,
3857 check_get_keys='contents'
3858 )
cd7c66cf 3859 playlist = try_get(
79360d99 3860 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 3861
79360d99 3862 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
8bdd16b4 3863 title = playlist.get('title') or try_get(
3864 data, lambda x: x['titleText']['simpleText'], compat_str)
3865 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3866
3867 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3868 playlist_url = urljoin(url, try_get(
3869 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3870 compat_str))
3871 if playlist_url and playlist_url != url:
3872 return self.url_result(
3873 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3874 video_title=title)
cd7c66cf 3875
8bdd16b4 3876 return self.playlist_result(
79360d99 3877 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
cd7c66cf 3878 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 3879
358de58c 3880 def _reload_with_unavailable_videos(self, item_id, data, webpage):
3881 """
3882 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3883 """
3884 sidebar_renderer = try_get(
5d342002 3885 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3886 if not sidebar_renderer:
3887 return
3888 browse_id = params = None
358de58c 3889 for item in sidebar_renderer:
3890 if not isinstance(item, dict):
3891 continue
3892 renderer = item.get('playlistSidebarPrimaryInfoRenderer')
3893 menu_renderer = try_get(
3894 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3895 for menu_item in menu_renderer:
3896 if not isinstance(menu_item, dict):
3897 continue
3898 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3899 text = try_get(
3900 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3901 if not text or text.lower() != 'show unavailable videos':
3902 continue
3903 browse_endpoint = try_get(
3904 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3905 browse_id = browse_endpoint.get('browseId')
3906 params = browse_endpoint.get('params')
5d342002 3907 break
3908
3909 ytcfg = self._extract_ytcfg(item_id, webpage)
3910 headers = self._generate_api_headers(
3911 ytcfg, account_syncid=self._extract_account_syncid(ytcfg),
3912 identity_token=self._extract_identity_token(webpage, item_id=item_id),
3913 visitor_data=try_get(
3914 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
3915 query = {
3916 'params': params or 'wgYCCAA=',
3917 'browseId': browse_id or 'VL%s' % item_id
3918 }
3919 return self._extract_response(
3920 item_id=item_id, headers=headers, query=query,
3921 check_get_keys='contents', fatal=False,
3922 note='Downloading API JSON with unavailable videos')
358de58c 3923
cd7c66cf 3924 def _extract_webpage(self, url, item_id):
a06916d9 3925 retries = self.get_param('extractor_retries', 3)
62bff2c1 3926 count = -1
c705177d 3927 last_error = 'Incomplete yt initial data recieved'
14fdfea9 3928 while count < retries:
62bff2c1 3929 count += 1
14fdfea9 3930 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 3931 # See: https://github.com/yt-dlp/yt-dlp/issues/116
3932 if count:
c705177d 3933 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 3934 webpage = self._download_webpage(
3935 url, item_id,
cd7c66cf 3936 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
14fdfea9 3937 data = self._extract_yt_initial_data(item_id, webpage)
14fdfea9 3938 if data.get('contents') or data.get('currentVideoEndpoint'):
3939 break
95c01b6c 3940 # Extract alerts here only when there is error
3941 self._extract_and_report_alerts(data)
c705177d 3942 if count >= retries:
6a39ee13 3943 raise ExtractorError(last_error)
cd7c66cf 3944 return webpage, data
3945
9297939e 3946 @staticmethod
3947 def _smuggle_data(entries, data):
3948 for entry in entries:
3949 if data:
3950 entry['url'] = smuggle_url(entry['url'], data)
3951 yield entry
3952
cd7c66cf 3953 def _real_extract(self, url):
9297939e 3954 url, smuggled_data = unsmuggle_url(url, {})
3955 if self.is_music_url(url):
3956 smuggled_data['is_music_url'] = True
fe03a6cd 3957 info_dict = self.__real_extract(url, smuggled_data)
9297939e 3958 if info_dict.get('entries'):
3959 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
3960 return info_dict
3961
fe03a6cd 3962 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
3963
3964 def __real_extract(self, url, smuggled_data):
cd7c66cf 3965 item_id = self._match_id(url)
3966 url = compat_urlparse.urlunparse(
3967 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 3968 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 3969
fe03a6cd 3970 def get_mobj(url):
3971 mobj = self._url_re.match(url).groupdict()
07cce701 3972 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 3973 return mobj
3974
3975 mobj = get_mobj(url)
3976 # Youtube returns incomplete data if tabname is not lower case
3977 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
3978
3979 if is_channel:
3980 if smuggled_data.get('is_music_url'):
3981 if item_id[:2] == 'VL':
3982 # Youtube music VL channels have an equivalent playlist
3983 item_id = item_id[2:]
3984 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
abcdd12b 3985 elif item_id[:2] == 'MP':
3986 # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
3987 item_id = self._search_regex(
3988 r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
3989 self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
3990 'playlist id')
3991 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
fe03a6cd 3992 elif mobj['channel_type'] == 'browse':
3993 # Youtube music /browse/ should be changed to /channel/
3994 pre = 'https://www.youtube.com/channel/%s' % item_id
3995 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
3996 # Home URLs should redirect to /videos/
6a39ee13 3997 self.report_warning(
cd7c66cf 3998 'A channel/user page was given. All the channel\'s videos will be downloaded. '
3999 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 4000 tab = '/videos'
4001
4002 url = ''.join((pre, tab, post))
4003 mobj = get_mobj(url)
cd7c66cf 4004
4005 # Handle both video/playlist URLs
201c1459 4006 qs = parse_qs(url)
cd7c66cf 4007 video_id = qs.get('v', [None])[0]
4008 playlist_id = qs.get('list', [None])[0]
4009
fe03a6cd 4010 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 4011 if not playlist_id:
fe03a6cd 4012 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 4013 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 4014 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6a39ee13 4015 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
cd7c66cf 4016 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
18db7548 4017 mobj = get_mobj(url)
cd7c66cf 4018
4019 if video_id and playlist_id:
a06916d9 4020 if self.get_param('noplaylist'):
cd7c66cf 4021 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4022 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4023 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4024
4025 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 4026
18db7548 4027 tabs = try_get(
4028 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4029 if tabs:
4030 selected_tab = self._extract_selected_tab(tabs)
4031 tab_name = selected_tab.get('title', '')
09f1580e 4032 if 'no-youtube-channel-redirect' not in compat_opts:
4033 if mobj['tab'] == '/live':
4034 # Live tab should have redirected to the video
4035 raise ExtractorError('The channel is not currently live', expected=True)
4036 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4037 if not mobj['not_channel'] and item_id[:2] == 'UC':
4038 # Topic channels don't have /videos. Use the equivalent playlist instead
4039 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4040 pl_id = 'UU%s' % item_id[2:]
4041 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4042 try:
4043 pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4044 for alert_type, alert_message in self._extract_alerts(pl_data):
4045 if alert_type == 'error':
4046 raise ExtractorError('Youtube said: %s' % alert_message)
4047 item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4048 except ExtractorError:
4049 self.report_warning('The playlist gave error. Falling back to channel URL')
4050 else:
4051 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
18db7548 4052
4053 self.write_debug('Final URL: %s' % url)
4054
358de58c 4055 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 4056 if 'no-youtube-unavailable-videos' not in compat_opts:
4057 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
95c01b6c 4058 self._extract_and_report_alerts(data)
358de58c 4059
8bdd16b4 4060 tabs = try_get(
4061 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4062 if tabs:
d069eca7 4063 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 4064
8bdd16b4 4065 playlist = try_get(
4066 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4067 if playlist:
79360d99 4068 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
cd7c66cf 4069
a0566bbf 4070 video_id = try_get(
4071 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4072 compat_str) or video_id
8bdd16b4 4073 if video_id:
09f1580e 4074 if mobj['tab'] != '/live': # live tab is expected to redirect to video
4075 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 4076 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 4077
8bdd16b4 4078 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 4079
c5e8d7af 4080
8bdd16b4 4081class YoutubePlaylistIE(InfoExtractor):
4082 IE_DESC = 'YouTube.com playlists'
4083 _VALID_URL = r'''(?x)(?:
4084 (?:https?://)?
4085 (?:\w+\.)?
4086 (?:
4087 (?:
4088 youtube(?:kids)?\.com|
29f7c58a 4089 invidio\.us
8bdd16b4 4090 )
4091 /.*?\?.*?\blist=
4092 )?
4093 (?P<id>%(playlist_id)s)
4094 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4095 IE_NAME = 'youtube:playlist'
cdc628a4 4096 _TESTS = [{
8bdd16b4 4097 'note': 'issue #673',
4098 'url': 'PLBB231211A4F62143',
cdc628a4 4099 'info_dict': {
8bdd16b4 4100 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4101 'id': 'PLBB231211A4F62143',
4102 'uploader': 'Wickydoo',
4103 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4104 },
4105 'playlist_mincount': 29,
4106 }, {
4107 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4108 'info_dict': {
4109 'title': 'YDL_safe_search',
4110 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4111 },
4112 'playlist_count': 2,
4113 'skip': 'This playlist is private',
9558dcec 4114 }, {
8bdd16b4 4115 'note': 'embedded',
4116 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4117 'playlist_count': 4,
9558dcec 4118 'info_dict': {
8bdd16b4 4119 'title': 'JODA15',
4120 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4121 'uploader': 'milan',
4122 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 4123 }
cdc628a4 4124 }, {
8bdd16b4 4125 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4126 'playlist_mincount': 982,
4127 'info_dict': {
4128 'title': '2018 Chinese New Singles (11/6 updated)',
4129 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4130 'uploader': 'LBK',
4131 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4132 }
daa0df9e 4133 }, {
29f7c58a 4134 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4135 'only_matching': True,
4136 }, {
4137 # music album playlist
4138 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4139 'only_matching': True,
4140 }]
4141
4142 @classmethod
4143 def suitable(cls, url):
201c1459 4144 if YoutubeTabIE.suitable(url):
4145 return False
1bdae7d3 4146 # Hack for lazy extractors until more generic solution is implemented
4147 # (see #28780)
4148 from .youtube import parse_qs
201c1459 4149 qs = parse_qs(url)
4150 if qs.get('v', [None])[0]:
4151 return False
4152 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 4153
4154 def _real_extract(self, url):
4155 playlist_id = self._match_id(url)
46953e7e 4156 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 4157 url = update_url_query(
4158 'https://www.youtube.com/playlist',
4159 parse_qs(url) or {'list': playlist_id})
4160 if is_music_url:
4161 url = smuggle_url(url, {'is_music_url': True})
4162 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 4163
4164
4165class YoutubeYtBeIE(InfoExtractor):
c76eb41b 4166 IE_DESC = 'youtu.be'
29f7c58a 4167 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4168 _TESTS = [{
8bdd16b4 4169 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4170 'info_dict': {
4171 'id': 'yeWKywCrFtk',
4172 'ext': 'mp4',
4173 'title': 'Small Scale Baler and Braiding Rugs',
4174 'uploader': 'Backus-Page House Museum',
4175 'uploader_id': 'backuspagemuseum',
4176 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4177 'upload_date': '20161008',
4178 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4179 'categories': ['Nonprofits & Activism'],
4180 'tags': list,
4181 'like_count': int,
4182 'dislike_count': int,
4183 },
4184 'params': {
4185 'noplaylist': True,
4186 'skip_download': True,
4187 },
39e7107d 4188 }, {
8bdd16b4 4189 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 4190 'only_matching': True,
cdc628a4
PH
4191 }]
4192
8bdd16b4 4193 def _real_extract(self, url):
29f7c58a 4194 mobj = re.match(self._VALID_URL, url)
4195 video_id = mobj.group('id')
4196 playlist_id = mobj.group('playlist_id')
8bdd16b4 4197 return self.url_result(
29f7c58a 4198 update_url_query('https://www.youtube.com/watch', {
4199 'v': video_id,
4200 'list': playlist_id,
4201 'feature': 'youtu.be',
4202 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 4203
4204
4205class YoutubeYtUserIE(InfoExtractor):
c76eb41b 4206 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 4207 _VALID_URL = r'ytuser:(?P<id>.+)'
4208 _TESTS = [{
4209 'url': 'ytuser:phihag',
4210 'only_matching': True,
4211 }]
4212
4213 def _real_extract(self, url):
4214 user_id = self._match_id(url)
4215 return self.url_result(
4216 'https://www.youtube.com/user/%s' % user_id,
4217 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 4218
b05654f0 4219
3d3dddc9 4220class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 4221 IE_NAME = 'youtube:favorites'
4222 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4223 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4224 _LOGIN_REQUIRED = True
4225 _TESTS = [{
4226 'url': ':ytfav',
4227 'only_matching': True,
4228 }, {
4229 'url': ':ytfavorites',
4230 'only_matching': True,
4231 }]
4232
4233 def _real_extract(self, url):
4234 return self.url_result(
4235 'https://www.youtube.com/playlist?list=LL',
4236 ie=YoutubeTabIE.ie_key())
4237
4238
79360d99 4239class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
69184e41 4240 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
4241 # there doesn't appear to be a real limit, for example if you search for
4242 # 'python' you get more than 8.000.000 results
4243 _MAX_RESULTS = float('inf')
78caa52a 4244 IE_NAME = 'youtube:search'
b05654f0 4245 _SEARCH_KEY = 'ytsearch'
6c894ea1 4246 _SEARCH_PARAMS = None
9dd8e46a 4247 _TESTS = []
b05654f0 4248
6c894ea1 4249 def _entries(self, query, n):
a5c56234 4250 data = {'query': query}
6c894ea1
U
4251 if self._SEARCH_PARAMS:
4252 data['params'] = self._SEARCH_PARAMS
4253 total = 0
4254 for page_num in itertools.count(1):
79360d99 4255 search = self._extract_response(
4256 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4257 check_get_keys=('contents', 'onResponseReceivedCommands')
4258 )
6c894ea1 4259 if not search:
b4c08069 4260 break
6c894ea1
U
4261 slr_contents = try_get(
4262 search,
4263 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4264 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4265 list)
4266 if not slr_contents:
a22b2fd1 4267 break
0366ae87 4268
0366ae87
M
4269 # Youtube sometimes adds promoted content to searches,
4270 # changing the index location of videos and token.
4271 # So we search through all entries till we find them.
30a074c2 4272 continuation_token = None
4273 for slr_content in slr_contents:
a96c6d15 4274 if continuation_token is None:
4275 continuation_token = try_get(
4276 slr_content,
4277 lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
4278 compat_str)
4279
30a074c2 4280 isr_contents = try_get(
4281 slr_content,
4282 lambda x: x['itemSectionRenderer']['contents'],
4283 list)
9da76d30 4284 if not isr_contents:
30a074c2 4285 continue
4286 for content in isr_contents:
4287 if not isinstance(content, dict):
4288 continue
4289 video = content.get('videoRenderer')
4290 if not isinstance(video, dict):
4291 continue
4292 video_id = video.get('videoId')
4293 if not video_id:
4294 continue
4295
4296 yield self._extract_video(video)
4297 total += 1
4298 if total == n:
4299 return
0366ae87 4300
0366ae87 4301 if not continuation_token:
6c894ea1 4302 break
0366ae87 4303 data['continuation'] = continuation_token
b05654f0 4304
6c894ea1
U
4305 def _get_n_results(self, query, n):
4306 """Get a specified number of results for a query"""
4307 return self.playlist_result(self._entries(query, n), query)
75dff0ee 4308
c9ae7b95 4309
a3dd9248 4310class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 4311 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 4312 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 4313 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 4314 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 4315
c9ae7b95 4316
386e1dd9 4317class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 4318 IE_DESC = 'YouTube.com search URLs'
386e1dd9 4319 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4320 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 4321 # _MAX_RESULTS = 100
3462ffa8 4322 _TESTS = [{
4323 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4324 'playlist_mincount': 5,
4325 'info_dict': {
4326 'title': 'youtube-dl test video',
4327 }
4328 }, {
4329 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4330 'only_matching': True,
4331 }]
4332
386e1dd9 4333 @classmethod
4334 def _make_valid_url(cls):
4335 return cls._VALID_URL
4336
3462ffa8 4337 def _real_extract(self, url):
386e1dd9 4338 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4339 query = (qs.get('search_query') or qs.get('q'))[0]
4340 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4341 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 4342
4343
4344class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 4345 """
25f14e9f 4346 Base class for feed extractors
3d3dddc9 4347 Subclasses must define the _FEED_NAME property.
d7ae0639 4348 """
b2e8bc1b 4349 _LOGIN_REQUIRED = True
ef2f3c7f 4350 _TESTS = []
d7ae0639
JMF
4351
4352 @property
4353 def IE_NAME(self):
78caa52a 4354 return 'youtube:%s' % self._FEED_NAME
04cc9617 4355
3853309f 4356 def _real_extract(self, url):
3d3dddc9 4357 return self.url_result(
4358 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4359 ie=YoutubeTabIE.ie_key())
25f14e9f
S
4360
4361
ef2f3c7f 4362class YoutubeWatchLaterIE(InfoExtractor):
4363 IE_NAME = 'youtube:watchlater'
70d5c17b 4364 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 4365 _VALID_URL = r':ytwatchlater'
bc7a9cd8 4366 _TESTS = [{
8bdd16b4 4367 'url': ':ytwatchlater',
bc7a9cd8
S
4368 'only_matching': True,
4369 }]
25f14e9f
S
4370
4371 def _real_extract(self, url):
ef2f3c7f 4372 return self.url_result(
4373 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 4374
4375
25f14e9f
S
4376class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4377 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 4378 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 4379 _FEED_NAME = 'recommended'
45db527f 4380 _LOGIN_REQUIRED = False
3d3dddc9 4381 _TESTS = [{
4382 'url': ':ytrec',
4383 'only_matching': True,
4384 }, {
4385 'url': ':ytrecommended',
4386 'only_matching': True,
4387 }, {
4388 'url': 'https://youtube.com',
4389 'only_matching': True,
4390 }]
1ed5b5c9 4391
1ed5b5c9 4392
25f14e9f 4393class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 4394 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 4395 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 4396 _FEED_NAME = 'subscriptions'
3d3dddc9 4397 _TESTS = [{
4398 'url': ':ytsubs',
4399 'only_matching': True,
4400 }, {
4401 'url': ':ytsubscriptions',
4402 'only_matching': True,
4403 }]
1ed5b5c9 4404
1ed5b5c9 4405
25f14e9f 4406class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
4407 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4408 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 4409 _FEED_NAME = 'history'
3d3dddc9 4410 _TESTS = [{
4411 'url': ':ythistory',
4412 'only_matching': True,
4413 }]
1ed5b5c9
JMF
4414
4415
15870e90
PH
4416class YoutubeTruncatedURLIE(InfoExtractor):
4417 IE_NAME = 'youtube:truncated_url'
4418 IE_DESC = False # Do not list
975d35db 4419 _VALID_URL = r'''(?x)
b95aab84
PH
4420 (?:https?://)?
4421 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4422 (?:watch\?(?:
c4808c60 4423 feature=[a-z_]+|
b95aab84
PH
4424 annotation_id=annotation_[^&]+|
4425 x-yt-cl=[0-9]+|
c1708b89 4426 hl=[^&]*|
287be8c6 4427 t=[0-9]+
b95aab84
PH
4428 )?
4429 |
4430 attribution_link\?a=[^&]+
4431 )
4432 $
975d35db 4433 '''
15870e90 4434
c4808c60 4435 _TESTS = [{
2d3d2997 4436 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 4437 'only_matching': True,
dc2fc736 4438 }, {
2d3d2997 4439 'url': 'https://www.youtube.com/watch?',
dc2fc736 4440 'only_matching': True,
b95aab84
PH
4441 }, {
4442 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4443 'only_matching': True,
4444 }, {
4445 'url': 'https://www.youtube.com/watch?feature=foo',
4446 'only_matching': True,
c1708b89
PH
4447 }, {
4448 'url': 'https://www.youtube.com/watch?hl=en-GB',
4449 'only_matching': True,
287be8c6
PH
4450 }, {
4451 'url': 'https://www.youtube.com/watch?t=2372',
4452 'only_matching': True,
c4808c60
PH
4453 }]
4454
15870e90
PH
4455 def _real_extract(self, url):
4456 raise ExtractorError(
78caa52a
PH
4457 'Did you forget to quote the URL? Remember that & is a meta '
4458 'character in most shells, so you want to put the URL in quotes, '
3867038a 4459 'like youtube-dl '
2d3d2997 4460 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 4461 ' or simply youtube-dl BaW_jenozKc .',
15870e90 4462 expected=True)
772fd5cc
PH
4463
4464
4465class YoutubeTruncatedIDIE(InfoExtractor):
4466 IE_NAME = 'youtube:truncated_id'
4467 IE_DESC = False # Do not list
b95aab84 4468 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
4469
4470 _TESTS = [{
4471 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4472 'only_matching': True,
4473 }]
4474
4475 def _real_extract(self, url):
4476 video_id = self._match_id(url)
4477 raise ExtractorError(
4478 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4479 expected=True)