]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[BravoTV] Improve metadata extraction (#483)
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
d92f5d5a 5import calendar
109dd3b2 6import copy
a5c56234 7import hashlib
0ca96d48 8import itertools
c5e8d7af 9import json
c4417ddb 10import os.path
d77ab8e2 11import random
c5e8d7af 12import re
8a784c74 13import time
e0df6211 14import traceback
c5e8d7af 15
b05654f0 16from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 17from ..compat import (
edf3e38e 18 compat_chr,
29f7c58a 19 compat_HTTPError,
c5e8d7af 20 compat_parse_qs,
545cc85d 21 compat_str,
7fd002c0 22 compat_urllib_parse_unquote_plus,
15707c7e 23 compat_urllib_parse_urlencode,
7c80519c 24 compat_urllib_parse_urlparse,
7c61bd36 25 compat_urlparse,
4bb4a188 26)
545cc85d 27from ..jsinterp import JSInterpreter
4bb4a188 28from ..utils import (
c224251a 29 bool_or_none,
c5e8d7af 30 clean_html,
26fe8ffe 31 dict_get,
d92f5d5a 32 datetime_from_str,
358de58c 33 error_to_compat_str,
c5e8d7af 34 ExtractorError,
b60419c5 35 format_field,
2d30521a 36 float_or_none,
dd27fd17 37 int_or_none,
94278f72 38 mimetype2ext,
6310acf5 39 parse_codecs,
7c80519c 40 parse_duration,
dca3ff4a 41 qualities,
3995d37d 42 remove_start,
cf7e015f 43 smuggle_url,
dbdaaa23 44 str_or_none,
c93d53f5 45 str_to_int,
556dbe7f 46 try_get,
c5e8d7af
PH
47 unescapeHTML,
48 unified_strdate,
cf7e015f 49 unsmuggle_url,
8bdd16b4 50 update_url_query,
21c340b8 51 url_or_none,
6e6bc8da 52 urlencode_postdata,
d92f5d5a 53 urljoin
c5e8d7af
PH
54)
55
5f6a1245 56
201c1459 57def parse_qs(url):
58 return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
59
60
de7f3446 61class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
62 """Provide base functions for Youtube extractors"""
63 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 64 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
65
66 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
67 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
68 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 69
3462ffa8 70 _RESERVED_NAMES = (
bea74222 71 r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|shorts|'
46953e7e 72 r'movies|results|shared|hashtag|trending|feed|feeds|oembed|get_video_info|'
cd7c66cf 73 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 74
b2e8bc1b
JMF
75 _NETRC_MACHINE = 'youtube'
76 # If True it will raise an error if no login info is provided
77 _LOGIN_REQUIRED = False
78
70d5c17b 79 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 80
b2e8bc1b 81 def _login(self):
83317f69 82 """
83 Attempt to log in to YouTube.
84 True is returned if successful or skipped.
85 False is returned if login failed.
86
87 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
88 """
9d5d4d64 89
90 def warn(message):
91 self.report_warning(message)
92
93 # username+password login is broken
94 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
95 self.raise_login_required(
96 'Login details are needed to download this content', method='cookies')
68217024 97 username, password = self._get_login_info()
9d5d4d64 98 if username:
99 warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
100 return
101 # Everything below this is broken!
102
b2e8bc1b
JMF
103 # No authentication to be performed
104 if username is None:
a06916d9 105 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
69ea8ca4 106 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
a06916d9 107 # if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
545cc85d 108 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 109 return True
b2e8bc1b 110
7cc3570e
PH
111 login_page = self._download_webpage(
112 self._LOGIN_URL, None,
69ea8ca4
PH
113 note='Downloading login page',
114 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
115 if login_page is False:
116 return
b2e8bc1b 117
1212e997 118 login_form = self._hidden_inputs(login_page)
c5e8d7af 119
e00eb564
S
120 def req(url, f_req, note, errnote):
121 data = login_form.copy()
122 data.update({
123 'pstMsg': 1,
124 'checkConnection': 'youtube',
125 'checkedDomains': 'youtube',
126 'hl': 'en',
127 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 128 'f.req': json.dumps(f_req),
e00eb564
S
129 'flowName': 'GlifWebSignIn',
130 'flowEntry': 'ServiceLogin',
baf67a60
S
131 # TODO: reverse actual botguard identifier generation algo
132 'bgRequest': '["identifier",""]',
041bc3ad 133 })
e00eb564
S
134 return self._download_json(
135 url, None, note=note, errnote=errnote,
136 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
137 fatal=False,
138 data=urlencode_postdata(data), headers={
139 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
140 'Google-Accounts-XSRF': 1,
141 })
142
3995d37d
S
143 lookup_req = [
144 username,
145 None, [], None, 'US', None, None, 2, False, True,
146 [
147 None, None,
148 [2, 1, None, 1,
149 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
150 None, [], 4],
151 1, [None, None, []], None, None, None, True
152 ],
153 username,
154 ]
155
e00eb564 156 lookup_results = req(
3995d37d 157 self._LOOKUP_URL, lookup_req,
e00eb564
S
158 'Looking up account info', 'Unable to look up account info')
159
160 if lookup_results is False:
161 return False
041bc3ad 162
3995d37d
S
163 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
164 if not user_hash:
165 warn('Unable to extract user hash')
166 return False
167
168 challenge_req = [
169 user_hash,
170 None, 1, None, [1, None, None, None, [password, None, True]],
171 [
172 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
173 1, [None, None, []], None, None, None, True
174 ]]
83317f69 175
3995d37d
S
176 challenge_results = req(
177 self._CHALLENGE_URL, challenge_req,
178 'Logging in', 'Unable to log in')
83317f69 179
3995d37d 180 if challenge_results is False:
e00eb564 181 return
83317f69 182
3995d37d
S
183 login_res = try_get(challenge_results, lambda x: x[0][5], list)
184 if login_res:
185 login_msg = try_get(login_res, lambda x: x[5], compat_str)
186 warn(
187 'Unable to login: %s' % 'Invalid password'
188 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
189 return False
190
191 res = try_get(challenge_results, lambda x: x[0][-1], list)
192 if not res:
193 warn('Unable to extract result entry')
194 return False
195
9a6628aa
S
196 login_challenge = try_get(res, lambda x: x[0][0], list)
197 if login_challenge:
198 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
199 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
200 # SEND_SUCCESS - TFA code has been successfully sent to phone
201 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 202 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
203 if status == 'QUOTA_EXCEEDED':
204 warn('Exceeded the limit of TFA codes, try later')
205 return False
206
207 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
208 if not tl:
209 warn('Unable to extract TL')
210 return False
211
212 tfa_code = self._get_tfa_info('2-step verification code')
213
214 if not tfa_code:
215 warn(
216 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
217 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
218 return False
219
220 tfa_code = remove_start(tfa_code, 'G-')
221
222 tfa_req = [
223 user_hash, None, 2, None,
224 [
225 9, None, None, None, None, None, None, None,
226 [None, tfa_code, True, 2]
227 ]]
228
229 tfa_results = req(
230 self._TFA_URL.format(tl), tfa_req,
231 'Submitting TFA code', 'Unable to submit TFA code')
232
233 if tfa_results is False:
234 return False
235
236 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
237 if tfa_res:
238 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
239 warn(
240 'Unable to finish TFA: %s' % 'Invalid TFA code'
241 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
242 return False
243
244 check_cookie_url = try_get(
245 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
246 else:
247 CHALLENGES = {
248 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
249 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
250 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
251 }
252 challenge = CHALLENGES.get(
253 challenge_str,
254 '%s returned error %s.' % (self.IE_NAME, challenge_str))
255 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
256 return False
3995d37d
S
257 else:
258 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
259
260 if not check_cookie_url:
261 warn('Unable to extract CheckCookie URL')
262 return False
e00eb564
S
263
264 check_cookie_results = self._download_webpage(
3995d37d
S
265 check_cookie_url, None, 'Checking cookie', fatal=False)
266
267 if check_cookie_results is False:
268 return False
e00eb564 269
3995d37d
S
270 if 'https://myaccount.google.com/' not in check_cookie_results:
271 warn('Unable to log in')
b2e8bc1b 272 return False
e00eb564 273
b2e8bc1b
JMF
274 return True
275
cce889b9 276 def _initialize_consent(self):
277 cookies = self._get_cookies('https://www.youtube.com/')
278 if cookies.get('__Secure-3PSID'):
279 return
280 consent_id = None
281 consent = cookies.get('CONSENT')
282 if consent:
283 if 'YES' in consent.value:
284 return
285 consent_id = self._search_regex(
286 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
287 if not consent_id:
288 consent_id = random.randint(100, 999)
289 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 290
b2e8bc1b 291 def _real_initialize(self):
cce889b9 292 self._initialize_consent()
b2e8bc1b
JMF
293 if self._downloader is None:
294 return
b2e8bc1b
JMF
295 if not self._login():
296 return
c5e8d7af 297
a0566bbf 298 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 299 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
300 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 301
109dd3b2 302 _YT_DEFAULT_YTCFGS = {
303 'WEB': {
304 'INNERTUBE_API_VERSION': 'v1',
305 'INNERTUBE_CLIENT_NAME': 'WEB',
306 'INNERTUBE_CLIENT_VERSION': '2.20210622.10.00',
307 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
308 'INNERTUBE_CONTEXT': {
309 'client': {
310 'clientName': 'WEB',
311 'clientVersion': '2.20210622.10.00',
312 'hl': 'en',
313 }
314 },
315 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
316 },
317 'WEB_REMIX': {
318 'INNERTUBE_API_VERSION': 'v1',
319 'INNERTUBE_CLIENT_NAME': 'WEB_REMIX',
320 'INNERTUBE_CLIENT_VERSION': '1.20210621.00.00',
321 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
322 'INNERTUBE_CONTEXT': {
323 'client': {
324 'clientName': 'WEB_REMIX',
325 'clientVersion': '1.20210621.00.00',
326 'hl': 'en',
327 }
328 },
329 'INNERTUBE_CONTEXT_CLIENT_NAME': 67
330 },
331 'WEB_EMBEDDED_PLAYER': {
332 'INNERTUBE_API_VERSION': 'v1',
333 'INNERTUBE_CLIENT_NAME': 'WEB_EMBEDDED_PLAYER',
334 'INNERTUBE_CLIENT_VERSION': '1.20210620.0.1',
335 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
336 'INNERTUBE_CONTEXT': {
337 'client': {
338 'clientName': 'WEB_EMBEDDED_PLAYER',
339 'clientVersion': '1.20210620.0.1',
340 'hl': 'en',
341 }
342 },
343 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
344 },
345 'ANDROID': {
346 'INNERTUBE_API_VERSION': 'v1',
347 'INNERTUBE_CLIENT_NAME': 'ANDROID',
348 'INNERTUBE_CLIENT_VERSION': '16.20',
349 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
350 'INNERTUBE_CONTEXT': {
351 'client': {
352 'clientName': 'ANDROID',
353 'clientVersion': '16.20',
354 'hl': 'en',
355 }
356 },
357 'INNERTUBE_CONTEXT_CLIENT_NAME': 'ANDROID'
358 },
359 'ANDROID_EMBEDDED_PLAYER': {
360 'INNERTUBE_API_VERSION': 'v1',
361 'INNERTUBE_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER',
362 'INNERTUBE_CLIENT_VERSION': '16.20',
363 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
364 'INNERTUBE_CONTEXT': {
365 'client': {
366 'clientName': 'ANDROID_EMBEDDED_PLAYER',
367 'clientVersion': '16.20',
368 'hl': 'en',
369 }
370 },
371 'INNERTUBE_CONTEXT_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER'
372 },
373 'ANDROID_MUSIC': {
374 'INNERTUBE_API_VERSION': 'v1',
375 'INNERTUBE_CLIENT_NAME': 'ANDROID_MUSIC',
376 'INNERTUBE_CLIENT_VERSION': '4.32',
377 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
378 'INNERTUBE_CONTEXT': {
379 'client': {
380 'clientName': 'ANDROID_MUSIC',
381 'clientVersion': '4.32',
382 'hl': 'en',
383 }
384 },
385 'INNERTUBE_CONTEXT_CLIENT_NAME': 'ANDROID_MUSIC'
386 }
387 }
388
389 _YT_DEFAULT_INNERTUBE_HOSTS = {
390 'DIRECT': 'youtubei.googleapis.com',
391 'WEB': 'www.youtube.com',
392 'WEB_REMIX': 'music.youtube.com',
393 'ANDROID_MUSIC': 'music.youtube.com'
394 }
395
396 def _get_default_ytcfg(self, client='WEB'):
397 if client in self._YT_DEFAULT_YTCFGS:
398 return copy.deepcopy(self._YT_DEFAULT_YTCFGS[client])
399 self.write_debug(f'INNERTUBE default client {client} does not exist - falling back to WEB client.')
400 return copy.deepcopy(self._YT_DEFAULT_YTCFGS['WEB'])
401
402 def _get_innertube_host(self, client='WEB'):
403 return dict_get(self._YT_DEFAULT_INNERTUBE_HOSTS, (client, 'WEB'))
404
405 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='WEB'):
406 # try_get but with fallback to default ytcfg client values when present
407 _func = lambda y: try_get(y, getter, expected_type)
408 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
409
410 def _extract_client_name(self, ytcfg, default_client='WEB'):
411 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str, default_client)
412
413 def _extract_client_version(self, ytcfg, default_client='WEB'):
414 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str, default_client)
415
416 def _extract_api_key(self, ytcfg=None, default_client='WEB'):
417 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
418
419 def _extract_context(self, ytcfg=None, default_client='WEB'):
420 _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
421 context = _get_context(ytcfg)
422 if context:
423 return context
424
425 context = _get_context(self._get_default_ytcfg(default_client))
426 if not ytcfg:
427 return context
428
429 # Recreate the client context (required)
430 context['client'].update({
431 'clientVersion': self._extract_client_version(ytcfg, default_client),
432 'clientName': self._extract_client_name(ytcfg, default_client),
433 })
434 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
435 if visitor_data:
436 context['client']['visitorData'] = visitor_data
437 return context
438
439 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
1974e99f 440 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
441 # See: https://github.com/yt-dlp/yt-dlp/issues/393
442 yt_cookies = self._get_cookies('https://www.youtube.com')
443 sapisid_cookie = dict_get(
444 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
a5c56234
M
445 if sapisid_cookie is None:
446 return
447 time_now = round(time.time())
1974e99f 448 # SAPISID cookie is required if not already present
449 if not yt_cookies.get('SAPISID'):
450 self._set_cookie(
451 '.youtube.com', 'SAPISID', sapisid_cookie.value, secure=True, expire_time=time_now + 3600)
452 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
453 sapisidhash = hashlib.sha1(
109dd3b2 454 f'{time_now} {sapisid_cookie.value} {origin}'.encode('utf-8')).hexdigest()
1974e99f 455 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
456
457 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 458 note='Downloading API JSON', errnote='Unable to download API page',
109dd3b2 459 context=None, api_key=None, api_hostname=None, default_client='WEB'):
f4f751af 460
109dd3b2 461 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 462 data.update(query)
109dd3b2 463 real_headers = self._generate_api_headers(client=default_client)
f4f751af 464 real_headers.update({'content-type': 'application/json'})
465 if headers:
466 real_headers.update(headers)
545cc85d 467 return self._download_json(
109dd3b2 468 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
a5c56234 469 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 470 data=json.dumps(data).encode('utf8'), headers=real_headers,
471 query={'key': api_key or self._extract_api_key()})
472
8bdd16b4 473 def _extract_yt_initial_data(self, video_id, webpage):
474 return self._parse_json(
475 self._search_regex(
29f7c58a 476 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 477 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 478 video_id)
0c148415 479
a1c5d2ca
M
480 def _extract_identity_token(self, webpage, item_id):
481 ytcfg = self._extract_ytcfg(item_id, webpage)
482 if ytcfg:
483 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
484 if token:
485 return token
486 return self._search_regex(
487 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
488 'identity token', default=None)
489
490 @staticmethod
491 def _extract_account_syncid(data):
8ea3f7b9 492 """
493 Extract syncId required to download private playlists of secondary channels
494 @param data Either response or ytcfg
495 """
496 sync_ids = (try_get(
497 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
498 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
a1c5d2ca
M
499 if len(sync_ids) >= 2 and sync_ids[1]:
500 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
501 # and just "user_syncid||" for primary channel. We only want the channel_syncid
502 return sync_ids[0]
8ea3f7b9 503 # ytcfg includes channel_syncid if on secondary channel
504 return data.get('DELEGATED_SESSION_ID')
a1c5d2ca 505
29f7c58a 506 def _extract_ytcfg(self, video_id, webpage):
8c54a305 507 if not webpage:
508 return {}
29f7c58a 509 return self._parse_json(
510 self._search_regex(
511 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 512 default='{}'), video_id, fatal=False) or {}
513
109dd3b2 514 def _generate_api_headers(self, ytcfg=None, identity_token=None, account_syncid=None,
515 visitor_data=None, api_hostname=None, client='WEB'):
516 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(client))
f4f751af 517 headers = {
109dd3b2 518 'X-YouTube-Client-Name': compat_str(
519 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=client)),
520 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, client),
521 'Origin': origin
f4f751af 522 }
523 if identity_token:
109dd3b2 524 headers['X-Youtube-Identity-Token'] = identity_token
f4f751af 525 if account_syncid:
526 headers['X-Goog-PageId'] = account_syncid
527 headers['X-Goog-AuthUser'] = 0
528 if visitor_data:
109dd3b2 529 headers['X-Goog-Visitor-Id'] = visitor_data
530 auth = self._generate_sapisidhash_header(origin)
f4f751af 531 if auth is not None:
532 headers['Authorization'] = auth
109dd3b2 533 headers['X-Origin'] = origin
f4f751af 534 return headers
29f7c58a 535
109dd3b2 536 @staticmethod
537 def _extract_alerts(data):
538 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
539 if not isinstance(alert_dict, dict):
540 continue
541 for alert in alert_dict.values():
542 alert_type = alert.get('type')
543 if not alert_type:
544 continue
545 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str) or ''
546 if message:
547 yield alert_type, message
548 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
549 message += try_get(run, lambda x: x['text'], compat_str)
550 if message:
551 yield alert_type, message
552
553 def _report_alerts(self, alerts, expected=True):
554 errors = []
555 warnings = []
556 for alert_type, alert_message in alerts:
557 if alert_type.lower() == 'error':
558 errors.append([alert_type, alert_message])
559 else:
560 warnings.append([alert_type, alert_message])
561
562 for alert_type, alert_message in (warnings + errors[:-1]):
563 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
564 if errors:
565 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
566
567 def _extract_and_report_alerts(self, data, *args, **kwargs):
568 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
569
570 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
571 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
572 default_client='WEB'):
573 response = None
574 last_error = None
575 count = -1
576 retries = self.get_param('extractor_retries', 3)
577 if check_get_keys is None:
578 check_get_keys = []
579 while count < retries:
580 count += 1
581 if last_error:
582 self.report_warning('%s. Retrying ...' % last_error)
583 try:
584 response = self._call_api(
585 ep=ep, fatal=True, headers=headers,
586 video_id=item_id, query=query,
587 context=self._extract_context(ytcfg, default_client),
588 api_key=self._extract_api_key(ytcfg, default_client),
589 api_hostname=api_hostname, default_client=default_client,
590 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
591 except ExtractorError as e:
592 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
593 # Downloading page may result in intermittent 5xx HTTP error
594 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
595 last_error = 'HTTP Error %s' % e.cause.code
596 if count < retries:
597 continue
598 if fatal:
599 raise
600 else:
601 self.report_warning(error_to_compat_str(e))
602 return
603
604 else:
605 # Youtube may send alerts if there was an issue with the continuation page
606 try:
607 self._extract_and_report_alerts(response, expected=False)
608 except ExtractorError as e:
609 if fatal:
610 raise
611 self.report_warning(error_to_compat_str(e))
612 return
613 if not check_get_keys or dict_get(response, check_get_keys):
614 break
615 # Youtube sometimes sends incomplete data
616 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
617 last_error = 'Incomplete data received'
618 if count >= retries:
619 if fatal:
620 raise ExtractorError(last_error)
621 else:
622 self.report_warning(last_error)
623 return
624 return response
625
9297939e 626 @staticmethod
627 def is_music_url(url):
628 return re.match(r'https?://music\.youtube\.com/', url) is not None
629
30a074c2 630 def _extract_video(self, renderer):
631 video_id = renderer.get('videoId')
632 title = try_get(
633 renderer,
634 (lambda x: x['title']['runs'][0]['text'],
635 lambda x: x['title']['simpleText']), compat_str)
636 description = try_get(
637 renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
638 compat_str)
639 duration = parse_duration(try_get(
640 renderer, lambda x: x['lengthText']['simpleText'], compat_str))
641 view_count_text = try_get(
642 renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
643 view_count = str_to_int(self._search_regex(
644 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
645 'view count', default=None))
646 uploader = try_get(
bc2ca1bb 647 renderer,
648 (lambda x: x['ownerText']['runs'][0]['text'],
649 lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
30a074c2 650 return {
39ed931e 651 '_type': 'url',
30a074c2 652 'ie_key': YoutubeIE.ie_key(),
653 'id': video_id,
654 'url': video_id,
655 'title': title,
656 'description': description,
657 'duration': duration,
658 'view_count': view_count,
659 'uploader': uploader,
660 }
661
0c148415 662
360e1ca5 663class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 664 IE_DESC = 'YouTube.com'
bc2ca1bb 665 _INVIDIOUS_SITES = (
666 # invidious-redirect websites
667 r'(?:www\.)?redirect\.invidious\.io',
668 r'(?:(?:www|dev)\.)?invidio\.us',
669 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
670 r'(?:www\.)?invidious\.pussthecat\.org',
bc2ca1bb 671 r'(?:www\.)?invidious\.zee\.li',
bc2ca1bb 672 r'(?:www\.)?invidious\.ethibox\.fr',
bc2ca1bb 673 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
bc2ca1bb 674 # youtube-dl invidious instances list
675 r'(?:(?:www|no)\.)?invidiou\.sh',
676 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
677 r'(?:www\.)?invidious\.kabi\.tk',
bc2ca1bb 678 r'(?:www\.)?invidious\.mastodon\.host',
679 r'(?:www\.)?invidious\.zapashcanon\.fr',
ed807c18 680 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
201c1459 681 r'(?:www\.)?invidious\.tinfoil-hat\.net',
682 r'(?:www\.)?invidious\.himiko\.cloud',
683 r'(?:www\.)?invidious\.reallyancient\.tech',
bc2ca1bb 684 r'(?:www\.)?invidious\.tube',
685 r'(?:www\.)?invidiou\.site',
686 r'(?:www\.)?invidious\.site',
687 r'(?:www\.)?invidious\.xyz',
688 r'(?:www\.)?invidious\.nixnet\.xyz',
201c1459 689 r'(?:www\.)?invidious\.048596\.xyz',
bc2ca1bb 690 r'(?:www\.)?invidious\.drycat\.fr',
201c1459 691 r'(?:www\.)?inv\.skyn3t\.in',
bc2ca1bb 692 r'(?:www\.)?tube\.poal\.co',
693 r'(?:www\.)?tube\.connect\.cafe',
694 r'(?:www\.)?vid\.wxzm\.sx',
695 r'(?:www\.)?vid\.mint\.lgbt',
201c1459 696 r'(?:www\.)?vid\.puffyan\.us',
bc2ca1bb 697 r'(?:www\.)?yewtu\.be',
698 r'(?:www\.)?yt\.elukerio\.org',
699 r'(?:www\.)?yt\.lelux\.fi',
700 r'(?:www\.)?invidious\.ggc-project\.de',
701 r'(?:www\.)?yt\.maisputain\.ovh',
201c1459 702 r'(?:www\.)?ytprivate\.com',
703 r'(?:www\.)?invidious\.13ad\.de',
bc2ca1bb 704 r'(?:www\.)?invidious\.toot\.koeln',
705 r'(?:www\.)?invidious\.fdn\.fr',
706 r'(?:www\.)?watch\.nettohikari\.com',
ed807c18 707 r'(?:www\.)?invidious\.namazso\.eu',
708 r'(?:www\.)?invidious\.silkky\.cloud',
709 r'(?:www\.)?invidious\.exonip\.de',
710 r'(?:www\.)?invidious\.riverside\.rocks',
711 r'(?:www\.)?invidious\.blamefran\.net',
712 r'(?:www\.)?invidious\.moomoo\.de',
713 r'(?:www\.)?ytb\.trom\.tf',
714 r'(?:www\.)?yt\.cyberhost\.uk',
bc2ca1bb 715 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
716 r'(?:www\.)?qklhadlycap4cnod\.onion',
717 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
718 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
719 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
720 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
721 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
722 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
ed807c18 723 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
724 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
725 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
726 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
bc2ca1bb 727 )
cb7dfeea 728 _VALID_URL = r"""(?x)^
c5e8d7af 729 (
edb53e2d 730 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 731 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
732 (?:www\.)?deturl\.com/www\.youtube\.com|
733 (?:www\.)?pwnyoutube\.com|
734 (?:www\.)?hooktube\.com|
735 (?:www\.)?yourepeat\.com|
736 tube\.majestyc\.net|
737 %(invidious)s|
738 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
739 (?:.*?\#/)? # handle anchor (#/) redirect urls
740 (?: # the various things that can precede the ID:
ac7553d0 741 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 742 |(?: # or the v= param in all its forms
f7000f3a 743 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 744 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 745 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
746 v=
747 )
f4b05232 748 ))
cbaed4bb
S
749 |(?:
750 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
751 vid\.plus| # or vid.plus/xxxx
752 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 753 %(invidious)s
cbaed4bb 754 )/
edb53e2d 755 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 756 )
c5e8d7af 757 )? # all until now is optional -> you can pass the naked ID
201c1459 758 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 759 (?(1).+)? # if we found the ID, everything can follow
9297939e 760 (?:\#|$)""" % {
bc2ca1bb 761 'invidious': '|'.join(_INVIDIOUS_SITES),
762 }
e40c758c 763 _PLAYER_INFO_RE = (
cc2db878 764 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
765 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 766 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 767 )
2c62dc26 768 _formats = {
c2d3cb4c 769 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
770 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
771 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
772 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
773 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
774 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
775 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
776 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 777 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 778 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
779 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
780 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
781 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
782 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
783 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 784 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 785 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
786 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 787
788
789 # 3D videos
c2d3cb4c 790 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
791 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
792 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
793 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 794 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
795 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
796 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 797
96fb5605 798 # Apple HTTP Live Streaming
11f12195 799 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 800 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
801 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
802 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
803 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
804 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 805 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
806 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
807
808 # DASH mp4 video
d23028a8
S
809 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
810 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
811 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
812 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
813 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 814 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
815 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
816 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
817 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
818 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
819 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
820 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 821
f6f1fc92 822 # Dash mp4 audio
d23028a8
S
823 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
824 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
825 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
826 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
827 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
828 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
829 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
830
831 # Dash webm
d23028a8
S
832 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
833 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
834 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
835 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
836 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
837 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
838 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
839 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
840 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
841 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
842 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
843 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
844 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
845 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
846 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 847 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
848 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
849 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
850 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
851 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
852 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
853 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
854
855 # Dash webm audio
d23028a8
S
856 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
857 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 858
0857baad 859 # Dash webm audio with opus inside
d23028a8
S
860 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
861 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
862 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 863
ce6b9a2d
PH
864 # RTMP (unnamed)
865 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
866
867 # av01 video only formats sometimes served with "unknown" codecs
868 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
869 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
870 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
871 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 872 }
29f7c58a 873 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 874
109dd3b2 875 _AGE_GATE_REASONS = (
876 'Sign in to confirm your age',
877 'This video may be inappropriate for some users.',
878 'Sorry, this content is age-restricted.')
879
fd5c4aab
S
880 _GEO_BYPASS = False
881
78caa52a 882 IE_NAME = 'youtube'
2eb88d95
PH
883 _TESTS = [
884 {
2d3d2997 885 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
886 'info_dict': {
887 'id': 'BaW_jenozKc',
888 'ext': 'mp4',
3867038a 889 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
890 'uploader': 'Philipp Hagemeister',
891 'uploader_id': 'phihag',
ec85ded8 892 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
893 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
894 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 895 'upload_date': '20121002',
3867038a 896 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 897 'categories': ['Science & Technology'],
3867038a 898 'tags': ['youtube-dl'],
556dbe7f 899 'duration': 10,
dbdaaa23 900 'view_count': int,
3e7c1224
PH
901 'like_count': int,
902 'dislike_count': int,
7c80519c 903 'start_time': 1,
297a564b 904 'end_time': 9,
2eb88d95 905 }
0e853ca4 906 },
fccd3771 907 {
4bc3a23e
PH
908 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
909 'note': 'Embed-only video (#1746)',
910 'info_dict': {
911 'id': 'yZIXLfi8CZQ',
912 'ext': 'mp4',
913 'upload_date': '20120608',
914 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
915 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
916 'uploader': 'SET India',
94bfcd23 917 'uploader_id': 'setindia',
ec85ded8 918 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 919 'age_limit': 18,
545cc85d 920 },
921 'skip': 'Private video',
fccd3771 922 },
11b56058 923 {
8bdd16b4 924 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
925 'note': 'Use the first video ID in the URL',
926 'info_dict': {
927 'id': 'BaW_jenozKc',
928 'ext': 'mp4',
3867038a 929 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
930 'uploader': 'Philipp Hagemeister',
931 'uploader_id': 'phihag',
ec85ded8 932 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 933 'upload_date': '20121002',
3867038a 934 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 935 'categories': ['Science & Technology'],
3867038a 936 'tags': ['youtube-dl'],
556dbe7f 937 'duration': 10,
dbdaaa23 938 'view_count': int,
11b56058
PM
939 'like_count': int,
940 'dislike_count': int,
34a7de29
S
941 },
942 'params': {
943 'skip_download': True,
944 },
11b56058 945 },
dd27fd17 946 {
2d3d2997 947 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
948 'note': '256k DASH audio (format 141) via DASH manifest',
949 'info_dict': {
950 'id': 'a9LDPn-MO4I',
951 'ext': 'm4a',
952 'upload_date': '20121002',
953 'uploader_id': '8KVIDEO',
ec85ded8 954 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
955 'description': '',
956 'uploader': '8KVIDEO',
957 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 958 },
4bc3a23e
PH
959 'params': {
960 'youtube_include_dash_manifest': True,
961 'format': '141',
4919603f 962 },
de3c7fe0 963 'skip': 'format 141 not served anymore',
dd27fd17 964 },
8bdd16b4 965 # DASH manifest with encrypted signature
966 {
967 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
968 'info_dict': {
969 'id': 'IB3lcPjvWLA',
970 'ext': 'm4a',
971 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
972 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
973 'duration': 244,
974 'uploader': 'AfrojackVEVO',
975 'uploader_id': 'AfrojackVEVO',
976 'upload_date': '20131011',
cc2db878 977 'abr': 129.495,
8bdd16b4 978 },
979 'params': {
980 'youtube_include_dash_manifest': True,
981 'format': '141/bestaudio[ext=m4a]',
982 },
983 },
aa79ac0c
PH
984 # Controversy video
985 {
986 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
987 'info_dict': {
988 'id': 'T4XJQO3qol8',
989 'ext': 'mp4',
556dbe7f 990 'duration': 219,
aa79ac0c 991 'upload_date': '20100909',
4fe54c12 992 'uploader': 'Amazing Atheist',
aa79ac0c 993 'uploader_id': 'TheAmazingAtheist',
ec85ded8 994 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c 995 'title': 'Burning Everyone\'s Koran',
545cc85d 996 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
aa79ac0c 997 }
c522adb1 998 },
dd2d55f1 999 # Normal age-gate video (embed allowed)
c522adb1 1000 {
2d3d2997 1001 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1002 'info_dict': {
1003 'id': 'HtVdAasjOgU',
1004 'ext': 'mp4',
1005 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1006 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1007 'duration': 142,
c522adb1
JMF
1008 'uploader': 'The Witcher',
1009 'uploader_id': 'WitcherGame',
ec85ded8 1010 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 1011 'upload_date': '20140605',
34952f09 1012 'age_limit': 18,
c522adb1
JMF
1013 },
1014 },
8bdd16b4 1015 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1016 # YouTube Red ad is not captured for creator
1017 {
1018 'url': '__2ABJjxzNo',
1019 'info_dict': {
1020 'id': '__2ABJjxzNo',
1021 'ext': 'mp4',
1022 'duration': 266,
1023 'upload_date': '20100430',
1024 'uploader_id': 'deadmau5',
1025 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 1026 'creator': 'deadmau5',
1027 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1028 'uploader': 'deadmau5',
1029 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1030 'alt_title': 'Some Chords',
8bdd16b4 1031 },
1032 'expected_warnings': [
1033 'DASH manifest missing',
1034 ]
1035 },
067aa17e 1036 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1037 {
1038 'url': 'lqQg6PlCWgI',
1039 'info_dict': {
1040 'id': 'lqQg6PlCWgI',
1041 'ext': 'mp4',
556dbe7f 1042 'duration': 6085,
90227264 1043 'upload_date': '20150827',
cbe2bd91 1044 'uploader_id': 'olympic',
ec85ded8 1045 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 1046 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 1047 'uploader': 'Olympic',
cbe2bd91
PH
1048 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1049 },
1050 'params': {
1051 'skip_download': 'requires avconv',
e52a40ab 1052 }
cbe2bd91 1053 },
6271f1ca
PH
1054 # Non-square pixels
1055 {
1056 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1057 'info_dict': {
1058 'id': '_b-2C3KPAM0',
1059 'ext': 'mp4',
1060 'stretched_ratio': 16 / 9.,
556dbe7f 1061 'duration': 85,
6271f1ca
PH
1062 'upload_date': '20110310',
1063 'uploader_id': 'AllenMeow',
ec85ded8 1064 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 1065 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 1066 'uploader': '孫ᄋᄅ',
6271f1ca
PH
1067 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1068 },
06b491eb
S
1069 },
1070 # url_encoded_fmt_stream_map is empty string
1071 {
1072 'url': 'qEJwOuvDf7I',
1073 'info_dict': {
1074 'id': 'qEJwOuvDf7I',
f57b7835 1075 'ext': 'webm',
06b491eb
S
1076 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1077 'description': '',
1078 'upload_date': '20150404',
1079 'uploader_id': 'spbelect',
1080 'uploader': 'Наблюдатели Петербурга',
1081 },
1082 'params': {
1083 'skip_download': 'requires avconv',
e323cf3f
S
1084 },
1085 'skip': 'This live event has ended.',
06b491eb 1086 },
067aa17e 1087 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1088 {
1089 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1090 'info_dict': {
1091 'id': 'FIl7x6_3R5Y',
eb6793ba 1092 'ext': 'webm',
da77d856
S
1093 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1094 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1095 'duration': 220,
da77d856
S
1096 'upload_date': '20150625',
1097 'uploader_id': 'dorappi2000',
ec85ded8 1098 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 1099 'uploader': 'dorappi2000',
eb6793ba 1100 'formats': 'mincount:31',
da77d856 1101 },
eb6793ba 1102 'skip': 'not actual anymore',
2ee8f5d8 1103 },
8a1a26ce
YCH
1104 # DASH manifest with segment_list
1105 {
1106 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1107 'md5': '8ce563a1d667b599d21064e982ab9e31',
1108 'info_dict': {
1109 'id': 'CsmdDsKjzN8',
1110 'ext': 'mp4',
17ee98e1 1111 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
1112 'uploader': 'Airtek',
1113 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1114 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1115 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1116 },
1117 'params': {
1118 'youtube_include_dash_manifest': True,
1119 'format': '135', # bestvideo
be49068d
S
1120 },
1121 'skip': 'This live event has ended.',
2ee8f5d8 1122 },
cf7e015f
S
1123 {
1124 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 1125 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 1126 'info_dict': {
545cc85d 1127 'id': 'jvGDaLqkpTg',
1128 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1129 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
1130 },
1131 'playlist': [{
1132 'info_dict': {
545cc85d 1133 'id': 'jvGDaLqkpTg',
cf7e015f 1134 'ext': 'mp4',
545cc85d 1135 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1136 'description': 'md5:e03b909557865076822aa169218d6a5d',
1137 'duration': 10643,
1138 'upload_date': '20161111',
1139 'uploader': 'Team PGP',
1140 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1141 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1142 },
1143 }, {
1144 'info_dict': {
545cc85d 1145 'id': '3AKt1R1aDnw',
cf7e015f 1146 'ext': 'mp4',
545cc85d 1147 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1148 'description': 'md5:e03b909557865076822aa169218d6a5d',
1149 'duration': 10991,
1150 'upload_date': '20161111',
1151 'uploader': 'Team PGP',
1152 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1153 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1154 },
1155 }, {
1156 'info_dict': {
545cc85d 1157 'id': 'RtAMM00gpVc',
cf7e015f 1158 'ext': 'mp4',
545cc85d 1159 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1160 'description': 'md5:e03b909557865076822aa169218d6a5d',
1161 'duration': 10995,
1162 'upload_date': '20161111',
1163 'uploader': 'Team PGP',
1164 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1165 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1166 },
1167 }, {
1168 'info_dict': {
545cc85d 1169 'id': '6N2fdlP3C5U',
cf7e015f 1170 'ext': 'mp4',
545cc85d 1171 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1172 'description': 'md5:e03b909557865076822aa169218d6a5d',
1173 'duration': 10990,
1174 'upload_date': '20161111',
1175 'uploader': 'Team PGP',
1176 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1177 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1178 },
1179 }],
1180 'params': {
1181 'skip_download': True,
1182 },
cbaed4bb 1183 },
f9f49d87 1184 {
067aa17e 1185 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1186 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1187 'info_dict': {
1188 'id': 'gVfLd0zydlo',
1189 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1190 },
1191 'playlist_count': 2,
be49068d 1192 'skip': 'Not multifeed anymore',
f9f49d87 1193 },
cbaed4bb 1194 {
2d3d2997 1195 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1196 'only_matching': True,
0e49d9a6 1197 },
6d4fc66b 1198 {
2d3d2997 1199 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1200 'only_matching': True,
1201 },
0e49d9a6 1202 {
067aa17e 1203 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1204 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1205 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1206 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1207 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1208 'info_dict': {
1209 'id': 'lsguqyKfVQg',
1210 'ext': 'mp4',
1211 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 1212 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 1213 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1214 'duration': 133,
0e49d9a6
LL
1215 'upload_date': '20151119',
1216 'uploader_id': 'IronSoulElf',
ec85ded8 1217 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1218 'uploader': 'IronSoulElf',
eb6793ba
S
1219 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
1220 'track': 'Dark Walk - Position Music',
1221 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 1222 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
1223 },
1224 'params': {
1225 'skip_download': True,
1226 },
1227 },
61f92af1 1228 {
067aa17e 1229 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1230 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1231 'only_matching': True,
1232 },
313dfc45
LL
1233 {
1234 # Video with yt:stretch=17:0
1235 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1236 'info_dict': {
1237 'id': 'Q39EVAstoRM',
1238 'ext': 'mp4',
1239 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1240 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1241 'upload_date': '20151107',
1242 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1243 'uploader': 'CH GAMER DROID',
1244 },
1245 'params': {
1246 'skip_download': True,
1247 },
be49068d 1248 'skip': 'This video does not exist.',
313dfc45 1249 },
201c1459 1250 {
1251 # Video with incomplete 'yt:stretch=16:'
1252 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1253 'only_matching': True,
1254 },
7caf9830
S
1255 {
1256 # Video licensed under Creative Commons
1257 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1258 'info_dict': {
1259 'id': 'M4gD1WSo5mA',
1260 'ext': 'mp4',
1261 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1262 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1263 'duration': 721,
7caf9830
S
1264 'upload_date': '20150127',
1265 'uploader_id': 'BerkmanCenter',
ec85ded8 1266 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1267 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1268 'license': 'Creative Commons Attribution license (reuse allowed)',
1269 },
1270 'params': {
1271 'skip_download': True,
1272 },
1273 },
fd050249
S
1274 {
1275 # Channel-like uploader_url
1276 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1277 'info_dict': {
1278 'id': 'eQcmzGIKrzg',
1279 'ext': 'mp4',
1280 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1281 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1282 'duration': 4060,
fd050249 1283 'upload_date': '20151119',
eb6793ba 1284 'uploader': 'Bernie Sanders',
fd050249 1285 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1286 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1287 'license': 'Creative Commons Attribution license (reuse allowed)',
1288 },
1289 'params': {
1290 'skip_download': True,
1291 },
1292 },
040ac686
S
1293 {
1294 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1295 'only_matching': True,
7f29cf54
S
1296 },
1297 {
067aa17e 1298 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1299 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1300 'only_matching': True,
6496ccb4
S
1301 },
1302 {
1303 # Rental video preview
1304 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1305 'info_dict': {
1306 'id': 'uGpuVWrhIzE',
1307 'ext': 'mp4',
1308 'title': 'Piku - Trailer',
1309 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1310 'upload_date': '20150811',
1311 'uploader': 'FlixMatrix',
1312 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1313 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1314 'license': 'Standard YouTube License',
1315 },
1316 'params': {
1317 'skip_download': True,
1318 },
eb6793ba 1319 'skip': 'This video is not available.',
022a5d66 1320 },
12afdc2a
S
1321 {
1322 # YouTube Red video with episode data
1323 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1324 'info_dict': {
1325 'id': 'iqKdEhx-dD4',
1326 'ext': 'mp4',
1327 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1328 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1329 'duration': 2085,
12afdc2a
S
1330 'upload_date': '20170118',
1331 'uploader': 'Vsauce',
1332 'uploader_id': 'Vsauce',
1333 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1334 'series': 'Mind Field',
1335 'season_number': 1,
1336 'episode_number': 1,
1337 },
1338 'params': {
1339 'skip_download': True,
1340 },
1341 'expected_warnings': [
1342 'Skipping DASH manifest',
1343 ],
1344 },
c7121fa7
S
1345 {
1346 # The following content has been identified by the YouTube community
1347 # as inappropriate or offensive to some audiences.
1348 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1349 'info_dict': {
1350 'id': '6SJNVb0GnPI',
1351 'ext': 'mp4',
1352 'title': 'Race Differences in Intelligence',
1353 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1354 'duration': 965,
1355 'upload_date': '20140124',
1356 'uploader': 'New Century Foundation',
1357 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1358 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1359 },
1360 'params': {
1361 'skip_download': True,
1362 },
545cc85d 1363 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1364 },
022a5d66
S
1365 {
1366 # itag 212
1367 'url': '1t24XAntNCY',
1368 'only_matching': True,
fd5c4aab
S
1369 },
1370 {
1371 # geo restricted to JP
1372 'url': 'sJL6WA-aGkQ',
1373 'only_matching': True,
1374 },
cd5a74a2
S
1375 {
1376 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1377 'only_matching': True,
1378 },
bc2ca1bb 1379 {
1380 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1381 'only_matching': True,
1382 },
1383 {
1384 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1385 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1386 'only_matching': True,
1387 },
825cd268
RA
1388 {
1389 # DRM protected
1390 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1391 'only_matching': True,
4fe54c12
S
1392 },
1393 {
1394 # Video with unsupported adaptive stream type formats
1395 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1396 'info_dict': {
1397 'id': 'Z4Vy8R84T1U',
1398 'ext': 'mp4',
1399 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1400 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1401 'duration': 433,
1402 'upload_date': '20130923',
1403 'uploader': 'Amelia Putri Harwita',
1404 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1405 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1406 'formats': 'maxcount:10',
1407 },
1408 'params': {
1409 'skip_download': True,
1410 'youtube_include_dash_manifest': False,
1411 },
5429d6a9 1412 'skip': 'not actual anymore',
5caabd3c 1413 },
1414 {
822b9d9c 1415 # Youtube Music Auto-generated description
5caabd3c 1416 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1417 'info_dict': {
1418 'id': 'MgNrAu2pzNs',
1419 'ext': 'mp4',
1420 'title': 'Voyeur Girl',
1421 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1422 'upload_date': '20190312',
5429d6a9
S
1423 'uploader': 'Stephen - Topic',
1424 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1425 'artist': 'Stephen',
1426 'track': 'Voyeur Girl',
1427 'album': 'it\'s too much love to know my dear',
1428 'release_date': '20190313',
1429 'release_year': 2019,
1430 },
1431 'params': {
1432 'skip_download': True,
1433 },
1434 },
66b48727
RA
1435 {
1436 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1437 'only_matching': True,
1438 },
011e75e6
S
1439 {
1440 # invalid -> valid video id redirection
1441 'url': 'DJztXj2GPfl',
1442 'info_dict': {
1443 'id': 'DJztXj2GPfk',
1444 'ext': 'mp4',
1445 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1446 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1447 'upload_date': '20090125',
1448 'uploader': 'Prochorowka',
1449 'uploader_id': 'Prochorowka',
1450 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1451 'artist': 'Panjabi MC',
1452 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1453 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1454 },
1455 'params': {
1456 'skip_download': True,
1457 },
545cc85d 1458 'skip': 'Video unavailable',
ea74e00b
DP
1459 },
1460 {
1461 # empty description results in an empty string
1462 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1463 'info_dict': {
1464 'id': 'x41yOUIvK2k',
1465 'ext': 'mp4',
1466 'title': 'IMG 3456',
1467 'description': '',
1468 'upload_date': '20170613',
1469 'uploader_id': 'ElevageOrVert',
1470 'uploader': 'ElevageOrVert',
1471 },
1472 'params': {
1473 'skip_download': True,
1474 },
1475 },
a0566bbf 1476 {
29f7c58a 1477 # with '};' inside yt initial data (see [1])
1478 # see [2] for an example with '};' inside ytInitialPlayerResponse
1479 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1480 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1481 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1482 'info_dict': {
1483 'id': 'CHqg6qOn4no',
1484 'ext': 'mp4',
1485 'title': 'Part 77 Sort a list of simple types in c#',
1486 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1487 'upload_date': '20130831',
1488 'uploader_id': 'kudvenkat',
1489 'uploader': 'kudvenkat',
1490 },
1491 'params': {
1492 'skip_download': True,
1493 },
1494 },
29f7c58a 1495 {
1496 # another example of '};' in ytInitialData
1497 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1498 'only_matching': True,
1499 },
1500 {
1501 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1502 'only_matching': True,
1503 },
545cc85d 1504 {
cc2db878 1505 # https://github.com/ytdl-org/youtube-dl/pull/28094
1506 'url': 'OtqTfy26tG0',
1507 'info_dict': {
1508 'id': 'OtqTfy26tG0',
1509 'ext': 'mp4',
1510 'title': 'Burn Out',
1511 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1512 'upload_date': '20141120',
1513 'uploader': 'The Cinematic Orchestra - Topic',
1514 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1515 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1516 'artist': 'The Cinematic Orchestra',
1517 'track': 'Burn Out',
1518 'album': 'Every Day',
1519 'release_data': None,
1520 'release_year': None,
1521 },
1522 'params': {
1523 'skip_download': True,
1524 },
545cc85d 1525 },
bc2ca1bb 1526 {
1527 # controversial video, only works with bpctr when authenticated with cookies
1528 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1529 'only_matching': True,
1530 },
f7ad7160 1531 {
1532 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1533 'url': 'cBvYw8_A0vQ',
1534 'info_dict': {
1535 'id': 'cBvYw8_A0vQ',
1536 'ext': 'mp4',
1537 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1538 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1539 'upload_date': '20201120',
1540 'uploader': 'Walk around Japan',
1541 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1542 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1543 },
1544 'params': {
1545 'skip_download': True,
1546 },
0fb983f6 1547 }, {
1548 # Has multiple audio streams
1549 'url': 'WaOKSUlf4TM',
1550 'only_matching': True
9297939e 1551 }, {
1552 # Requires Premium: has format 141 when requested using YTM url
1553 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1554 'only_matching': True
1555 }, {
120916da 1556 # multiple subtitles with same lang_code
1557 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1558 'only_matching': True,
109dd3b2 1559 }, {
1560 # Force use android client fallback
1561 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1562 'info_dict': {
1563 'id': 'YOelRv7fMxY',
1564 'title': 'Digging a Secret Tunnel from my Workshop',
1565 'ext': '3gp',
1566 'upload_date': '20210624',
1567 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1568 'uploader': 'colinfurze',
1569 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1570 'description': 'md5:ecb672623246d98c6c562eed6ae798c3'
1571 },
1572 'params': {
1573 'format': '17', # 3gp format available on android
1574 'extractor_args': {'youtube': {'player_client': ['android']}},
1575 },
120916da 1576 },
109dd3b2 1577 {
1578 # Skip download of additional client configs (remix client config in this case)
1579 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1580 'only_matching': True,
1581 'params': {
1582 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1583 },
1584 }
2eb88d95
PH
1585 ]
1586
201c1459 1587 @classmethod
1588 def suitable(cls, url):
1bdae7d3 1589 # Hack for lazy extractors until more generic solution is implemented
1590 # (see #28780)
1591 from .youtube import parse_qs
201c1459 1592 qs = parse_qs(url)
1593 if qs.get('list', [None])[0]:
1594 return False
1595 return super(YoutubeIE, cls).suitable(url)
1596
e0df6211
PH
1597 def __init__(self, *args, **kwargs):
1598 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1599 self._code_cache = {}
83799698 1600 self._player_cache = {}
e0df6211 1601
109dd3b2 1602 def _extract_player_url(self, ytcfg=None, webpage=None):
1603 player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)
1604 if not player_url:
1605 player_url = self._search_regex(
1606 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1607 webpage, 'player URL', fatal=False)
1608 if player_url.startswith('//'):
1609 player_url = 'https:' + player_url
1610 elif not re.match(r'https?://', player_url):
1611 player_url = compat_urlparse.urljoin(
1612 'https://www.youtube.com', player_url)
1613 return player_url
1614
60064c53
PH
1615 def _signature_cache_id(self, example_sig):
1616 """ Return a string representation of a signature """
78caa52a 1617 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1618
e40c758c
S
1619 @classmethod
1620 def _extract_player_info(cls, player_url):
1621 for player_re in cls._PLAYER_INFO_RE:
1622 id_m = re.search(player_re, player_url)
1623 if id_m:
1624 break
1625 else:
c081b35c 1626 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1627 return id_m.group('id')
e40c758c 1628
109dd3b2 1629 def _load_player(self, video_id, player_url, fatal=True) -> bool:
1630 player_id = self._extract_player_info(player_url)
1631 if player_id not in self._code_cache:
1632 self._code_cache[player_id] = self._download_webpage(
1633 player_url, video_id, fatal=fatal,
1634 note='Downloading player ' + player_id,
1635 errnote='Download of %s failed' % player_url)
1636 return player_id in self._code_cache
1637
e40c758c 1638 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1639 player_id = self._extract_player_info(player_url)
e0df6211 1640
c4417ddb 1641 # Read from filesystem cache
545cc85d 1642 func_id = 'js_%s_%s' % (
1643 player_id, self._signature_cache_id(example_sig))
c4417ddb 1644 assert os.path.basename(func_id) == func_id
a0e07d31 1645
69ea8ca4 1646 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1647 if cache_spec is not None:
78caa52a 1648 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1649
109dd3b2 1650 if self._load_player(video_id, player_url):
1651 code = self._code_cache[player_id]
1652 res = self._parse_sig_js(code)
e0df6211 1653
109dd3b2 1654 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1655 cache_res = res(test_string)
1656 cache_spec = [ord(c) for c in cache_res]
83799698 1657
109dd3b2 1658 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1659 return res
83799698 1660
60064c53 1661 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1662 def gen_sig_code(idxs):
1663 def _genslice(start, end, step):
78caa52a 1664 starts = '' if start == 0 else str(start)
8bcc8756 1665 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1666 steps = '' if step == 1 else (':%d' % step)
78caa52a 1667 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1668
1669 step = None
7af808a5
PH
1670 # Quelch pyflakes warnings - start will be set when step is set
1671 start = '(Never used)'
edf3e38e
PH
1672 for i, prev in zip(idxs[1:], idxs[:-1]):
1673 if step is not None:
1674 if i - prev == step:
1675 continue
1676 yield _genslice(start, prev, step)
1677 step = None
1678 continue
1679 if i - prev in [-1, 1]:
1680 step = i - prev
1681 start = prev
1682 continue
1683 else:
78caa52a 1684 yield 's[%d]' % prev
edf3e38e 1685 if step is None:
78caa52a 1686 yield 's[%d]' % i
edf3e38e
PH
1687 else:
1688 yield _genslice(start, i, step)
1689
78caa52a 1690 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1691 cache_res = func(test_string)
edf3e38e 1692 cache_spec = [ord(c) for c in cache_res]
78caa52a 1693 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1694 signature_id_tuple = '(%s)' % (
1695 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1696 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1697 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1698 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1699
e0df6211
PH
1700 def _parse_sig_js(self, jscode):
1701 funcname = self._search_regex(
abefc03f
S
1702 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1703 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1704 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1705 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1706 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1707 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1708 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1709 # Obsolete patterns
1710 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1711 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1712 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1713 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1714 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1715 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1716 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1717 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1718 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1719
1720 jsi = JSInterpreter(jscode)
1721 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1722 return lambda s: initial_function([s])
1723
545cc85d 1724 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1725 """Turn the encrypted s field into a working signature"""
6b37f0be 1726
c8bf86d5 1727 if player_url is None:
69ea8ca4 1728 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1729
c8bf86d5 1730 try:
62af3a0e 1731 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1732 if player_id not in self._player_cache:
1733 func = self._extract_signature_function(
60064c53 1734 video_id, player_url, s
c8bf86d5
PH
1735 )
1736 self._player_cache[player_id] = func
1737 func = self._player_cache[player_id]
a06916d9 1738 if self.get_param('youtube_print_sig_code'):
60064c53 1739 self._print_sig_code(func, s)
c8bf86d5
PH
1740 return func(s)
1741 except Exception as e:
1742 tb = traceback.format_exc()
1743 raise ExtractorError(
78caa52a 1744 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1745
109dd3b2 1746 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1747 """
1748 Extract signatureTimestamp (sts)
1749 Required to tell API what sig/player version is in use.
1750 """
1751 sts = None
1752 if isinstance(ytcfg, dict):
1753 sts = int_or_none(ytcfg.get('STS'))
1754
1755 if not sts:
1756 # Attempt to extract from player
1757 if player_url is None:
1758 error_msg = 'Cannot extract signature timestamp without player_url.'
1759 if fatal:
1760 raise ExtractorError(error_msg)
1761 self.report_warning(error_msg)
1762 return
1763 if self._load_player(video_id, player_url, fatal=fatal):
1764 player_id = self._extract_player_info(player_url)
1765 code = self._code_cache[player_id]
1766 sts = int_or_none(self._search_regex(
1767 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
1768 'JS player signature timestamp', group='sts', fatal=fatal))
1769 return sts
1770
545cc85d 1771 def _mark_watched(self, video_id, player_response):
21c340b8
S
1772 playback_url = url_or_none(try_get(
1773 player_response,
545cc85d 1774 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
d77ab8e2
S
1775 if not playback_url:
1776 return
1777 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1778 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1779
1780 # cpn generation algorithm is reverse engineered from base.js.
1781 # In fact it works even with dummy cpn.
1782 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1783 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1784
1785 qs.update({
1786 'ver': ['2'],
1787 'cpn': [cpn],
1788 })
1789 playback_url = compat_urlparse.urlunparse(
15707c7e 1790 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1791
1792 self._download_webpage(
1793 playback_url, video_id, 'Marking watched',
1794 'Unable to mark watched', fatal=False)
1795
66c9fa36
S
1796 @staticmethod
1797 def _extract_urls(webpage):
1798 # Embedded YouTube player
1799 entries = [
1800 unescapeHTML(mobj.group('url'))
1801 for mobj in re.finditer(r'''(?x)
1802 (?:
1803 <iframe[^>]+?src=|
1804 data-video-url=|
1805 <embed[^>]+?src=|
1806 embedSWF\(?:\s*|
1807 <object[^>]+data=|
1808 new\s+SWFObject\(
1809 )
1810 (["\'])
1811 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1812 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1813 \1''', webpage)]
1814
1815 # lazyYT YouTube embed
1816 entries.extend(list(map(
1817 unescapeHTML,
1818 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1819
1820 # Wordpress "YouTube Video Importer" plugin
1821 matches = re.findall(r'''(?x)<div[^>]+
1822 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1823 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1824 entries.extend(m[-1] for m in matches)
1825
1826 return entries
1827
1828 @staticmethod
1829 def _extract_url(webpage):
1830 urls = YoutubeIE._extract_urls(webpage)
1831 return urls[0] if urls else None
1832
97665381
PH
1833 @classmethod
1834 def extract_id(cls, url):
1835 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1836 if mobj is None:
69ea8ca4 1837 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1838 video_id = mobj.group(2)
1839 return video_id
1840
545cc85d 1841 def _extract_chapters_from_json(self, data, video_id, duration):
84213ea8 1842 chapters_list = try_get(
8bdd16b4 1843 data,
84213ea8
S
1844 lambda x: x['playerOverlays']
1845 ['playerOverlayRenderer']
1846 ['decoratedPlayerBarRenderer']
1847 ['decoratedPlayerBarRenderer']
1848 ['playerBar']
1849 ['chapteredPlayerBarRenderer']
1850 ['chapters'],
1851 list)
1852 if not chapters_list:
1853 return
1854
1855 def chapter_time(chapter):
1856 return float_or_none(
1857 try_get(
1858 chapter,
1859 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1860 int),
1861 scale=1000)
1862 chapters = []
1863 for next_num, chapter in enumerate(chapters_list, start=1):
1864 start_time = chapter_time(chapter)
1865 if start_time is None:
1866 continue
1867 end_time = (chapter_time(chapters_list[next_num])
1868 if next_num < len(chapters_list) else duration)
1869 if end_time is None:
1870 continue
1871 title = try_get(
1872 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1873 compat_str)
1874 chapters.append({
1875 'start_time': start_time,
1876 'end_time': end_time,
1877 'title': title,
1878 })
1879 return chapters
1880
545cc85d 1881 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1882 return self._parse_json(self._search_regex(
1883 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1884 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 1885
d92f5d5a 1886 @staticmethod
1887 def parse_time_text(time_text):
1888 """
1889 Parse the comment time text
1890 time_text is in the format 'X units ago (edited)'
1891 """
1892 time_text_split = time_text.split(' ')
1893 if len(time_text_split) >= 3:
1894 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
1895
a1c5d2ca
M
1896 @staticmethod
1897 def _join_text_entries(runs):
1898 text = None
1899 for run in runs:
1900 if not isinstance(run, dict):
1901 continue
1902 sub_text = try_get(run, lambda x: x['text'], compat_str)
1903 if sub_text:
1904 if not text:
1905 text = sub_text
1906 continue
1907 text += sub_text
1908 return text
1909
1910 def _extract_comment(self, comment_renderer, parent=None):
1911 comment_id = comment_renderer.get('commentId')
1912 if not comment_id:
1913 return
1914 comment_text_runs = try_get(comment_renderer, lambda x: x['contentText']['runs']) or []
1915 text = self._join_text_entries(comment_text_runs) or ''
1916 comment_time_text = try_get(comment_renderer, lambda x: x['publishedTimeText']['runs']) or []
1917 time_text = self._join_text_entries(comment_time_text)
d92f5d5a 1918 timestamp = calendar.timegm(self.parse_time_text(time_text).timetuple())
a1c5d2ca
M
1919 author = try_get(comment_renderer, lambda x: x['authorText']['simpleText'], compat_str)
1920 author_id = try_get(comment_renderer,
1921 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
1922 votes = str_to_int(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
1923 lambda x: x['likeCount']), compat_str)) or 0
1924 author_thumbnail = try_get(comment_renderer,
1925 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
1926
1927 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
1928 is_liked = try_get(comment_renderer, lambda x: x['isLiked'], bool)
a1c5d2ca
M
1929 return {
1930 'id': comment_id,
1931 'text': text,
d92f5d5a 1932 'timestamp': timestamp,
a1c5d2ca
M
1933 'time_text': time_text,
1934 'like_count': votes,
1935 'is_favorited': is_liked,
1936 'author': author,
1937 'author_id': author_id,
1938 'author_thumbnail': author_thumbnail,
1939 'author_is_uploader': author_is_uploader,
1940 'parent': parent or 'root'
1941 }
1942
1943 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
f4f751af 1944 ytcfg, session_token_list, parent=None, comment_counts=None):
a1c5d2ca
M
1945
1946 def extract_thread(parent_renderer):
1947 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
1948 if not parent:
1949 comment_counts[2] = 0
1950 for content in contents:
1951 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
1952 comment_renderer = try_get(
1953 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
1954 content, (lambda x: x['commentRenderer'], dict))
1955
1956 if not comment_renderer:
1957 continue
1958 comment = self._extract_comment(comment_renderer, parent)
1959 if not comment:
1960 continue
1961 comment_counts[0] += 1
1962 yield comment
1963 # Attempt to get the replies
1964 comment_replies_renderer = try_get(
1965 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
1966
1967 if comment_replies_renderer:
1968 comment_counts[2] += 1
1969 comment_entries_iter = self._comment_entries(
f4f751af 1970 comment_replies_renderer, identity_token, account_syncid, ytcfg,
a1c5d2ca
M
1971 parent=comment.get('id'), session_token_list=session_token_list,
1972 comment_counts=comment_counts)
1973
1974 for reply_comment in comment_entries_iter:
1975 yield reply_comment
1976
1977 if not comment_counts:
1978 # comment so far, est. total comments, current comment thread #
1979 comment_counts = [0, 0, 0]
a1c5d2ca
M
1980
1981 # TODO: Generalize the download code with TabIE
f4f751af 1982 context = self._extract_context(ytcfg)
1983 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
a1c5d2ca
M
1984 continuation = YoutubeTabIE._extract_continuation(root_continuation_data) # TODO
1985 first_continuation = False
1986 if parent is None:
1987 first_continuation = True
1988
1989 for page_num in itertools.count(0):
1990 if not continuation:
1991 break
f4f751af 1992 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
a06916d9 1993 retries = self.get_param('extractor_retries', 3)
a1c5d2ca
M
1994 count = -1
1995 last_error = None
1996
1997 while count < retries:
1998 count += 1
1999 if last_error:
2000 self.report_warning('%s. Retrying ...' % last_error)
2001 try:
2002 query = {
2003 'ctoken': continuation['ctoken'],
2004 'pbj': 1,
2005 'type': 'next',
2006 }
45261e06 2007 if 'itct' in continuation:
2008 query['itct'] = continuation['itct']
a1c5d2ca
M
2009 if parent:
2010 query['action_get_comment_replies'] = 1
2011 else:
2012 query['action_get_comments'] = 1
2013
2014 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2015 if page_num == 0:
2016 if first_continuation:
d92f5d5a 2017 note_prefix = 'Downloading initial comment continuation page'
a1c5d2ca 2018 else:
d92f5d5a 2019 note_prefix = ' Downloading comment reply thread %d %s' % (comment_counts[2], comment_prog_str)
a1c5d2ca 2020 else:
d92f5d5a 2021 note_prefix = '%sDownloading comment%s page %d %s' % (
2022 ' ' if parent else '',
a1c5d2ca
M
2023 ' replies' if parent else '',
2024 page_num,
2025 comment_prog_str)
2026
2027 browse = self._download_json(
2028 'https://www.youtube.com/comment_service_ajax', None,
2029 '%s %s' % (note_prefix, '(retry #%d)' % count if count else ''),
2030 headers=headers, query=query,
2031 data=urlencode_postdata({
2032 'session_token': session_token_list[0]
2033 }))
2034 except ExtractorError as e:
2035 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404, 413):
2036 if e.cause.code == 413:
d92f5d5a 2037 self.report_warning('Assumed end of comments (received HTTP Error 413)')
a1c5d2ca
M
2038 return
2039 # Downloading page may result in intermittent 5xx HTTP error
2040 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
2041 last_error = 'HTTP Error %s' % e.cause.code
2042 if e.cause.code == 404:
d92f5d5a 2043 last_error = last_error + ' (this API is probably deprecated)'
a1c5d2ca
M
2044 if count < retries:
2045 continue
2046 raise
2047 else:
2048 session_token = try_get(browse, lambda x: x['xsrf_token'], compat_str)
2049 if session_token:
2050 session_token_list[0] = session_token
2051
2052 response = try_get(browse,
2053 (lambda x: x['response'],
45261e06 2054 lambda x: x[1]['response']), dict) or {}
a1c5d2ca
M
2055
2056 if response.get('continuationContents'):
2057 break
2058
2059 # YouTube sometimes gives reload: now json if something went wrong (e.g. bad auth)
45261e06 2060 if isinstance(browse, dict):
2061 if browse.get('reload'):
2062 raise ExtractorError('Invalid or missing params in continuation request', expected=False)
2063
2064 # TODO: not tested, merged from old extractor
2065 err_msg = browse.get('externalErrorMessage')
2066 if err_msg:
2067 last_error = err_msg
2068 continue
a1c5d2ca 2069
45261e06 2070 response_error = try_get(response, lambda x: x['responseContext']['errors']['error'][0], dict) or {}
2071 err_msg = response_error.get('externalErrorMessage')
a1c5d2ca 2072 if err_msg:
45261e06 2073 last_error = err_msg
2074 continue
a1c5d2ca
M
2075
2076 # Youtube sometimes sends incomplete data
2077 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
2078 last_error = 'Incomplete data received'
2079 if count >= retries:
6a39ee13 2080 raise ExtractorError(last_error)
a1c5d2ca
M
2081
2082 if not response:
2083 break
f4f751af 2084 visitor_data = try_get(
2085 response,
2086 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2087 compat_str) or visitor_data
a1c5d2ca
M
2088
2089 known_continuation_renderers = {
2090 'itemSectionContinuation': extract_thread,
2091 'commentRepliesContinuation': extract_thread
2092 }
2093
2094 # extract next root continuation from the results
2095 continuation_contents = try_get(
2096 response, lambda x: x['continuationContents'], dict) or {}
2097
2098 for key, value in continuation_contents.items():
2099 if key not in known_continuation_renderers:
2100 continue
2101 continuation_renderer = value
2102
2103 if first_continuation:
2104 first_continuation = False
2105 expected_comment_count = try_get(
2106 continuation_renderer,
2107 (lambda x: x['header']['commentsHeaderRenderer']['countText']['runs'][0]['text'],
2108 lambda x: x['header']['commentsHeaderRenderer']['commentsCount']['runs'][0]['text']),
2109 compat_str)
2110
2111 if expected_comment_count:
2112 comment_counts[1] = str_to_int(expected_comment_count)
d92f5d5a 2113 self.to_screen('Downloading ~%d comments' % str_to_int(expected_comment_count))
a1c5d2ca
M
2114 yield comment_counts[1]
2115
2116 # TODO: cli arg.
2117 # 1/True for newest, 0/False for popular (default)
2118 comment_sort_index = int(True)
2119 sort_continuation_renderer = try_get(
2120 continuation_renderer,
2121 lambda x: x['header']['commentsHeaderRenderer']['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems']
2122 [comment_sort_index]['continuation']['reloadContinuationData'], dict)
2123 # If this fails, the initial continuation page
2124 # starts off with popular anyways.
2125 if sort_continuation_renderer:
2126 continuation = YoutubeTabIE._build_continuation_query(
2127 continuation=sort_continuation_renderer.get('continuation'),
2128 ctp=sort_continuation_renderer.get('clickTrackingParams'))
d92f5d5a 2129 self.to_screen('Sorting comments by %s' % ('popular' if comment_sort_index == 0 else 'newest'))
a1c5d2ca
M
2130 break
2131
2132 for entry in known_continuation_renderers[key](continuation_renderer):
2133 yield entry
2134
2135 continuation = YoutubeTabIE._extract_continuation(continuation_renderer) # TODO
2136 break
2137
2138 def _extract_comments(self, ytcfg, video_id, contents, webpage, xsrf_token):
2139 """Entry for comment extraction"""
2140 comments = []
2141 known_entry_comment_renderers = (
2142 'itemSectionRenderer',
2143 )
2144 estimated_total = 0
2145 for entry in contents:
2146 for key, renderer in entry.items():
2147 if key not in known_entry_comment_renderers:
2148 continue
2149
2150 comment_iter = self._comment_entries(
2151 renderer,
2152 identity_token=self._extract_identity_token(webpage, item_id=video_id),
2153 account_syncid=self._extract_account_syncid(ytcfg),
f4f751af 2154 ytcfg=ytcfg,
a1c5d2ca
M
2155 session_token_list=[xsrf_token])
2156
2157 for comment in comment_iter:
2158 if isinstance(comment, int):
2159 estimated_total = comment
2160 continue
2161 comments.append(comment)
2162 break
d92f5d5a 2163 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
a1c5d2ca
M
2164 return {
2165 'comments': comments,
2166 'comment_count': len(comments),
2167 }
2168
109dd3b2 2169 @staticmethod
2170 def _generate_player_context(sts=None):
2171 context = {
2172 'html5Preference': 'HTML5_PREF_WANTS',
2173 }
2174 if sts is not None:
2175 context['signatureTimestamp'] = sts
2176 return {
2177 'playbackContext': {
2178 'contentPlaybackContext': context
2179 }
2180 }
2181
4e6767b5 2182 @staticmethod
2183 def _get_video_info_params(video_id):
2184 return {
2185 'video_id': video_id,
2186 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
2187 'html5': '1',
2188 'c': 'TVHTML5',
2189 'cver': '6.20180913',
2190 }
2191
c5e8d7af 2192 def _real_extract(self, url):
cf7e015f 2193 url, smuggled_data = unsmuggle_url(url, {})
545cc85d 2194 video_id = self._match_id(url)
9297939e 2195
2196 is_music_url = smuggled_data.get('is_music_url') or self.is_music_url(url)
2197
545cc85d 2198 base_url = self.http_scheme() + '//www.youtube.com/'
b3d12425 2199 webpage_url = base_url + 'watch?v=' + video_id
2200 webpage = self._download_webpage(
cce889b9 2201 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
545cc85d 2202
109dd3b2 2203 ytcfg = self._extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2204 identity_token = self._extract_identity_token(webpage, video_id)
2205 syncid = self._extract_account_syncid(ytcfg)
2206 headers = self._generate_api_headers(ytcfg, identity_token, syncid)
2207
2208 player_url = self._extract_player_url(ytcfg, webpage)
2209
4bb6b02f 2210 player_client = (self._configuration_arg('player_client') or [''])[0]
2211 if player_client not in ('web', 'android', ''):
2212 self.report_warning(f'Invalid player_client {player_client} given. Falling back to WEB')
2213 force_mobile_client = player_client == 'android'
2214 player_skip = self._configuration_arg('player_skip')
109dd3b2 2215
9297939e 2216 def get_text(x):
2217 if not x:
2218 return
2219 text = x.get('simpleText')
2220 if text and isinstance(text, compat_str):
2221 return text
2222 runs = x.get('runs')
2223 if not isinstance(runs, list):
2224 return
2225 return ''.join([r['text'] for r in runs if isinstance(r.get('text'), compat_str)])
2226
2227 ytm_streaming_data = {}
2228 if is_music_url:
109dd3b2 2229 ytm_webpage = None
2230 sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2231 if sts and not force_mobile_client and 'configs' not in player_skip:
2232 ytm_webpage = self._download_webpage(
2233 'https://music.youtube.com',
2234 video_id, fatal=False, note="Downloading remix client config")
2235
2236 ytm_cfg = self._extract_ytcfg(video_id, ytm_webpage) or {}
2237 ytm_client = 'WEB_REMIX'
2238 if not sts or force_mobile_client:
2239 # Android client already has signature descrambled
2240 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2241 if not sts:
2242 self.report_warning('Falling back to mobile remix client for player API.')
2243 ytm_client = 'ANDROID_MUSIC'
2244 ytm_cfg = {}
2245
2246 ytm_headers = self._generate_api_headers(
2247 ytm_cfg, identity_token, syncid,
2248 client=ytm_client)
2249 ytm_query = {'videoId': video_id}
2250 ytm_query.update(self._generate_player_context(sts))
2251
2252 ytm_player_response = self._extract_response(
2253 item_id=video_id, ep='player', query=ytm_query,
2254 ytcfg=ytm_cfg, headers=ytm_headers, fatal=False,
2255 default_client=ytm_client,
2256 note='Downloading %sremix player API JSON' % ('mobile ' if force_mobile_client else ''))
2257
2258 ytm_streaming_data = try_get(ytm_player_response, lambda x: x['streamingData']) or {}
545cc85d 2259 player_response = None
2260 if webpage:
2261 player_response = self._extract_yt_initial_variable(
2262 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2263 video_id, 'initial player response')
f4f751af 2264
109dd3b2 2265 if not player_response or force_mobile_client:
2266 sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2267 yt_client = 'WEB'
2268 ytpcfg = ytcfg
2269 ytp_headers = headers
2270 if not sts or force_mobile_client:
2271 # Android client already has signature descrambled
2272 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2273 if not sts:
2274 self.report_warning('Falling back to mobile client for player API.')
2275 yt_client = 'ANDROID'
2276 ytpcfg = {}
2277 ytp_headers = self._generate_api_headers(ytpcfg, identity_token, syncid, yt_client)
2278
2279 yt_query = {'videoId': video_id}
2280 yt_query.update(self._generate_player_context(sts))
2281 player_response = self._extract_response(
2282 item_id=video_id, ep='player', query=yt_query,
2283 ytcfg=ytpcfg, headers=ytp_headers, fatal=False,
2284 default_client=yt_client,
2285 note='Downloading %splayer API JSON' % ('mobile ' if force_mobile_client else '')
2286 )
545cc85d 2287
109dd3b2 2288 # Age-gate workarounds
545cc85d 2289 playability_status = player_response.get('playabilityStatus') or {}
109dd3b2 2290 if playability_status.get('reason') in self._AGE_GATE_REASONS:
545cc85d 2291 pr = self._parse_json(try_get(compat_parse_qs(
2292 self._download_webpage(
2293 base_url + 'get_video_info', video_id,
4e6767b5 2294 'Refetching age-gated info webpage', 'unable to download video info webpage',
2295 query=self._get_video_info_params(video_id), fatal=False)),
545cc85d 2296 lambda x: x['player_response'][0],
2297 compat_str) or '{}', video_id)
109dd3b2 2298 if not pr:
2299 self.report_warning('Falling back to embedded-only age-gate workaround.')
2300 embed_webpage = None
2301 sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2302 if sts and not force_mobile_client and 'configs' not in player_skip:
2303 embed_webpage = self._download_webpage(
2304 'https://www.youtube.com/embed/%s?html5=1' % video_id,
2305 video_id=video_id, note='Downloading age-gated embed config')
2306
2307 ytcfg_age = self._extract_ytcfg(video_id, embed_webpage) or {}
2308 # If we extracted the embed webpage, it'll tell us if we can view the video
2309 embedded_pr = self._parse_json(
2310 try_get(ytcfg_age, lambda x: x['PLAYER_VARS']['embedded_player_response'], str) or '{}',
2311 video_id=video_id)
2312 embedded_ps_reason = try_get(embedded_pr, lambda x: x['playabilityStatus']['reason'], str) or ''
2313 if embedded_ps_reason not in self._AGE_GATE_REASONS:
2314 yt_client = 'WEB_EMBEDDED_PLAYER'
2315 if not sts or force_mobile_client:
2316 # Android client already has signature descrambled
2317 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2318 if not sts:
2319 self.report_warning(
2320 'Falling back to mobile embedded client for player API (note: some formats may be missing).')
2321 yt_client = 'ANDROID_EMBEDDED_PLAYER'
2322 ytcfg_age = {}
2323
2324 ytage_headers = self._generate_api_headers(
2325 ytcfg_age, identity_token, syncid, client=yt_client)
2326 yt_age_query = {'videoId': video_id}
2327 yt_age_query.update(self._generate_player_context(sts))
2328 pr = self._extract_response(
2329 item_id=video_id, ep='player', query=yt_age_query,
2330 ytcfg=ytcfg_age, headers=ytage_headers, fatal=False,
2331 default_client=yt_client,
2332 note='Downloading %sage-gated player API JSON' % ('mobile ' if force_mobile_client else '')
2333 ) or {}
2334
545cc85d 2335 if pr:
2336 player_response = pr
2337
2338 trailer_video_id = try_get(
2339 playability_status,
2340 lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
2341 compat_str)
2342 if trailer_video_id:
2343 return self.url_result(
2344 trailer_video_id, self.ie_key(), trailer_video_id)
cf7e015f 2345
545cc85d 2346 search_meta = (
2347 lambda x: self._html_search_meta(x, webpage, default=None)) \
2348 if webpage else lambda x: None
dbdaaa23 2349
545cc85d 2350 video_details = player_response.get('videoDetails') or {}
37357d21 2351 microformat = try_get(
545cc85d 2352 player_response,
2353 lambda x: x['microformat']['playerMicroformatRenderer'],
2354 dict) or {}
2355 video_title = video_details.get('title') \
2356 or get_text(microformat.get('title')) \
2357 or search_meta(['og:title', 'twitter:title', 'title'])
2358 video_description = video_details.get('shortDescription')
cf7e015f 2359
8fe10494 2360 if not smuggled_data.get('force_singlefeed', False):
a06916d9 2361 if not self.get_param('noplaylist'):
8fe10494
S
2362 multifeed_metadata_list = try_get(
2363 player_response,
2364 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
545cc85d 2365 compat_str)
8fe10494
S
2366 if multifeed_metadata_list:
2367 entries = []
2368 feed_ids = []
2369 for feed in multifeed_metadata_list.split(','):
2370 # Unquote should take place before split on comma (,) since textual
2371 # fields may contain comma as well (see
067aa17e 2372 # https://github.com/ytdl-org/youtube-dl/issues/8536)
545cc85d 2373 feed_data = compat_parse_qs(
2374 compat_urllib_parse_unquote_plus(feed))
6b09401b
S
2375
2376 def feed_entry(name):
545cc85d 2377 return try_get(
2378 feed_data, lambda x: x[name][0], compat_str)
6b09401b
S
2379
2380 feed_id = feed_entry('id')
2381 if not feed_id:
2382 continue
2383 feed_title = feed_entry('title')
2384 title = video_title
2385 if feed_title:
2386 title += ' (%s)' % feed_title
8fe10494
S
2387 entries.append({
2388 '_type': 'url_transparent',
2389 'ie_key': 'Youtube',
2390 'url': smuggle_url(
545cc85d 2391 base_url + 'watch?v=' + feed_data['id'][0],
8fe10494 2392 {'force_singlefeed': True}),
6b09401b 2393 'title': title,
8fe10494 2394 })
6b09401b 2395 feed_ids.append(feed_id)
8fe10494
S
2396 self.to_screen(
2397 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2398 % (', '.join(feed_ids), video_id))
545cc85d 2399 return self.playlist_result(
2400 entries, video_id, video_title, video_description)
8fe10494
S
2401 else:
2402 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 2403
9297939e 2404 formats, itags, stream_ids = [], [], []
cc2db878 2405 itag_qualities = {}
d3fc8074 2406 q = qualities([
60bdb7bd 2407 # "tiny" is the smallest video-only format. But some audio-only formats
2408 # was also labeled "tiny". It is not clear if such formats still exist
d3fc8074 2409 'tiny', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
2410 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2411 ])
9297939e 2412
545cc85d 2413 streaming_data = player_response.get('streamingData') or {}
2414 streaming_formats = streaming_data.get('formats') or []
2415 streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
9297939e 2416 streaming_formats.extend(ytm_streaming_data.get('formats') or [])
2417 streaming_formats.extend(ytm_streaming_data.get('adaptiveFormats') or [])
2418
545cc85d 2419 for fmt in streaming_formats:
2420 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2421 continue
321bf820 2422
cc2db878 2423 itag = str_or_none(fmt.get('itag'))
9297939e 2424 audio_track = fmt.get('audioTrack') or {}
2425 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2426 if stream_id in stream_ids:
2427 continue
2428
cc2db878 2429 quality = fmt.get('quality')
d3fc8074 2430 if quality == 'tiny' or not quality:
2431 quality = fmt.get('audioQuality', '').lower() or quality
cc2db878 2432 if itag and quality:
2433 itag_qualities[itag] = quality
2434 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2435 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2436 # number of fragment that would subsequently requested with (`&sq=N`)
2437 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2438 continue
2439
545cc85d 2440 fmt_url = fmt.get('url')
2441 if not fmt_url:
2442 sc = compat_parse_qs(fmt.get('signatureCipher'))
2443 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2444 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2445 if not (sc and fmt_url and encrypted_sig):
2446 continue
545cc85d 2447 if not player_url:
201e9eaa 2448 continue
545cc85d 2449 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2450 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2451 fmt_url += '&' + sp + '=' + signature
2452
545cc85d 2453 if itag:
2454 itags.append(itag)
9297939e 2455 stream_ids.append(stream_id)
2456
cc2db878 2457 tbr = float_or_none(
2458 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 2459 dct = {
2460 'asr': int_or_none(fmt.get('audioSampleRate')),
2461 'filesize': int_or_none(fmt.get('contentLength')),
2462 'format_id': itag,
0fb983f6 2463 'format_note': audio_track.get('displayName') or fmt.get('qualityLabel') or quality,
545cc85d 2464 'fps': int_or_none(fmt.get('fps')),
2465 'height': int_or_none(fmt.get('height')),
dca3ff4a 2466 'quality': q(quality),
cc2db878 2467 'tbr': tbr,
545cc85d 2468 'url': fmt_url,
2469 'width': fmt.get('width'),
0fb983f6 2470 'language': audio_track.get('id', '').split('.')[0],
545cc85d 2471 }
60bdb7bd 2472 mime_mobj = re.match(
2473 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2474 if mime_mobj:
2475 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2476 dct.update(parse_codecs(mime_mobj.group(2)))
2477 # The 3gp format in android client has a quality of "small",
2478 # but is actually worse than all other formats
2479 if dct['ext'] == '3gp':
2480 dct['quality'] = q('tiny')
cc2db878 2481 no_audio = dct.get('acodec') == 'none'
2482 no_video = dct.get('vcodec') == 'none'
2483 if no_audio:
2484 dct['vbr'] = tbr
2485 if no_video:
2486 dct['abr'] = tbr
2487 if no_audio or no_video:
545cc85d 2488 dct['downloader_options'] = {
2489 # Youtube throttles chunks >~10M
2490 'http_chunk_size': 10485760,
bf1317d2 2491 }
7c60c33e 2492 if dct.get('ext'):
2493 dct['container'] = dct['ext'] + '_dash'
545cc85d 2494 formats.append(dct)
2495
4bb6b02f 2496 skip_manifests = self._configuration_arg('skip')
5d3a0e79 2497 get_dash = 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True)
2498 get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2499
9297939e 2500 for sd in (streaming_data, ytm_streaming_data):
5d3a0e79 2501 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
9297939e 2502 if hls_manifest_url:
2503 for f in self._extract_m3u8_formats(
2504 hls_manifest_url, video_id, 'mp4', fatal=False):
2505 itag = self._search_regex(
2506 r'/itag/(\d+)', f['url'], 'itag', default=None)
2507 if itag:
2508 f['format_id'] = itag
8d68ab98 2509 formats.append(f)
545cc85d 2510
5d3a0e79 2511 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2512 if dash_manifest_url:
2513 for f in self._extract_mpd_formats(
2514 dash_manifest_url, video_id, fatal=False):
2515 itag = f['format_id']
2516 if itag in itags:
2517 continue
2518 if itag in itag_qualities:
2519 f['quality'] = q(itag_qualities[itag])
2520 filesize = int_or_none(self._search_regex(
2521 r'/clen/(\d+)', f.get('fragment_base_url')
2522 or f['url'], 'file size', default=None))
2523 if filesize:
2524 f['filesize'] = filesize
2525 formats.append(f)
bf1317d2 2526
545cc85d 2527 if not formats:
a06916d9 2528 if not self.get_param('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
b7da73eb 2529 self.raise_no_formats(
545cc85d 2530 'This video is DRM protected.', expected=True)
2531 pemr = try_get(
2532 playability_status,
2533 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
2534 dict) or {}
2535 reason = get_text(pemr.get('reason')) or playability_status.get('reason')
2536 subreason = pemr.get('subreason')
2537 if subreason:
2538 subreason = clean_html(get_text(subreason))
2539 if subreason == 'The uploader has not made this video available in your country.':
2540 countries = microformat.get('availableCountries')
2541 if not countries:
2542 regions_allowed = search_meta('regionsAllowed')
2543 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2544 self.raise_geo_restricted(subreason, countries, metadata_available=True)
545cc85d 2545 reason += '\n' + subreason
2546 if reason:
b7da73eb 2547 self.raise_no_formats(reason, expected=True)
bf1317d2 2548
545cc85d 2549 self._sort_formats(formats)
bf1317d2 2550
545cc85d 2551 keywords = video_details.get('keywords') or []
2552 if not keywords and webpage:
2553 keywords = [
2554 unescapeHTML(m.group('content'))
2555 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2556 for keyword in keywords:
2557 if keyword.startswith('yt:stretch='):
201c1459 2558 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2559 if mobj:
2560 # NB: float is intentional for forcing float division
2561 w, h = (float(v) for v in mobj.groups())
2562 if w > 0 and h > 0:
2563 ratio = w / h
2564 for f in formats:
2565 if f.get('vcodec') != 'none':
2566 f['stretched_ratio'] = ratio
2567 break
6449cd80 2568
545cc85d 2569 thumbnails = []
2570 for container in (video_details, microformat):
2571 for thumbnail in (try_get(
2572 container,
2573 lambda x: x['thumbnail']['thumbnails'], list) or []):
2574 thumbnail_url = thumbnail.get('url')
2575 if not thumbnail_url:
bf1317d2 2576 continue
1988fab7 2577 # Sometimes youtube gives a wrong thumbnail URL. See:
2578 # https://github.com/yt-dlp/yt-dlp/issues/233
2579 # https://github.com/ytdl-org/youtube-dl/issues/28023
2580 if 'maxresdefault' in thumbnail_url:
2581 thumbnail_url = thumbnail_url.split('?')[0]
545cc85d 2582 thumbnails.append({
545cc85d 2583 'url': thumbnail_url,
ff2751ac 2584 'height': int_or_none(thumbnail.get('height')),
545cc85d 2585 'width': int_or_none(thumbnail.get('width')),
ff2751ac 2586 'preference': 1 if 'maxresdefault' in thumbnail_url else -1
545cc85d 2587 })
ff2751ac 2588 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2589 if thumbnail_url:
2590 thumbnails.append({
2591 'url': thumbnail_url,
2592 'preference': 1 if 'maxresdefault' in thumbnail_url else -1
2593 })
2594 # All videos have a maxresdefault thumbnail, but sometimes it does not appear in the webpage
2595 # See: https://github.com/ytdl-org/youtube-dl/issues/29049
2596 thumbnails.append({
2597 'url': 'https://i.ytimg.com/vi/%s/maxresdefault.jpg' % video_id,
2598 'preference': 1,
2599 })
2600 self._remove_duplicate_formats(thumbnails)
545cc85d 2601
2602 category = microformat.get('category') or search_meta('genre')
2603 channel_id = video_details.get('channelId') \
2604 or microformat.get('externalChannelId') \
2605 or search_meta('channelId')
2606 duration = int_or_none(
2607 video_details.get('lengthSeconds')
2608 or microformat.get('lengthSeconds')) \
2609 or parse_duration(search_meta('duration'))
2610 is_live = video_details.get('isLive')
f6745c49 2611 is_upcoming = video_details.get('isUpcoming')
545cc85d 2612 owner_profile_url = microformat.get('ownerProfileUrl')
2613
2614 info = {
2615 'id': video_id,
2616 'title': self._live_title(video_title) if is_live else video_title,
2617 'formats': formats,
2618 'thumbnails': thumbnails,
2619 'description': video_description,
2620 'upload_date': unified_strdate(
2621 microformat.get('uploadDate')
2622 or search_meta('uploadDate')),
2623 'uploader': video_details['author'],
2624 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2625 'uploader_url': owner_profile_url,
2626 'channel_id': channel_id,
2627 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
2628 'duration': duration,
2629 'view_count': int_or_none(
2630 video_details.get('viewCount')
2631 or microformat.get('viewCount')
2632 or search_meta('interactionCount')),
2633 'average_rating': float_or_none(video_details.get('averageRating')),
2634 'age_limit': 18 if (
2635 microformat.get('isFamilySafe') is False
2636 or search_meta('isFamilyFriendly') == 'false'
2637 or search_meta('og:restrictions:age') == '18+') else 0,
2638 'webpage_url': webpage_url,
2639 'categories': [category] if category else None,
2640 'tags': keywords,
2641 'is_live': is_live,
2642 'playable_in_embed': playability_status.get('playableInEmbed'),
c224251a 2643 'was_live': video_details.get('isLiveContent'),
545cc85d 2644 }
b477fc13 2645
545cc85d 2646 pctr = try_get(
2647 player_response,
2648 lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
2649 subtitles = {}
2650 if pctr:
774d79cc 2651 def process_language(container, base_url, lang_code, sub_name, query):
120916da 2652 lang_subs = container.setdefault(lang_code, [])
545cc85d 2653 for fmt in self._SUBTITLE_FORMATS:
2654 query.update({
2655 'fmt': fmt,
2656 })
2657 lang_subs.append({
2658 'ext': fmt,
2659 'url': update_url_query(base_url, query),
774d79cc 2660 'name': sub_name,
545cc85d 2661 })
7e72694b 2662
545cc85d 2663 for caption_track in (pctr.get('captionTracks') or []):
2664 base_url = caption_track.get('baseUrl')
2665 if not base_url:
2666 continue
2667 if caption_track.get('kind') != 'asr':
120916da 2668 lang_code = (
2669 remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2670 or caption_track.get('languageCode'))
545cc85d 2671 if not lang_code:
2672 continue
2673 process_language(
774d79cc 2674 subtitles, base_url, lang_code,
2675 try_get(caption_track, lambda x: x.get('name').get('simpleText')),
2676 {})
545cc85d 2677 continue
2678 automatic_captions = {}
2679 for translation_language in (pctr.get('translationLanguages') or []):
2680 translation_language_code = translation_language.get('languageCode')
2681 if not translation_language_code:
2682 continue
2683 process_language(
2684 automatic_captions, base_url, translation_language_code,
49c258e1 2685 try_get(translation_language, (
2686 lambda x: x['languageName']['simpleText'],
2687 lambda x: x['languageName']['runs'][0]['text'])),
545cc85d 2688 {'tlang': translation_language_code})
2689 info['automatic_captions'] = automatic_captions
2690 info['subtitles'] = subtitles
7e72694b 2691
545cc85d 2692 parsed_url = compat_urllib_parse_urlparse(url)
2693 for component in [parsed_url.fragment, parsed_url.query]:
2694 query = compat_parse_qs(component)
2695 for k, v in query.items():
2696 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2697 d_k += '_time'
2698 if d_k not in info and k in s_ks:
2699 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2700
2701 # Youtube Music Auto-generated description
822b9d9c 2702 if video_description:
38d70284 2703 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2704 if mobj:
822b9d9c
RA
2705 release_year = mobj.group('release_year')
2706 release_date = mobj.group('release_date')
2707 if release_date:
2708 release_date = release_date.replace('-', '')
2709 if not release_year:
545cc85d 2710 release_year = release_date[:4]
2711 info.update({
2712 'album': mobj.group('album'.strip()),
2713 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2714 'track': mobj.group('track').strip(),
2715 'release_date': release_date,
cc2db878 2716 'release_year': int_or_none(release_year),
545cc85d 2717 })
7e72694b 2718
545cc85d 2719 initial_data = None
2720 if webpage:
2721 initial_data = self._extract_yt_initial_variable(
2722 webpage, self._YT_INITIAL_DATA_RE, video_id,
2723 'yt initial data')
2724 if not initial_data:
109dd3b2 2725 initial_data = self._extract_response(
2726 item_id=video_id, ep='next', fatal=False,
2727 ytcfg=ytcfg, headers=headers, query={'videoId': video_id},
2728 note='Downloading initial data API JSON')
545cc85d 2729
c60ee3a2 2730 try:
2731 # This will error if there is no livechat
2732 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2733 info['subtitles']['live_chat'] = [{
2734 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
2735 'video_id': video_id,
2736 'ext': 'json',
f6745c49 2737 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
c60ee3a2 2738 }]
2739 except (KeyError, IndexError, TypeError):
2740 pass
545cc85d 2741
2742 if initial_data:
2743 chapters = self._extract_chapters_from_json(
2744 initial_data, video_id, duration)
2745 if not chapters:
2746 for engagment_pannel in (initial_data.get('engagementPanels') or []):
2747 contents = try_get(
2748 engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
2749 list)
2750 if not contents:
2751 continue
2752
2753 def chapter_time(mmlir):
2754 return parse_duration(
2755 get_text(mmlir.get('timeDescription')))
2756
2757 chapters = []
2758 for next_num, content in enumerate(contents, start=1):
2759 mmlir = content.get('macroMarkersListItemRenderer') or {}
2760 start_time = chapter_time(mmlir)
2761 end_time = chapter_time(try_get(
2762 contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
2763 if next_num < len(contents) else duration
2764 if start_time is None or end_time is None:
2765 continue
2766 chapters.append({
2767 'start_time': start_time,
2768 'end_time': end_time,
2769 'title': get_text(mmlir.get('title')),
2770 })
2771 if chapters:
2772 break
2773 if chapters:
2774 info['chapters'] = chapters
2775
2776 contents = try_get(
2777 initial_data,
2778 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2779 list) or []
2780 for content in contents:
2781 vpir = content.get('videoPrimaryInfoRenderer')
2782 if vpir:
2783 stl = vpir.get('superTitleLink')
2784 if stl:
2785 stl = get_text(stl)
2786 if try_get(
2787 vpir,
2788 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2789 info['location'] = stl
2790 else:
2791 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2792 if mobj:
2793 info.update({
2794 'series': mobj.group(1),
2795 'season_number': int(mobj.group(2)),
2796 'episode_number': int(mobj.group(3)),
2797 })
2798 for tlb in (try_get(
2799 vpir,
2800 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2801 list) or []):
2802 tbr = tlb.get('toggleButtonRenderer') or {}
2803 for getter, regex in [(
2804 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2805 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2806 lambda x: x['accessibility'],
2807 lambda x: x['accessibilityData']['accessibilityData'],
2808 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2809 label = (try_get(tbr, getter, dict) or {}).get('label')
2810 if label:
2811 mobj = re.match(regex, label)
2812 if mobj:
2813 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2814 break
2815 sbr_tooltip = try_get(
2816 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2817 if sbr_tooltip:
2818 like_count, dislike_count = sbr_tooltip.split(' / ')
2819 info.update({
2820 'like_count': str_to_int(like_count),
2821 'dislike_count': str_to_int(dislike_count),
2822 })
2823 vsir = content.get('videoSecondaryInfoRenderer')
2824 if vsir:
2825 info['channel'] = get_text(try_get(
2826 vsir,
2827 lambda x: x['owner']['videoOwnerRenderer']['title'],
cce889b9 2828 dict))
545cc85d 2829 rows = try_get(
2830 vsir,
2831 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2832 list) or []
2833 multiple_songs = False
2834 for row in rows:
2835 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2836 multiple_songs = True
2837 break
2838 for row in rows:
2839 mrr = row.get('metadataRowRenderer') or {}
2840 mrr_title = mrr.get('title')
2841 if not mrr_title:
2842 continue
2843 mrr_title = get_text(mrr['title'])
2844 mrr_contents_text = get_text(mrr['contents'][0])
2845 if mrr_title == 'License':
2846 info['license'] = mrr_contents_text
2847 elif not multiple_songs:
2848 if mrr_title == 'Album':
2849 info['album'] = mrr_contents_text
2850 elif mrr_title == 'Artist':
2851 info['artist'] = mrr_contents_text
2852 elif mrr_title == 'Song':
2853 info['track'] = mrr_contents_text
2854
2855 fallbacks = {
2856 'channel': 'uploader',
2857 'channel_id': 'uploader_id',
2858 'channel_url': 'uploader_url',
2859 }
2860 for to, frm in fallbacks.items():
2861 if not info.get(to):
2862 info[to] = info.get(frm)
2863
2864 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
2865 v = info.get(s_k)
2866 if v:
2867 info[d_k] = v
b84071c0 2868
c224251a
M
2869 is_private = bool_or_none(video_details.get('isPrivate'))
2870 is_unlisted = bool_or_none(microformat.get('isUnlisted'))
2871 is_membersonly = None
b28f8d24 2872 is_premium = None
c224251a
M
2873 if initial_data and is_private is not None:
2874 is_membersonly = False
b28f8d24 2875 is_premium = False
c224251a
M
2876 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list)
2877 for content in contents or []:
2878 badges = try_get(content, lambda x: x['videoPrimaryInfoRenderer']['badges'], list)
2879 for badge in badges or []:
2880 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label']) or ''
2881 if label.lower() == 'members only':
2882 is_membersonly = True
2883 break
b28f8d24
M
2884 elif label.lower() == 'premium':
2885 is_premium = True
2886 break
2887 if is_membersonly or is_premium:
c224251a
M
2888 break
2889
2890 # TODO: Add this for playlists
2891 info['availability'] = self._availability(
2892 is_private=is_private,
b28f8d24 2893 needs_premium=is_premium,
c224251a
M
2894 needs_subscription=is_membersonly,
2895 needs_auth=info['age_limit'] >= 18,
2896 is_unlisted=None if is_private is None else is_unlisted)
2897
06167fbb 2898 # get xsrf for annotations or comments
a06916d9 2899 get_annotations = self.get_param('writeannotations', False)
2900 get_comments = self.get_param('getcomments', False)
06167fbb 2901 if get_annotations or get_comments:
29f7c58a 2902 xsrf_token = None
545cc85d 2903 ytcfg = self._extract_ytcfg(video_id, webpage)
29f7c58a 2904 if ytcfg:
2905 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2906 if not xsrf_token:
2907 xsrf_token = self._search_regex(
2908 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 2909 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 2910
2911 # annotations
06167fbb 2912 if get_annotations:
64b6a4e9
RA
2913 invideo_url = try_get(
2914 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2915 if xsrf_token and invideo_url:
29f7c58a 2916 xsrf_field_name = None
2917 if ytcfg:
2918 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2919 if not xsrf_field_name:
2920 xsrf_field_name = self._search_regex(
2921 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 2922 webpage, 'xsrf field name',
29f7c58a 2923 group='xsrf_field_name', default='session_token')
8a784c74 2924 info['annotations'] = self._download_webpage(
64b6a4e9
RA
2925 self._proto_relative_url(invideo_url),
2926 video_id, note='Downloading annotations',
2927 errnote='Unable to download video annotations', fatal=False,
2928 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 2929
277d6ff5 2930 if get_comments:
a1c5d2ca 2931 info['__post_extractor'] = lambda: self._extract_comments(ytcfg, video_id, contents, webpage, xsrf_token)
4ea3be0a 2932
545cc85d 2933 self.mark_watched(video_id, player_response)
d77ab8e2 2934
545cc85d 2935 return info
c5e8d7af 2936
5f6a1245 2937
8bdd16b4 2938class YoutubeTabIE(YoutubeBaseInfoExtractor):
2939 IE_DESC = 'YouTube.com tab'
70d5c17b 2940 _VALID_URL = r'''(?x)
2941 https?://
2942 (?:\w+\.)?
2943 (?:
2944 youtube(?:kids)?\.com|
2945 invidio\.us
2946 )/
2947 (?:
fe03a6cd 2948 (?P<channel_type>channel|c|user|browse)/|
70d5c17b 2949 (?P<not_channel>
9ba5705a 2950 feed/|hashtag/|
70d5c17b 2951 (?:playlist|watch)\?.*?\blist=
2952 )|
29f7c58a 2953 (?!(?:%s)\b) # Direct URLs
70d5c17b 2954 )
2955 (?P<id>[^/?\#&]+)
2956 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 2957 IE_NAME = 'youtube:tab'
2958
81127aa5 2959 _TESTS = [{
da692b79 2960 'note': 'playlists, multipage',
8bdd16b4 2961 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2962 'playlist_mincount': 94,
2963 'info_dict': {
2964 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2965 'title': 'Игорь Клейнер - Playlists',
2966 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2967 'uploader': 'Игорь Клейнер',
2968 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 2969 },
2970 }, {
da692b79 2971 'note': 'playlists, multipage, different order',
8bdd16b4 2972 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2973 'playlist_mincount': 94,
2974 'info_dict': {
2975 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2976 'title': 'Игорь Клейнер - Playlists',
2977 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2978 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
2979 'uploader': 'Игорь Клейнер',
8bdd16b4 2980 },
201c1459 2981 }, {
da692b79 2982 'note': 'playlists, series',
201c1459 2983 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
2984 'playlist_mincount': 5,
2985 'info_dict': {
2986 'id': 'UCYO_jab_esuFRV4b17AJtAw',
2987 'title': '3Blue1Brown - Playlists',
2988 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
da692b79 2989 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
2990 'uploader': '3Blue1Brown',
201c1459 2991 },
8bdd16b4 2992 }, {
da692b79 2993 'note': 'playlists, singlepage',
8bdd16b4 2994 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2995 'playlist_mincount': 4,
2996 'info_dict': {
2997 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2998 'title': 'ThirstForScience - Playlists',
2999 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 3000 'uploader': 'ThirstForScience',
3001 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 3002 }
3003 }, {
3004 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3005 'only_matching': True,
3006 }, {
da692b79 3007 'note': 'basic, single video playlist',
0e30a7b9 3008 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 3009 'info_dict': {
0e30a7b9 3010 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3011 'uploader': 'Sergey M.',
3012 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 3013 'title': 'youtube-dl public playlist',
81127aa5 3014 },
0e30a7b9 3015 'playlist_count': 1,
9291475f 3016 }, {
da692b79 3017 'note': 'empty playlist',
0e30a7b9 3018 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 3019 'info_dict': {
0e30a7b9 3020 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3021 'uploader': 'Sergey M.',
3022 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 3023 'title': 'youtube-dl empty playlist',
9291475f
PH
3024 },
3025 'playlist_count': 0,
3026 }, {
da692b79 3027 'note': 'Home tab',
8bdd16b4 3028 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 3029 'info_dict': {
8bdd16b4 3030 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3031 'title': 'lex will - Home',
3032 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3033 'uploader': 'lex will',
3034 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3035 },
8bdd16b4 3036 'playlist_mincount': 2,
9291475f 3037 }, {
da692b79 3038 'note': 'Videos tab',
8bdd16b4 3039 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 3040 'info_dict': {
8bdd16b4 3041 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3042 'title': 'lex will - Videos',
3043 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3044 'uploader': 'lex will',
3045 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3046 },
8bdd16b4 3047 'playlist_mincount': 975,
9291475f 3048 }, {
da692b79 3049 'note': 'Videos tab, sorted by popular',
8bdd16b4 3050 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 3051 'info_dict': {
8bdd16b4 3052 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3053 'title': 'lex will - Videos',
3054 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3055 'uploader': 'lex will',
3056 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3057 },
8bdd16b4 3058 'playlist_mincount': 199,
9291475f 3059 }, {
da692b79 3060 'note': 'Playlists tab',
8bdd16b4 3061 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 3062 'info_dict': {
8bdd16b4 3063 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3064 'title': 'lex will - Playlists',
3065 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3066 'uploader': 'lex will',
3067 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3068 },
8bdd16b4 3069 'playlist_mincount': 17,
ac7553d0 3070 }, {
da692b79 3071 'note': 'Community tab',
8bdd16b4 3072 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 3073 'info_dict': {
8bdd16b4 3074 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3075 'title': 'lex will - Community',
3076 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3077 'uploader': 'lex will',
3078 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3079 },
3080 'playlist_mincount': 18,
87dadd45 3081 }, {
da692b79 3082 'note': 'Channels tab',
8bdd16b4 3083 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 3084 'info_dict': {
8bdd16b4 3085 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3086 'title': 'lex will - Channels',
3087 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3088 'uploader': 'lex will',
3089 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3090 },
deaec5af 3091 'playlist_mincount': 12,
cd684175 3092 }, {
3093 'note': 'Search tab',
3094 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3095 'playlist_mincount': 40,
3096 'info_dict': {
3097 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3098 'title': '3Blue1Brown - Search - linear algebra',
3099 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3100 'uploader': '3Blue1Brown',
3101 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3102 },
6b08cdf6 3103 }, {
a0566bbf 3104 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3105 'only_matching': True,
3106 }, {
a0566bbf 3107 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3108 'only_matching': True,
3109 }, {
a0566bbf 3110 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3111 'only_matching': True,
3112 }, {
3113 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3114 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3115 'info_dict': {
3116 'title': '29C3: Not my department',
3117 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3118 'uploader': 'Christiaan008',
3119 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 3120 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 3121 },
3122 'playlist_count': 96,
3123 }, {
3124 'note': 'Large playlist',
3125 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 3126 'info_dict': {
8bdd16b4 3127 'title': 'Uploads from Cauchemar',
3128 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3129 'uploader': 'Cauchemar',
3130 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 3131 },
8bdd16b4 3132 'playlist_mincount': 1123,
3133 }, {
da692b79 3134 'note': 'even larger playlist, 8832 videos',
8bdd16b4 3135 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3136 'only_matching': True,
4b7df0d3
JMF
3137 }, {
3138 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3139 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3140 'info_dict': {
acf757f4
PH
3141 'title': 'Uploads from Interstellar Movie',
3142 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 3143 'uploader': 'Interstellar Movie',
8bdd16b4 3144 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 3145 },
481cc733 3146 'playlist_mincount': 21,
358de58c 3147 }, {
3148 'note': 'Playlist with "show unavailable videos" button',
3149 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3150 'info_dict': {
3151 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3152 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3153 'uploader': 'Phim Siêu Nhân Nhật Bản',
3154 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3155 },
da692b79 3156 'playlist_mincount': 200,
5d342002 3157 }, {
da692b79 3158 'note': 'Playlist with unavailable videos in page 7',
5d342002 3159 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3160 'info_dict': {
3161 'title': 'Uploads from BlankTV',
3162 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3163 'uploader': 'BlankTV',
3164 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3165 },
da692b79 3166 'playlist_mincount': 1000,
8bdd16b4 3167 }, {
da692b79 3168 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
8bdd16b4 3169 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3170 'info_dict': {
3171 'title': 'Data Analysis with Dr Mike Pound',
3172 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3173 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3174 'uploader': 'Computerphile',
deaec5af 3175 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 3176 },
3177 'playlist_mincount': 11,
3178 }, {
a0566bbf 3179 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 3180 'only_matching': True,
dacb3a86 3181 }, {
da692b79 3182 'note': 'Playlist URL that does not actually serve a playlist',
dacb3a86
S
3183 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3184 'info_dict': {
3185 'id': 'FqZTN594JQw',
3186 'ext': 'webm',
3187 'title': "Smiley's People 01 detective, Adventure Series, Action",
3188 'uploader': 'STREEM',
3189 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 3190 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
3191 'upload_date': '20150526',
3192 'license': 'Standard YouTube License',
3193 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3194 'categories': ['People & Blogs'],
3195 'tags': list,
dbdaaa23 3196 'view_count': int,
dacb3a86
S
3197 'like_count': int,
3198 'dislike_count': int,
3199 },
3200 'params': {
3201 'skip_download': True,
3202 },
13a75688 3203 'skip': 'This video is not available.',
dacb3a86 3204 'add_ie': [YoutubeIE.ie_key()],
481cc733 3205 }, {
8bdd16b4 3206 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 3207 'only_matching': True,
66b48727 3208 }, {
8bdd16b4 3209 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 3210 'only_matching': True,
a0566bbf 3211 }, {
3212 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3213 'info_dict': {
da692b79 3214 'id': 'X1whbWASnNQ', # This will keep changing
a0566bbf 3215 'ext': 'mp4',
deaec5af 3216 'title': compat_str,
a0566bbf 3217 'uploader': 'Sky News',
3218 'uploader_id': 'skynews',
3219 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
da692b79 3220 'upload_date': r're:\d{8}',
3221 'description': compat_str,
a0566bbf 3222 'categories': ['News & Politics'],
3223 'tags': list,
3224 'like_count': int,
3225 'dislike_count': int,
3226 },
3227 'params': {
3228 'skip_download': True,
3229 },
da692b79 3230 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
a0566bbf 3231 }, {
3232 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3233 'info_dict': {
3234 'id': 'a48o2S1cPoo',
3235 'ext': 'mp4',
3236 'title': 'The Young Turks - Live Main Show',
3237 'uploader': 'The Young Turks',
3238 'uploader_id': 'TheYoungTurks',
3239 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3240 'upload_date': '20150715',
3241 'license': 'Standard YouTube License',
3242 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3243 'categories': ['News & Politics'],
3244 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3245 'like_count': int,
3246 'dislike_count': int,
3247 },
3248 'params': {
3249 'skip_download': True,
3250 },
3251 'only_matching': True,
3252 }, {
3253 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3254 'only_matching': True,
3255 }, {
3256 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3257 'only_matching': True,
09f1580e 3258 }, {
3259 'note': 'A channel that is not live. Should raise error',
3260 'url': 'https://www.youtube.com/user/numberphile/live',
3261 'only_matching': True,
3d3dddc9 3262 }, {
3263 'url': 'https://www.youtube.com/feed/trending',
3264 'only_matching': True,
3265 }, {
3d3dddc9 3266 'url': 'https://www.youtube.com/feed/library',
3267 'only_matching': True,
3268 }, {
3d3dddc9 3269 'url': 'https://www.youtube.com/feed/history',
3270 'only_matching': True,
3271 }, {
3d3dddc9 3272 'url': 'https://www.youtube.com/feed/subscriptions',
3273 'only_matching': True,
3274 }, {
3d3dddc9 3275 'url': 'https://www.youtube.com/feed/watch_later',
3276 'only_matching': True,
3277 }, {
da692b79 3278 'note': 'Recommended - redirects to home page',
3d3dddc9 3279 'url': 'https://www.youtube.com/feed/recommended',
3280 'only_matching': True,
29f7c58a 3281 }, {
da692b79 3282 'note': 'inline playlist with not always working continuations',
29f7c58a 3283 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3284 'only_matching': True,
3285 }, {
3286 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3287 'only_matching': True,
3288 }, {
3289 'url': 'https://www.youtube.com/course',
3290 'only_matching': True,
3291 }, {
3292 'url': 'https://www.youtube.com/zsecurity',
3293 'only_matching': True,
3294 }, {
3295 'url': 'http://www.youtube.com/NASAgovVideo/videos',
3296 'only_matching': True,
3297 }, {
3298 'url': 'https://www.youtube.com/TheYoungTurks/live',
3299 'only_matching': True,
39ed931e 3300 }, {
3301 'url': 'https://www.youtube.com/hashtag/cctv9',
3302 'info_dict': {
3303 'id': 'cctv9',
3304 'title': '#cctv9',
3305 },
3306 'playlist_mincount': 350,
201c1459 3307 }, {
3308 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3309 'only_matching': True,
9297939e 3310 }, {
da692b79 3311 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
9297939e 3312 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3313 'only_matching': True
fe03a6cd 3314 }, {
3315 'note': '/browse/ should redirect to /channel/',
3316 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3317 'only_matching': True
3318 }, {
3319 'note': 'VLPL, should redirect to playlist?list=PL...',
3320 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3321 'info_dict': {
3322 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3323 'uploader': 'NoCopyrightSounds',
3324 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3325 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3326 'title': 'NCS Releases',
3327 },
3328 'playlist_mincount': 166,
18db7548 3329 }, {
3330 'note': 'Topic, should redirect to playlist?list=UU...',
3331 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3332 'info_dict': {
3333 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3334 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3335 'title': 'Uploads from Royalty Free Music - Topic',
3336 'uploader': 'Royalty Free Music - Topic',
3337 },
3338 'expected_warnings': [
3339 'A channel/user page was given',
3340 'The URL does not have a videos tab',
3341 ],
3342 'playlist_mincount': 101,
3343 }, {
3344 'note': 'Topic without a UU playlist',
3345 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3346 'info_dict': {
3347 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3348 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3349 },
3350 'expected_warnings': [
3351 'A channel/user page was given',
3352 'The URL does not have a videos tab',
3353 'Falling back to channel URL',
3354 ],
3355 'playlist_mincount': 9,
abcdd12b 3356 }, {
3357 'note': 'Youtube music Album',
3358 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3359 'info_dict': {
3360 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3361 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3362 },
3363 'playlist_count': 50,
29f7c58a 3364 }]
3365
3366 @classmethod
3367 def suitable(cls, url):
3368 return False if YoutubeIE.suitable(url) else super(
3369 YoutubeTabIE, cls).suitable(url)
8bdd16b4 3370
3371 def _extract_channel_id(self, webpage):
3372 channel_id = self._html_search_meta(
3373 'channelId', webpage, 'channel id', default=None)
3374 if channel_id:
3375 return channel_id
3376 channel_url = self._html_search_meta(
3377 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3378 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3379 'twitter:app:url:googleplay'), webpage, 'channel url')
3380 return self._search_regex(
3381 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3382 channel_url, 'channel id')
15f6397c 3383
8bdd16b4 3384 @staticmethod
cd7c66cf 3385 def _extract_basic_item_renderer(item):
3386 # Modified from _extract_grid_item_renderer
201c1459 3387 known_basic_renderers = (
3388 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 3389 )
3390 for key, renderer in item.items():
201c1459 3391 if not isinstance(renderer, dict):
cd7c66cf 3392 continue
201c1459 3393 elif key in known_basic_renderers:
3394 return renderer
3395 elif key.startswith('grid') and key.endswith('Renderer'):
3396 return renderer
8bdd16b4 3397
8bdd16b4 3398 def _grid_entries(self, grid_renderer):
3399 for item in grid_renderer['items']:
3400 if not isinstance(item, dict):
39b62db1 3401 continue
cd7c66cf 3402 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 3403 if not isinstance(renderer, dict):
3404 continue
3405 title = try_get(
201c1459 3406 renderer, (lambda x: x['title']['runs'][0]['text'],
3407 lambda x: x['title']['simpleText']), compat_str)
8bdd16b4 3408 # playlist
3409 playlist_id = renderer.get('playlistId')
3410 if playlist_id:
3411 yield self.url_result(
3412 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3413 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3414 video_title=title)
201c1459 3415 continue
8bdd16b4 3416 # video
3417 video_id = renderer.get('videoId')
3418 if video_id:
3419 yield self._extract_video(renderer)
201c1459 3420 continue
8bdd16b4 3421 # channel
3422 channel_id = renderer.get('channelId')
3423 if channel_id:
3424 title = try_get(
3425 renderer, lambda x: x['title']['simpleText'], compat_str)
3426 yield self.url_result(
3427 'https://www.youtube.com/channel/%s' % channel_id,
3428 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 3429 continue
3430 # generic endpoint URL support
3431 ep_url = urljoin('https://www.youtube.com/', try_get(
3432 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3433 compat_str))
3434 if ep_url:
3435 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3436 if ie.suitable(ep_url):
3437 yield self.url_result(
3438 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3439 break
8bdd16b4 3440
3d3dddc9 3441 def _shelf_entries_from_content(self, shelf_renderer):
3442 content = shelf_renderer.get('content')
3443 if not isinstance(content, dict):
8bdd16b4 3444 return
cd7c66cf 3445 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 3446 if renderer:
3447 # TODO: add support for nested playlists so each shelf is processed
3448 # as separate playlist
3449 # TODO: this includes only first N items
3450 for entry in self._grid_entries(renderer):
3451 yield entry
3452 renderer = content.get('horizontalListRenderer')
3453 if renderer:
3454 # TODO
3455 pass
8bdd16b4 3456
29f7c58a 3457 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3458 ep = try_get(
3459 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3460 compat_str)
3461 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3462 if shelf_url:
29f7c58a 3463 # Skipping links to another channels, note that checking for
3464 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3465 # will not work
3466 if skip_channels and '/channels?' in shelf_url:
3467 return
3d3dddc9 3468 title = try_get(
3469 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
3470 yield self.url_result(shelf_url, video_title=title)
3471 # Shelf may not contain shelf URL, fallback to extraction from content
3472 for entry in self._shelf_entries_from_content(shelf_renderer):
3473 yield entry
c5e8d7af 3474
8bdd16b4 3475 def _playlist_entries(self, video_list_renderer):
3476 for content in video_list_renderer['contents']:
3477 if not isinstance(content, dict):
3478 continue
3479 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3480 if not isinstance(renderer, dict):
3481 continue
3482 video_id = renderer.get('videoId')
3483 if not video_id:
3484 continue
3485 yield self._extract_video(renderer)
07aeced6 3486
3462ffa8 3487 def _rich_entries(self, rich_grid_renderer):
3488 renderer = try_get(
70d5c17b 3489 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3490 video_id = renderer.get('videoId')
3491 if not video_id:
3492 return
3493 yield self._extract_video(renderer)
3494
8bdd16b4 3495 def _video_entry(self, video_renderer):
3496 video_id = video_renderer.get('videoId')
3497 if video_id:
3498 return self._extract_video(video_renderer)
dacb3a86 3499
8bdd16b4 3500 def _post_thread_entries(self, post_thread_renderer):
3501 post_renderer = try_get(
3502 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3503 if not post_renderer:
3504 return
3505 # video attachment
3506 video_renderer = try_get(
895b0931 3507 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3508 video_id = video_renderer.get('videoId')
3509 if video_id:
3510 entry = self._extract_video(video_renderer)
8bdd16b4 3511 if entry:
3512 yield entry
895b0931 3513 # playlist attachment
3514 playlist_id = try_get(
3515 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3516 if playlist_id:
3517 yield self.url_result(
e28f1c0a 3518 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3519 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3520 # inline video links
3521 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3522 for run in runs:
3523 if not isinstance(run, dict):
3524 continue
3525 ep_url = try_get(
3526 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3527 if not ep_url:
3528 continue
3529 if not YoutubeIE.suitable(ep_url):
3530 continue
3531 ep_video_id = YoutubeIE._match_id(ep_url)
3532 if video_id == ep_video_id:
3533 continue
895b0931 3534 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 3535
8bdd16b4 3536 def _post_thread_continuation_entries(self, post_thread_continuation):
3537 contents = post_thread_continuation.get('contents')
3538 if not isinstance(contents, list):
3539 return
3540 for content in contents:
3541 renderer = content.get('backstagePostThreadRenderer')
3542 if not isinstance(renderer, dict):
3543 continue
3544 for entry in self._post_thread_entries(renderer):
3545 yield entry
07aeced6 3546
39ed931e 3547 r''' # unused
3548 def _rich_grid_entries(self, contents):
3549 for content in contents:
3550 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3551 if video_renderer:
3552 entry = self._video_entry(video_renderer)
3553 if entry:
3554 yield entry
3555 '''
3556
29f7c58a 3557 @staticmethod
3558 def _build_continuation_query(continuation, ctp=None):
3559 query = {
3560 'ctoken': continuation,
3561 'continuation': continuation,
3562 }
3563 if ctp:
3564 query['itct'] = ctp
3565 return query
3566
8bdd16b4 3567 @staticmethod
3568 def _extract_next_continuation_data(renderer):
3569 next_continuation = try_get(
3570 renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
3571 if not next_continuation:
3572 return
3573 continuation = next_continuation.get('continuation')
3574 if not continuation:
3575 return
3576 ctp = next_continuation.get('clickTrackingParams')
29f7c58a 3577 return YoutubeTabIE._build_continuation_query(continuation, ctp)
c5e8d7af 3578
8bdd16b4 3579 @classmethod
3580 def _extract_continuation(cls, renderer):
3581 next_continuation = cls._extract_next_continuation_data(renderer)
3582 if next_continuation:
3583 return next_continuation
cc2db878 3584 contents = []
3585 for key in ('contents', 'items'):
3586 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
8bdd16b4 3587 for content in contents:
3588 if not isinstance(content, dict):
3589 continue
3590 continuation_ep = try_get(
3591 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
3592 dict)
3593 if not continuation_ep:
3594 continue
3595 continuation = try_get(
3596 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
3597 if not continuation:
3598 continue
3599 ctp = continuation_ep.get('clickTrackingParams')
29f7c58a 3600 return YoutubeTabIE._build_continuation_query(continuation, ctp)
448830ce 3601
f4f751af 3602 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3462ffa8 3603
70d5c17b 3604 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3605 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3606 for content in contents:
3607 if not isinstance(content, dict):
8bdd16b4 3608 continue
70d5c17b 3609 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3610 if not is_renderer:
70d5c17b 3611 renderer = content.get('richItemRenderer')
3462ffa8 3612 if renderer:
3613 for entry in self._rich_entries(renderer):
3614 yield entry
3615 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3616 continue
3462ffa8 3617 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3618 for isr_content in isr_contents:
3619 if not isinstance(isr_content, dict):
3620 continue
69184e41 3621
3622 known_renderers = {
3623 'playlistVideoListRenderer': self._playlist_entries,
3624 'gridRenderer': self._grid_entries,
3625 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3626 'backstagePostThreadRenderer': self._post_thread_entries,
3627 'videoRenderer': lambda x: [self._video_entry(x)],
3628 }
3629 for key, renderer in isr_content.items():
3630 if key not in known_renderers:
3631 continue
3632 for entry in known_renderers[key](renderer):
3633 if entry:
3634 yield entry
3462ffa8 3635 continuation_list[0] = self._extract_continuation(renderer)
69184e41 3636 break
70d5c17b 3637
3462ffa8 3638 if not continuation_list[0]:
3639 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3640
3641 if not continuation_list[0]:
3642 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3643
3644 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3645 tab_content = try_get(tab, lambda x: x['content'], dict)
3646 if not tab_content:
3647 return
3462ffa8 3648 parent_renderer = (
29f7c58a 3649 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3650 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3651 for entry in extract_entries(parent_renderer):
3652 yield entry
3462ffa8 3653 continuation = continuation_list[0]
f4f751af 3654 context = self._extract_context(ytcfg)
3655 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
d069eca7 3656
8bdd16b4 3657 for page_num in itertools.count(1):
3658 if not continuation:
3659 break
79360d99 3660 query = {
3661 'continuation': continuation['continuation'],
3662 'clickTracking': {'clickTrackingParams': continuation['itct']}
3663 }
f4f751af 3664 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
79360d99 3665 response = self._extract_response(
3666 item_id='%s page %s' % (item_id, page_num),
3667 query=query, headers=headers, ytcfg=ytcfg,
3668 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3669
3670 if not response:
8bdd16b4 3671 break
f4f751af 3672 visitor_data = try_get(
3673 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
ebf1b291 3674
69184e41 3675 known_continuation_renderers = {
3676 'playlistVideoListContinuation': self._playlist_entries,
3677 'gridContinuation': self._grid_entries,
3678 'itemSectionContinuation': self._post_thread_continuation_entries,
3679 'sectionListContinuation': extract_entries, # for feeds
3680 }
8bdd16b4 3681 continuation_contents = try_get(
69184e41 3682 response, lambda x: x['continuationContents'], dict) or {}
3683 continuation_renderer = None
3684 for key, value in continuation_contents.items():
3685 if key not in known_continuation_renderers:
3462ffa8 3686 continue
69184e41 3687 continuation_renderer = value
3688 continuation_list = [None]
3689 for entry in known_continuation_renderers[key](continuation_renderer):
3690 yield entry
3691 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3692 break
3693 if continuation_renderer:
3694 continue
c5e8d7af 3695
a1b535bd 3696 known_renderers = {
3697 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3698 'gridVideoRenderer': (self._grid_entries, 'items'),
3699 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3700 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3701 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3702 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3703 }
cce889b9 3704 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3705 continuation_items = try_get(
cce889b9 3706 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3707 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3708 video_items_renderer = None
3709 for key, value in continuation_item.items():
3710 if key not in known_renderers:
8bdd16b4 3711 continue
a1b535bd 3712 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3713 continuation_list = [None]
a1b535bd 3714 for entry in known_renderers[key][0](video_items_renderer):
3715 yield entry
9ba5705a 3716 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3717 break
3718 if video_items_renderer:
3719 continue
8bdd16b4 3720 break
9558dcec 3721
8bdd16b4 3722 @staticmethod
3723 def _extract_selected_tab(tabs):
3724 for tab in tabs:
cd684175 3725 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3726 if renderer.get('selected') is True:
3727 return renderer
2b3c2546 3728 else:
8bdd16b4 3729 raise ExtractorError('Unable to find selected tab')
b82f815f 3730
8bdd16b4 3731 @staticmethod
3732 def _extract_uploader(data):
3733 uploader = {}
3734 sidebar_renderer = try_get(
3735 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3736 if sidebar_renderer:
3737 for item in sidebar_renderer:
3738 if not isinstance(item, dict):
3739 continue
3740 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3741 if not isinstance(renderer, dict):
3742 continue
3743 owner = try_get(
3744 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3745 if owner:
3746 uploader['uploader'] = owner.get('text')
3747 uploader['uploader_id'] = try_get(
3748 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3749 uploader['uploader_url'] = urljoin(
3750 'https://www.youtube.com/',
3751 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3752 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3753
d069eca7 3754 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3755 playlist_id = title = description = channel_url = channel_name = channel_id = None
3756 thumbnails_list = tags = []
3757
8bdd16b4 3758 selected_tab = self._extract_selected_tab(tabs)
3759 renderer = try_get(
3760 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3761 if renderer:
b60419c5 3762 channel_name = renderer.get('title')
3763 channel_url = renderer.get('channelUrl')
3764 channel_id = renderer.get('externalId')
39ed931e 3765 else:
64c0d954 3766 renderer = try_get(
3767 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3768
8bdd16b4 3769 if renderer:
3770 title = renderer.get('title')
ecc97af3 3771 description = renderer.get('description', '')
b60419c5 3772 playlist_id = channel_id
3773 tags = renderer.get('keywords', '').split()
3774 thumbnails_list = (
3775 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3776 or try_get(
3777 data,
3778 lambda x: x['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3779 list)
b60419c5 3780 or [])
3781
3782 thumbnails = []
3783 for t in thumbnails_list:
3784 if not isinstance(t, dict):
3785 continue
3786 thumbnail_url = url_or_none(t.get('url'))
3787 if not thumbnail_url:
3788 continue
3789 thumbnails.append({
3790 'url': thumbnail_url,
3791 'width': int_or_none(t.get('width')),
3792 'height': int_or_none(t.get('height')),
3793 })
3462ffa8 3794 if playlist_id is None:
70d5c17b 3795 playlist_id = item_id
3796 if title is None:
39ed931e 3797 title = (
3798 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3799 or playlist_id)
b60419c5 3800 title += format_field(selected_tab, 'title', ' - %s')
cd684175 3801 title += format_field(selected_tab, 'expandedText', ' - %s')
b60419c5 3802
3803 metadata = {
3804 'playlist_id': playlist_id,
3805 'playlist_title': title,
3806 'playlist_description': description,
3807 'uploader': channel_name,
3808 'uploader_id': channel_id,
3809 'uploader_url': channel_url,
3810 'thumbnails': thumbnails,
3811 'tags': tags,
3812 }
3813 if not channel_id:
3814 metadata.update(self._extract_uploader(data))
3815 metadata.update({
3816 'channel': metadata['uploader'],
3817 'channel_id': metadata['uploader_id'],
3818 'channel_url': metadata['uploader_url']})
3819 return self.playlist_result(
d069eca7
M
3820 self._entries(
3821 selected_tab, playlist_id,
3822 self._extract_identity_token(webpage, item_id),
f4f751af 3823 self._extract_account_syncid(data),
3824 self._extract_ytcfg(item_id, webpage)),
b60419c5 3825 **metadata)
73c4ac2c 3826
79360d99 3827 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
2be71994 3828 first_id = last_id = None
79360d99 3829 ytcfg = self._extract_ytcfg(playlist_id, webpage)
3830 headers = self._generate_api_headers(
3831 ytcfg, account_syncid=self._extract_account_syncid(data),
3832 identity_token=self._extract_identity_token(webpage, item_id=playlist_id),
3833 visitor_data=try_get(self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
2be71994 3834 for page_num in itertools.count(1):
cd7c66cf 3835 videos = list(self._playlist_entries(playlist))
3836 if not videos:
3837 return
2be71994 3838 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3839 if start >= len(videos):
3840 return
3841 for video in videos[start:]:
3842 if video['id'] == first_id:
3843 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3844 return
3845 yield video
3846 first_id = first_id or videos[0]['id']
3847 last_id = videos[-1]['id']
79360d99 3848 watch_endpoint = try_get(
3849 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3850 query = {
3851 'playlistId': playlist_id,
3852 'videoId': watch_endpoint.get('videoId') or last_id,
3853 'index': watch_endpoint.get('index') or len(videos),
3854 'params': watch_endpoint.get('params') or 'OAE%3D'
3855 }
3856 response = self._extract_response(
3857 item_id='%s page %d' % (playlist_id, page_num),
3858 query=query,
3859 ep='next',
3860 headers=headers,
3861 check_get_keys='contents'
3862 )
cd7c66cf 3863 playlist = try_get(
79360d99 3864 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 3865
79360d99 3866 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
8bdd16b4 3867 title = playlist.get('title') or try_get(
3868 data, lambda x: x['titleText']['simpleText'], compat_str)
3869 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3870
3871 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3872 playlist_url = urljoin(url, try_get(
3873 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3874 compat_str))
3875 if playlist_url and playlist_url != url:
3876 return self.url_result(
3877 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3878 video_title=title)
cd7c66cf 3879
8bdd16b4 3880 return self.playlist_result(
79360d99 3881 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
cd7c66cf 3882 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 3883
358de58c 3884 def _reload_with_unavailable_videos(self, item_id, data, webpage):
3885 """
3886 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3887 """
3888 sidebar_renderer = try_get(
5d342002 3889 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3890 if not sidebar_renderer:
3891 return
3892 browse_id = params = None
358de58c 3893 for item in sidebar_renderer:
3894 if not isinstance(item, dict):
3895 continue
3896 renderer = item.get('playlistSidebarPrimaryInfoRenderer')
3897 menu_renderer = try_get(
3898 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3899 for menu_item in menu_renderer:
3900 if not isinstance(menu_item, dict):
3901 continue
3902 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3903 text = try_get(
3904 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3905 if not text or text.lower() != 'show unavailable videos':
3906 continue
3907 browse_endpoint = try_get(
3908 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3909 browse_id = browse_endpoint.get('browseId')
3910 params = browse_endpoint.get('params')
5d342002 3911 break
3912
3913 ytcfg = self._extract_ytcfg(item_id, webpage)
3914 headers = self._generate_api_headers(
3915 ytcfg, account_syncid=self._extract_account_syncid(ytcfg),
3916 identity_token=self._extract_identity_token(webpage, item_id=item_id),
3917 visitor_data=try_get(
3918 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
3919 query = {
3920 'params': params or 'wgYCCAA=',
3921 'browseId': browse_id or 'VL%s' % item_id
3922 }
3923 return self._extract_response(
3924 item_id=item_id, headers=headers, query=query,
3925 check_get_keys='contents', fatal=False,
3926 note='Downloading API JSON with unavailable videos')
358de58c 3927
cd7c66cf 3928 def _extract_webpage(self, url, item_id):
a06916d9 3929 retries = self.get_param('extractor_retries', 3)
62bff2c1 3930 count = -1
c705177d 3931 last_error = 'Incomplete yt initial data recieved'
14fdfea9 3932 while count < retries:
62bff2c1 3933 count += 1
14fdfea9 3934 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 3935 # See: https://github.com/yt-dlp/yt-dlp/issues/116
3936 if count:
c705177d 3937 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 3938 webpage = self._download_webpage(
3939 url, item_id,
cd7c66cf 3940 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
14fdfea9 3941 data = self._extract_yt_initial_data(item_id, webpage)
14fdfea9 3942 if data.get('contents') or data.get('currentVideoEndpoint'):
3943 break
95c01b6c 3944 # Extract alerts here only when there is error
3945 self._extract_and_report_alerts(data)
c705177d 3946 if count >= retries:
6a39ee13 3947 raise ExtractorError(last_error)
cd7c66cf 3948 return webpage, data
3949
9297939e 3950 @staticmethod
3951 def _smuggle_data(entries, data):
3952 for entry in entries:
3953 if data:
3954 entry['url'] = smuggle_url(entry['url'], data)
3955 yield entry
3956
cd7c66cf 3957 def _real_extract(self, url):
9297939e 3958 url, smuggled_data = unsmuggle_url(url, {})
3959 if self.is_music_url(url):
3960 smuggled_data['is_music_url'] = True
fe03a6cd 3961 info_dict = self.__real_extract(url, smuggled_data)
9297939e 3962 if info_dict.get('entries'):
3963 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
3964 return info_dict
3965
fe03a6cd 3966 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
3967
3968 def __real_extract(self, url, smuggled_data):
cd7c66cf 3969 item_id = self._match_id(url)
3970 url = compat_urlparse.urlunparse(
3971 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 3972 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 3973
fe03a6cd 3974 def get_mobj(url):
3975 mobj = self._url_re.match(url).groupdict()
07cce701 3976 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 3977 return mobj
3978
3979 mobj = get_mobj(url)
3980 # Youtube returns incomplete data if tabname is not lower case
3981 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
3982
3983 if is_channel:
3984 if smuggled_data.get('is_music_url'):
3985 if item_id[:2] == 'VL':
3986 # Youtube music VL channels have an equivalent playlist
3987 item_id = item_id[2:]
3988 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
abcdd12b 3989 elif item_id[:2] == 'MP':
3990 # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
3991 item_id = self._search_regex(
3992 r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
3993 self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
3994 'playlist id')
3995 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
fe03a6cd 3996 elif mobj['channel_type'] == 'browse':
3997 # Youtube music /browse/ should be changed to /channel/
3998 pre = 'https://www.youtube.com/channel/%s' % item_id
3999 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4000 # Home URLs should redirect to /videos/
6a39ee13 4001 self.report_warning(
cd7c66cf 4002 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4003 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 4004 tab = '/videos'
4005
4006 url = ''.join((pre, tab, post))
4007 mobj = get_mobj(url)
cd7c66cf 4008
4009 # Handle both video/playlist URLs
201c1459 4010 qs = parse_qs(url)
cd7c66cf 4011 video_id = qs.get('v', [None])[0]
4012 playlist_id = qs.get('list', [None])[0]
4013
fe03a6cd 4014 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 4015 if not playlist_id:
fe03a6cd 4016 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 4017 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 4018 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6a39ee13 4019 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
cd7c66cf 4020 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
18db7548 4021 mobj = get_mobj(url)
cd7c66cf 4022
4023 if video_id and playlist_id:
a06916d9 4024 if self.get_param('noplaylist'):
cd7c66cf 4025 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4026 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4027 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4028
4029 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 4030
18db7548 4031 tabs = try_get(
4032 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4033 if tabs:
4034 selected_tab = self._extract_selected_tab(tabs)
4035 tab_name = selected_tab.get('title', '')
09f1580e 4036 if 'no-youtube-channel-redirect' not in compat_opts:
4037 if mobj['tab'] == '/live':
4038 # Live tab should have redirected to the video
4039 raise ExtractorError('The channel is not currently live', expected=True)
4040 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4041 if not mobj['not_channel'] and item_id[:2] == 'UC':
4042 # Topic channels don't have /videos. Use the equivalent playlist instead
4043 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4044 pl_id = 'UU%s' % item_id[2:]
4045 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4046 try:
4047 pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4048 for alert_type, alert_message in self._extract_alerts(pl_data):
4049 if alert_type == 'error':
4050 raise ExtractorError('Youtube said: %s' % alert_message)
4051 item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4052 except ExtractorError:
4053 self.report_warning('The playlist gave error. Falling back to channel URL')
4054 else:
4055 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
18db7548 4056
4057 self.write_debug('Final URL: %s' % url)
4058
358de58c 4059 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 4060 if 'no-youtube-unavailable-videos' not in compat_opts:
4061 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
95c01b6c 4062 self._extract_and_report_alerts(data)
358de58c 4063
8bdd16b4 4064 tabs = try_get(
4065 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4066 if tabs:
d069eca7 4067 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 4068
8bdd16b4 4069 playlist = try_get(
4070 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4071 if playlist:
79360d99 4072 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
cd7c66cf 4073
a0566bbf 4074 video_id = try_get(
4075 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4076 compat_str) or video_id
8bdd16b4 4077 if video_id:
09f1580e 4078 if mobj['tab'] != '/live': # live tab is expected to redirect to video
4079 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 4080 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 4081
8bdd16b4 4082 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 4083
c5e8d7af 4084
8bdd16b4 4085class YoutubePlaylistIE(InfoExtractor):
4086 IE_DESC = 'YouTube.com playlists'
4087 _VALID_URL = r'''(?x)(?:
4088 (?:https?://)?
4089 (?:\w+\.)?
4090 (?:
4091 (?:
4092 youtube(?:kids)?\.com|
29f7c58a 4093 invidio\.us
8bdd16b4 4094 )
4095 /.*?\?.*?\blist=
4096 )?
4097 (?P<id>%(playlist_id)s)
4098 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4099 IE_NAME = 'youtube:playlist'
cdc628a4 4100 _TESTS = [{
8bdd16b4 4101 'note': 'issue #673',
4102 'url': 'PLBB231211A4F62143',
cdc628a4 4103 'info_dict': {
8bdd16b4 4104 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4105 'id': 'PLBB231211A4F62143',
4106 'uploader': 'Wickydoo',
4107 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4108 },
4109 'playlist_mincount': 29,
4110 }, {
4111 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4112 'info_dict': {
4113 'title': 'YDL_safe_search',
4114 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4115 },
4116 'playlist_count': 2,
4117 'skip': 'This playlist is private',
9558dcec 4118 }, {
8bdd16b4 4119 'note': 'embedded',
4120 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4121 'playlist_count': 4,
9558dcec 4122 'info_dict': {
8bdd16b4 4123 'title': 'JODA15',
4124 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4125 'uploader': 'milan',
4126 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 4127 }
cdc628a4 4128 }, {
8bdd16b4 4129 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4130 'playlist_mincount': 982,
4131 'info_dict': {
4132 'title': '2018 Chinese New Singles (11/6 updated)',
4133 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4134 'uploader': 'LBK',
4135 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4136 }
daa0df9e 4137 }, {
29f7c58a 4138 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4139 'only_matching': True,
4140 }, {
4141 # music album playlist
4142 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4143 'only_matching': True,
4144 }]
4145
4146 @classmethod
4147 def suitable(cls, url):
201c1459 4148 if YoutubeTabIE.suitable(url):
4149 return False
1bdae7d3 4150 # Hack for lazy extractors until more generic solution is implemented
4151 # (see #28780)
4152 from .youtube import parse_qs
201c1459 4153 qs = parse_qs(url)
4154 if qs.get('v', [None])[0]:
4155 return False
4156 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 4157
4158 def _real_extract(self, url):
4159 playlist_id = self._match_id(url)
46953e7e 4160 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 4161 url = update_url_query(
4162 'https://www.youtube.com/playlist',
4163 parse_qs(url) or {'list': playlist_id})
4164 if is_music_url:
4165 url = smuggle_url(url, {'is_music_url': True})
4166 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 4167
4168
4169class YoutubeYtBeIE(InfoExtractor):
c76eb41b 4170 IE_DESC = 'youtu.be'
29f7c58a 4171 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4172 _TESTS = [{
8bdd16b4 4173 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4174 'info_dict': {
4175 'id': 'yeWKywCrFtk',
4176 'ext': 'mp4',
4177 'title': 'Small Scale Baler and Braiding Rugs',
4178 'uploader': 'Backus-Page House Museum',
4179 'uploader_id': 'backuspagemuseum',
4180 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4181 'upload_date': '20161008',
4182 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4183 'categories': ['Nonprofits & Activism'],
4184 'tags': list,
4185 'like_count': int,
4186 'dislike_count': int,
4187 },
4188 'params': {
4189 'noplaylist': True,
4190 'skip_download': True,
4191 },
39e7107d 4192 }, {
8bdd16b4 4193 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 4194 'only_matching': True,
cdc628a4
PH
4195 }]
4196
8bdd16b4 4197 def _real_extract(self, url):
29f7c58a 4198 mobj = re.match(self._VALID_URL, url)
4199 video_id = mobj.group('id')
4200 playlist_id = mobj.group('playlist_id')
8bdd16b4 4201 return self.url_result(
29f7c58a 4202 update_url_query('https://www.youtube.com/watch', {
4203 'v': video_id,
4204 'list': playlist_id,
4205 'feature': 'youtu.be',
4206 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 4207
4208
4209class YoutubeYtUserIE(InfoExtractor):
c76eb41b 4210 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 4211 _VALID_URL = r'ytuser:(?P<id>.+)'
4212 _TESTS = [{
4213 'url': 'ytuser:phihag',
4214 'only_matching': True,
4215 }]
4216
4217 def _real_extract(self, url):
4218 user_id = self._match_id(url)
4219 return self.url_result(
4220 'https://www.youtube.com/user/%s' % user_id,
4221 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 4222
b05654f0 4223
3d3dddc9 4224class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 4225 IE_NAME = 'youtube:favorites'
4226 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4227 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4228 _LOGIN_REQUIRED = True
4229 _TESTS = [{
4230 'url': ':ytfav',
4231 'only_matching': True,
4232 }, {
4233 'url': ':ytfavorites',
4234 'only_matching': True,
4235 }]
4236
4237 def _real_extract(self, url):
4238 return self.url_result(
4239 'https://www.youtube.com/playlist?list=LL',
4240 ie=YoutubeTabIE.ie_key())
4241
4242
79360d99 4243class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
69184e41 4244 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
4245 # there doesn't appear to be a real limit, for example if you search for
4246 # 'python' you get more than 8.000.000 results
4247 _MAX_RESULTS = float('inf')
78caa52a 4248 IE_NAME = 'youtube:search'
b05654f0 4249 _SEARCH_KEY = 'ytsearch'
6c894ea1 4250 _SEARCH_PARAMS = None
9dd8e46a 4251 _TESTS = []
b05654f0 4252
6c894ea1 4253 def _entries(self, query, n):
a5c56234 4254 data = {'query': query}
6c894ea1
U
4255 if self._SEARCH_PARAMS:
4256 data['params'] = self._SEARCH_PARAMS
4257 total = 0
4258 for page_num in itertools.count(1):
79360d99 4259 search = self._extract_response(
4260 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4261 check_get_keys=('contents', 'onResponseReceivedCommands')
4262 )
6c894ea1 4263 if not search:
b4c08069 4264 break
6c894ea1
U
4265 slr_contents = try_get(
4266 search,
4267 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4268 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4269 list)
4270 if not slr_contents:
a22b2fd1 4271 break
0366ae87 4272
0366ae87
M
4273 # Youtube sometimes adds promoted content to searches,
4274 # changing the index location of videos and token.
4275 # So we search through all entries till we find them.
30a074c2 4276 continuation_token = None
4277 for slr_content in slr_contents:
a96c6d15 4278 if continuation_token is None:
4279 continuation_token = try_get(
4280 slr_content,
4281 lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
4282 compat_str)
4283
30a074c2 4284 isr_contents = try_get(
4285 slr_content,
4286 lambda x: x['itemSectionRenderer']['contents'],
4287 list)
9da76d30 4288 if not isr_contents:
30a074c2 4289 continue
4290 for content in isr_contents:
4291 if not isinstance(content, dict):
4292 continue
4293 video = content.get('videoRenderer')
4294 if not isinstance(video, dict):
4295 continue
4296 video_id = video.get('videoId')
4297 if not video_id:
4298 continue
4299
4300 yield self._extract_video(video)
4301 total += 1
4302 if total == n:
4303 return
0366ae87 4304
0366ae87 4305 if not continuation_token:
6c894ea1 4306 break
0366ae87 4307 data['continuation'] = continuation_token
b05654f0 4308
6c894ea1
U
4309 def _get_n_results(self, query, n):
4310 """Get a specified number of results for a query"""
4311 return self.playlist_result(self._entries(query, n), query)
75dff0ee 4312
c9ae7b95 4313
a3dd9248 4314class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 4315 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 4316 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 4317 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 4318 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 4319
c9ae7b95 4320
386e1dd9 4321class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 4322 IE_DESC = 'YouTube.com search URLs'
386e1dd9 4323 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4324 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 4325 # _MAX_RESULTS = 100
3462ffa8 4326 _TESTS = [{
4327 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4328 'playlist_mincount': 5,
4329 'info_dict': {
4330 'title': 'youtube-dl test video',
4331 }
4332 }, {
4333 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4334 'only_matching': True,
4335 }]
4336
386e1dd9 4337 @classmethod
4338 def _make_valid_url(cls):
4339 return cls._VALID_URL
4340
3462ffa8 4341 def _real_extract(self, url):
386e1dd9 4342 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4343 query = (qs.get('search_query') or qs.get('q'))[0]
4344 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4345 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 4346
4347
4348class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 4349 """
25f14e9f 4350 Base class for feed extractors
3d3dddc9 4351 Subclasses must define the _FEED_NAME property.
d7ae0639 4352 """
b2e8bc1b 4353 _LOGIN_REQUIRED = True
ef2f3c7f 4354 _TESTS = []
d7ae0639
JMF
4355
4356 @property
4357 def IE_NAME(self):
78caa52a 4358 return 'youtube:%s' % self._FEED_NAME
04cc9617 4359
3853309f 4360 def _real_extract(self, url):
3d3dddc9 4361 return self.url_result(
4362 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4363 ie=YoutubeTabIE.ie_key())
25f14e9f
S
4364
4365
ef2f3c7f 4366class YoutubeWatchLaterIE(InfoExtractor):
4367 IE_NAME = 'youtube:watchlater'
70d5c17b 4368 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 4369 _VALID_URL = r':ytwatchlater'
bc7a9cd8 4370 _TESTS = [{
8bdd16b4 4371 'url': ':ytwatchlater',
bc7a9cd8
S
4372 'only_matching': True,
4373 }]
25f14e9f
S
4374
4375 def _real_extract(self, url):
ef2f3c7f 4376 return self.url_result(
4377 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 4378
4379
25f14e9f
S
4380class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4381 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 4382 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 4383 _FEED_NAME = 'recommended'
45db527f 4384 _LOGIN_REQUIRED = False
3d3dddc9 4385 _TESTS = [{
4386 'url': ':ytrec',
4387 'only_matching': True,
4388 }, {
4389 'url': ':ytrecommended',
4390 'only_matching': True,
4391 }, {
4392 'url': 'https://youtube.com',
4393 'only_matching': True,
4394 }]
1ed5b5c9 4395
1ed5b5c9 4396
25f14e9f 4397class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 4398 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 4399 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 4400 _FEED_NAME = 'subscriptions'
3d3dddc9 4401 _TESTS = [{
4402 'url': ':ytsubs',
4403 'only_matching': True,
4404 }, {
4405 'url': ':ytsubscriptions',
4406 'only_matching': True,
4407 }]
1ed5b5c9 4408
1ed5b5c9 4409
25f14e9f 4410class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
4411 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4412 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 4413 _FEED_NAME = 'history'
3d3dddc9 4414 _TESTS = [{
4415 'url': ':ythistory',
4416 'only_matching': True,
4417 }]
1ed5b5c9
JMF
4418
4419
15870e90
PH
4420class YoutubeTruncatedURLIE(InfoExtractor):
4421 IE_NAME = 'youtube:truncated_url'
4422 IE_DESC = False # Do not list
975d35db 4423 _VALID_URL = r'''(?x)
b95aab84
PH
4424 (?:https?://)?
4425 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4426 (?:watch\?(?:
c4808c60 4427 feature=[a-z_]+|
b95aab84
PH
4428 annotation_id=annotation_[^&]+|
4429 x-yt-cl=[0-9]+|
c1708b89 4430 hl=[^&]*|
287be8c6 4431 t=[0-9]+
b95aab84
PH
4432 )?
4433 |
4434 attribution_link\?a=[^&]+
4435 )
4436 $
975d35db 4437 '''
15870e90 4438
c4808c60 4439 _TESTS = [{
2d3d2997 4440 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 4441 'only_matching': True,
dc2fc736 4442 }, {
2d3d2997 4443 'url': 'https://www.youtube.com/watch?',
dc2fc736 4444 'only_matching': True,
b95aab84
PH
4445 }, {
4446 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4447 'only_matching': True,
4448 }, {
4449 'url': 'https://www.youtube.com/watch?feature=foo',
4450 'only_matching': True,
c1708b89
PH
4451 }, {
4452 'url': 'https://www.youtube.com/watch?hl=en-GB',
4453 'only_matching': True,
287be8c6
PH
4454 }, {
4455 'url': 'https://www.youtube.com/watch?t=2372',
4456 'only_matching': True,
c4808c60
PH
4457 }]
4458
15870e90
PH
4459 def _real_extract(self, url):
4460 raise ExtractorError(
78caa52a
PH
4461 'Did you forget to quote the URL? Remember that & is a meta '
4462 'character in most shells, so you want to put the URL in quotes, '
3867038a 4463 'like youtube-dl '
2d3d2997 4464 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 4465 ' or simply youtube-dl BaW_jenozKc .',
15870e90 4466 expected=True)
772fd5cc
PH
4467
4468
4469class YoutubeTruncatedIDIE(InfoExtractor):
4470 IE_NAME = 'youtube:truncated_id'
4471 IE_DESC = False # Do not list
b95aab84 4472 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
4473
4474 _TESTS = [{
4475 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4476 'only_matching': True,
4477 }]
4478
4479 def _real_extract(self, url):
4480 video_id = self._match_id(url)
4481 raise ExtractorError(
4482 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4483 expected=True)