]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[vlive] Extract thumbnail directly in addition to the one from Naver
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
2d6659b9 5import base64
d92f5d5a 6import calendar
109dd3b2 7import copy
a5c56234 8import hashlib
0ca96d48 9import itertools
c5e8d7af 10import json
c4417ddb 11import os.path
d77ab8e2 12import random
c5e8d7af 13import re
8a784c74 14import time
e0df6211 15import traceback
c5e8d7af 16
b05654f0 17from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 18from ..compat import (
edf3e38e 19 compat_chr,
29f7c58a 20 compat_HTTPError,
c5e8d7af 21 compat_parse_qs,
545cc85d 22 compat_str,
7fd002c0 23 compat_urllib_parse_unquote_plus,
15707c7e 24 compat_urllib_parse_urlencode,
7c80519c 25 compat_urllib_parse_urlparse,
7c61bd36 26 compat_urlparse,
4bb4a188 27)
545cc85d 28from ..jsinterp import JSInterpreter
4bb4a188 29from ..utils import (
c224251a 30 bool_or_none,
2d6659b9 31 bytes_to_intlist,
c5e8d7af 32 clean_html,
26fe8ffe 33 dict_get,
d92f5d5a 34 datetime_from_str,
358de58c 35 error_to_compat_str,
c5e8d7af 36 ExtractorError,
b60419c5 37 format_field,
2d30521a 38 float_or_none,
dd27fd17 39 int_or_none,
2d6659b9 40 intlist_to_bytes,
94278f72 41 mimetype2ext,
6310acf5 42 parse_codecs,
7c80519c 43 parse_duration,
dca3ff4a 44 qualities,
3995d37d 45 remove_start,
cf7e015f 46 smuggle_url,
dbdaaa23 47 str_or_none,
c93d53f5 48 str_to_int,
556dbe7f 49 try_get,
c5e8d7af
PH
50 unescapeHTML,
51 unified_strdate,
cf7e015f 52 unsmuggle_url,
8bdd16b4 53 update_url_query,
21c340b8 54 url_or_none,
6e6bc8da 55 urlencode_postdata,
d92f5d5a 56 urljoin
c5e8d7af
PH
57)
58
5f6a1245 59
201c1459 60def parse_qs(url):
61 return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
62
63
de7f3446 64class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
65 """Provide base functions for Youtube extractors"""
66 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 67 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
68
69 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
70 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
71 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 72
3462ffa8 73 _RESERVED_NAMES = (
bea74222 74 r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|shorts|'
46953e7e 75 r'movies|results|shared|hashtag|trending|feed|feeds|oembed|get_video_info|'
cd7c66cf 76 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 77
b2e8bc1b
JMF
78 _NETRC_MACHINE = 'youtube'
79 # If True it will raise an error if no login info is provided
80 _LOGIN_REQUIRED = False
81
70d5c17b 82 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 83
b2e8bc1b 84 def _login(self):
83317f69 85 """
86 Attempt to log in to YouTube.
87 True is returned if successful or skipped.
88 False is returned if login failed.
89
90 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
91 """
9d5d4d64 92
93 def warn(message):
94 self.report_warning(message)
95
96 # username+password login is broken
97 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
98 self.raise_login_required(
99 'Login details are needed to download this content', method='cookies')
68217024 100 username, password = self._get_login_info()
9d5d4d64 101 if username:
102 warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
103 return
9d5d4d64 104
2d6659b9 105 # Everything below this is broken!
106 r'''
b2e8bc1b
JMF
107 # No authentication to be performed
108 if username is None:
a06916d9 109 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
69ea8ca4 110 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
a06916d9 111 # if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
545cc85d 112 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 113 return True
b2e8bc1b 114
7cc3570e
PH
115 login_page = self._download_webpage(
116 self._LOGIN_URL, None,
69ea8ca4
PH
117 note='Downloading login page',
118 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
119 if login_page is False:
120 return
b2e8bc1b 121
1212e997 122 login_form = self._hidden_inputs(login_page)
c5e8d7af 123
e00eb564
S
124 def req(url, f_req, note, errnote):
125 data = login_form.copy()
126 data.update({
127 'pstMsg': 1,
128 'checkConnection': 'youtube',
129 'checkedDomains': 'youtube',
130 'hl': 'en',
131 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 132 'f.req': json.dumps(f_req),
e00eb564
S
133 'flowName': 'GlifWebSignIn',
134 'flowEntry': 'ServiceLogin',
baf67a60
S
135 # TODO: reverse actual botguard identifier generation algo
136 'bgRequest': '["identifier",""]',
041bc3ad 137 })
e00eb564
S
138 return self._download_json(
139 url, None, note=note, errnote=errnote,
140 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
141 fatal=False,
142 data=urlencode_postdata(data), headers={
143 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
144 'Google-Accounts-XSRF': 1,
145 })
146
3995d37d
S
147 lookup_req = [
148 username,
149 None, [], None, 'US', None, None, 2, False, True,
150 [
151 None, None,
152 [2, 1, None, 1,
153 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
154 None, [], 4],
155 1, [None, None, []], None, None, None, True
156 ],
157 username,
158 ]
159
e00eb564 160 lookup_results = req(
3995d37d 161 self._LOOKUP_URL, lookup_req,
e00eb564
S
162 'Looking up account info', 'Unable to look up account info')
163
164 if lookup_results is False:
165 return False
041bc3ad 166
3995d37d
S
167 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
168 if not user_hash:
169 warn('Unable to extract user hash')
170 return False
171
172 challenge_req = [
173 user_hash,
174 None, 1, None, [1, None, None, None, [password, None, True]],
175 [
176 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
177 1, [None, None, []], None, None, None, True
178 ]]
83317f69 179
3995d37d
S
180 challenge_results = req(
181 self._CHALLENGE_URL, challenge_req,
182 'Logging in', 'Unable to log in')
83317f69 183
3995d37d 184 if challenge_results is False:
e00eb564 185 return
83317f69 186
3995d37d
S
187 login_res = try_get(challenge_results, lambda x: x[0][5], list)
188 if login_res:
189 login_msg = try_get(login_res, lambda x: x[5], compat_str)
190 warn(
191 'Unable to login: %s' % 'Invalid password'
192 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
193 return False
194
195 res = try_get(challenge_results, lambda x: x[0][-1], list)
196 if not res:
197 warn('Unable to extract result entry')
198 return False
199
9a6628aa
S
200 login_challenge = try_get(res, lambda x: x[0][0], list)
201 if login_challenge:
202 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
203 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
204 # SEND_SUCCESS - TFA code has been successfully sent to phone
205 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 206 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
207 if status == 'QUOTA_EXCEEDED':
208 warn('Exceeded the limit of TFA codes, try later')
209 return False
210
211 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
212 if not tl:
213 warn('Unable to extract TL')
214 return False
215
216 tfa_code = self._get_tfa_info('2-step verification code')
217
218 if not tfa_code:
219 warn(
220 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
221 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
222 return False
223
224 tfa_code = remove_start(tfa_code, 'G-')
225
226 tfa_req = [
227 user_hash, None, 2, None,
228 [
229 9, None, None, None, None, None, None, None,
230 [None, tfa_code, True, 2]
231 ]]
232
233 tfa_results = req(
234 self._TFA_URL.format(tl), tfa_req,
235 'Submitting TFA code', 'Unable to submit TFA code')
236
237 if tfa_results is False:
238 return False
239
240 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
241 if tfa_res:
242 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
243 warn(
244 'Unable to finish TFA: %s' % 'Invalid TFA code'
245 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
246 return False
247
248 check_cookie_url = try_get(
249 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
250 else:
251 CHALLENGES = {
252 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
253 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
254 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
255 }
256 challenge = CHALLENGES.get(
257 challenge_str,
258 '%s returned error %s.' % (self.IE_NAME, challenge_str))
259 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
260 return False
3995d37d
S
261 else:
262 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
263
264 if not check_cookie_url:
265 warn('Unable to extract CheckCookie URL')
266 return False
e00eb564
S
267
268 check_cookie_results = self._download_webpage(
3995d37d
S
269 check_cookie_url, None, 'Checking cookie', fatal=False)
270
271 if check_cookie_results is False:
272 return False
e00eb564 273
3995d37d
S
274 if 'https://myaccount.google.com/' not in check_cookie_results:
275 warn('Unable to log in')
b2e8bc1b 276 return False
e00eb564 277
b2e8bc1b 278 return True
2d6659b9 279 '''
b2e8bc1b 280
cce889b9 281 def _initialize_consent(self):
282 cookies = self._get_cookies('https://www.youtube.com/')
283 if cookies.get('__Secure-3PSID'):
284 return
285 consent_id = None
286 consent = cookies.get('CONSENT')
287 if consent:
288 if 'YES' in consent.value:
289 return
290 consent_id = self._search_regex(
291 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
292 if not consent_id:
293 consent_id = random.randint(100, 999)
294 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 295
b2e8bc1b 296 def _real_initialize(self):
cce889b9 297 self._initialize_consent()
b2e8bc1b
JMF
298 if self._downloader is None:
299 return
b2e8bc1b
JMF
300 if not self._login():
301 return
c5e8d7af 302
a0566bbf 303 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 304 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
305 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 306
109dd3b2 307 _YT_DEFAULT_YTCFGS = {
308 'WEB': {
309 'INNERTUBE_API_VERSION': 'v1',
310 'INNERTUBE_CLIENT_NAME': 'WEB',
311 'INNERTUBE_CLIENT_VERSION': '2.20210622.10.00',
312 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
313 'INNERTUBE_CONTEXT': {
314 'client': {
315 'clientName': 'WEB',
316 'clientVersion': '2.20210622.10.00',
317 'hl': 'en',
318 }
319 },
320 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
321 },
322 'WEB_REMIX': {
323 'INNERTUBE_API_VERSION': 'v1',
324 'INNERTUBE_CLIENT_NAME': 'WEB_REMIX',
325 'INNERTUBE_CLIENT_VERSION': '1.20210621.00.00',
326 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
327 'INNERTUBE_CONTEXT': {
328 'client': {
329 'clientName': 'WEB_REMIX',
330 'clientVersion': '1.20210621.00.00',
331 'hl': 'en',
332 }
333 },
334 'INNERTUBE_CONTEXT_CLIENT_NAME': 67
335 },
336 'WEB_EMBEDDED_PLAYER': {
337 'INNERTUBE_API_VERSION': 'v1',
338 'INNERTUBE_CLIENT_NAME': 'WEB_EMBEDDED_PLAYER',
339 'INNERTUBE_CLIENT_VERSION': '1.20210620.0.1',
340 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
341 'INNERTUBE_CONTEXT': {
342 'client': {
343 'clientName': 'WEB_EMBEDDED_PLAYER',
344 'clientVersion': '1.20210620.0.1',
345 'hl': 'en',
346 }
347 },
348 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
349 },
350 'ANDROID': {
351 'INNERTUBE_API_VERSION': 'v1',
352 'INNERTUBE_CLIENT_NAME': 'ANDROID',
353 'INNERTUBE_CLIENT_VERSION': '16.20',
354 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
355 'INNERTUBE_CONTEXT': {
356 'client': {
357 'clientName': 'ANDROID',
358 'clientVersion': '16.20',
359 'hl': 'en',
360 }
361 },
362 'INNERTUBE_CONTEXT_CLIENT_NAME': 'ANDROID'
363 },
364 'ANDROID_EMBEDDED_PLAYER': {
365 'INNERTUBE_API_VERSION': 'v1',
366 'INNERTUBE_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER',
367 'INNERTUBE_CLIENT_VERSION': '16.20',
368 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
369 'INNERTUBE_CONTEXT': {
370 'client': {
371 'clientName': 'ANDROID_EMBEDDED_PLAYER',
372 'clientVersion': '16.20',
373 'hl': 'en',
374 }
375 },
376 'INNERTUBE_CONTEXT_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER'
377 },
378 'ANDROID_MUSIC': {
379 'INNERTUBE_API_VERSION': 'v1',
380 'INNERTUBE_CLIENT_NAME': 'ANDROID_MUSIC',
381 'INNERTUBE_CLIENT_VERSION': '4.32',
382 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
383 'INNERTUBE_CONTEXT': {
384 'client': {
385 'clientName': 'ANDROID_MUSIC',
386 'clientVersion': '4.32',
387 'hl': 'en',
388 }
389 },
390 'INNERTUBE_CONTEXT_CLIENT_NAME': 'ANDROID_MUSIC'
391 }
392 }
393
394 _YT_DEFAULT_INNERTUBE_HOSTS = {
395 'DIRECT': 'youtubei.googleapis.com',
396 'WEB': 'www.youtube.com',
397 'WEB_REMIX': 'music.youtube.com',
398 'ANDROID_MUSIC': 'music.youtube.com'
399 }
400
401 def _get_default_ytcfg(self, client='WEB'):
402 if client in self._YT_DEFAULT_YTCFGS:
403 return copy.deepcopy(self._YT_DEFAULT_YTCFGS[client])
404 self.write_debug(f'INNERTUBE default client {client} does not exist - falling back to WEB client.')
405 return copy.deepcopy(self._YT_DEFAULT_YTCFGS['WEB'])
406
407 def _get_innertube_host(self, client='WEB'):
408 return dict_get(self._YT_DEFAULT_INNERTUBE_HOSTS, (client, 'WEB'))
409
410 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='WEB'):
411 # try_get but with fallback to default ytcfg client values when present
412 _func = lambda y: try_get(y, getter, expected_type)
413 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
414
415 def _extract_client_name(self, ytcfg, default_client='WEB'):
416 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str, default_client)
417
418 def _extract_client_version(self, ytcfg, default_client='WEB'):
419 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str, default_client)
420
421 def _extract_api_key(self, ytcfg=None, default_client='WEB'):
422 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
423
424 def _extract_context(self, ytcfg=None, default_client='WEB'):
425 _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
426 context = _get_context(ytcfg)
427 if context:
428 return context
429
430 context = _get_context(self._get_default_ytcfg(default_client))
431 if not ytcfg:
432 return context
433
434 # Recreate the client context (required)
435 context['client'].update({
436 'clientVersion': self._extract_client_version(ytcfg, default_client),
437 'clientName': self._extract_client_name(ytcfg, default_client),
438 })
439 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
440 if visitor_data:
441 context['client']['visitorData'] = visitor_data
442 return context
443
444 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
1974e99f 445 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
446 # See: https://github.com/yt-dlp/yt-dlp/issues/393
447 yt_cookies = self._get_cookies('https://www.youtube.com')
448 sapisid_cookie = dict_get(
449 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
a5c56234
M
450 if sapisid_cookie is None:
451 return
452 time_now = round(time.time())
1974e99f 453 # SAPISID cookie is required if not already present
454 if not yt_cookies.get('SAPISID'):
455 self._set_cookie(
456 '.youtube.com', 'SAPISID', sapisid_cookie.value, secure=True, expire_time=time_now + 3600)
457 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
458 sapisidhash = hashlib.sha1(
109dd3b2 459 f'{time_now} {sapisid_cookie.value} {origin}'.encode('utf-8')).hexdigest()
1974e99f 460 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
461
462 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 463 note='Downloading API JSON', errnote='Unable to download API page',
109dd3b2 464 context=None, api_key=None, api_hostname=None, default_client='WEB'):
f4f751af 465
109dd3b2 466 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 467 data.update(query)
109dd3b2 468 real_headers = self._generate_api_headers(client=default_client)
f4f751af 469 real_headers.update({'content-type': 'application/json'})
470 if headers:
471 real_headers.update(headers)
545cc85d 472 return self._download_json(
109dd3b2 473 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
a5c56234 474 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 475 data=json.dumps(data).encode('utf8'), headers=real_headers,
476 query={'key': api_key or self._extract_api_key()})
477
8bdd16b4 478 def _extract_yt_initial_data(self, video_id, webpage):
479 return self._parse_json(
480 self._search_regex(
29f7c58a 481 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 482 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 483 video_id)
0c148415 484
a1c5d2ca
M
485 def _extract_identity_token(self, webpage, item_id):
486 ytcfg = self._extract_ytcfg(item_id, webpage)
487 if ytcfg:
488 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
489 if token:
490 return token
491 return self._search_regex(
492 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
493 'identity token', default=None)
494
495 @staticmethod
496 def _extract_account_syncid(data):
8ea3f7b9 497 """
498 Extract syncId required to download private playlists of secondary channels
499 @param data Either response or ytcfg
500 """
501 sync_ids = (try_get(
502 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
503 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
a1c5d2ca
M
504 if len(sync_ids) >= 2 and sync_ids[1]:
505 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
506 # and just "user_syncid||" for primary channel. We only want the channel_syncid
507 return sync_ids[0]
8ea3f7b9 508 # ytcfg includes channel_syncid if on secondary channel
509 return data.get('DELEGATED_SESSION_ID')
a1c5d2ca 510
29f7c58a 511 def _extract_ytcfg(self, video_id, webpage):
8c54a305 512 if not webpage:
513 return {}
29f7c58a 514 return self._parse_json(
515 self._search_regex(
516 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 517 default='{}'), video_id, fatal=False) or {}
518
109dd3b2 519 def _generate_api_headers(self, ytcfg=None, identity_token=None, account_syncid=None,
520 visitor_data=None, api_hostname=None, client='WEB'):
521 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(client))
f4f751af 522 headers = {
109dd3b2 523 'X-YouTube-Client-Name': compat_str(
524 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=client)),
525 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, client),
526 'Origin': origin
f4f751af 527 }
2d6659b9 528 if not visitor_data and ytcfg:
529 visitor_data = try_get(
530 self._extract_context(ytcfg, client), lambda x: x['client']['visitorData'], compat_str)
f4f751af 531 if identity_token:
109dd3b2 532 headers['X-Youtube-Identity-Token'] = identity_token
f4f751af 533 if account_syncid:
534 headers['X-Goog-PageId'] = account_syncid
535 headers['X-Goog-AuthUser'] = 0
536 if visitor_data:
109dd3b2 537 headers['X-Goog-Visitor-Id'] = visitor_data
538 auth = self._generate_sapisidhash_header(origin)
f4f751af 539 if auth is not None:
540 headers['Authorization'] = auth
109dd3b2 541 headers['X-Origin'] = origin
f4f751af 542 return headers
29f7c58a 543
2d6659b9 544 @staticmethod
545 def _build_api_continuation_query(continuation, ctp=None):
546 query = {
547 'continuation': continuation
548 }
549 # TODO: Inconsistency with clickTrackingParams.
550 # Currently we have a fixed ctp contained within context (from ytcfg)
551 # and a ctp in root query for continuation.
552 if ctp:
553 query['clickTracking'] = {'clickTrackingParams': ctp}
554 return query
555
556 @classmethod
557 def _continuation_query_ajax_to_api(cls, continuation_query):
558 continuation = dict_get(continuation_query, ('continuation', 'ctoken'))
559 return cls._build_api_continuation_query(continuation, continuation_query.get('itct'))
560
561 @staticmethod
562 def _build_continuation_query(continuation, ctp=None):
563 query = {
564 'ctoken': continuation,
565 'continuation': continuation,
566 }
567 if ctp:
568 query['itct'] = ctp
569 return query
570
571 @classmethod
572 def _extract_next_continuation_data(cls, renderer):
573 next_continuation = try_get(
574 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
575 lambda x: x['continuation']['reloadContinuationData']), dict)
576 if not next_continuation:
577 return
578 continuation = next_continuation.get('continuation')
579 if not continuation:
580 return
581 ctp = next_continuation.get('clickTrackingParams')
582 return cls._build_continuation_query(continuation, ctp)
583
584 @classmethod
585 def _extract_continuation_ep_data(cls, continuation_ep: dict):
586 if isinstance(continuation_ep, dict):
587 continuation = try_get(
588 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
589 if not continuation:
590 return
591 ctp = continuation_ep.get('clickTrackingParams')
592 return cls._build_continuation_query(continuation, ctp)
593
594 @classmethod
595 def _extract_continuation(cls, renderer):
596 next_continuation = cls._extract_next_continuation_data(renderer)
597 if next_continuation:
598 return next_continuation
599 contents = []
600 for key in ('contents', 'items'):
601 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
602 for content in contents:
603 if not isinstance(content, dict):
604 continue
605 continuation_ep = try_get(
606 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
607 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
608 dict)
609 continuation = cls._extract_continuation_ep_data(continuation_ep)
610 if continuation:
611 return continuation
612
109dd3b2 613 @staticmethod
614 def _extract_alerts(data):
615 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
616 if not isinstance(alert_dict, dict):
617 continue
618 for alert in alert_dict.values():
619 alert_type = alert.get('type')
620 if not alert_type:
621 continue
622 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str) or ''
623 if message:
624 yield alert_type, message
625 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
626 message += try_get(run, lambda x: x['text'], compat_str)
627 if message:
628 yield alert_type, message
629
630 def _report_alerts(self, alerts, expected=True):
631 errors = []
632 warnings = []
633 for alert_type, alert_message in alerts:
634 if alert_type.lower() == 'error':
635 errors.append([alert_type, alert_message])
636 else:
637 warnings.append([alert_type, alert_message])
638
639 for alert_type, alert_message in (warnings + errors[:-1]):
640 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
641 if errors:
642 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
643
644 def _extract_and_report_alerts(self, data, *args, **kwargs):
645 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
646
647 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
648 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
649 default_client='WEB'):
650 response = None
651 last_error = None
652 count = -1
653 retries = self.get_param('extractor_retries', 3)
654 if check_get_keys is None:
655 check_get_keys = []
656 while count < retries:
657 count += 1
658 if last_error:
659 self.report_warning('%s. Retrying ...' % last_error)
660 try:
661 response = self._call_api(
662 ep=ep, fatal=True, headers=headers,
663 video_id=item_id, query=query,
664 context=self._extract_context(ytcfg, default_client),
665 api_key=self._extract_api_key(ytcfg, default_client),
666 api_hostname=api_hostname, default_client=default_client,
667 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
668 except ExtractorError as e:
669 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
670 # Downloading page may result in intermittent 5xx HTTP error
671 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
672 last_error = 'HTTP Error %s' % e.cause.code
673 if count < retries:
674 continue
675 if fatal:
676 raise
677 else:
678 self.report_warning(error_to_compat_str(e))
679 return
680
681 else:
682 # Youtube may send alerts if there was an issue with the continuation page
683 try:
684 self._extract_and_report_alerts(response, expected=False)
685 except ExtractorError as e:
686 if fatal:
687 raise
688 self.report_warning(error_to_compat_str(e))
689 return
690 if not check_get_keys or dict_get(response, check_get_keys):
691 break
692 # Youtube sometimes sends incomplete data
693 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
694 last_error = 'Incomplete data received'
695 if count >= retries:
696 if fatal:
697 raise ExtractorError(last_error)
698 else:
699 self.report_warning(last_error)
700 return
701 return response
702
9297939e 703 @staticmethod
704 def is_music_url(url):
705 return re.match(r'https?://music\.youtube\.com/', url) is not None
706
30a074c2 707 def _extract_video(self, renderer):
708 video_id = renderer.get('videoId')
709 title = try_get(
710 renderer,
711 (lambda x: x['title']['runs'][0]['text'],
712 lambda x: x['title']['simpleText']), compat_str)
713 description = try_get(
714 renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
715 compat_str)
716 duration = parse_duration(try_get(
717 renderer, lambda x: x['lengthText']['simpleText'], compat_str))
718 view_count_text = try_get(
719 renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
720 view_count = str_to_int(self._search_regex(
721 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
722 'view count', default=None))
723 uploader = try_get(
bc2ca1bb 724 renderer,
725 (lambda x: x['ownerText']['runs'][0]['text'],
726 lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
30a074c2 727 return {
39ed931e 728 '_type': 'url',
30a074c2 729 'ie_key': YoutubeIE.ie_key(),
730 'id': video_id,
731 'url': video_id,
732 'title': title,
733 'description': description,
734 'duration': duration,
735 'view_count': view_count,
736 'uploader': uploader,
737 }
738
0c148415 739
360e1ca5 740class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 741 IE_DESC = 'YouTube.com'
bc2ca1bb 742 _INVIDIOUS_SITES = (
743 # invidious-redirect websites
744 r'(?:www\.)?redirect\.invidious\.io',
745 r'(?:(?:www|dev)\.)?invidio\.us',
746 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
747 r'(?:www\.)?invidious\.pussthecat\.org',
bc2ca1bb 748 r'(?:www\.)?invidious\.zee\.li',
bc2ca1bb 749 r'(?:www\.)?invidious\.ethibox\.fr',
bc2ca1bb 750 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
bc2ca1bb 751 # youtube-dl invidious instances list
752 r'(?:(?:www|no)\.)?invidiou\.sh',
753 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
754 r'(?:www\.)?invidious\.kabi\.tk',
bc2ca1bb 755 r'(?:www\.)?invidious\.mastodon\.host',
756 r'(?:www\.)?invidious\.zapashcanon\.fr',
ed807c18 757 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
201c1459 758 r'(?:www\.)?invidious\.tinfoil-hat\.net',
759 r'(?:www\.)?invidious\.himiko\.cloud',
760 r'(?:www\.)?invidious\.reallyancient\.tech',
bc2ca1bb 761 r'(?:www\.)?invidious\.tube',
762 r'(?:www\.)?invidiou\.site',
763 r'(?:www\.)?invidious\.site',
764 r'(?:www\.)?invidious\.xyz',
765 r'(?:www\.)?invidious\.nixnet\.xyz',
201c1459 766 r'(?:www\.)?invidious\.048596\.xyz',
bc2ca1bb 767 r'(?:www\.)?invidious\.drycat\.fr',
201c1459 768 r'(?:www\.)?inv\.skyn3t\.in',
bc2ca1bb 769 r'(?:www\.)?tube\.poal\.co',
770 r'(?:www\.)?tube\.connect\.cafe',
771 r'(?:www\.)?vid\.wxzm\.sx',
772 r'(?:www\.)?vid\.mint\.lgbt',
201c1459 773 r'(?:www\.)?vid\.puffyan\.us',
bc2ca1bb 774 r'(?:www\.)?yewtu\.be',
775 r'(?:www\.)?yt\.elukerio\.org',
776 r'(?:www\.)?yt\.lelux\.fi',
777 r'(?:www\.)?invidious\.ggc-project\.de',
778 r'(?:www\.)?yt\.maisputain\.ovh',
201c1459 779 r'(?:www\.)?ytprivate\.com',
780 r'(?:www\.)?invidious\.13ad\.de',
bc2ca1bb 781 r'(?:www\.)?invidious\.toot\.koeln',
782 r'(?:www\.)?invidious\.fdn\.fr',
783 r'(?:www\.)?watch\.nettohikari\.com',
ed807c18 784 r'(?:www\.)?invidious\.namazso\.eu',
785 r'(?:www\.)?invidious\.silkky\.cloud',
786 r'(?:www\.)?invidious\.exonip\.de',
787 r'(?:www\.)?invidious\.riverside\.rocks',
788 r'(?:www\.)?invidious\.blamefran\.net',
789 r'(?:www\.)?invidious\.moomoo\.de',
790 r'(?:www\.)?ytb\.trom\.tf',
791 r'(?:www\.)?yt\.cyberhost\.uk',
bc2ca1bb 792 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
793 r'(?:www\.)?qklhadlycap4cnod\.onion',
794 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
795 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
796 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
797 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
798 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
799 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
ed807c18 800 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
801 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
802 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
803 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
bc2ca1bb 804 )
cb7dfeea 805 _VALID_URL = r"""(?x)^
c5e8d7af 806 (
edb53e2d 807 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 808 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
809 (?:www\.)?deturl\.com/www\.youtube\.com|
810 (?:www\.)?pwnyoutube\.com|
811 (?:www\.)?hooktube\.com|
812 (?:www\.)?yourepeat\.com|
813 tube\.majestyc\.net|
814 %(invidious)s|
815 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
816 (?:.*?\#/)? # handle anchor (#/) redirect urls
817 (?: # the various things that can precede the ID:
ac7553d0 818 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 819 |(?: # or the v= param in all its forms
f7000f3a 820 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 821 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 822 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
823 v=
824 )
f4b05232 825 ))
cbaed4bb
S
826 |(?:
827 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
828 vid\.plus| # or vid.plus/xxxx
829 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 830 %(invidious)s
cbaed4bb 831 )/
edb53e2d 832 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 833 )
c5e8d7af 834 )? # all until now is optional -> you can pass the naked ID
201c1459 835 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 836 (?(1).+)? # if we found the ID, everything can follow
9297939e 837 (?:\#|$)""" % {
bc2ca1bb 838 'invidious': '|'.join(_INVIDIOUS_SITES),
839 }
e40c758c 840 _PLAYER_INFO_RE = (
cc2db878 841 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
842 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 843 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 844 )
2c62dc26 845 _formats = {
c2d3cb4c 846 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
847 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
848 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
849 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
850 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
851 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
852 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
853 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 854 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 855 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
856 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
857 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
858 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
859 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
860 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 861 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 862 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
863 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 864
865
866 # 3D videos
c2d3cb4c 867 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
868 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
869 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
870 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 871 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
872 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
873 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 874
96fb5605 875 # Apple HTTP Live Streaming
11f12195 876 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 877 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
878 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
879 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
880 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
881 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 882 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
883 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
884
885 # DASH mp4 video
d23028a8
S
886 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
887 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
888 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
889 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
890 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 891 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
892 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
893 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
894 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
895 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
896 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
897 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 898
f6f1fc92 899 # Dash mp4 audio
d23028a8
S
900 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
901 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
902 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
903 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
904 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
905 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
906 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
907
908 # Dash webm
d23028a8
S
909 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
910 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
911 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
912 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
913 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
914 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
915 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
916 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
917 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
918 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
919 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
920 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
921 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
922 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
923 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 924 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
925 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
926 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
927 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
928 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
929 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
930 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
931
932 # Dash webm audio
d23028a8
S
933 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
934 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 935
0857baad 936 # Dash webm audio with opus inside
d23028a8
S
937 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
938 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
939 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 940
ce6b9a2d
PH
941 # RTMP (unnamed)
942 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
943
944 # av01 video only formats sometimes served with "unknown" codecs
945 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
946 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
947 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
948 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 949 }
29f7c58a 950 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 951
109dd3b2 952 _AGE_GATE_REASONS = (
953 'Sign in to confirm your age',
954 'This video may be inappropriate for some users.',
955 'Sorry, this content is age-restricted.')
956
fd5c4aab
S
957 _GEO_BYPASS = False
958
78caa52a 959 IE_NAME = 'youtube'
2eb88d95
PH
960 _TESTS = [
961 {
2d3d2997 962 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
963 'info_dict': {
964 'id': 'BaW_jenozKc',
965 'ext': 'mp4',
3867038a 966 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
967 'uploader': 'Philipp Hagemeister',
968 'uploader_id': 'phihag',
ec85ded8 969 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
970 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
971 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 972 'upload_date': '20121002',
3867038a 973 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 974 'categories': ['Science & Technology'],
3867038a 975 'tags': ['youtube-dl'],
556dbe7f 976 'duration': 10,
dbdaaa23 977 'view_count': int,
3e7c1224
PH
978 'like_count': int,
979 'dislike_count': int,
7c80519c 980 'start_time': 1,
297a564b 981 'end_time': 9,
2eb88d95 982 }
0e853ca4 983 },
fccd3771 984 {
4bc3a23e
PH
985 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
986 'note': 'Embed-only video (#1746)',
987 'info_dict': {
988 'id': 'yZIXLfi8CZQ',
989 'ext': 'mp4',
990 'upload_date': '20120608',
991 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
992 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
993 'uploader': 'SET India',
94bfcd23 994 'uploader_id': 'setindia',
ec85ded8 995 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 996 'age_limit': 18,
545cc85d 997 },
998 'skip': 'Private video',
fccd3771 999 },
11b56058 1000 {
8bdd16b4 1001 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
1002 'note': 'Use the first video ID in the URL',
1003 'info_dict': {
1004 'id': 'BaW_jenozKc',
1005 'ext': 'mp4',
3867038a 1006 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
1007 'uploader': 'Philipp Hagemeister',
1008 'uploader_id': 'phihag',
ec85ded8 1009 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 1010 'upload_date': '20121002',
3867038a 1011 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 1012 'categories': ['Science & Technology'],
3867038a 1013 'tags': ['youtube-dl'],
556dbe7f 1014 'duration': 10,
dbdaaa23 1015 'view_count': int,
11b56058
PM
1016 'like_count': int,
1017 'dislike_count': int,
34a7de29
S
1018 },
1019 'params': {
1020 'skip_download': True,
1021 },
11b56058 1022 },
dd27fd17 1023 {
2d3d2997 1024 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
1025 'note': '256k DASH audio (format 141) via DASH manifest',
1026 'info_dict': {
1027 'id': 'a9LDPn-MO4I',
1028 'ext': 'm4a',
1029 'upload_date': '20121002',
1030 'uploader_id': '8KVIDEO',
ec85ded8 1031 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
1032 'description': '',
1033 'uploader': '8KVIDEO',
1034 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 1035 },
4bc3a23e
PH
1036 'params': {
1037 'youtube_include_dash_manifest': True,
1038 'format': '141',
4919603f 1039 },
de3c7fe0 1040 'skip': 'format 141 not served anymore',
dd27fd17 1041 },
8bdd16b4 1042 # DASH manifest with encrypted signature
1043 {
1044 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1045 'info_dict': {
1046 'id': 'IB3lcPjvWLA',
1047 'ext': 'm4a',
1048 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1049 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1050 'duration': 244,
1051 'uploader': 'AfrojackVEVO',
1052 'uploader_id': 'AfrojackVEVO',
1053 'upload_date': '20131011',
cc2db878 1054 'abr': 129.495,
8bdd16b4 1055 },
1056 'params': {
1057 'youtube_include_dash_manifest': True,
1058 'format': '141/bestaudio[ext=m4a]',
1059 },
1060 },
aa79ac0c
PH
1061 # Controversy video
1062 {
1063 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
1064 'info_dict': {
1065 'id': 'T4XJQO3qol8',
1066 'ext': 'mp4',
556dbe7f 1067 'duration': 219,
aa79ac0c 1068 'upload_date': '20100909',
4fe54c12 1069 'uploader': 'Amazing Atheist',
aa79ac0c 1070 'uploader_id': 'TheAmazingAtheist',
ec85ded8 1071 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c 1072 'title': 'Burning Everyone\'s Koran',
545cc85d 1073 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
aa79ac0c 1074 }
c522adb1 1075 },
dd2d55f1 1076 # Normal age-gate video (embed allowed)
c522adb1 1077 {
2d3d2997 1078 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1079 'info_dict': {
1080 'id': 'HtVdAasjOgU',
1081 'ext': 'mp4',
1082 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1083 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1084 'duration': 142,
c522adb1
JMF
1085 'uploader': 'The Witcher',
1086 'uploader_id': 'WitcherGame',
ec85ded8 1087 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 1088 'upload_date': '20140605',
34952f09 1089 'age_limit': 18,
c522adb1
JMF
1090 },
1091 },
8bdd16b4 1092 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1093 # YouTube Red ad is not captured for creator
1094 {
1095 'url': '__2ABJjxzNo',
1096 'info_dict': {
1097 'id': '__2ABJjxzNo',
1098 'ext': 'mp4',
1099 'duration': 266,
1100 'upload_date': '20100430',
1101 'uploader_id': 'deadmau5',
1102 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 1103 'creator': 'deadmau5',
1104 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1105 'uploader': 'deadmau5',
1106 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1107 'alt_title': 'Some Chords',
8bdd16b4 1108 },
1109 'expected_warnings': [
1110 'DASH manifest missing',
1111 ]
1112 },
067aa17e 1113 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1114 {
1115 'url': 'lqQg6PlCWgI',
1116 'info_dict': {
1117 'id': 'lqQg6PlCWgI',
1118 'ext': 'mp4',
556dbe7f 1119 'duration': 6085,
90227264 1120 'upload_date': '20150827',
cbe2bd91 1121 'uploader_id': 'olympic',
ec85ded8 1122 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 1123 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 1124 'uploader': 'Olympic',
cbe2bd91
PH
1125 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1126 },
1127 'params': {
1128 'skip_download': 'requires avconv',
e52a40ab 1129 }
cbe2bd91 1130 },
6271f1ca
PH
1131 # Non-square pixels
1132 {
1133 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1134 'info_dict': {
1135 'id': '_b-2C3KPAM0',
1136 'ext': 'mp4',
1137 'stretched_ratio': 16 / 9.,
556dbe7f 1138 'duration': 85,
6271f1ca
PH
1139 'upload_date': '20110310',
1140 'uploader_id': 'AllenMeow',
ec85ded8 1141 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 1142 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 1143 'uploader': '孫ᄋᄅ',
6271f1ca
PH
1144 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1145 },
06b491eb
S
1146 },
1147 # url_encoded_fmt_stream_map is empty string
1148 {
1149 'url': 'qEJwOuvDf7I',
1150 'info_dict': {
1151 'id': 'qEJwOuvDf7I',
f57b7835 1152 'ext': 'webm',
06b491eb
S
1153 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1154 'description': '',
1155 'upload_date': '20150404',
1156 'uploader_id': 'spbelect',
1157 'uploader': 'Наблюдатели Петербурга',
1158 },
1159 'params': {
1160 'skip_download': 'requires avconv',
e323cf3f
S
1161 },
1162 'skip': 'This live event has ended.',
06b491eb 1163 },
067aa17e 1164 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1165 {
1166 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1167 'info_dict': {
1168 'id': 'FIl7x6_3R5Y',
eb6793ba 1169 'ext': 'webm',
da77d856
S
1170 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1171 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1172 'duration': 220,
da77d856
S
1173 'upload_date': '20150625',
1174 'uploader_id': 'dorappi2000',
ec85ded8 1175 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 1176 'uploader': 'dorappi2000',
eb6793ba 1177 'formats': 'mincount:31',
da77d856 1178 },
eb6793ba 1179 'skip': 'not actual anymore',
2ee8f5d8 1180 },
8a1a26ce
YCH
1181 # DASH manifest with segment_list
1182 {
1183 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1184 'md5': '8ce563a1d667b599d21064e982ab9e31',
1185 'info_dict': {
1186 'id': 'CsmdDsKjzN8',
1187 'ext': 'mp4',
17ee98e1 1188 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
1189 'uploader': 'Airtek',
1190 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1191 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1192 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1193 },
1194 'params': {
1195 'youtube_include_dash_manifest': True,
1196 'format': '135', # bestvideo
be49068d
S
1197 },
1198 'skip': 'This live event has ended.',
2ee8f5d8 1199 },
cf7e015f
S
1200 {
1201 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 1202 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 1203 'info_dict': {
545cc85d 1204 'id': 'jvGDaLqkpTg',
1205 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1206 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
1207 },
1208 'playlist': [{
1209 'info_dict': {
545cc85d 1210 'id': 'jvGDaLqkpTg',
cf7e015f 1211 'ext': 'mp4',
545cc85d 1212 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1213 'description': 'md5:e03b909557865076822aa169218d6a5d',
1214 'duration': 10643,
1215 'upload_date': '20161111',
1216 'uploader': 'Team PGP',
1217 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1218 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1219 },
1220 }, {
1221 'info_dict': {
545cc85d 1222 'id': '3AKt1R1aDnw',
cf7e015f 1223 'ext': 'mp4',
545cc85d 1224 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1225 'description': 'md5:e03b909557865076822aa169218d6a5d',
1226 'duration': 10991,
1227 'upload_date': '20161111',
1228 'uploader': 'Team PGP',
1229 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1230 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1231 },
1232 }, {
1233 'info_dict': {
545cc85d 1234 'id': 'RtAMM00gpVc',
cf7e015f 1235 'ext': 'mp4',
545cc85d 1236 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1237 'description': 'md5:e03b909557865076822aa169218d6a5d',
1238 'duration': 10995,
1239 'upload_date': '20161111',
1240 'uploader': 'Team PGP',
1241 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1242 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1243 },
1244 }, {
1245 'info_dict': {
545cc85d 1246 'id': '6N2fdlP3C5U',
cf7e015f 1247 'ext': 'mp4',
545cc85d 1248 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1249 'description': 'md5:e03b909557865076822aa169218d6a5d',
1250 'duration': 10990,
1251 'upload_date': '20161111',
1252 'uploader': 'Team PGP',
1253 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1254 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1255 },
1256 }],
1257 'params': {
1258 'skip_download': True,
1259 },
cbaed4bb 1260 },
f9f49d87 1261 {
067aa17e 1262 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1263 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1264 'info_dict': {
1265 'id': 'gVfLd0zydlo',
1266 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1267 },
1268 'playlist_count': 2,
be49068d 1269 'skip': 'Not multifeed anymore',
f9f49d87 1270 },
cbaed4bb 1271 {
2d3d2997 1272 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1273 'only_matching': True,
0e49d9a6 1274 },
6d4fc66b 1275 {
2d3d2997 1276 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1277 'only_matching': True,
1278 },
0e49d9a6 1279 {
067aa17e 1280 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1281 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1282 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1283 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1284 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1285 'info_dict': {
1286 'id': 'lsguqyKfVQg',
1287 'ext': 'mp4',
1288 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 1289 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 1290 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1291 'duration': 133,
0e49d9a6
LL
1292 'upload_date': '20151119',
1293 'uploader_id': 'IronSoulElf',
ec85ded8 1294 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1295 'uploader': 'IronSoulElf',
eb6793ba
S
1296 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
1297 'track': 'Dark Walk - Position Music',
1298 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 1299 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
1300 },
1301 'params': {
1302 'skip_download': True,
1303 },
1304 },
61f92af1 1305 {
067aa17e 1306 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1307 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1308 'only_matching': True,
1309 },
313dfc45
LL
1310 {
1311 # Video with yt:stretch=17:0
1312 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1313 'info_dict': {
1314 'id': 'Q39EVAstoRM',
1315 'ext': 'mp4',
1316 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1317 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1318 'upload_date': '20151107',
1319 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1320 'uploader': 'CH GAMER DROID',
1321 },
1322 'params': {
1323 'skip_download': True,
1324 },
be49068d 1325 'skip': 'This video does not exist.',
313dfc45 1326 },
201c1459 1327 {
1328 # Video with incomplete 'yt:stretch=16:'
1329 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1330 'only_matching': True,
1331 },
7caf9830
S
1332 {
1333 # Video licensed under Creative Commons
1334 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1335 'info_dict': {
1336 'id': 'M4gD1WSo5mA',
1337 'ext': 'mp4',
1338 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1339 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1340 'duration': 721,
7caf9830
S
1341 'upload_date': '20150127',
1342 'uploader_id': 'BerkmanCenter',
ec85ded8 1343 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1344 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1345 'license': 'Creative Commons Attribution license (reuse allowed)',
1346 },
1347 'params': {
1348 'skip_download': True,
1349 },
1350 },
fd050249
S
1351 {
1352 # Channel-like uploader_url
1353 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1354 'info_dict': {
1355 'id': 'eQcmzGIKrzg',
1356 'ext': 'mp4',
1357 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1358 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1359 'duration': 4060,
fd050249 1360 'upload_date': '20151119',
eb6793ba 1361 'uploader': 'Bernie Sanders',
fd050249 1362 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1363 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1364 'license': 'Creative Commons Attribution license (reuse allowed)',
1365 },
1366 'params': {
1367 'skip_download': True,
1368 },
1369 },
040ac686
S
1370 {
1371 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1372 'only_matching': True,
7f29cf54
S
1373 },
1374 {
067aa17e 1375 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1376 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1377 'only_matching': True,
6496ccb4
S
1378 },
1379 {
1380 # Rental video preview
1381 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1382 'info_dict': {
1383 'id': 'uGpuVWrhIzE',
1384 'ext': 'mp4',
1385 'title': 'Piku - Trailer',
1386 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1387 'upload_date': '20150811',
1388 'uploader': 'FlixMatrix',
1389 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1390 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1391 'license': 'Standard YouTube License',
1392 },
1393 'params': {
1394 'skip_download': True,
1395 },
eb6793ba 1396 'skip': 'This video is not available.',
022a5d66 1397 },
12afdc2a
S
1398 {
1399 # YouTube Red video with episode data
1400 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1401 'info_dict': {
1402 'id': 'iqKdEhx-dD4',
1403 'ext': 'mp4',
1404 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1405 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1406 'duration': 2085,
12afdc2a
S
1407 'upload_date': '20170118',
1408 'uploader': 'Vsauce',
1409 'uploader_id': 'Vsauce',
1410 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1411 'series': 'Mind Field',
1412 'season_number': 1,
1413 'episode_number': 1,
1414 },
1415 'params': {
1416 'skip_download': True,
1417 },
1418 'expected_warnings': [
1419 'Skipping DASH manifest',
1420 ],
1421 },
c7121fa7
S
1422 {
1423 # The following content has been identified by the YouTube community
1424 # as inappropriate or offensive to some audiences.
1425 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1426 'info_dict': {
1427 'id': '6SJNVb0GnPI',
1428 'ext': 'mp4',
1429 'title': 'Race Differences in Intelligence',
1430 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1431 'duration': 965,
1432 'upload_date': '20140124',
1433 'uploader': 'New Century Foundation',
1434 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1435 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1436 },
1437 'params': {
1438 'skip_download': True,
1439 },
545cc85d 1440 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1441 },
022a5d66
S
1442 {
1443 # itag 212
1444 'url': '1t24XAntNCY',
1445 'only_matching': True,
fd5c4aab
S
1446 },
1447 {
1448 # geo restricted to JP
1449 'url': 'sJL6WA-aGkQ',
1450 'only_matching': True,
1451 },
cd5a74a2
S
1452 {
1453 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1454 'only_matching': True,
1455 },
bc2ca1bb 1456 {
1457 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1458 'only_matching': True,
1459 },
1460 {
1461 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1462 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1463 'only_matching': True,
1464 },
825cd268
RA
1465 {
1466 # DRM protected
1467 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1468 'only_matching': True,
4fe54c12
S
1469 },
1470 {
1471 # Video with unsupported adaptive stream type formats
1472 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1473 'info_dict': {
1474 'id': 'Z4Vy8R84T1U',
1475 'ext': 'mp4',
1476 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1477 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1478 'duration': 433,
1479 'upload_date': '20130923',
1480 'uploader': 'Amelia Putri Harwita',
1481 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1482 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1483 'formats': 'maxcount:10',
1484 },
1485 'params': {
1486 'skip_download': True,
1487 'youtube_include_dash_manifest': False,
1488 },
5429d6a9 1489 'skip': 'not actual anymore',
5caabd3c 1490 },
1491 {
822b9d9c 1492 # Youtube Music Auto-generated description
5caabd3c 1493 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1494 'info_dict': {
1495 'id': 'MgNrAu2pzNs',
1496 'ext': 'mp4',
1497 'title': 'Voyeur Girl',
1498 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1499 'upload_date': '20190312',
5429d6a9
S
1500 'uploader': 'Stephen - Topic',
1501 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1502 'artist': 'Stephen',
1503 'track': 'Voyeur Girl',
1504 'album': 'it\'s too much love to know my dear',
1505 'release_date': '20190313',
1506 'release_year': 2019,
1507 },
1508 'params': {
1509 'skip_download': True,
1510 },
1511 },
66b48727
RA
1512 {
1513 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1514 'only_matching': True,
1515 },
011e75e6
S
1516 {
1517 # invalid -> valid video id redirection
1518 'url': 'DJztXj2GPfl',
1519 'info_dict': {
1520 'id': 'DJztXj2GPfk',
1521 'ext': 'mp4',
1522 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1523 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1524 'upload_date': '20090125',
1525 'uploader': 'Prochorowka',
1526 'uploader_id': 'Prochorowka',
1527 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1528 'artist': 'Panjabi MC',
1529 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1530 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1531 },
1532 'params': {
1533 'skip_download': True,
1534 },
545cc85d 1535 'skip': 'Video unavailable',
ea74e00b
DP
1536 },
1537 {
1538 # empty description results in an empty string
1539 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1540 'info_dict': {
1541 'id': 'x41yOUIvK2k',
1542 'ext': 'mp4',
1543 'title': 'IMG 3456',
1544 'description': '',
1545 'upload_date': '20170613',
1546 'uploader_id': 'ElevageOrVert',
1547 'uploader': 'ElevageOrVert',
1548 },
1549 'params': {
1550 'skip_download': True,
1551 },
1552 },
a0566bbf 1553 {
29f7c58a 1554 # with '};' inside yt initial data (see [1])
1555 # see [2] for an example with '};' inside ytInitialPlayerResponse
1556 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1557 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1558 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1559 'info_dict': {
1560 'id': 'CHqg6qOn4no',
1561 'ext': 'mp4',
1562 'title': 'Part 77 Sort a list of simple types in c#',
1563 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1564 'upload_date': '20130831',
1565 'uploader_id': 'kudvenkat',
1566 'uploader': 'kudvenkat',
1567 },
1568 'params': {
1569 'skip_download': True,
1570 },
1571 },
29f7c58a 1572 {
1573 # another example of '};' in ytInitialData
1574 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1575 'only_matching': True,
1576 },
1577 {
1578 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1579 'only_matching': True,
1580 },
545cc85d 1581 {
cc2db878 1582 # https://github.com/ytdl-org/youtube-dl/pull/28094
1583 'url': 'OtqTfy26tG0',
1584 'info_dict': {
1585 'id': 'OtqTfy26tG0',
1586 'ext': 'mp4',
1587 'title': 'Burn Out',
1588 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1589 'upload_date': '20141120',
1590 'uploader': 'The Cinematic Orchestra - Topic',
1591 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1592 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1593 'artist': 'The Cinematic Orchestra',
1594 'track': 'Burn Out',
1595 'album': 'Every Day',
1596 'release_data': None,
1597 'release_year': None,
1598 },
1599 'params': {
1600 'skip_download': True,
1601 },
545cc85d 1602 },
bc2ca1bb 1603 {
1604 # controversial video, only works with bpctr when authenticated with cookies
1605 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1606 'only_matching': True,
1607 },
f7ad7160 1608 {
1609 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1610 'url': 'cBvYw8_A0vQ',
1611 'info_dict': {
1612 'id': 'cBvYw8_A0vQ',
1613 'ext': 'mp4',
1614 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1615 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1616 'upload_date': '20201120',
1617 'uploader': 'Walk around Japan',
1618 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1619 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1620 },
1621 'params': {
1622 'skip_download': True,
1623 },
0fb983f6 1624 }, {
1625 # Has multiple audio streams
1626 'url': 'WaOKSUlf4TM',
1627 'only_matching': True
9297939e 1628 }, {
1629 # Requires Premium: has format 141 when requested using YTM url
1630 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1631 'only_matching': True
1632 }, {
120916da 1633 # multiple subtitles with same lang_code
1634 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1635 'only_matching': True,
109dd3b2 1636 }, {
1637 # Force use android client fallback
1638 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1639 'info_dict': {
1640 'id': 'YOelRv7fMxY',
1641 'title': 'Digging a Secret Tunnel from my Workshop',
1642 'ext': '3gp',
1643 'upload_date': '20210624',
1644 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1645 'uploader': 'colinfurze',
1646 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1647 'description': 'md5:ecb672623246d98c6c562eed6ae798c3'
1648 },
1649 'params': {
1650 'format': '17', # 3gp format available on android
1651 'extractor_args': {'youtube': {'player_client': ['android']}},
1652 },
120916da 1653 },
109dd3b2 1654 {
1655 # Skip download of additional client configs (remix client config in this case)
1656 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1657 'only_matching': True,
1658 'params': {
1659 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1660 },
1661 }
2eb88d95
PH
1662 ]
1663
201c1459 1664 @classmethod
1665 def suitable(cls, url):
1bdae7d3 1666 # Hack for lazy extractors until more generic solution is implemented
1667 # (see #28780)
1668 from .youtube import parse_qs
201c1459 1669 qs = parse_qs(url)
1670 if qs.get('list', [None])[0]:
1671 return False
1672 return super(YoutubeIE, cls).suitable(url)
1673
e0df6211
PH
1674 def __init__(self, *args, **kwargs):
1675 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1676 self._code_cache = {}
83799698 1677 self._player_cache = {}
e0df6211 1678
109dd3b2 1679 def _extract_player_url(self, ytcfg=None, webpage=None):
1680 player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)
1681 if not player_url:
1682 player_url = self._search_regex(
1683 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1684 webpage, 'player URL', fatal=False)
1685 if player_url.startswith('//'):
1686 player_url = 'https:' + player_url
1687 elif not re.match(r'https?://', player_url):
1688 player_url = compat_urlparse.urljoin(
1689 'https://www.youtube.com', player_url)
1690 return player_url
1691
60064c53
PH
1692 def _signature_cache_id(self, example_sig):
1693 """ Return a string representation of a signature """
78caa52a 1694 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1695
e40c758c
S
1696 @classmethod
1697 def _extract_player_info(cls, player_url):
1698 for player_re in cls._PLAYER_INFO_RE:
1699 id_m = re.search(player_re, player_url)
1700 if id_m:
1701 break
1702 else:
c081b35c 1703 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1704 return id_m.group('id')
e40c758c 1705
109dd3b2 1706 def _load_player(self, video_id, player_url, fatal=True) -> bool:
1707 player_id = self._extract_player_info(player_url)
1708 if player_id not in self._code_cache:
1709 self._code_cache[player_id] = self._download_webpage(
1710 player_url, video_id, fatal=fatal,
1711 note='Downloading player ' + player_id,
1712 errnote='Download of %s failed' % player_url)
1713 return player_id in self._code_cache
1714
e40c758c 1715 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1716 player_id = self._extract_player_info(player_url)
e0df6211 1717
c4417ddb 1718 # Read from filesystem cache
545cc85d 1719 func_id = 'js_%s_%s' % (
1720 player_id, self._signature_cache_id(example_sig))
c4417ddb 1721 assert os.path.basename(func_id) == func_id
a0e07d31 1722
69ea8ca4 1723 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1724 if cache_spec is not None:
78caa52a 1725 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1726
109dd3b2 1727 if self._load_player(video_id, player_url):
1728 code = self._code_cache[player_id]
1729 res = self._parse_sig_js(code)
e0df6211 1730
109dd3b2 1731 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1732 cache_res = res(test_string)
1733 cache_spec = [ord(c) for c in cache_res]
83799698 1734
109dd3b2 1735 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1736 return res
83799698 1737
60064c53 1738 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1739 def gen_sig_code(idxs):
1740 def _genslice(start, end, step):
78caa52a 1741 starts = '' if start == 0 else str(start)
8bcc8756 1742 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1743 steps = '' if step == 1 else (':%d' % step)
78caa52a 1744 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1745
1746 step = None
7af808a5
PH
1747 # Quelch pyflakes warnings - start will be set when step is set
1748 start = '(Never used)'
edf3e38e
PH
1749 for i, prev in zip(idxs[1:], idxs[:-1]):
1750 if step is not None:
1751 if i - prev == step:
1752 continue
1753 yield _genslice(start, prev, step)
1754 step = None
1755 continue
1756 if i - prev in [-1, 1]:
1757 step = i - prev
1758 start = prev
1759 continue
1760 else:
78caa52a 1761 yield 's[%d]' % prev
edf3e38e 1762 if step is None:
78caa52a 1763 yield 's[%d]' % i
edf3e38e
PH
1764 else:
1765 yield _genslice(start, i, step)
1766
78caa52a 1767 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1768 cache_res = func(test_string)
edf3e38e 1769 cache_spec = [ord(c) for c in cache_res]
78caa52a 1770 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1771 signature_id_tuple = '(%s)' % (
1772 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1773 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1774 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1775 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1776
e0df6211
PH
1777 def _parse_sig_js(self, jscode):
1778 funcname = self._search_regex(
abefc03f
S
1779 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1780 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1781 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1782 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1783 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1784 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1785 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1786 # Obsolete patterns
1787 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1788 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1789 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1790 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1791 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1792 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1793 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1794 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1795 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1796
1797 jsi = JSInterpreter(jscode)
1798 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1799 return lambda s: initial_function([s])
1800
545cc85d 1801 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1802 """Turn the encrypted s field into a working signature"""
6b37f0be 1803
c8bf86d5 1804 if player_url is None:
69ea8ca4 1805 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1806
c8bf86d5 1807 try:
62af3a0e 1808 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1809 if player_id not in self._player_cache:
1810 func = self._extract_signature_function(
60064c53 1811 video_id, player_url, s
c8bf86d5
PH
1812 )
1813 self._player_cache[player_id] = func
1814 func = self._player_cache[player_id]
a06916d9 1815 if self.get_param('youtube_print_sig_code'):
60064c53 1816 self._print_sig_code(func, s)
c8bf86d5
PH
1817 return func(s)
1818 except Exception as e:
1819 tb = traceback.format_exc()
1820 raise ExtractorError(
78caa52a 1821 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1822
109dd3b2 1823 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1824 """
1825 Extract signatureTimestamp (sts)
1826 Required to tell API what sig/player version is in use.
1827 """
1828 sts = None
1829 if isinstance(ytcfg, dict):
1830 sts = int_or_none(ytcfg.get('STS'))
1831
1832 if not sts:
1833 # Attempt to extract from player
1834 if player_url is None:
1835 error_msg = 'Cannot extract signature timestamp without player_url.'
1836 if fatal:
1837 raise ExtractorError(error_msg)
1838 self.report_warning(error_msg)
1839 return
1840 if self._load_player(video_id, player_url, fatal=fatal):
1841 player_id = self._extract_player_info(player_url)
1842 code = self._code_cache[player_id]
1843 sts = int_or_none(self._search_regex(
1844 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
1845 'JS player signature timestamp', group='sts', fatal=fatal))
1846 return sts
1847
545cc85d 1848 def _mark_watched(self, video_id, player_response):
21c340b8
S
1849 playback_url = url_or_none(try_get(
1850 player_response,
545cc85d 1851 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
d77ab8e2
S
1852 if not playback_url:
1853 return
1854 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1855 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1856
1857 # cpn generation algorithm is reverse engineered from base.js.
1858 # In fact it works even with dummy cpn.
1859 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1860 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1861
1862 qs.update({
1863 'ver': ['2'],
1864 'cpn': [cpn],
1865 })
1866 playback_url = compat_urlparse.urlunparse(
15707c7e 1867 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1868
1869 self._download_webpage(
1870 playback_url, video_id, 'Marking watched',
1871 'Unable to mark watched', fatal=False)
1872
66c9fa36
S
1873 @staticmethod
1874 def _extract_urls(webpage):
1875 # Embedded YouTube player
1876 entries = [
1877 unescapeHTML(mobj.group('url'))
1878 for mobj in re.finditer(r'''(?x)
1879 (?:
1880 <iframe[^>]+?src=|
1881 data-video-url=|
1882 <embed[^>]+?src=|
1883 embedSWF\(?:\s*|
1884 <object[^>]+data=|
1885 new\s+SWFObject\(
1886 )
1887 (["\'])
1888 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1889 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1890 \1''', webpage)]
1891
1892 # lazyYT YouTube embed
1893 entries.extend(list(map(
1894 unescapeHTML,
1895 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1896
1897 # Wordpress "YouTube Video Importer" plugin
1898 matches = re.findall(r'''(?x)<div[^>]+
1899 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1900 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1901 entries.extend(m[-1] for m in matches)
1902
1903 return entries
1904
1905 @staticmethod
1906 def _extract_url(webpage):
1907 urls = YoutubeIE._extract_urls(webpage)
1908 return urls[0] if urls else None
1909
97665381
PH
1910 @classmethod
1911 def extract_id(cls, url):
1912 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1913 if mobj is None:
69ea8ca4 1914 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1915 video_id = mobj.group(2)
1916 return video_id
1917
545cc85d 1918 def _extract_chapters_from_json(self, data, video_id, duration):
84213ea8 1919 chapters_list = try_get(
8bdd16b4 1920 data,
84213ea8
S
1921 lambda x: x['playerOverlays']
1922 ['playerOverlayRenderer']
1923 ['decoratedPlayerBarRenderer']
1924 ['decoratedPlayerBarRenderer']
1925 ['playerBar']
1926 ['chapteredPlayerBarRenderer']
1927 ['chapters'],
1928 list)
1929 if not chapters_list:
1930 return
1931
1932 def chapter_time(chapter):
1933 return float_or_none(
1934 try_get(
1935 chapter,
1936 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1937 int),
1938 scale=1000)
1939 chapters = []
1940 for next_num, chapter in enumerate(chapters_list, start=1):
1941 start_time = chapter_time(chapter)
1942 if start_time is None:
1943 continue
1944 end_time = (chapter_time(chapters_list[next_num])
1945 if next_num < len(chapters_list) else duration)
1946 if end_time is None:
1947 continue
1948 title = try_get(
1949 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1950 compat_str)
1951 chapters.append({
1952 'start_time': start_time,
1953 'end_time': end_time,
1954 'title': title,
1955 })
1956 return chapters
1957
545cc85d 1958 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1959 return self._parse_json(self._search_regex(
1960 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1961 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 1962
d92f5d5a 1963 @staticmethod
1964 def parse_time_text(time_text):
1965 """
1966 Parse the comment time text
1967 time_text is in the format 'X units ago (edited)'
1968 """
1969 time_text_split = time_text.split(' ')
1970 if len(time_text_split) >= 3:
1971 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
1972
a1c5d2ca
M
1973 @staticmethod
1974 def _join_text_entries(runs):
1975 text = None
1976 for run in runs:
1977 if not isinstance(run, dict):
1978 continue
1979 sub_text = try_get(run, lambda x: x['text'], compat_str)
1980 if sub_text:
1981 if not text:
1982 text = sub_text
1983 continue
1984 text += sub_text
1985 return text
1986
1987 def _extract_comment(self, comment_renderer, parent=None):
1988 comment_id = comment_renderer.get('commentId')
1989 if not comment_id:
1990 return
1991 comment_text_runs = try_get(comment_renderer, lambda x: x['contentText']['runs']) or []
1992 text = self._join_text_entries(comment_text_runs) or ''
1993 comment_time_text = try_get(comment_renderer, lambda x: x['publishedTimeText']['runs']) or []
1994 time_text = self._join_text_entries(comment_time_text)
d92f5d5a 1995 timestamp = calendar.timegm(self.parse_time_text(time_text).timetuple())
a1c5d2ca
M
1996 author = try_get(comment_renderer, lambda x: x['authorText']['simpleText'], compat_str)
1997 author_id = try_get(comment_renderer,
1998 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
1999 votes = str_to_int(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2000 lambda x: x['likeCount']), compat_str)) or 0
2001 author_thumbnail = try_get(comment_renderer,
2002 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2003
2004 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2005 is_liked = try_get(comment_renderer, lambda x: x['isLiked'], bool)
a1c5d2ca
M
2006 return {
2007 'id': comment_id,
2008 'text': text,
d92f5d5a 2009 'timestamp': timestamp,
a1c5d2ca
M
2010 'time_text': time_text,
2011 'like_count': votes,
2012 'is_favorited': is_liked,
2013 'author': author,
2014 'author_id': author_id,
2015 'author_thumbnail': author_thumbnail,
2016 'author_is_uploader': author_is_uploader,
2017 'parent': parent or 'root'
2018 }
2019
2020 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
2d6659b9 2021 ytcfg, video_id, parent=None, comment_counts=None):
2022
2023 def extract_header(contents):
2024 _total_comments = 0
2025 _continuation = None
2026 for content in contents:
2027 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
2028 expected_comment_count = try_get(comments_header_renderer,
2029 (lambda x: x['countText']['runs'][0]['text'],
2030 lambda x: x['commentsCount']['runs'][0]['text']),
2031 compat_str)
2032 if expected_comment_count:
2033 comment_counts[1] = str_to_int(expected_comment_count)
2034 self.to_screen('Downloading ~%d comments' % str_to_int(expected_comment_count))
2035 _total_comments = comment_counts[1]
2036 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2037 comment_sort_index = int(sort_mode_str != 'top') # 1 = new, 0 = top
2038
2039 sort_menu_item = try_get(
2040 comments_header_renderer,
2041 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2042 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2043
2044 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2045 if not _continuation:
2046 continue
2047
2048 sort_text = sort_menu_item.get('title')
2049 if isinstance(sort_text, compat_str):
2050 sort_text = sort_text.lower()
2051 else:
2052 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2053 self.to_screen('Sorting comments by %s' % sort_text)
2054 break
2055 return _total_comments, _continuation
a1c5d2ca 2056
2d6659b9 2057 def extract_thread(contents):
a1c5d2ca
M
2058 if not parent:
2059 comment_counts[2] = 0
2060 for content in contents:
2061 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2062 comment_renderer = try_get(
2063 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2064 content, (lambda x: x['commentRenderer'], dict))
2065
2066 if not comment_renderer:
2067 continue
2068 comment = self._extract_comment(comment_renderer, parent)
2069 if not comment:
2070 continue
2071 comment_counts[0] += 1
2072 yield comment
2073 # Attempt to get the replies
2074 comment_replies_renderer = try_get(
2075 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2076
2077 if comment_replies_renderer:
2078 comment_counts[2] += 1
2079 comment_entries_iter = self._comment_entries(
f4f751af 2080 comment_replies_renderer, identity_token, account_syncid, ytcfg,
2d6659b9 2081 video_id, parent=comment.get('id'), comment_counts=comment_counts)
a1c5d2ca
M
2082
2083 for reply_comment in comment_entries_iter:
2084 yield reply_comment
2085
2d6659b9 2086 # YouTube comments have a max depth of 2
2087 max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2088 if max_depth == 1 and parent:
2089 return
a1c5d2ca
M
2090 if not comment_counts:
2091 # comment so far, est. total comments, current comment thread #
2092 comment_counts = [0, 0, 0]
a1c5d2ca 2093
2d6659b9 2094 continuation = self._extract_continuation(root_continuation_data)
2095 if continuation and len(continuation['ctoken']) < 27:
2096 self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2097 continuation_token = self._generate_comment_continuation(video_id)
2098 continuation = self._build_continuation_query(continuation_token, None)
2099
2100 visitor_data = None
2101 is_first_continuation = parent is None
a1c5d2ca
M
2102
2103 for page_num in itertools.count(0):
2104 if not continuation:
2105 break
f4f751af 2106 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
2d6659b9 2107 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2108 if page_num == 0:
2109 if is_first_continuation:
2110 note_prefix = 'Downloading comment section API JSON'
a1c5d2ca 2111 else:
2d6659b9 2112 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
2113 comment_counts[2], comment_prog_str)
2114 else:
2115 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2116 ' ' if parent else '', ' replies' if parent else '',
2117 page_num, comment_prog_str)
2118
2119 response = self._extract_response(
2120 item_id=None, query=self._continuation_query_ajax_to_api(continuation),
2121 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2122 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca
M
2123 if not response:
2124 break
f4f751af 2125 visitor_data = try_get(
2126 response,
2127 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2128 compat_str) or visitor_data
a1c5d2ca 2129
2d6659b9 2130 continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca 2131
2d6659b9 2132 continuation = None
2133 if isinstance(continuation_contents, list):
2134 for continuation_section in continuation_contents:
2135 if not isinstance(continuation_section, dict):
2136 continue
2137 continuation_items = try_get(
2138 continuation_section,
2139 (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2140 lambda x: x['appendContinuationItemsAction']['continuationItems']),
2141 list) or []
2142 if is_first_continuation:
2143 total_comments, continuation = extract_header(continuation_items)
2144 if total_comments:
2145 yield total_comments
2146 is_first_continuation = False
2147 if continuation:
2148 break
2149 continue
2150 count = 0
2151 for count, entry in enumerate(extract_thread(continuation_items)):
2152 yield entry
2153 continuation = self._extract_continuation({'contents': continuation_items})
2154 if continuation:
2155 # Sometimes YouTube provides a continuation without any comments
2156 # In most cases we end up just downloading these with very little comments to come.
2157 if count == 0:
2158 if not parent:
2159 self.report_warning('No comments received - assuming end of comments')
2160 continuation = None
a1c5d2ca
M
2161 break
2162
2d6659b9 2163 # Deprecated response structure
2164 elif isinstance(continuation_contents, dict):
2165 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2166 for key, continuation_renderer in continuation_contents.items():
2167 if key not in known_continuation_renderers:
2168 continue
2169 if not isinstance(continuation_renderer, dict):
2170 continue
2171 if is_first_continuation:
2172 header_continuation_items = [continuation_renderer.get('header') or {}]
2173 total_comments, continuation = extract_header(header_continuation_items)
2174 if total_comments:
2175 yield total_comments
2176 is_first_continuation = False
2177 if continuation:
2178 break
a1c5d2ca 2179
2d6659b9 2180 # Sometimes YouTube provides a continuation without any comments
2181 # In most cases we end up just downloading these with very little comments to come.
2182 count = 0
2183 for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2184 yield entry
2185 continuation = self._extract_continuation(continuation_renderer)
2186 if count == 0:
2187 if not parent:
2188 self.report_warning('No comments received - assuming end of comments')
2189 continuation = None
2190 break
a1c5d2ca 2191
2d6659b9 2192 @staticmethod
2193 def _generate_comment_continuation(video_id):
2194 """
2195 Generates initial comment section continuation token from given video id
2196 """
2197 b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2198 parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2199 new_continuation_intlist = list(itertools.chain.from_iterable(
2200 [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2201 return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2202
2203 def _extract_comments(self, ytcfg, video_id, contents, webpage):
a1c5d2ca 2204 """Entry for comment extraction"""
2d6659b9 2205 def _real_comment_extract(contents):
2206 if isinstance(contents, list):
2207 for entry in contents:
2208 for key, renderer in entry.items():
2209 if key not in known_entry_comment_renderers:
2210 continue
2211 yield from self._comment_entries(
2212 renderer, video_id=video_id, ytcfg=ytcfg,
2213 identity_token=self._extract_identity_token(webpage, item_id=video_id),
2214 account_syncid=self._extract_account_syncid(ytcfg))
2215 break
a1c5d2ca 2216 comments = []
2d6659b9 2217 known_entry_comment_renderers = ('itemSectionRenderer',)
a1c5d2ca 2218 estimated_total = 0
2d6659b9 2219 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
a1c5d2ca 2220
2d6659b9 2221 try:
2222 for comment in _real_comment_extract(contents):
2223 if len(comments) >= max_comments:
2224 break
2225 if isinstance(comment, int):
2226 estimated_total = comment
2227 continue
2228 comments.append(comment)
2229 except KeyboardInterrupt:
2230 self.to_screen('Interrupted by user')
d92f5d5a 2231 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
a1c5d2ca
M
2232 return {
2233 'comments': comments,
2234 'comment_count': len(comments),
2235 }
2236
109dd3b2 2237 @staticmethod
2238 def _generate_player_context(sts=None):
2239 context = {
2240 'html5Preference': 'HTML5_PREF_WANTS',
2241 }
2242 if sts is not None:
2243 context['signatureTimestamp'] = sts
2244 return {
2245 'playbackContext': {
2246 'contentPlaybackContext': context
2247 }
2248 }
2249
4e6767b5 2250 @staticmethod
2251 def _get_video_info_params(video_id):
2252 return {
2253 'video_id': video_id,
2254 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
2255 'html5': '1',
2256 'c': 'TVHTML5',
2257 'cver': '6.20180913',
2258 }
2259
c5e8d7af 2260 def _real_extract(self, url):
cf7e015f 2261 url, smuggled_data = unsmuggle_url(url, {})
545cc85d 2262 video_id = self._match_id(url)
9297939e 2263
2264 is_music_url = smuggled_data.get('is_music_url') or self.is_music_url(url)
2265
545cc85d 2266 base_url = self.http_scheme() + '//www.youtube.com/'
b3d12425 2267 webpage_url = base_url + 'watch?v=' + video_id
2268 webpage = self._download_webpage(
cce889b9 2269 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
545cc85d 2270
109dd3b2 2271 ytcfg = self._extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2272 identity_token = self._extract_identity_token(webpage, video_id)
2273 syncid = self._extract_account_syncid(ytcfg)
2274 headers = self._generate_api_headers(ytcfg, identity_token, syncid)
2275
2276 player_url = self._extract_player_url(ytcfg, webpage)
2277
2d6659b9 2278 player_client = self._configuration_arg('player_client', [''])[0]
4bb6b02f 2279 if player_client not in ('web', 'android', ''):
2280 self.report_warning(f'Invalid player_client {player_client} given. Falling back to WEB')
2281 force_mobile_client = player_client == 'android'
2282 player_skip = self._configuration_arg('player_skip')
109dd3b2 2283
9297939e 2284 def get_text(x):
2285 if not x:
2286 return
2287 text = x.get('simpleText')
2288 if text and isinstance(text, compat_str):
2289 return text
2290 runs = x.get('runs')
2291 if not isinstance(runs, list):
2292 return
2293 return ''.join([r['text'] for r in runs if isinstance(r.get('text'), compat_str)])
2294
2295 ytm_streaming_data = {}
2296 if is_music_url:
109dd3b2 2297 ytm_webpage = None
2298 sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2299 if sts and not force_mobile_client and 'configs' not in player_skip:
2300 ytm_webpage = self._download_webpage(
2301 'https://music.youtube.com',
2d6659b9 2302 video_id, fatal=False, note='Downloading remix client config')
109dd3b2 2303
2304 ytm_cfg = self._extract_ytcfg(video_id, ytm_webpage) or {}
2305 ytm_client = 'WEB_REMIX'
2306 if not sts or force_mobile_client:
2307 # Android client already has signature descrambled
2308 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2309 if not sts:
2310 self.report_warning('Falling back to mobile remix client for player API.')
2311 ytm_client = 'ANDROID_MUSIC'
2312 ytm_cfg = {}
2313
2314 ytm_headers = self._generate_api_headers(
2315 ytm_cfg, identity_token, syncid,
2316 client=ytm_client)
2317 ytm_query = {'videoId': video_id}
2318 ytm_query.update(self._generate_player_context(sts))
2319
2320 ytm_player_response = self._extract_response(
2321 item_id=video_id, ep='player', query=ytm_query,
2322 ytcfg=ytm_cfg, headers=ytm_headers, fatal=False,
2323 default_client=ytm_client,
2324 note='Downloading %sremix player API JSON' % ('mobile ' if force_mobile_client else ''))
2d6659b9 2325 ytm_streaming_data = try_get(ytm_player_response, lambda x: x['streamingData'], dict) or {}
109dd3b2 2326
545cc85d 2327 player_response = None
2328 if webpage:
2329 player_response = self._extract_yt_initial_variable(
2330 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2331 video_id, 'initial player response')
f4f751af 2332
109dd3b2 2333 if not player_response or force_mobile_client:
2334 sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2335 yt_client = 'WEB'
2336 ytpcfg = ytcfg
2337 ytp_headers = headers
2338 if not sts or force_mobile_client:
2339 # Android client already has signature descrambled
2340 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2341 if not sts:
2342 self.report_warning('Falling back to mobile client for player API.')
2343 yt_client = 'ANDROID'
2344 ytpcfg = {}
2345 ytp_headers = self._generate_api_headers(ytpcfg, identity_token, syncid, yt_client)
2346
2347 yt_query = {'videoId': video_id}
2348 yt_query.update(self._generate_player_context(sts))
2349 player_response = self._extract_response(
2350 item_id=video_id, ep='player', query=yt_query,
2351 ytcfg=ytpcfg, headers=ytp_headers, fatal=False,
2352 default_client=yt_client,
2353 note='Downloading %splayer API JSON' % ('mobile ' if force_mobile_client else '')
2354 )
545cc85d 2355
109dd3b2 2356 # Age-gate workarounds
545cc85d 2357 playability_status = player_response.get('playabilityStatus') or {}
109dd3b2 2358 if playability_status.get('reason') in self._AGE_GATE_REASONS:
545cc85d 2359 pr = self._parse_json(try_get(compat_parse_qs(
2360 self._download_webpage(
2361 base_url + 'get_video_info', video_id,
4e6767b5 2362 'Refetching age-gated info webpage', 'unable to download video info webpage',
2363 query=self._get_video_info_params(video_id), fatal=False)),
545cc85d 2364 lambda x: x['player_response'][0],
2365 compat_str) or '{}', video_id)
109dd3b2 2366 if not pr:
2367 self.report_warning('Falling back to embedded-only age-gate workaround.')
2368 embed_webpage = None
2369 sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2370 if sts and not force_mobile_client and 'configs' not in player_skip:
2371 embed_webpage = self._download_webpage(
2372 'https://www.youtube.com/embed/%s?html5=1' % video_id,
2373 video_id=video_id, note='Downloading age-gated embed config')
2374
2375 ytcfg_age = self._extract_ytcfg(video_id, embed_webpage) or {}
2376 # If we extracted the embed webpage, it'll tell us if we can view the video
2377 embedded_pr = self._parse_json(
2378 try_get(ytcfg_age, lambda x: x['PLAYER_VARS']['embedded_player_response'], str) or '{}',
2379 video_id=video_id)
2380 embedded_ps_reason = try_get(embedded_pr, lambda x: x['playabilityStatus']['reason'], str) or ''
2381 if embedded_ps_reason not in self._AGE_GATE_REASONS:
2382 yt_client = 'WEB_EMBEDDED_PLAYER'
2383 if not sts or force_mobile_client:
2384 # Android client already has signature descrambled
2385 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2386 if not sts:
2387 self.report_warning(
2388 'Falling back to mobile embedded client for player API (note: some formats may be missing).')
2389 yt_client = 'ANDROID_EMBEDDED_PLAYER'
2390 ytcfg_age = {}
2391
2392 ytage_headers = self._generate_api_headers(
2393 ytcfg_age, identity_token, syncid, client=yt_client)
2394 yt_age_query = {'videoId': video_id}
2395 yt_age_query.update(self._generate_player_context(sts))
2396 pr = self._extract_response(
2397 item_id=video_id, ep='player', query=yt_age_query,
2398 ytcfg=ytcfg_age, headers=ytage_headers, fatal=False,
2399 default_client=yt_client,
2400 note='Downloading %sage-gated player API JSON' % ('mobile ' if force_mobile_client else '')
2401 ) or {}
2402
545cc85d 2403 if pr:
2404 player_response = pr
2405
2406 trailer_video_id = try_get(
2407 playability_status,
2408 lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
2409 compat_str)
2410 if trailer_video_id:
2411 return self.url_result(
2412 trailer_video_id, self.ie_key(), trailer_video_id)
cf7e015f 2413
545cc85d 2414 search_meta = (
2415 lambda x: self._html_search_meta(x, webpage, default=None)) \
2416 if webpage else lambda x: None
dbdaaa23 2417
545cc85d 2418 video_details = player_response.get('videoDetails') or {}
37357d21 2419 microformat = try_get(
545cc85d 2420 player_response,
2421 lambda x: x['microformat']['playerMicroformatRenderer'],
2422 dict) or {}
2423 video_title = video_details.get('title') \
2424 or get_text(microformat.get('title')) \
2425 or search_meta(['og:title', 'twitter:title', 'title'])
2426 video_description = video_details.get('shortDescription')
cf7e015f 2427
8fe10494 2428 if not smuggled_data.get('force_singlefeed', False):
a06916d9 2429 if not self.get_param('noplaylist'):
8fe10494
S
2430 multifeed_metadata_list = try_get(
2431 player_response,
2432 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
545cc85d 2433 compat_str)
8fe10494
S
2434 if multifeed_metadata_list:
2435 entries = []
2436 feed_ids = []
2437 for feed in multifeed_metadata_list.split(','):
2438 # Unquote should take place before split on comma (,) since textual
2439 # fields may contain comma as well (see
067aa17e 2440 # https://github.com/ytdl-org/youtube-dl/issues/8536)
545cc85d 2441 feed_data = compat_parse_qs(
2442 compat_urllib_parse_unquote_plus(feed))
6b09401b
S
2443
2444 def feed_entry(name):
545cc85d 2445 return try_get(
2446 feed_data, lambda x: x[name][0], compat_str)
6b09401b
S
2447
2448 feed_id = feed_entry('id')
2449 if not feed_id:
2450 continue
2451 feed_title = feed_entry('title')
2452 title = video_title
2453 if feed_title:
2454 title += ' (%s)' % feed_title
8fe10494
S
2455 entries.append({
2456 '_type': 'url_transparent',
2457 'ie_key': 'Youtube',
2458 'url': smuggle_url(
545cc85d 2459 base_url + 'watch?v=' + feed_data['id'][0],
8fe10494 2460 {'force_singlefeed': True}),
6b09401b 2461 'title': title,
8fe10494 2462 })
6b09401b 2463 feed_ids.append(feed_id)
8fe10494
S
2464 self.to_screen(
2465 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2466 % (', '.join(feed_ids), video_id))
545cc85d 2467 return self.playlist_result(
2468 entries, video_id, video_title, video_description)
8fe10494
S
2469 else:
2470 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 2471
9297939e 2472 formats, itags, stream_ids = [], [], []
cc2db878 2473 itag_qualities = {}
d3fc8074 2474 q = qualities([
60bdb7bd 2475 # "tiny" is the smallest video-only format. But some audio-only formats
2476 # was also labeled "tiny". It is not clear if such formats still exist
d3fc8074 2477 'tiny', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
2478 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2479 ])
9297939e 2480
545cc85d 2481 streaming_data = player_response.get('streamingData') or {}
2482 streaming_formats = streaming_data.get('formats') or []
2483 streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
9297939e 2484 streaming_formats.extend(ytm_streaming_data.get('formats') or [])
2485 streaming_formats.extend(ytm_streaming_data.get('adaptiveFormats') or [])
2486
545cc85d 2487 for fmt in streaming_formats:
2488 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2489 continue
321bf820 2490
cc2db878 2491 itag = str_or_none(fmt.get('itag'))
9297939e 2492 audio_track = fmt.get('audioTrack') or {}
2493 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2494 if stream_id in stream_ids:
2495 continue
2496
cc2db878 2497 quality = fmt.get('quality')
d3fc8074 2498 if quality == 'tiny' or not quality:
2499 quality = fmt.get('audioQuality', '').lower() or quality
cc2db878 2500 if itag and quality:
2501 itag_qualities[itag] = quality
2502 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2503 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2504 # number of fragment that would subsequently requested with (`&sq=N`)
2505 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2506 continue
2507
545cc85d 2508 fmt_url = fmt.get('url')
2509 if not fmt_url:
2510 sc = compat_parse_qs(fmt.get('signatureCipher'))
2511 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2512 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2513 if not (sc and fmt_url and encrypted_sig):
2514 continue
545cc85d 2515 if not player_url:
201e9eaa 2516 continue
545cc85d 2517 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2518 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2519 fmt_url += '&' + sp + '=' + signature
2520
545cc85d 2521 if itag:
2522 itags.append(itag)
9297939e 2523 stream_ids.append(stream_id)
2524
cc2db878 2525 tbr = float_or_none(
2526 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 2527 dct = {
2528 'asr': int_or_none(fmt.get('audioSampleRate')),
2529 'filesize': int_or_none(fmt.get('contentLength')),
2530 'format_id': itag,
0fb983f6 2531 'format_note': audio_track.get('displayName') or fmt.get('qualityLabel') or quality,
545cc85d 2532 'fps': int_or_none(fmt.get('fps')),
2533 'height': int_or_none(fmt.get('height')),
dca3ff4a 2534 'quality': q(quality),
cc2db878 2535 'tbr': tbr,
545cc85d 2536 'url': fmt_url,
2537 'width': fmt.get('width'),
0fb983f6 2538 'language': audio_track.get('id', '').split('.')[0],
545cc85d 2539 }
60bdb7bd 2540 mime_mobj = re.match(
2541 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2542 if mime_mobj:
2543 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2544 dct.update(parse_codecs(mime_mobj.group(2)))
2545 # The 3gp format in android client has a quality of "small",
2546 # but is actually worse than all other formats
2547 if dct['ext'] == '3gp':
2548 dct['quality'] = q('tiny')
cc2db878 2549 no_audio = dct.get('acodec') == 'none'
2550 no_video = dct.get('vcodec') == 'none'
2551 if no_audio:
2552 dct['vbr'] = tbr
2553 if no_video:
2554 dct['abr'] = tbr
2555 if no_audio or no_video:
545cc85d 2556 dct['downloader_options'] = {
2557 # Youtube throttles chunks >~10M
2558 'http_chunk_size': 10485760,
bf1317d2 2559 }
7c60c33e 2560 if dct.get('ext'):
2561 dct['container'] = dct['ext'] + '_dash'
545cc85d 2562 formats.append(dct)
2563
4bb6b02f 2564 skip_manifests = self._configuration_arg('skip')
5d3a0e79 2565 get_dash = 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True)
2566 get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2567
9297939e 2568 for sd in (streaming_data, ytm_streaming_data):
5d3a0e79 2569 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
9297939e 2570 if hls_manifest_url:
2571 for f in self._extract_m3u8_formats(
2572 hls_manifest_url, video_id, 'mp4', fatal=False):
2573 itag = self._search_regex(
2574 r'/itag/(\d+)', f['url'], 'itag', default=None)
2575 if itag:
2576 f['format_id'] = itag
8d68ab98 2577 formats.append(f)
545cc85d 2578
5d3a0e79 2579 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2580 if dash_manifest_url:
2581 for f in self._extract_mpd_formats(
2582 dash_manifest_url, video_id, fatal=False):
2583 itag = f['format_id']
2584 if itag in itags:
2585 continue
2586 if itag in itag_qualities:
2587 f['quality'] = q(itag_qualities[itag])
2588 filesize = int_or_none(self._search_regex(
2589 r'/clen/(\d+)', f.get('fragment_base_url')
2590 or f['url'], 'file size', default=None))
2591 if filesize:
2592 f['filesize'] = filesize
2593 formats.append(f)
bf1317d2 2594
545cc85d 2595 if not formats:
a06916d9 2596 if not self.get_param('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
b7da73eb 2597 self.raise_no_formats(
545cc85d 2598 'This video is DRM protected.', expected=True)
2599 pemr = try_get(
2600 playability_status,
2601 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
2602 dict) or {}
2603 reason = get_text(pemr.get('reason')) or playability_status.get('reason')
2604 subreason = pemr.get('subreason')
2605 if subreason:
2606 subreason = clean_html(get_text(subreason))
2607 if subreason == 'The uploader has not made this video available in your country.':
2608 countries = microformat.get('availableCountries')
2609 if not countries:
2610 regions_allowed = search_meta('regionsAllowed')
2611 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2612 self.raise_geo_restricted(subreason, countries, metadata_available=True)
545cc85d 2613 reason += '\n' + subreason
2614 if reason:
b7da73eb 2615 self.raise_no_formats(reason, expected=True)
bf1317d2 2616
545cc85d 2617 self._sort_formats(formats)
bf1317d2 2618
545cc85d 2619 keywords = video_details.get('keywords') or []
2620 if not keywords and webpage:
2621 keywords = [
2622 unescapeHTML(m.group('content'))
2623 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2624 for keyword in keywords:
2625 if keyword.startswith('yt:stretch='):
201c1459 2626 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2627 if mobj:
2628 # NB: float is intentional for forcing float division
2629 w, h = (float(v) for v in mobj.groups())
2630 if w > 0 and h > 0:
2631 ratio = w / h
2632 for f in formats:
2633 if f.get('vcodec') != 'none':
2634 f['stretched_ratio'] = ratio
2635 break
6449cd80 2636
545cc85d 2637 thumbnails = []
2638 for container in (video_details, microformat):
2639 for thumbnail in (try_get(
2640 container,
2641 lambda x: x['thumbnail']['thumbnails'], list) or []):
2642 thumbnail_url = thumbnail.get('url')
2643 if not thumbnail_url:
bf1317d2 2644 continue
1988fab7 2645 # Sometimes youtube gives a wrong thumbnail URL. See:
2646 # https://github.com/yt-dlp/yt-dlp/issues/233
2647 # https://github.com/ytdl-org/youtube-dl/issues/28023
2648 if 'maxresdefault' in thumbnail_url:
2649 thumbnail_url = thumbnail_url.split('?')[0]
545cc85d 2650 thumbnails.append({
545cc85d 2651 'url': thumbnail_url,
ff2751ac 2652 'height': int_or_none(thumbnail.get('height')),
545cc85d 2653 'width': int_or_none(thumbnail.get('width')),
ff2751ac 2654 'preference': 1 if 'maxresdefault' in thumbnail_url else -1
545cc85d 2655 })
ff2751ac 2656 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2657 if thumbnail_url:
2658 thumbnails.append({
2659 'url': thumbnail_url,
2660 'preference': 1 if 'maxresdefault' in thumbnail_url else -1
2661 })
2662 # All videos have a maxresdefault thumbnail, but sometimes it does not appear in the webpage
2663 # See: https://github.com/ytdl-org/youtube-dl/issues/29049
2664 thumbnails.append({
2665 'url': 'https://i.ytimg.com/vi/%s/maxresdefault.jpg' % video_id,
2666 'preference': 1,
2667 })
2668 self._remove_duplicate_formats(thumbnails)
545cc85d 2669
2670 category = microformat.get('category') or search_meta('genre')
2671 channel_id = video_details.get('channelId') \
2672 or microformat.get('externalChannelId') \
2673 or search_meta('channelId')
2674 duration = int_or_none(
2675 video_details.get('lengthSeconds')
2676 or microformat.get('lengthSeconds')) \
2677 or parse_duration(search_meta('duration'))
2678 is_live = video_details.get('isLive')
f6745c49 2679 is_upcoming = video_details.get('isUpcoming')
545cc85d 2680 owner_profile_url = microformat.get('ownerProfileUrl')
2681
2682 info = {
2683 'id': video_id,
2684 'title': self._live_title(video_title) if is_live else video_title,
2685 'formats': formats,
2686 'thumbnails': thumbnails,
2687 'description': video_description,
2688 'upload_date': unified_strdate(
2689 microformat.get('uploadDate')
2690 or search_meta('uploadDate')),
2691 'uploader': video_details['author'],
2692 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2693 'uploader_url': owner_profile_url,
2694 'channel_id': channel_id,
2695 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
2696 'duration': duration,
2697 'view_count': int_or_none(
2698 video_details.get('viewCount')
2699 or microformat.get('viewCount')
2700 or search_meta('interactionCount')),
2701 'average_rating': float_or_none(video_details.get('averageRating')),
2702 'age_limit': 18 if (
2703 microformat.get('isFamilySafe') is False
2704 or search_meta('isFamilyFriendly') == 'false'
2705 or search_meta('og:restrictions:age') == '18+') else 0,
2706 'webpage_url': webpage_url,
2707 'categories': [category] if category else None,
2708 'tags': keywords,
2709 'is_live': is_live,
2710 'playable_in_embed': playability_status.get('playableInEmbed'),
c224251a 2711 'was_live': video_details.get('isLiveContent'),
545cc85d 2712 }
b477fc13 2713
545cc85d 2714 pctr = try_get(
2715 player_response,
2716 lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
2717 subtitles = {}
2718 if pctr:
774d79cc 2719 def process_language(container, base_url, lang_code, sub_name, query):
120916da 2720 lang_subs = container.setdefault(lang_code, [])
545cc85d 2721 for fmt in self._SUBTITLE_FORMATS:
2722 query.update({
2723 'fmt': fmt,
2724 })
2725 lang_subs.append({
2726 'ext': fmt,
2727 'url': update_url_query(base_url, query),
774d79cc 2728 'name': sub_name,
545cc85d 2729 })
7e72694b 2730
545cc85d 2731 for caption_track in (pctr.get('captionTracks') or []):
2732 base_url = caption_track.get('baseUrl')
2733 if not base_url:
2734 continue
2735 if caption_track.get('kind') != 'asr':
120916da 2736 lang_code = (
2737 remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2738 or caption_track.get('languageCode'))
545cc85d 2739 if not lang_code:
2740 continue
2741 process_language(
774d79cc 2742 subtitles, base_url, lang_code,
2d6659b9 2743 try_get(caption_track, lambda x: x['name']['simpleText']),
774d79cc 2744 {})
545cc85d 2745 continue
2746 automatic_captions = {}
2747 for translation_language in (pctr.get('translationLanguages') or []):
2748 translation_language_code = translation_language.get('languageCode')
2749 if not translation_language_code:
2750 continue
2751 process_language(
2752 automatic_captions, base_url, translation_language_code,
49c258e1 2753 try_get(translation_language, (
2754 lambda x: x['languageName']['simpleText'],
2755 lambda x: x['languageName']['runs'][0]['text'])),
545cc85d 2756 {'tlang': translation_language_code})
2757 info['automatic_captions'] = automatic_captions
2758 info['subtitles'] = subtitles
7e72694b 2759
545cc85d 2760 parsed_url = compat_urllib_parse_urlparse(url)
2761 for component in [parsed_url.fragment, parsed_url.query]:
2762 query = compat_parse_qs(component)
2763 for k, v in query.items():
2764 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2765 d_k += '_time'
2766 if d_k not in info and k in s_ks:
2767 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2768
2769 # Youtube Music Auto-generated description
822b9d9c 2770 if video_description:
38d70284 2771 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2772 if mobj:
822b9d9c
RA
2773 release_year = mobj.group('release_year')
2774 release_date = mobj.group('release_date')
2775 if release_date:
2776 release_date = release_date.replace('-', '')
2777 if not release_year:
545cc85d 2778 release_year = release_date[:4]
2779 info.update({
2780 'album': mobj.group('album'.strip()),
2781 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2782 'track': mobj.group('track').strip(),
2783 'release_date': release_date,
cc2db878 2784 'release_year': int_or_none(release_year),
545cc85d 2785 })
7e72694b 2786
545cc85d 2787 initial_data = None
2788 if webpage:
2789 initial_data = self._extract_yt_initial_variable(
2790 webpage, self._YT_INITIAL_DATA_RE, video_id,
2791 'yt initial data')
2792 if not initial_data:
109dd3b2 2793 initial_data = self._extract_response(
2794 item_id=video_id, ep='next', fatal=False,
2795 ytcfg=ytcfg, headers=headers, query={'videoId': video_id},
2796 note='Downloading initial data API JSON')
545cc85d 2797
c60ee3a2 2798 try:
2799 # This will error if there is no livechat
2800 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2801 info['subtitles']['live_chat'] = [{
2802 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
2803 'video_id': video_id,
2804 'ext': 'json',
f6745c49 2805 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
c60ee3a2 2806 }]
2807 except (KeyError, IndexError, TypeError):
2808 pass
545cc85d 2809
2810 if initial_data:
2811 chapters = self._extract_chapters_from_json(
2812 initial_data, video_id, duration)
2813 if not chapters:
2814 for engagment_pannel in (initial_data.get('engagementPanels') or []):
2815 contents = try_get(
2816 engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
2817 list)
2818 if not contents:
2819 continue
2820
2821 def chapter_time(mmlir):
2822 return parse_duration(
2823 get_text(mmlir.get('timeDescription')))
2824
2825 chapters = []
2826 for next_num, content in enumerate(contents, start=1):
2827 mmlir = content.get('macroMarkersListItemRenderer') or {}
2828 start_time = chapter_time(mmlir)
2829 end_time = chapter_time(try_get(
2830 contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
2831 if next_num < len(contents) else duration
2832 if start_time is None or end_time is None:
2833 continue
2834 chapters.append({
2835 'start_time': start_time,
2836 'end_time': end_time,
2837 'title': get_text(mmlir.get('title')),
2838 })
2839 if chapters:
2840 break
2841 if chapters:
2842 info['chapters'] = chapters
2843
2844 contents = try_get(
2845 initial_data,
2846 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2847 list) or []
2848 for content in contents:
2849 vpir = content.get('videoPrimaryInfoRenderer')
2850 if vpir:
2851 stl = vpir.get('superTitleLink')
2852 if stl:
2853 stl = get_text(stl)
2854 if try_get(
2855 vpir,
2856 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2857 info['location'] = stl
2858 else:
2859 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2860 if mobj:
2861 info.update({
2862 'series': mobj.group(1),
2863 'season_number': int(mobj.group(2)),
2864 'episode_number': int(mobj.group(3)),
2865 })
2866 for tlb in (try_get(
2867 vpir,
2868 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2869 list) or []):
2870 tbr = tlb.get('toggleButtonRenderer') or {}
2871 for getter, regex in [(
2872 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2873 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2874 lambda x: x['accessibility'],
2875 lambda x: x['accessibilityData']['accessibilityData'],
2876 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2877 label = (try_get(tbr, getter, dict) or {}).get('label')
2878 if label:
2879 mobj = re.match(regex, label)
2880 if mobj:
2881 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2882 break
2883 sbr_tooltip = try_get(
2884 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2885 if sbr_tooltip:
2886 like_count, dislike_count = sbr_tooltip.split(' / ')
2887 info.update({
2888 'like_count': str_to_int(like_count),
2889 'dislike_count': str_to_int(dislike_count),
2890 })
2891 vsir = content.get('videoSecondaryInfoRenderer')
2892 if vsir:
2893 info['channel'] = get_text(try_get(
2894 vsir,
2895 lambda x: x['owner']['videoOwnerRenderer']['title'],
cce889b9 2896 dict))
545cc85d 2897 rows = try_get(
2898 vsir,
2899 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2900 list) or []
2901 multiple_songs = False
2902 for row in rows:
2903 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2904 multiple_songs = True
2905 break
2906 for row in rows:
2907 mrr = row.get('metadataRowRenderer') or {}
2908 mrr_title = mrr.get('title')
2909 if not mrr_title:
2910 continue
2911 mrr_title = get_text(mrr['title'])
2912 mrr_contents_text = get_text(mrr['contents'][0])
2913 if mrr_title == 'License':
2914 info['license'] = mrr_contents_text
2915 elif not multiple_songs:
2916 if mrr_title == 'Album':
2917 info['album'] = mrr_contents_text
2918 elif mrr_title == 'Artist':
2919 info['artist'] = mrr_contents_text
2920 elif mrr_title == 'Song':
2921 info['track'] = mrr_contents_text
2922
2923 fallbacks = {
2924 'channel': 'uploader',
2925 'channel_id': 'uploader_id',
2926 'channel_url': 'uploader_url',
2927 }
2928 for to, frm in fallbacks.items():
2929 if not info.get(to):
2930 info[to] = info.get(frm)
2931
2932 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
2933 v = info.get(s_k)
2934 if v:
2935 info[d_k] = v
b84071c0 2936
c224251a
M
2937 is_private = bool_or_none(video_details.get('isPrivate'))
2938 is_unlisted = bool_or_none(microformat.get('isUnlisted'))
2939 is_membersonly = None
b28f8d24 2940 is_premium = None
c224251a
M
2941 if initial_data and is_private is not None:
2942 is_membersonly = False
b28f8d24 2943 is_premium = False
c224251a
M
2944 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list)
2945 for content in contents or []:
2946 badges = try_get(content, lambda x: x['videoPrimaryInfoRenderer']['badges'], list)
2947 for badge in badges or []:
2948 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label']) or ''
2949 if label.lower() == 'members only':
2950 is_membersonly = True
2951 break
b28f8d24
M
2952 elif label.lower() == 'premium':
2953 is_premium = True
2954 break
2955 if is_membersonly or is_premium:
c224251a
M
2956 break
2957
2958 # TODO: Add this for playlists
2959 info['availability'] = self._availability(
2960 is_private=is_private,
b28f8d24 2961 needs_premium=is_premium,
c224251a
M
2962 needs_subscription=is_membersonly,
2963 needs_auth=info['age_limit'] >= 18,
2964 is_unlisted=None if is_private is None else is_unlisted)
2965
06167fbb 2966 # get xsrf for annotations or comments
a06916d9 2967 get_annotations = self.get_param('writeannotations', False)
2968 get_comments = self.get_param('getcomments', False)
06167fbb 2969 if get_annotations or get_comments:
29f7c58a 2970 xsrf_token = None
545cc85d 2971 ytcfg = self._extract_ytcfg(video_id, webpage)
29f7c58a 2972 if ytcfg:
2973 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2974 if not xsrf_token:
2975 xsrf_token = self._search_regex(
2976 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 2977 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 2978
2979 # annotations
06167fbb 2980 if get_annotations:
64b6a4e9
RA
2981 invideo_url = try_get(
2982 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2983 if xsrf_token and invideo_url:
29f7c58a 2984 xsrf_field_name = None
2985 if ytcfg:
2986 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2987 if not xsrf_field_name:
2988 xsrf_field_name = self._search_regex(
2989 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 2990 webpage, 'xsrf field name',
29f7c58a 2991 group='xsrf_field_name', default='session_token')
8a784c74 2992 info['annotations'] = self._download_webpage(
64b6a4e9
RA
2993 self._proto_relative_url(invideo_url),
2994 video_id, note='Downloading annotations',
2995 errnote='Unable to download video annotations', fatal=False,
2996 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 2997
277d6ff5 2998 if get_comments:
2d6659b9 2999 info['__post_extractor'] = lambda: self._extract_comments(ytcfg, video_id, contents, webpage)
4ea3be0a 3000
545cc85d 3001 self.mark_watched(video_id, player_response)
d77ab8e2 3002
545cc85d 3003 return info
c5e8d7af 3004
5f6a1245 3005
8bdd16b4 3006class YoutubeTabIE(YoutubeBaseInfoExtractor):
3007 IE_DESC = 'YouTube.com tab'
70d5c17b 3008 _VALID_URL = r'''(?x)
3009 https?://
3010 (?:\w+\.)?
3011 (?:
3012 youtube(?:kids)?\.com|
3013 invidio\.us
3014 )/
3015 (?:
fe03a6cd 3016 (?P<channel_type>channel|c|user|browse)/|
70d5c17b 3017 (?P<not_channel>
9ba5705a 3018 feed/|hashtag/|
70d5c17b 3019 (?:playlist|watch)\?.*?\blist=
3020 )|
29f7c58a 3021 (?!(?:%s)\b) # Direct URLs
70d5c17b 3022 )
3023 (?P<id>[^/?\#&]+)
3024 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 3025 IE_NAME = 'youtube:tab'
3026
81127aa5 3027 _TESTS = [{
da692b79 3028 'note': 'playlists, multipage',
8bdd16b4 3029 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3030 'playlist_mincount': 94,
3031 'info_dict': {
3032 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3033 'title': 'Игорь Клейнер - Playlists',
3034 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3035 'uploader': 'Игорь Клейнер',
3036 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 3037 },
3038 }, {
da692b79 3039 'note': 'playlists, multipage, different order',
8bdd16b4 3040 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3041 'playlist_mincount': 94,
3042 'info_dict': {
3043 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3044 'title': 'Игорь Клейнер - Playlists',
3045 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3046 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3047 'uploader': 'Игорь Клейнер',
8bdd16b4 3048 },
201c1459 3049 }, {
da692b79 3050 'note': 'playlists, series',
201c1459 3051 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3052 'playlist_mincount': 5,
3053 'info_dict': {
3054 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3055 'title': '3Blue1Brown - Playlists',
3056 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
da692b79 3057 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3058 'uploader': '3Blue1Brown',
201c1459 3059 },
8bdd16b4 3060 }, {
da692b79 3061 'note': 'playlists, singlepage',
8bdd16b4 3062 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3063 'playlist_mincount': 4,
3064 'info_dict': {
3065 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3066 'title': 'ThirstForScience - Playlists',
3067 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 3068 'uploader': 'ThirstForScience',
3069 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 3070 }
3071 }, {
3072 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3073 'only_matching': True,
3074 }, {
da692b79 3075 'note': 'basic, single video playlist',
0e30a7b9 3076 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 3077 'info_dict': {
0e30a7b9 3078 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3079 'uploader': 'Sergey M.',
3080 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 3081 'title': 'youtube-dl public playlist',
81127aa5 3082 },
0e30a7b9 3083 'playlist_count': 1,
9291475f 3084 }, {
da692b79 3085 'note': 'empty playlist',
0e30a7b9 3086 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 3087 'info_dict': {
0e30a7b9 3088 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3089 'uploader': 'Sergey M.',
3090 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 3091 'title': 'youtube-dl empty playlist',
9291475f
PH
3092 },
3093 'playlist_count': 0,
3094 }, {
da692b79 3095 'note': 'Home tab',
8bdd16b4 3096 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 3097 'info_dict': {
8bdd16b4 3098 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3099 'title': 'lex will - Home',
3100 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3101 'uploader': 'lex will',
3102 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3103 },
8bdd16b4 3104 'playlist_mincount': 2,
9291475f 3105 }, {
da692b79 3106 'note': 'Videos tab',
8bdd16b4 3107 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 3108 'info_dict': {
8bdd16b4 3109 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3110 'title': 'lex will - Videos',
3111 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3112 'uploader': 'lex will',
3113 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3114 },
8bdd16b4 3115 'playlist_mincount': 975,
9291475f 3116 }, {
da692b79 3117 'note': 'Videos tab, sorted by popular',
8bdd16b4 3118 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 3119 'info_dict': {
8bdd16b4 3120 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3121 'title': 'lex will - Videos',
3122 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3123 'uploader': 'lex will',
3124 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3125 },
8bdd16b4 3126 'playlist_mincount': 199,
9291475f 3127 }, {
da692b79 3128 'note': 'Playlists tab',
8bdd16b4 3129 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 3130 'info_dict': {
8bdd16b4 3131 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3132 'title': 'lex will - Playlists',
3133 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3134 'uploader': 'lex will',
3135 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3136 },
8bdd16b4 3137 'playlist_mincount': 17,
ac7553d0 3138 }, {
da692b79 3139 'note': 'Community tab',
8bdd16b4 3140 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 3141 'info_dict': {
8bdd16b4 3142 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3143 'title': 'lex will - Community',
3144 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3145 'uploader': 'lex will',
3146 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3147 },
3148 'playlist_mincount': 18,
87dadd45 3149 }, {
da692b79 3150 'note': 'Channels tab',
8bdd16b4 3151 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 3152 'info_dict': {
8bdd16b4 3153 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3154 'title': 'lex will - Channels',
3155 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3156 'uploader': 'lex will',
3157 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3158 },
deaec5af 3159 'playlist_mincount': 12,
cd684175 3160 }, {
3161 'note': 'Search tab',
3162 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3163 'playlist_mincount': 40,
3164 'info_dict': {
3165 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3166 'title': '3Blue1Brown - Search - linear algebra',
3167 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3168 'uploader': '3Blue1Brown',
3169 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3170 },
6b08cdf6 3171 }, {
a0566bbf 3172 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3173 'only_matching': True,
3174 }, {
a0566bbf 3175 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3176 'only_matching': True,
3177 }, {
a0566bbf 3178 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3179 'only_matching': True,
3180 }, {
3181 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3182 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3183 'info_dict': {
3184 'title': '29C3: Not my department',
3185 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3186 'uploader': 'Christiaan008',
3187 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 3188 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 3189 },
3190 'playlist_count': 96,
3191 }, {
3192 'note': 'Large playlist',
3193 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 3194 'info_dict': {
8bdd16b4 3195 'title': 'Uploads from Cauchemar',
3196 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3197 'uploader': 'Cauchemar',
3198 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 3199 },
8bdd16b4 3200 'playlist_mincount': 1123,
3201 }, {
da692b79 3202 'note': 'even larger playlist, 8832 videos',
8bdd16b4 3203 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3204 'only_matching': True,
4b7df0d3
JMF
3205 }, {
3206 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3207 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3208 'info_dict': {
acf757f4
PH
3209 'title': 'Uploads from Interstellar Movie',
3210 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 3211 'uploader': 'Interstellar Movie',
8bdd16b4 3212 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 3213 },
481cc733 3214 'playlist_mincount': 21,
358de58c 3215 }, {
3216 'note': 'Playlist with "show unavailable videos" button',
3217 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3218 'info_dict': {
3219 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3220 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3221 'uploader': 'Phim Siêu Nhân Nhật Bản',
3222 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3223 },
da692b79 3224 'playlist_mincount': 200,
5d342002 3225 }, {
da692b79 3226 'note': 'Playlist with unavailable videos in page 7',
5d342002 3227 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3228 'info_dict': {
3229 'title': 'Uploads from BlankTV',
3230 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3231 'uploader': 'BlankTV',
3232 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3233 },
da692b79 3234 'playlist_mincount': 1000,
8bdd16b4 3235 }, {
da692b79 3236 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
8bdd16b4 3237 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3238 'info_dict': {
3239 'title': 'Data Analysis with Dr Mike Pound',
3240 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3241 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3242 'uploader': 'Computerphile',
deaec5af 3243 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 3244 },
3245 'playlist_mincount': 11,
3246 }, {
a0566bbf 3247 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 3248 'only_matching': True,
dacb3a86 3249 }, {
da692b79 3250 'note': 'Playlist URL that does not actually serve a playlist',
dacb3a86
S
3251 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3252 'info_dict': {
3253 'id': 'FqZTN594JQw',
3254 'ext': 'webm',
3255 'title': "Smiley's People 01 detective, Adventure Series, Action",
3256 'uploader': 'STREEM',
3257 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 3258 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
3259 'upload_date': '20150526',
3260 'license': 'Standard YouTube License',
3261 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3262 'categories': ['People & Blogs'],
3263 'tags': list,
dbdaaa23 3264 'view_count': int,
dacb3a86
S
3265 'like_count': int,
3266 'dislike_count': int,
3267 },
3268 'params': {
3269 'skip_download': True,
3270 },
13a75688 3271 'skip': 'This video is not available.',
dacb3a86 3272 'add_ie': [YoutubeIE.ie_key()],
481cc733 3273 }, {
8bdd16b4 3274 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 3275 'only_matching': True,
66b48727 3276 }, {
8bdd16b4 3277 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 3278 'only_matching': True,
a0566bbf 3279 }, {
3280 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3281 'info_dict': {
da692b79 3282 'id': 'X1whbWASnNQ', # This will keep changing
a0566bbf 3283 'ext': 'mp4',
deaec5af 3284 'title': compat_str,
a0566bbf 3285 'uploader': 'Sky News',
3286 'uploader_id': 'skynews',
3287 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
da692b79 3288 'upload_date': r're:\d{8}',
3289 'description': compat_str,
a0566bbf 3290 'categories': ['News & Politics'],
3291 'tags': list,
3292 'like_count': int,
3293 'dislike_count': int,
3294 },
3295 'params': {
3296 'skip_download': True,
3297 },
da692b79 3298 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
a0566bbf 3299 }, {
3300 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3301 'info_dict': {
3302 'id': 'a48o2S1cPoo',
3303 'ext': 'mp4',
3304 'title': 'The Young Turks - Live Main Show',
3305 'uploader': 'The Young Turks',
3306 'uploader_id': 'TheYoungTurks',
3307 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3308 'upload_date': '20150715',
3309 'license': 'Standard YouTube License',
3310 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3311 'categories': ['News & Politics'],
3312 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3313 'like_count': int,
3314 'dislike_count': int,
3315 },
3316 'params': {
3317 'skip_download': True,
3318 },
3319 'only_matching': True,
3320 }, {
3321 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3322 'only_matching': True,
3323 }, {
3324 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3325 'only_matching': True,
09f1580e 3326 }, {
3327 'note': 'A channel that is not live. Should raise error',
3328 'url': 'https://www.youtube.com/user/numberphile/live',
3329 'only_matching': True,
3d3dddc9 3330 }, {
3331 'url': 'https://www.youtube.com/feed/trending',
3332 'only_matching': True,
3333 }, {
3d3dddc9 3334 'url': 'https://www.youtube.com/feed/library',
3335 'only_matching': True,
3336 }, {
3d3dddc9 3337 'url': 'https://www.youtube.com/feed/history',
3338 'only_matching': True,
3339 }, {
3d3dddc9 3340 'url': 'https://www.youtube.com/feed/subscriptions',
3341 'only_matching': True,
3342 }, {
3d3dddc9 3343 'url': 'https://www.youtube.com/feed/watch_later',
3344 'only_matching': True,
3345 }, {
da692b79 3346 'note': 'Recommended - redirects to home page',
3d3dddc9 3347 'url': 'https://www.youtube.com/feed/recommended',
3348 'only_matching': True,
29f7c58a 3349 }, {
da692b79 3350 'note': 'inline playlist with not always working continuations',
29f7c58a 3351 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3352 'only_matching': True,
3353 }, {
3354 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3355 'only_matching': True,
3356 }, {
3357 'url': 'https://www.youtube.com/course',
3358 'only_matching': True,
3359 }, {
3360 'url': 'https://www.youtube.com/zsecurity',
3361 'only_matching': True,
3362 }, {
3363 'url': 'http://www.youtube.com/NASAgovVideo/videos',
3364 'only_matching': True,
3365 }, {
3366 'url': 'https://www.youtube.com/TheYoungTurks/live',
3367 'only_matching': True,
39ed931e 3368 }, {
3369 'url': 'https://www.youtube.com/hashtag/cctv9',
3370 'info_dict': {
3371 'id': 'cctv9',
3372 'title': '#cctv9',
3373 },
3374 'playlist_mincount': 350,
201c1459 3375 }, {
3376 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3377 'only_matching': True,
9297939e 3378 }, {
da692b79 3379 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
9297939e 3380 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3381 'only_matching': True
fe03a6cd 3382 }, {
3383 'note': '/browse/ should redirect to /channel/',
3384 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3385 'only_matching': True
3386 }, {
3387 'note': 'VLPL, should redirect to playlist?list=PL...',
3388 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3389 'info_dict': {
3390 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3391 'uploader': 'NoCopyrightSounds',
3392 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3393 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3394 'title': 'NCS Releases',
3395 },
3396 'playlist_mincount': 166,
18db7548 3397 }, {
3398 'note': 'Topic, should redirect to playlist?list=UU...',
3399 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3400 'info_dict': {
3401 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3402 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3403 'title': 'Uploads from Royalty Free Music - Topic',
3404 'uploader': 'Royalty Free Music - Topic',
3405 },
3406 'expected_warnings': [
3407 'A channel/user page was given',
3408 'The URL does not have a videos tab',
3409 ],
3410 'playlist_mincount': 101,
3411 }, {
3412 'note': 'Topic without a UU playlist',
3413 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3414 'info_dict': {
3415 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3416 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3417 },
3418 'expected_warnings': [
3419 'A channel/user page was given',
3420 'The URL does not have a videos tab',
3421 'Falling back to channel URL',
3422 ],
3423 'playlist_mincount': 9,
abcdd12b 3424 }, {
3425 'note': 'Youtube music Album',
3426 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3427 'info_dict': {
3428 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3429 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3430 },
3431 'playlist_count': 50,
29f7c58a 3432 }]
3433
3434 @classmethod
3435 def suitable(cls, url):
3436 return False if YoutubeIE.suitable(url) else super(
3437 YoutubeTabIE, cls).suitable(url)
8bdd16b4 3438
3439 def _extract_channel_id(self, webpage):
3440 channel_id = self._html_search_meta(
3441 'channelId', webpage, 'channel id', default=None)
3442 if channel_id:
3443 return channel_id
3444 channel_url = self._html_search_meta(
3445 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3446 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3447 'twitter:app:url:googleplay'), webpage, 'channel url')
3448 return self._search_regex(
3449 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3450 channel_url, 'channel id')
15f6397c 3451
8bdd16b4 3452 @staticmethod
cd7c66cf 3453 def _extract_basic_item_renderer(item):
3454 # Modified from _extract_grid_item_renderer
201c1459 3455 known_basic_renderers = (
3456 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 3457 )
3458 for key, renderer in item.items():
201c1459 3459 if not isinstance(renderer, dict):
cd7c66cf 3460 continue
201c1459 3461 elif key in known_basic_renderers:
3462 return renderer
3463 elif key.startswith('grid') and key.endswith('Renderer'):
3464 return renderer
8bdd16b4 3465
8bdd16b4 3466 def _grid_entries(self, grid_renderer):
3467 for item in grid_renderer['items']:
3468 if not isinstance(item, dict):
39b62db1 3469 continue
cd7c66cf 3470 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 3471 if not isinstance(renderer, dict):
3472 continue
3473 title = try_get(
201c1459 3474 renderer, (lambda x: x['title']['runs'][0]['text'],
3475 lambda x: x['title']['simpleText']), compat_str)
8bdd16b4 3476 # playlist
3477 playlist_id = renderer.get('playlistId')
3478 if playlist_id:
3479 yield self.url_result(
3480 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3481 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3482 video_title=title)
201c1459 3483 continue
8bdd16b4 3484 # video
3485 video_id = renderer.get('videoId')
3486 if video_id:
3487 yield self._extract_video(renderer)
201c1459 3488 continue
8bdd16b4 3489 # channel
3490 channel_id = renderer.get('channelId')
3491 if channel_id:
3492 title = try_get(
3493 renderer, lambda x: x['title']['simpleText'], compat_str)
3494 yield self.url_result(
3495 'https://www.youtube.com/channel/%s' % channel_id,
3496 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 3497 continue
3498 # generic endpoint URL support
3499 ep_url = urljoin('https://www.youtube.com/', try_get(
3500 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3501 compat_str))
3502 if ep_url:
3503 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3504 if ie.suitable(ep_url):
3505 yield self.url_result(
3506 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3507 break
8bdd16b4 3508
3d3dddc9 3509 def _shelf_entries_from_content(self, shelf_renderer):
3510 content = shelf_renderer.get('content')
3511 if not isinstance(content, dict):
8bdd16b4 3512 return
cd7c66cf 3513 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 3514 if renderer:
3515 # TODO: add support for nested playlists so each shelf is processed
3516 # as separate playlist
3517 # TODO: this includes only first N items
3518 for entry in self._grid_entries(renderer):
3519 yield entry
3520 renderer = content.get('horizontalListRenderer')
3521 if renderer:
3522 # TODO
3523 pass
8bdd16b4 3524
29f7c58a 3525 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3526 ep = try_get(
3527 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3528 compat_str)
3529 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3530 if shelf_url:
29f7c58a 3531 # Skipping links to another channels, note that checking for
3532 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3533 # will not work
3534 if skip_channels and '/channels?' in shelf_url:
3535 return
3d3dddc9 3536 title = try_get(
3537 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
3538 yield self.url_result(shelf_url, video_title=title)
3539 # Shelf may not contain shelf URL, fallback to extraction from content
3540 for entry in self._shelf_entries_from_content(shelf_renderer):
3541 yield entry
c5e8d7af 3542
8bdd16b4 3543 def _playlist_entries(self, video_list_renderer):
3544 for content in video_list_renderer['contents']:
3545 if not isinstance(content, dict):
3546 continue
3547 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3548 if not isinstance(renderer, dict):
3549 continue
3550 video_id = renderer.get('videoId')
3551 if not video_id:
3552 continue
3553 yield self._extract_video(renderer)
07aeced6 3554
3462ffa8 3555 def _rich_entries(self, rich_grid_renderer):
3556 renderer = try_get(
70d5c17b 3557 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3558 video_id = renderer.get('videoId')
3559 if not video_id:
3560 return
3561 yield self._extract_video(renderer)
3562
8bdd16b4 3563 def _video_entry(self, video_renderer):
3564 video_id = video_renderer.get('videoId')
3565 if video_id:
3566 return self._extract_video(video_renderer)
dacb3a86 3567
8bdd16b4 3568 def _post_thread_entries(self, post_thread_renderer):
3569 post_renderer = try_get(
3570 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3571 if not post_renderer:
3572 return
3573 # video attachment
3574 video_renderer = try_get(
895b0931 3575 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3576 video_id = video_renderer.get('videoId')
3577 if video_id:
3578 entry = self._extract_video(video_renderer)
8bdd16b4 3579 if entry:
3580 yield entry
895b0931 3581 # playlist attachment
3582 playlist_id = try_get(
3583 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3584 if playlist_id:
3585 yield self.url_result(
e28f1c0a 3586 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3587 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3588 # inline video links
3589 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3590 for run in runs:
3591 if not isinstance(run, dict):
3592 continue
3593 ep_url = try_get(
3594 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3595 if not ep_url:
3596 continue
3597 if not YoutubeIE.suitable(ep_url):
3598 continue
3599 ep_video_id = YoutubeIE._match_id(ep_url)
3600 if video_id == ep_video_id:
3601 continue
895b0931 3602 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 3603
8bdd16b4 3604 def _post_thread_continuation_entries(self, post_thread_continuation):
3605 contents = post_thread_continuation.get('contents')
3606 if not isinstance(contents, list):
3607 return
3608 for content in contents:
3609 renderer = content.get('backstagePostThreadRenderer')
3610 if not isinstance(renderer, dict):
3611 continue
3612 for entry in self._post_thread_entries(renderer):
3613 yield entry
07aeced6 3614
39ed931e 3615 r''' # unused
3616 def _rich_grid_entries(self, contents):
3617 for content in contents:
3618 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3619 if video_renderer:
3620 entry = self._video_entry(video_renderer)
3621 if entry:
3622 yield entry
3623 '''
f4f751af 3624 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3462ffa8 3625
70d5c17b 3626 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3627 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3628 for content in contents:
3629 if not isinstance(content, dict):
8bdd16b4 3630 continue
70d5c17b 3631 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3632 if not is_renderer:
70d5c17b 3633 renderer = content.get('richItemRenderer')
3462ffa8 3634 if renderer:
3635 for entry in self._rich_entries(renderer):
3636 yield entry
3637 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3638 continue
3462ffa8 3639 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3640 for isr_content in isr_contents:
3641 if not isinstance(isr_content, dict):
3642 continue
69184e41 3643
3644 known_renderers = {
3645 'playlistVideoListRenderer': self._playlist_entries,
3646 'gridRenderer': self._grid_entries,
3647 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3648 'backstagePostThreadRenderer': self._post_thread_entries,
3649 'videoRenderer': lambda x: [self._video_entry(x)],
3650 }
3651 for key, renderer in isr_content.items():
3652 if key not in known_renderers:
3653 continue
3654 for entry in known_renderers[key](renderer):
3655 if entry:
3656 yield entry
3462ffa8 3657 continuation_list[0] = self._extract_continuation(renderer)
69184e41 3658 break
70d5c17b 3659
3462ffa8 3660 if not continuation_list[0]:
3661 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3662
3663 if not continuation_list[0]:
3664 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3665
3666 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3667 tab_content = try_get(tab, lambda x: x['content'], dict)
3668 if not tab_content:
3669 return
3462ffa8 3670 parent_renderer = (
29f7c58a 3671 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3672 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3673 for entry in extract_entries(parent_renderer):
3674 yield entry
3462ffa8 3675 continuation = continuation_list[0]
f4f751af 3676 context = self._extract_context(ytcfg)
3677 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
d069eca7 3678
8bdd16b4 3679 for page_num in itertools.count(1):
3680 if not continuation:
3681 break
79360d99 3682 query = {
3683 'continuation': continuation['continuation'],
3684 'clickTracking': {'clickTrackingParams': continuation['itct']}
3685 }
f4f751af 3686 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
79360d99 3687 response = self._extract_response(
3688 item_id='%s page %s' % (item_id, page_num),
3689 query=query, headers=headers, ytcfg=ytcfg,
3690 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3691
3692 if not response:
8bdd16b4 3693 break
f4f751af 3694 visitor_data = try_get(
3695 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
ebf1b291 3696
69184e41 3697 known_continuation_renderers = {
3698 'playlistVideoListContinuation': self._playlist_entries,
3699 'gridContinuation': self._grid_entries,
3700 'itemSectionContinuation': self._post_thread_continuation_entries,
3701 'sectionListContinuation': extract_entries, # for feeds
3702 }
8bdd16b4 3703 continuation_contents = try_get(
69184e41 3704 response, lambda x: x['continuationContents'], dict) or {}
3705 continuation_renderer = None
3706 for key, value in continuation_contents.items():
3707 if key not in known_continuation_renderers:
3462ffa8 3708 continue
69184e41 3709 continuation_renderer = value
3710 continuation_list = [None]
3711 for entry in known_continuation_renderers[key](continuation_renderer):
3712 yield entry
3713 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3714 break
3715 if continuation_renderer:
3716 continue
c5e8d7af 3717
a1b535bd 3718 known_renderers = {
3719 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3720 'gridVideoRenderer': (self._grid_entries, 'items'),
3721 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3722 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3723 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3724 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3725 }
cce889b9 3726 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3727 continuation_items = try_get(
cce889b9 3728 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3729 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3730 video_items_renderer = None
3731 for key, value in continuation_item.items():
3732 if key not in known_renderers:
8bdd16b4 3733 continue
a1b535bd 3734 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3735 continuation_list = [None]
a1b535bd 3736 for entry in known_renderers[key][0](video_items_renderer):
3737 yield entry
9ba5705a 3738 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3739 break
3740 if video_items_renderer:
3741 continue
8bdd16b4 3742 break
9558dcec 3743
8bdd16b4 3744 @staticmethod
3745 def _extract_selected_tab(tabs):
3746 for tab in tabs:
cd684175 3747 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3748 if renderer.get('selected') is True:
3749 return renderer
2b3c2546 3750 else:
8bdd16b4 3751 raise ExtractorError('Unable to find selected tab')
b82f815f 3752
8bdd16b4 3753 @staticmethod
3754 def _extract_uploader(data):
3755 uploader = {}
3756 sidebar_renderer = try_get(
3757 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3758 if sidebar_renderer:
3759 for item in sidebar_renderer:
3760 if not isinstance(item, dict):
3761 continue
3762 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3763 if not isinstance(renderer, dict):
3764 continue
3765 owner = try_get(
3766 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3767 if owner:
3768 uploader['uploader'] = owner.get('text')
3769 uploader['uploader_id'] = try_get(
3770 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3771 uploader['uploader_url'] = urljoin(
3772 'https://www.youtube.com/',
3773 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3774 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3775
d069eca7 3776 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3777 playlist_id = title = description = channel_url = channel_name = channel_id = None
3778 thumbnails_list = tags = []
3779
8bdd16b4 3780 selected_tab = self._extract_selected_tab(tabs)
3781 renderer = try_get(
3782 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3783 if renderer:
b60419c5 3784 channel_name = renderer.get('title')
3785 channel_url = renderer.get('channelUrl')
3786 channel_id = renderer.get('externalId')
39ed931e 3787 else:
64c0d954 3788 renderer = try_get(
3789 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3790
8bdd16b4 3791 if renderer:
3792 title = renderer.get('title')
ecc97af3 3793 description = renderer.get('description', '')
b60419c5 3794 playlist_id = channel_id
3795 tags = renderer.get('keywords', '').split()
3796 thumbnails_list = (
3797 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3798 or try_get(
3799 data,
3800 lambda x: x['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3801 list)
b60419c5 3802 or [])
3803
3804 thumbnails = []
3805 for t in thumbnails_list:
3806 if not isinstance(t, dict):
3807 continue
3808 thumbnail_url = url_or_none(t.get('url'))
3809 if not thumbnail_url:
3810 continue
3811 thumbnails.append({
3812 'url': thumbnail_url,
3813 'width': int_or_none(t.get('width')),
3814 'height': int_or_none(t.get('height')),
3815 })
3462ffa8 3816 if playlist_id is None:
70d5c17b 3817 playlist_id = item_id
3818 if title is None:
39ed931e 3819 title = (
3820 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3821 or playlist_id)
b60419c5 3822 title += format_field(selected_tab, 'title', ' - %s')
cd684175 3823 title += format_field(selected_tab, 'expandedText', ' - %s')
b60419c5 3824
3825 metadata = {
3826 'playlist_id': playlist_id,
3827 'playlist_title': title,
3828 'playlist_description': description,
3829 'uploader': channel_name,
3830 'uploader_id': channel_id,
3831 'uploader_url': channel_url,
3832 'thumbnails': thumbnails,
3833 'tags': tags,
3834 }
3835 if not channel_id:
3836 metadata.update(self._extract_uploader(data))
3837 metadata.update({
3838 'channel': metadata['uploader'],
3839 'channel_id': metadata['uploader_id'],
3840 'channel_url': metadata['uploader_url']})
3841 return self.playlist_result(
d069eca7
M
3842 self._entries(
3843 selected_tab, playlist_id,
3844 self._extract_identity_token(webpage, item_id),
f4f751af 3845 self._extract_account_syncid(data),
3846 self._extract_ytcfg(item_id, webpage)),
b60419c5 3847 **metadata)
73c4ac2c 3848
79360d99 3849 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
2be71994 3850 first_id = last_id = None
79360d99 3851 ytcfg = self._extract_ytcfg(playlist_id, webpage)
3852 headers = self._generate_api_headers(
3853 ytcfg, account_syncid=self._extract_account_syncid(data),
3854 identity_token=self._extract_identity_token(webpage, item_id=playlist_id),
3855 visitor_data=try_get(self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
2be71994 3856 for page_num in itertools.count(1):
cd7c66cf 3857 videos = list(self._playlist_entries(playlist))
3858 if not videos:
3859 return
2be71994 3860 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3861 if start >= len(videos):
3862 return
3863 for video in videos[start:]:
3864 if video['id'] == first_id:
3865 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3866 return
3867 yield video
3868 first_id = first_id or videos[0]['id']
3869 last_id = videos[-1]['id']
79360d99 3870 watch_endpoint = try_get(
3871 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3872 query = {
3873 'playlistId': playlist_id,
3874 'videoId': watch_endpoint.get('videoId') or last_id,
3875 'index': watch_endpoint.get('index') or len(videos),
3876 'params': watch_endpoint.get('params') or 'OAE%3D'
3877 }
3878 response = self._extract_response(
3879 item_id='%s page %d' % (playlist_id, page_num),
3880 query=query,
3881 ep='next',
3882 headers=headers,
3883 check_get_keys='contents'
3884 )
cd7c66cf 3885 playlist = try_get(
79360d99 3886 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 3887
79360d99 3888 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
8bdd16b4 3889 title = playlist.get('title') or try_get(
3890 data, lambda x: x['titleText']['simpleText'], compat_str)
3891 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3892
3893 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3894 playlist_url = urljoin(url, try_get(
3895 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3896 compat_str))
3897 if playlist_url and playlist_url != url:
3898 return self.url_result(
3899 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3900 video_title=title)
cd7c66cf 3901
8bdd16b4 3902 return self.playlist_result(
79360d99 3903 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
cd7c66cf 3904 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 3905
358de58c 3906 def _reload_with_unavailable_videos(self, item_id, data, webpage):
3907 """
3908 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3909 """
3910 sidebar_renderer = try_get(
5d342002 3911 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3912 if not sidebar_renderer:
3913 return
3914 browse_id = params = None
358de58c 3915 for item in sidebar_renderer:
3916 if not isinstance(item, dict):
3917 continue
3918 renderer = item.get('playlistSidebarPrimaryInfoRenderer')
3919 menu_renderer = try_get(
3920 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3921 for menu_item in menu_renderer:
3922 if not isinstance(menu_item, dict):
3923 continue
3924 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3925 text = try_get(
3926 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3927 if not text or text.lower() != 'show unavailable videos':
3928 continue
3929 browse_endpoint = try_get(
3930 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3931 browse_id = browse_endpoint.get('browseId')
3932 params = browse_endpoint.get('params')
5d342002 3933 break
3934
3935 ytcfg = self._extract_ytcfg(item_id, webpage)
3936 headers = self._generate_api_headers(
3937 ytcfg, account_syncid=self._extract_account_syncid(ytcfg),
3938 identity_token=self._extract_identity_token(webpage, item_id=item_id),
3939 visitor_data=try_get(
3940 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
3941 query = {
3942 'params': params or 'wgYCCAA=',
3943 'browseId': browse_id or 'VL%s' % item_id
3944 }
3945 return self._extract_response(
3946 item_id=item_id, headers=headers, query=query,
3947 check_get_keys='contents', fatal=False,
3948 note='Downloading API JSON with unavailable videos')
358de58c 3949
cd7c66cf 3950 def _extract_webpage(self, url, item_id):
a06916d9 3951 retries = self.get_param('extractor_retries', 3)
62bff2c1 3952 count = -1
c705177d 3953 last_error = 'Incomplete yt initial data recieved'
14fdfea9 3954 while count < retries:
62bff2c1 3955 count += 1
14fdfea9 3956 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 3957 # See: https://github.com/yt-dlp/yt-dlp/issues/116
3958 if count:
c705177d 3959 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 3960 webpage = self._download_webpage(
3961 url, item_id,
cd7c66cf 3962 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
14fdfea9 3963 data = self._extract_yt_initial_data(item_id, webpage)
14fdfea9 3964 if data.get('contents') or data.get('currentVideoEndpoint'):
3965 break
95c01b6c 3966 # Extract alerts here only when there is error
3967 self._extract_and_report_alerts(data)
c705177d 3968 if count >= retries:
6a39ee13 3969 raise ExtractorError(last_error)
cd7c66cf 3970 return webpage, data
3971
9297939e 3972 @staticmethod
3973 def _smuggle_data(entries, data):
3974 for entry in entries:
3975 if data:
3976 entry['url'] = smuggle_url(entry['url'], data)
3977 yield entry
3978
cd7c66cf 3979 def _real_extract(self, url):
9297939e 3980 url, smuggled_data = unsmuggle_url(url, {})
3981 if self.is_music_url(url):
3982 smuggled_data['is_music_url'] = True
fe03a6cd 3983 info_dict = self.__real_extract(url, smuggled_data)
9297939e 3984 if info_dict.get('entries'):
3985 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
3986 return info_dict
3987
fe03a6cd 3988 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
3989
3990 def __real_extract(self, url, smuggled_data):
cd7c66cf 3991 item_id = self._match_id(url)
3992 url = compat_urlparse.urlunparse(
3993 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 3994 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 3995
fe03a6cd 3996 def get_mobj(url):
3997 mobj = self._url_re.match(url).groupdict()
07cce701 3998 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 3999 return mobj
4000
4001 mobj = get_mobj(url)
4002 # Youtube returns incomplete data if tabname is not lower case
4003 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4004
4005 if is_channel:
4006 if smuggled_data.get('is_music_url'):
4007 if item_id[:2] == 'VL':
4008 # Youtube music VL channels have an equivalent playlist
4009 item_id = item_id[2:]
4010 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
abcdd12b 4011 elif item_id[:2] == 'MP':
4012 # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4013 item_id = self._search_regex(
4014 r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4015 self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4016 'playlist id')
4017 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
fe03a6cd 4018 elif mobj['channel_type'] == 'browse':
4019 # Youtube music /browse/ should be changed to /channel/
4020 pre = 'https://www.youtube.com/channel/%s' % item_id
4021 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4022 # Home URLs should redirect to /videos/
6a39ee13 4023 self.report_warning(
cd7c66cf 4024 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4025 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 4026 tab = '/videos'
4027
4028 url = ''.join((pre, tab, post))
4029 mobj = get_mobj(url)
cd7c66cf 4030
4031 # Handle both video/playlist URLs
201c1459 4032 qs = parse_qs(url)
cd7c66cf 4033 video_id = qs.get('v', [None])[0]
4034 playlist_id = qs.get('list', [None])[0]
4035
fe03a6cd 4036 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 4037 if not playlist_id:
fe03a6cd 4038 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 4039 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 4040 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6a39ee13 4041 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
cd7c66cf 4042 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
18db7548 4043 mobj = get_mobj(url)
cd7c66cf 4044
4045 if video_id and playlist_id:
a06916d9 4046 if self.get_param('noplaylist'):
cd7c66cf 4047 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4048 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4049 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4050
4051 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 4052
18db7548 4053 tabs = try_get(
4054 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4055 if tabs:
4056 selected_tab = self._extract_selected_tab(tabs)
4057 tab_name = selected_tab.get('title', '')
09f1580e 4058 if 'no-youtube-channel-redirect' not in compat_opts:
4059 if mobj['tab'] == '/live':
4060 # Live tab should have redirected to the video
4061 raise ExtractorError('The channel is not currently live', expected=True)
4062 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4063 if not mobj['not_channel'] and item_id[:2] == 'UC':
4064 # Topic channels don't have /videos. Use the equivalent playlist instead
4065 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4066 pl_id = 'UU%s' % item_id[2:]
4067 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4068 try:
4069 pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4070 for alert_type, alert_message in self._extract_alerts(pl_data):
4071 if alert_type == 'error':
4072 raise ExtractorError('Youtube said: %s' % alert_message)
4073 item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4074 except ExtractorError:
4075 self.report_warning('The playlist gave error. Falling back to channel URL')
4076 else:
4077 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
18db7548 4078
4079 self.write_debug('Final URL: %s' % url)
4080
358de58c 4081 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 4082 if 'no-youtube-unavailable-videos' not in compat_opts:
4083 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
95c01b6c 4084 self._extract_and_report_alerts(data)
358de58c 4085
8bdd16b4 4086 tabs = try_get(
4087 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4088 if tabs:
d069eca7 4089 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 4090
8bdd16b4 4091 playlist = try_get(
4092 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4093 if playlist:
79360d99 4094 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
cd7c66cf 4095
a0566bbf 4096 video_id = try_get(
4097 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4098 compat_str) or video_id
8bdd16b4 4099 if video_id:
09f1580e 4100 if mobj['tab'] != '/live': # live tab is expected to redirect to video
4101 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 4102 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 4103
8bdd16b4 4104 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 4105
c5e8d7af 4106
8bdd16b4 4107class YoutubePlaylistIE(InfoExtractor):
4108 IE_DESC = 'YouTube.com playlists'
4109 _VALID_URL = r'''(?x)(?:
4110 (?:https?://)?
4111 (?:\w+\.)?
4112 (?:
4113 (?:
4114 youtube(?:kids)?\.com|
29f7c58a 4115 invidio\.us
8bdd16b4 4116 )
4117 /.*?\?.*?\blist=
4118 )?
4119 (?P<id>%(playlist_id)s)
4120 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4121 IE_NAME = 'youtube:playlist'
cdc628a4 4122 _TESTS = [{
8bdd16b4 4123 'note': 'issue #673',
4124 'url': 'PLBB231211A4F62143',
cdc628a4 4125 'info_dict': {
8bdd16b4 4126 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4127 'id': 'PLBB231211A4F62143',
4128 'uploader': 'Wickydoo',
4129 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4130 },
4131 'playlist_mincount': 29,
4132 }, {
4133 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4134 'info_dict': {
4135 'title': 'YDL_safe_search',
4136 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4137 },
4138 'playlist_count': 2,
4139 'skip': 'This playlist is private',
9558dcec 4140 }, {
8bdd16b4 4141 'note': 'embedded',
4142 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4143 'playlist_count': 4,
9558dcec 4144 'info_dict': {
8bdd16b4 4145 'title': 'JODA15',
4146 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4147 'uploader': 'milan',
4148 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 4149 }
cdc628a4 4150 }, {
8bdd16b4 4151 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4152 'playlist_mincount': 982,
4153 'info_dict': {
4154 'title': '2018 Chinese New Singles (11/6 updated)',
4155 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4156 'uploader': 'LBK',
4157 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4158 }
daa0df9e 4159 }, {
29f7c58a 4160 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4161 'only_matching': True,
4162 }, {
4163 # music album playlist
4164 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4165 'only_matching': True,
4166 }]
4167
4168 @classmethod
4169 def suitable(cls, url):
201c1459 4170 if YoutubeTabIE.suitable(url):
4171 return False
1bdae7d3 4172 # Hack for lazy extractors until more generic solution is implemented
4173 # (see #28780)
4174 from .youtube import parse_qs
201c1459 4175 qs = parse_qs(url)
4176 if qs.get('v', [None])[0]:
4177 return False
4178 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 4179
4180 def _real_extract(self, url):
4181 playlist_id = self._match_id(url)
46953e7e 4182 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 4183 url = update_url_query(
4184 'https://www.youtube.com/playlist',
4185 parse_qs(url) or {'list': playlist_id})
4186 if is_music_url:
4187 url = smuggle_url(url, {'is_music_url': True})
4188 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 4189
4190
4191class YoutubeYtBeIE(InfoExtractor):
c76eb41b 4192 IE_DESC = 'youtu.be'
29f7c58a 4193 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4194 _TESTS = [{
8bdd16b4 4195 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4196 'info_dict': {
4197 'id': 'yeWKywCrFtk',
4198 'ext': 'mp4',
4199 'title': 'Small Scale Baler and Braiding Rugs',
4200 'uploader': 'Backus-Page House Museum',
4201 'uploader_id': 'backuspagemuseum',
4202 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4203 'upload_date': '20161008',
4204 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4205 'categories': ['Nonprofits & Activism'],
4206 'tags': list,
4207 'like_count': int,
4208 'dislike_count': int,
4209 },
4210 'params': {
4211 'noplaylist': True,
4212 'skip_download': True,
4213 },
39e7107d 4214 }, {
8bdd16b4 4215 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 4216 'only_matching': True,
cdc628a4
PH
4217 }]
4218
8bdd16b4 4219 def _real_extract(self, url):
29f7c58a 4220 mobj = re.match(self._VALID_URL, url)
4221 video_id = mobj.group('id')
4222 playlist_id = mobj.group('playlist_id')
8bdd16b4 4223 return self.url_result(
29f7c58a 4224 update_url_query('https://www.youtube.com/watch', {
4225 'v': video_id,
4226 'list': playlist_id,
4227 'feature': 'youtu.be',
4228 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 4229
4230
4231class YoutubeYtUserIE(InfoExtractor):
c76eb41b 4232 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 4233 _VALID_URL = r'ytuser:(?P<id>.+)'
4234 _TESTS = [{
4235 'url': 'ytuser:phihag',
4236 'only_matching': True,
4237 }]
4238
4239 def _real_extract(self, url):
4240 user_id = self._match_id(url)
4241 return self.url_result(
4242 'https://www.youtube.com/user/%s' % user_id,
4243 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 4244
b05654f0 4245
3d3dddc9 4246class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 4247 IE_NAME = 'youtube:favorites'
4248 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4249 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4250 _LOGIN_REQUIRED = True
4251 _TESTS = [{
4252 'url': ':ytfav',
4253 'only_matching': True,
4254 }, {
4255 'url': ':ytfavorites',
4256 'only_matching': True,
4257 }]
4258
4259 def _real_extract(self, url):
4260 return self.url_result(
4261 'https://www.youtube.com/playlist?list=LL',
4262 ie=YoutubeTabIE.ie_key())
4263
4264
79360d99 4265class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
69184e41 4266 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
4267 # there doesn't appear to be a real limit, for example if you search for
4268 # 'python' you get more than 8.000.000 results
4269 _MAX_RESULTS = float('inf')
78caa52a 4270 IE_NAME = 'youtube:search'
b05654f0 4271 _SEARCH_KEY = 'ytsearch'
6c894ea1 4272 _SEARCH_PARAMS = None
9dd8e46a 4273 _TESTS = []
b05654f0 4274
6c894ea1 4275 def _entries(self, query, n):
a5c56234 4276 data = {'query': query}
6c894ea1
U
4277 if self._SEARCH_PARAMS:
4278 data['params'] = self._SEARCH_PARAMS
4279 total = 0
4280 for page_num in itertools.count(1):
79360d99 4281 search = self._extract_response(
4282 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4283 check_get_keys=('contents', 'onResponseReceivedCommands')
4284 )
6c894ea1 4285 if not search:
b4c08069 4286 break
6c894ea1
U
4287 slr_contents = try_get(
4288 search,
4289 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4290 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4291 list)
4292 if not slr_contents:
a22b2fd1 4293 break
0366ae87 4294
0366ae87
M
4295 # Youtube sometimes adds promoted content to searches,
4296 # changing the index location of videos and token.
4297 # So we search through all entries till we find them.
30a074c2 4298 continuation_token = None
4299 for slr_content in slr_contents:
a96c6d15 4300 if continuation_token is None:
4301 continuation_token = try_get(
4302 slr_content,
4303 lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
4304 compat_str)
4305
30a074c2 4306 isr_contents = try_get(
4307 slr_content,
4308 lambda x: x['itemSectionRenderer']['contents'],
4309 list)
9da76d30 4310 if not isr_contents:
30a074c2 4311 continue
4312 for content in isr_contents:
4313 if not isinstance(content, dict):
4314 continue
4315 video = content.get('videoRenderer')
4316 if not isinstance(video, dict):
4317 continue
4318 video_id = video.get('videoId')
4319 if not video_id:
4320 continue
4321
4322 yield self._extract_video(video)
4323 total += 1
4324 if total == n:
4325 return
0366ae87 4326
0366ae87 4327 if not continuation_token:
6c894ea1 4328 break
0366ae87 4329 data['continuation'] = continuation_token
b05654f0 4330
6c894ea1
U
4331 def _get_n_results(self, query, n):
4332 """Get a specified number of results for a query"""
4333 return self.playlist_result(self._entries(query, n), query)
75dff0ee 4334
c9ae7b95 4335
a3dd9248 4336class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 4337 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 4338 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 4339 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 4340 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 4341
c9ae7b95 4342
386e1dd9 4343class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 4344 IE_DESC = 'YouTube.com search URLs'
386e1dd9 4345 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4346 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 4347 # _MAX_RESULTS = 100
3462ffa8 4348 _TESTS = [{
4349 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4350 'playlist_mincount': 5,
4351 'info_dict': {
4352 'title': 'youtube-dl test video',
4353 }
4354 }, {
4355 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4356 'only_matching': True,
4357 }]
4358
386e1dd9 4359 @classmethod
4360 def _make_valid_url(cls):
4361 return cls._VALID_URL
4362
3462ffa8 4363 def _real_extract(self, url):
386e1dd9 4364 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4365 query = (qs.get('search_query') or qs.get('q'))[0]
4366 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4367 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 4368
4369
4370class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 4371 """
25f14e9f 4372 Base class for feed extractors
3d3dddc9 4373 Subclasses must define the _FEED_NAME property.
d7ae0639 4374 """
b2e8bc1b 4375 _LOGIN_REQUIRED = True
ef2f3c7f 4376 _TESTS = []
d7ae0639
JMF
4377
4378 @property
4379 def IE_NAME(self):
78caa52a 4380 return 'youtube:%s' % self._FEED_NAME
04cc9617 4381
3853309f 4382 def _real_extract(self, url):
3d3dddc9 4383 return self.url_result(
4384 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4385 ie=YoutubeTabIE.ie_key())
25f14e9f
S
4386
4387
ef2f3c7f 4388class YoutubeWatchLaterIE(InfoExtractor):
4389 IE_NAME = 'youtube:watchlater'
70d5c17b 4390 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 4391 _VALID_URL = r':ytwatchlater'
bc7a9cd8 4392 _TESTS = [{
8bdd16b4 4393 'url': ':ytwatchlater',
bc7a9cd8
S
4394 'only_matching': True,
4395 }]
25f14e9f
S
4396
4397 def _real_extract(self, url):
ef2f3c7f 4398 return self.url_result(
4399 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 4400
4401
25f14e9f
S
4402class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4403 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 4404 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 4405 _FEED_NAME = 'recommended'
45db527f 4406 _LOGIN_REQUIRED = False
3d3dddc9 4407 _TESTS = [{
4408 'url': ':ytrec',
4409 'only_matching': True,
4410 }, {
4411 'url': ':ytrecommended',
4412 'only_matching': True,
4413 }, {
4414 'url': 'https://youtube.com',
4415 'only_matching': True,
4416 }]
1ed5b5c9 4417
1ed5b5c9 4418
25f14e9f 4419class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 4420 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 4421 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 4422 _FEED_NAME = 'subscriptions'
3d3dddc9 4423 _TESTS = [{
4424 'url': ':ytsubs',
4425 'only_matching': True,
4426 }, {
4427 'url': ':ytsubscriptions',
4428 'only_matching': True,
4429 }]
1ed5b5c9 4430
1ed5b5c9 4431
25f14e9f 4432class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
4433 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4434 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 4435 _FEED_NAME = 'history'
3d3dddc9 4436 _TESTS = [{
4437 'url': ':ythistory',
4438 'only_matching': True,
4439 }]
1ed5b5c9
JMF
4440
4441
15870e90
PH
4442class YoutubeTruncatedURLIE(InfoExtractor):
4443 IE_NAME = 'youtube:truncated_url'
4444 IE_DESC = False # Do not list
975d35db 4445 _VALID_URL = r'''(?x)
b95aab84
PH
4446 (?:https?://)?
4447 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4448 (?:watch\?(?:
c4808c60 4449 feature=[a-z_]+|
b95aab84
PH
4450 annotation_id=annotation_[^&]+|
4451 x-yt-cl=[0-9]+|
c1708b89 4452 hl=[^&]*|
287be8c6 4453 t=[0-9]+
b95aab84
PH
4454 )?
4455 |
4456 attribution_link\?a=[^&]+
4457 )
4458 $
975d35db 4459 '''
15870e90 4460
c4808c60 4461 _TESTS = [{
2d3d2997 4462 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 4463 'only_matching': True,
dc2fc736 4464 }, {
2d3d2997 4465 'url': 'https://www.youtube.com/watch?',
dc2fc736 4466 'only_matching': True,
b95aab84
PH
4467 }, {
4468 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4469 'only_matching': True,
4470 }, {
4471 'url': 'https://www.youtube.com/watch?feature=foo',
4472 'only_matching': True,
c1708b89
PH
4473 }, {
4474 'url': 'https://www.youtube.com/watch?hl=en-GB',
4475 'only_matching': True,
287be8c6
PH
4476 }, {
4477 'url': 'https://www.youtube.com/watch?t=2372',
4478 'only_matching': True,
c4808c60
PH
4479 }]
4480
15870e90
PH
4481 def _real_extract(self, url):
4482 raise ExtractorError(
78caa52a
PH
4483 'Did you forget to quote the URL? Remember that & is a meta '
4484 'character in most shells, so you want to put the URL in quotes, '
3867038a 4485 'like youtube-dl '
2d3d2997 4486 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 4487 ' or simply youtube-dl BaW_jenozKc .',
15870e90 4488 expected=True)
772fd5cc
PH
4489
4490
4491class YoutubeTruncatedIDIE(InfoExtractor):
4492 IE_NAME = 'youtube:truncated_id'
4493 IE_DESC = False # Do not list
b95aab84 4494 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
4495
4496 _TESTS = [{
4497 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4498 'only_matching': True,
4499 }]
4500
4501 def _real_extract(self, url):
4502 video_id = self._match_id(url)
4503 raise ExtractorError(
4504 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4505 expected=True)