]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[youtube] Extract even more thumbnails and reduce testing
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
2d6659b9 5import base64
d92f5d5a 6import calendar
109dd3b2 7import copy
fe93e2c4 8import datetime
a5c56234 9import hashlib
0ca96d48 10import itertools
c5e8d7af 11import json
c4417ddb 12import os.path
d77ab8e2 13import random
c5e8d7af 14import re
8a784c74 15import time
e0df6211 16import traceback
c5e8d7af 17
b05654f0 18from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 19from ..compat import (
edf3e38e 20 compat_chr,
29f7c58a 21 compat_HTTPError,
c5e8d7af 22 compat_parse_qs,
545cc85d 23 compat_str,
7fd002c0 24 compat_urllib_parse_unquote_plus,
15707c7e 25 compat_urllib_parse_urlencode,
7c80519c 26 compat_urllib_parse_urlparse,
7c61bd36 27 compat_urlparse,
4bb4a188 28)
545cc85d 29from ..jsinterp import JSInterpreter
4bb4a188 30from ..utils import (
c224251a 31 bool_or_none,
2d6659b9 32 bytes_to_intlist,
c5e8d7af 33 clean_html,
26fe8ffe 34 dict_get,
d92f5d5a 35 datetime_from_str,
358de58c 36 error_to_compat_str,
c5e8d7af 37 ExtractorError,
b60419c5 38 format_field,
2d30521a 39 float_or_none,
dd27fd17 40 int_or_none,
2d6659b9 41 intlist_to_bytes,
94278f72 42 mimetype2ext,
6310acf5 43 parse_codecs,
49bd8c66 44 parse_count,
7c80519c 45 parse_duration,
dca3ff4a 46 qualities,
3995d37d 47 remove_start,
cf7e015f 48 smuggle_url,
dbdaaa23 49 str_or_none,
c93d53f5 50 str_to_int,
556dbe7f 51 try_get,
c5e8d7af
PH
52 unescapeHTML,
53 unified_strdate,
cf7e015f 54 unsmuggle_url,
8bdd16b4 55 update_url_query,
21c340b8 56 url_or_none,
6e6bc8da 57 urlencode_postdata,
fe93e2c4 58 urljoin,
59 variadic
c5e8d7af
PH
60)
61
5f6a1245 62
201c1459 63def parse_qs(url):
64 return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
65
66
de7f3446 67class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
68 """Provide base functions for Youtube extractors"""
69 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 70 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
71
72 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
73 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
74 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 75
3462ffa8 76 _RESERVED_NAMES = (
bea74222 77 r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|shorts|'
46953e7e 78 r'movies|results|shared|hashtag|trending|feed|feeds|oembed|get_video_info|'
cd7c66cf 79 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 80
b2e8bc1b
JMF
81 _NETRC_MACHINE = 'youtube'
82 # If True it will raise an error if no login info is provided
83 _LOGIN_REQUIRED = False
84
70d5c17b 85 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 86
b2e8bc1b 87 def _login(self):
83317f69 88 """
89 Attempt to log in to YouTube.
90 True is returned if successful or skipped.
91 False is returned if login failed.
92
93 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
94 """
9d5d4d64 95
96 def warn(message):
97 self.report_warning(message)
98
99 # username+password login is broken
100 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
101 self.raise_login_required(
102 'Login details are needed to download this content', method='cookies')
68217024 103 username, password = self._get_login_info()
9d5d4d64 104 if username:
105 warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
106 return
9d5d4d64 107
2d6659b9 108 # Everything below this is broken!
109 r'''
b2e8bc1b
JMF
110 # No authentication to be performed
111 if username is None:
a06916d9 112 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
69ea8ca4 113 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
a06916d9 114 # if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
545cc85d 115 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 116 return True
b2e8bc1b 117
7cc3570e
PH
118 login_page = self._download_webpage(
119 self._LOGIN_URL, None,
69ea8ca4
PH
120 note='Downloading login page',
121 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
122 if login_page is False:
123 return
b2e8bc1b 124
1212e997 125 login_form = self._hidden_inputs(login_page)
c5e8d7af 126
e00eb564
S
127 def req(url, f_req, note, errnote):
128 data = login_form.copy()
129 data.update({
130 'pstMsg': 1,
131 'checkConnection': 'youtube',
132 'checkedDomains': 'youtube',
133 'hl': 'en',
134 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 135 'f.req': json.dumps(f_req),
e00eb564
S
136 'flowName': 'GlifWebSignIn',
137 'flowEntry': 'ServiceLogin',
baf67a60
S
138 # TODO: reverse actual botguard identifier generation algo
139 'bgRequest': '["identifier",""]',
041bc3ad 140 })
e00eb564
S
141 return self._download_json(
142 url, None, note=note, errnote=errnote,
143 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
144 fatal=False,
145 data=urlencode_postdata(data), headers={
146 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
147 'Google-Accounts-XSRF': 1,
148 })
149
3995d37d
S
150 lookup_req = [
151 username,
152 None, [], None, 'US', None, None, 2, False, True,
153 [
154 None, None,
155 [2, 1, None, 1,
156 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
157 None, [], 4],
158 1, [None, None, []], None, None, None, True
159 ],
160 username,
161 ]
162
e00eb564 163 lookup_results = req(
3995d37d 164 self._LOOKUP_URL, lookup_req,
e00eb564
S
165 'Looking up account info', 'Unable to look up account info')
166
167 if lookup_results is False:
168 return False
041bc3ad 169
3995d37d
S
170 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
171 if not user_hash:
172 warn('Unable to extract user hash')
173 return False
174
175 challenge_req = [
176 user_hash,
177 None, 1, None, [1, None, None, None, [password, None, True]],
178 [
179 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
180 1, [None, None, []], None, None, None, True
181 ]]
83317f69 182
3995d37d
S
183 challenge_results = req(
184 self._CHALLENGE_URL, challenge_req,
185 'Logging in', 'Unable to log in')
83317f69 186
3995d37d 187 if challenge_results is False:
e00eb564 188 return
83317f69 189
3995d37d
S
190 login_res = try_get(challenge_results, lambda x: x[0][5], list)
191 if login_res:
192 login_msg = try_get(login_res, lambda x: x[5], compat_str)
193 warn(
194 'Unable to login: %s' % 'Invalid password'
195 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
196 return False
197
198 res = try_get(challenge_results, lambda x: x[0][-1], list)
199 if not res:
200 warn('Unable to extract result entry')
201 return False
202
9a6628aa
S
203 login_challenge = try_get(res, lambda x: x[0][0], list)
204 if login_challenge:
205 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
206 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
207 # SEND_SUCCESS - TFA code has been successfully sent to phone
208 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 209 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
210 if status == 'QUOTA_EXCEEDED':
211 warn('Exceeded the limit of TFA codes, try later')
212 return False
213
214 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
215 if not tl:
216 warn('Unable to extract TL')
217 return False
218
219 tfa_code = self._get_tfa_info('2-step verification code')
220
221 if not tfa_code:
222 warn(
223 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
224 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
225 return False
226
227 tfa_code = remove_start(tfa_code, 'G-')
228
229 tfa_req = [
230 user_hash, None, 2, None,
231 [
232 9, None, None, None, None, None, None, None,
233 [None, tfa_code, True, 2]
234 ]]
235
236 tfa_results = req(
237 self._TFA_URL.format(tl), tfa_req,
238 'Submitting TFA code', 'Unable to submit TFA code')
239
240 if tfa_results is False:
241 return False
242
243 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
244 if tfa_res:
245 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
246 warn(
247 'Unable to finish TFA: %s' % 'Invalid TFA code'
248 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
249 return False
250
251 check_cookie_url = try_get(
252 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
253 else:
254 CHALLENGES = {
255 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
256 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
257 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
258 }
259 challenge = CHALLENGES.get(
260 challenge_str,
261 '%s returned error %s.' % (self.IE_NAME, challenge_str))
262 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
263 return False
3995d37d
S
264 else:
265 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
266
267 if not check_cookie_url:
268 warn('Unable to extract CheckCookie URL')
269 return False
e00eb564
S
270
271 check_cookie_results = self._download_webpage(
3995d37d
S
272 check_cookie_url, None, 'Checking cookie', fatal=False)
273
274 if check_cookie_results is False:
275 return False
e00eb564 276
3995d37d
S
277 if 'https://myaccount.google.com/' not in check_cookie_results:
278 warn('Unable to log in')
b2e8bc1b 279 return False
e00eb564 280
b2e8bc1b 281 return True
2d6659b9 282 '''
b2e8bc1b 283
cce889b9 284 def _initialize_consent(self):
285 cookies = self._get_cookies('https://www.youtube.com/')
286 if cookies.get('__Secure-3PSID'):
287 return
288 consent_id = None
289 consent = cookies.get('CONSENT')
290 if consent:
291 if 'YES' in consent.value:
292 return
293 consent_id = self._search_regex(
294 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
295 if not consent_id:
296 consent_id = random.randint(100, 999)
297 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 298
b2e8bc1b 299 def _real_initialize(self):
cce889b9 300 self._initialize_consent()
b2e8bc1b
JMF
301 if self._downloader is None:
302 return
b2e8bc1b
JMF
303 if not self._login():
304 return
c5e8d7af 305
a0566bbf 306 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 307 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
308 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 309
109dd3b2 310 _YT_DEFAULT_YTCFGS = {
311 'WEB': {
312 'INNERTUBE_API_VERSION': 'v1',
313 'INNERTUBE_CLIENT_NAME': 'WEB',
314 'INNERTUBE_CLIENT_VERSION': '2.20210622.10.00',
315 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
316 'INNERTUBE_CONTEXT': {
317 'client': {
318 'clientName': 'WEB',
319 'clientVersion': '2.20210622.10.00',
320 'hl': 'en',
321 }
322 },
323 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
324 },
325 'WEB_REMIX': {
326 'INNERTUBE_API_VERSION': 'v1',
327 'INNERTUBE_CLIENT_NAME': 'WEB_REMIX',
328 'INNERTUBE_CLIENT_VERSION': '1.20210621.00.00',
329 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
330 'INNERTUBE_CONTEXT': {
331 'client': {
332 'clientName': 'WEB_REMIX',
333 'clientVersion': '1.20210621.00.00',
334 'hl': 'en',
335 }
336 },
337 'INNERTUBE_CONTEXT_CLIENT_NAME': 67
338 },
339 'WEB_EMBEDDED_PLAYER': {
340 'INNERTUBE_API_VERSION': 'v1',
341 'INNERTUBE_CLIENT_NAME': 'WEB_EMBEDDED_PLAYER',
342 'INNERTUBE_CLIENT_VERSION': '1.20210620.0.1',
343 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
344 'INNERTUBE_CONTEXT': {
345 'client': {
346 'clientName': 'WEB_EMBEDDED_PLAYER',
347 'clientVersion': '1.20210620.0.1',
348 'hl': 'en',
349 }
350 },
351 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
352 },
353 'ANDROID': {
354 'INNERTUBE_API_VERSION': 'v1',
355 'INNERTUBE_CLIENT_NAME': 'ANDROID',
356 'INNERTUBE_CLIENT_VERSION': '16.20',
357 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
358 'INNERTUBE_CONTEXT': {
359 'client': {
360 'clientName': 'ANDROID',
361 'clientVersion': '16.20',
362 'hl': 'en',
363 }
364 },
fe93e2c4 365 'INNERTUBE_CONTEXT_CLIENT_NAME': 3
109dd3b2 366 },
367 'ANDROID_EMBEDDED_PLAYER': {
368 'INNERTUBE_API_VERSION': 'v1',
369 'INNERTUBE_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER',
370 'INNERTUBE_CLIENT_VERSION': '16.20',
371 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
372 'INNERTUBE_CONTEXT': {
373 'client': {
374 'clientName': 'ANDROID_EMBEDDED_PLAYER',
375 'clientVersion': '16.20',
376 'hl': 'en',
377 }
378 },
fe93e2c4 379 'INNERTUBE_CONTEXT_CLIENT_NAME': 55
109dd3b2 380 },
381 'ANDROID_MUSIC': {
382 'INNERTUBE_API_VERSION': 'v1',
383 'INNERTUBE_CLIENT_NAME': 'ANDROID_MUSIC',
384 'INNERTUBE_CLIENT_VERSION': '4.32',
385 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
386 'INNERTUBE_CONTEXT': {
387 'client': {
388 'clientName': 'ANDROID_MUSIC',
389 'clientVersion': '4.32',
390 'hl': 'en',
391 }
392 },
fe93e2c4 393 'INNERTUBE_CONTEXT_CLIENT_NAME': 21
109dd3b2 394 }
395 }
396
397 _YT_DEFAULT_INNERTUBE_HOSTS = {
398 'DIRECT': 'youtubei.googleapis.com',
399 'WEB': 'www.youtube.com',
400 'WEB_REMIX': 'music.youtube.com',
401 'ANDROID_MUSIC': 'music.youtube.com'
402 }
403
404 def _get_default_ytcfg(self, client='WEB'):
405 if client in self._YT_DEFAULT_YTCFGS:
406 return copy.deepcopy(self._YT_DEFAULT_YTCFGS[client])
407 self.write_debug(f'INNERTUBE default client {client} does not exist - falling back to WEB client.')
408 return copy.deepcopy(self._YT_DEFAULT_YTCFGS['WEB'])
409
410 def _get_innertube_host(self, client='WEB'):
411 return dict_get(self._YT_DEFAULT_INNERTUBE_HOSTS, (client, 'WEB'))
412
413 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='WEB'):
414 # try_get but with fallback to default ytcfg client values when present
415 _func = lambda y: try_get(y, getter, expected_type)
416 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
417
418 def _extract_client_name(self, ytcfg, default_client='WEB'):
419 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str, default_client)
420
314ee305 421 @staticmethod
422 def _extract_session_index(ytcfg):
423 return int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
424
109dd3b2 425 def _extract_client_version(self, ytcfg, default_client='WEB'):
426 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str, default_client)
427
428 def _extract_api_key(self, ytcfg=None, default_client='WEB'):
429 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
430
431 def _extract_context(self, ytcfg=None, default_client='WEB'):
432 _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
433 context = _get_context(ytcfg)
434 if context:
435 return context
436
437 context = _get_context(self._get_default_ytcfg(default_client))
438 if not ytcfg:
439 return context
440
441 # Recreate the client context (required)
442 context['client'].update({
443 'clientVersion': self._extract_client_version(ytcfg, default_client),
444 'clientName': self._extract_client_name(ytcfg, default_client),
445 })
446 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
447 if visitor_data:
448 context['client']['visitorData'] = visitor_data
449 return context
450
451 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
1974e99f 452 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
453 # See: https://github.com/yt-dlp/yt-dlp/issues/393
454 yt_cookies = self._get_cookies('https://www.youtube.com')
455 sapisid_cookie = dict_get(
456 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
a5c56234
M
457 if sapisid_cookie is None:
458 return
459 time_now = round(time.time())
1974e99f 460 # SAPISID cookie is required if not already present
461 if not yt_cookies.get('SAPISID'):
462 self._set_cookie(
463 '.youtube.com', 'SAPISID', sapisid_cookie.value, secure=True, expire_time=time_now + 3600)
464 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
465 sapisidhash = hashlib.sha1(
109dd3b2 466 f'{time_now} {sapisid_cookie.value} {origin}'.encode('utf-8')).hexdigest()
1974e99f 467 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
468
469 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 470 note='Downloading API JSON', errnote='Unable to download API page',
109dd3b2 471 context=None, api_key=None, api_hostname=None, default_client='WEB'):
f4f751af 472
109dd3b2 473 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 474 data.update(query)
109dd3b2 475 real_headers = self._generate_api_headers(client=default_client)
f4f751af 476 real_headers.update({'content-type': 'application/json'})
477 if headers:
478 real_headers.update(headers)
545cc85d 479 return self._download_json(
109dd3b2 480 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
a5c56234 481 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 482 data=json.dumps(data).encode('utf8'), headers=real_headers,
483 query={'key': api_key or self._extract_api_key()})
484
8bdd16b4 485 def _extract_yt_initial_data(self, video_id, webpage):
486 return self._parse_json(
487 self._search_regex(
29f7c58a 488 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 489 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 490 video_id)
0c148415 491
a1c5d2ca
M
492 def _extract_identity_token(self, webpage, item_id):
493 ytcfg = self._extract_ytcfg(item_id, webpage)
494 if ytcfg:
495 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
496 if token:
497 return token
498 return self._search_regex(
499 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
500 'identity token', default=None)
501
502 @staticmethod
fe93e2c4 503 def _extract_account_syncid(*args):
8ea3f7b9 504 """
505 Extract syncId required to download private playlists of secondary channels
fe93e2c4 506 @params response and/or ytcfg
8ea3f7b9 507 """
fe93e2c4 508 for data in args:
509 # ytcfg includes channel_syncid if on secondary channel
510 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
511 if delegated_sid:
512 return delegated_sid
513 sync_ids = (try_get(
514 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
515 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
516 if len(sync_ids) >= 2 and sync_ids[1]:
517 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
518 # and just "user_syncid||" for primary channel. We only want the channel_syncid
519 return sync_ids[0]
a1c5d2ca 520
29f7c58a 521 def _extract_ytcfg(self, video_id, webpage):
8c54a305 522 if not webpage:
523 return {}
29f7c58a 524 return self._parse_json(
525 self._search_regex(
526 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 527 default='{}'), video_id, fatal=False) or {}
528
109dd3b2 529 def _generate_api_headers(self, ytcfg=None, identity_token=None, account_syncid=None,
314ee305 530 visitor_data=None, api_hostname=None, client='WEB', session_index=None):
109dd3b2 531 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(client))
f4f751af 532 headers = {
109dd3b2 533 'X-YouTube-Client-Name': compat_str(
534 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=client)),
535 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, client),
536 'Origin': origin
f4f751af 537 }
2d6659b9 538 if not visitor_data and ytcfg:
539 visitor_data = try_get(
540 self._extract_context(ytcfg, client), lambda x: x['client']['visitorData'], compat_str)
f4f751af 541 if identity_token:
109dd3b2 542 headers['X-Youtube-Identity-Token'] = identity_token
f4f751af 543 if account_syncid:
544 headers['X-Goog-PageId'] = account_syncid
314ee305 545 if session_index is None and ytcfg:
546 session_index = self._extract_session_index(ytcfg)
547 if account_syncid or session_index is not None:
548 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
f4f751af 549 if visitor_data:
109dd3b2 550 headers['X-Goog-Visitor-Id'] = visitor_data
551 auth = self._generate_sapisidhash_header(origin)
f4f751af 552 if auth is not None:
553 headers['Authorization'] = auth
109dd3b2 554 headers['X-Origin'] = origin
f4f751af 555 return headers
29f7c58a 556
2d6659b9 557 @staticmethod
558 def _build_api_continuation_query(continuation, ctp=None):
559 query = {
560 'continuation': continuation
561 }
562 # TODO: Inconsistency with clickTrackingParams.
563 # Currently we have a fixed ctp contained within context (from ytcfg)
564 # and a ctp in root query for continuation.
565 if ctp:
566 query['clickTracking'] = {'clickTrackingParams': ctp}
567 return query
568
2d6659b9 569 @classmethod
570 def _extract_next_continuation_data(cls, renderer):
571 next_continuation = try_get(
572 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
573 lambda x: x['continuation']['reloadContinuationData']), dict)
574 if not next_continuation:
575 return
576 continuation = next_continuation.get('continuation')
577 if not continuation:
578 return
579 ctp = next_continuation.get('clickTrackingParams')
fe93e2c4 580 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 581
582 @classmethod
583 def _extract_continuation_ep_data(cls, continuation_ep: dict):
584 if isinstance(continuation_ep, dict):
585 continuation = try_get(
586 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
587 if not continuation:
588 return
589 ctp = continuation_ep.get('clickTrackingParams')
fe93e2c4 590 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 591
592 @classmethod
593 def _extract_continuation(cls, renderer):
594 next_continuation = cls._extract_next_continuation_data(renderer)
595 if next_continuation:
596 return next_continuation
fe93e2c4 597
2d6659b9 598 contents = []
599 for key in ('contents', 'items'):
600 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
fe93e2c4 601
2d6659b9 602 for content in contents:
603 if not isinstance(content, dict):
604 continue
605 continuation_ep = try_get(
606 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
607 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
608 dict)
609 continuation = cls._extract_continuation_ep_data(continuation_ep)
610 if continuation:
611 return continuation
612
fe93e2c4 613 @classmethod
614 def _extract_alerts(cls, data):
109dd3b2 615 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
616 if not isinstance(alert_dict, dict):
617 continue
618 for alert in alert_dict.values():
619 alert_type = alert.get('type')
620 if not alert_type:
621 continue
fe93e2c4 622 message = cls._get_text(alert.get('text'))
109dd3b2 623 if message:
624 yield alert_type, message
625
626 def _report_alerts(self, alerts, expected=True):
627 errors = []
628 warnings = []
629 for alert_type, alert_message in alerts:
630 if alert_type.lower() == 'error':
631 errors.append([alert_type, alert_message])
632 else:
633 warnings.append([alert_type, alert_message])
634
635 for alert_type, alert_message in (warnings + errors[:-1]):
636 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
637 if errors:
638 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
639
640 def _extract_and_report_alerts(self, data, *args, **kwargs):
641 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
642
47193e02 643 def _extract_badges(self, renderer: dict):
644 badges = set()
645 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
646 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
647 if label:
648 badges.add(label.lower())
649 return badges
650
651 @staticmethod
fe93e2c4 652 def _get_text(data, getter=None, max_runs=None):
653 for get in variadic(getter):
654 d = try_get(data, get) if get is not None else data
655 text = try_get(d, lambda x: x['simpleText'], compat_str)
656 if text:
657 return text
658 runs = try_get(d, lambda x: x['runs'], list) or []
659 if not runs and isinstance(d, list):
660 runs = d
661
662 def get_runs(runs):
663 for run in runs[:min(len(runs), max_runs or len(runs))]:
664 yield try_get(run, lambda x: x['text'], compat_str) or ''
665
666 text = ''.join(get_runs(runs))
667 if text:
668 return text
47193e02 669
109dd3b2 670 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
671 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
672 default_client='WEB'):
673 response = None
674 last_error = None
675 count = -1
676 retries = self.get_param('extractor_retries', 3)
677 if check_get_keys is None:
678 check_get_keys = []
679 while count < retries:
680 count += 1
681 if last_error:
682 self.report_warning('%s. Retrying ...' % last_error)
683 try:
684 response = self._call_api(
685 ep=ep, fatal=True, headers=headers,
686 video_id=item_id, query=query,
687 context=self._extract_context(ytcfg, default_client),
688 api_key=self._extract_api_key(ytcfg, default_client),
689 api_hostname=api_hostname, default_client=default_client,
690 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
691 except ExtractorError as e:
692 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
693 # Downloading page may result in intermittent 5xx HTTP error
694 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
695 last_error = 'HTTP Error %s' % e.cause.code
696 if count < retries:
697 continue
698 if fatal:
699 raise
700 else:
701 self.report_warning(error_to_compat_str(e))
702 return
703
704 else:
705 # Youtube may send alerts if there was an issue with the continuation page
706 try:
707 self._extract_and_report_alerts(response, expected=False)
708 except ExtractorError as e:
709 if fatal:
710 raise
711 self.report_warning(error_to_compat_str(e))
712 return
713 if not check_get_keys or dict_get(response, check_get_keys):
714 break
715 # Youtube sometimes sends incomplete data
716 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
717 last_error = 'Incomplete data received'
718 if count >= retries:
719 if fatal:
720 raise ExtractorError(last_error)
721 else:
722 self.report_warning(last_error)
723 return
724 return response
725
9297939e 726 @staticmethod
727 def is_music_url(url):
728 return re.match(r'https?://music\.youtube\.com/', url) is not None
729
30a074c2 730 def _extract_video(self, renderer):
731 video_id = renderer.get('videoId')
fe93e2c4 732 title = self._get_text(renderer.get('title'))
733 description = self._get_text(renderer.get('descriptionSnippet'))
734 duration = parse_duration(self._get_text(renderer.get('lengthText')))
735 view_count_text = self._get_text(renderer.get('viewCountText')) or ''
30a074c2 736 view_count = str_to_int(self._search_regex(
737 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
738 'view count', default=None))
fe93e2c4 739
740 uploader = self._get_text(renderer, (lambda x: x['ownerText'], lambda x: x['shortBylineText']))
741
30a074c2 742 return {
39ed931e 743 '_type': 'url',
30a074c2 744 'ie_key': YoutubeIE.ie_key(),
745 'id': video_id,
746 'url': video_id,
747 'title': title,
748 'description': description,
749 'duration': duration,
750 'view_count': view_count,
751 'uploader': uploader,
752 }
753
0c148415 754
360e1ca5 755class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 756 IE_DESC = 'YouTube.com'
bc2ca1bb 757 _INVIDIOUS_SITES = (
758 # invidious-redirect websites
759 r'(?:www\.)?redirect\.invidious\.io',
760 r'(?:(?:www|dev)\.)?invidio\.us',
761 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
762 r'(?:www\.)?invidious\.pussthecat\.org',
bc2ca1bb 763 r'(?:www\.)?invidious\.zee\.li',
bc2ca1bb 764 r'(?:www\.)?invidious\.ethibox\.fr',
bc2ca1bb 765 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
bc2ca1bb 766 # youtube-dl invidious instances list
767 r'(?:(?:www|no)\.)?invidiou\.sh',
768 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
769 r'(?:www\.)?invidious\.kabi\.tk',
bc2ca1bb 770 r'(?:www\.)?invidious\.mastodon\.host',
771 r'(?:www\.)?invidious\.zapashcanon\.fr',
ed807c18 772 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
201c1459 773 r'(?:www\.)?invidious\.tinfoil-hat\.net',
774 r'(?:www\.)?invidious\.himiko\.cloud',
775 r'(?:www\.)?invidious\.reallyancient\.tech',
bc2ca1bb 776 r'(?:www\.)?invidious\.tube',
777 r'(?:www\.)?invidiou\.site',
778 r'(?:www\.)?invidious\.site',
779 r'(?:www\.)?invidious\.xyz',
780 r'(?:www\.)?invidious\.nixnet\.xyz',
201c1459 781 r'(?:www\.)?invidious\.048596\.xyz',
bc2ca1bb 782 r'(?:www\.)?invidious\.drycat\.fr',
201c1459 783 r'(?:www\.)?inv\.skyn3t\.in',
bc2ca1bb 784 r'(?:www\.)?tube\.poal\.co',
785 r'(?:www\.)?tube\.connect\.cafe',
786 r'(?:www\.)?vid\.wxzm\.sx',
787 r'(?:www\.)?vid\.mint\.lgbt',
201c1459 788 r'(?:www\.)?vid\.puffyan\.us',
bc2ca1bb 789 r'(?:www\.)?yewtu\.be',
790 r'(?:www\.)?yt\.elukerio\.org',
791 r'(?:www\.)?yt\.lelux\.fi',
792 r'(?:www\.)?invidious\.ggc-project\.de',
793 r'(?:www\.)?yt\.maisputain\.ovh',
201c1459 794 r'(?:www\.)?ytprivate\.com',
795 r'(?:www\.)?invidious\.13ad\.de',
bc2ca1bb 796 r'(?:www\.)?invidious\.toot\.koeln',
797 r'(?:www\.)?invidious\.fdn\.fr',
798 r'(?:www\.)?watch\.nettohikari\.com',
ed807c18 799 r'(?:www\.)?invidious\.namazso\.eu',
800 r'(?:www\.)?invidious\.silkky\.cloud',
801 r'(?:www\.)?invidious\.exonip\.de',
802 r'(?:www\.)?invidious\.riverside\.rocks',
803 r'(?:www\.)?invidious\.blamefran\.net',
804 r'(?:www\.)?invidious\.moomoo\.de',
805 r'(?:www\.)?ytb\.trom\.tf',
806 r'(?:www\.)?yt\.cyberhost\.uk',
bc2ca1bb 807 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
808 r'(?:www\.)?qklhadlycap4cnod\.onion',
809 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
810 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
811 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
812 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
813 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
814 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
ed807c18 815 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
816 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
817 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
818 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
bc2ca1bb 819 )
cb7dfeea 820 _VALID_URL = r"""(?x)^
c5e8d7af 821 (
edb53e2d 822 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 823 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
824 (?:www\.)?deturl\.com/www\.youtube\.com|
825 (?:www\.)?pwnyoutube\.com|
826 (?:www\.)?hooktube\.com|
827 (?:www\.)?yourepeat\.com|
828 tube\.majestyc\.net|
829 %(invidious)s|
830 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
831 (?:.*?\#/)? # handle anchor (#/) redirect urls
832 (?: # the various things that can precede the ID:
ac7553d0 833 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 834 |(?: # or the v= param in all its forms
f7000f3a 835 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 836 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 837 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
838 v=
839 )
f4b05232 840 ))
cbaed4bb
S
841 |(?:
842 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
843 vid\.plus| # or vid.plus/xxxx
844 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 845 %(invidious)s
cbaed4bb 846 )/
edb53e2d 847 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 848 )
c5e8d7af 849 )? # all until now is optional -> you can pass the naked ID
201c1459 850 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 851 (?(1).+)? # if we found the ID, everything can follow
9297939e 852 (?:\#|$)""" % {
bc2ca1bb 853 'invidious': '|'.join(_INVIDIOUS_SITES),
854 }
e40c758c 855 _PLAYER_INFO_RE = (
cc2db878 856 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
857 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 858 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 859 )
2c62dc26 860 _formats = {
c2d3cb4c 861 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
862 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
863 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
864 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
865 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
866 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
867 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
868 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 869 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 870 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
871 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
872 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
873 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
874 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
875 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 876 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 877 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
878 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 879
880
881 # 3D videos
c2d3cb4c 882 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
883 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
884 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
885 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 886 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
887 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
888 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 889
96fb5605 890 # Apple HTTP Live Streaming
11f12195 891 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 892 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
893 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
894 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
895 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
896 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 897 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
898 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
899
900 # DASH mp4 video
d23028a8
S
901 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
902 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
903 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
904 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
905 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 906 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
907 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
908 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
909 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
910 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
911 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
912 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 913
f6f1fc92 914 # Dash mp4 audio
d23028a8
S
915 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
916 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
917 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
918 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
919 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
920 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
921 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
922
923 # Dash webm
d23028a8
S
924 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
925 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
926 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
927 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
928 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
929 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
930 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
931 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
932 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
933 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
934 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
935 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
936 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
937 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
938 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 939 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
940 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
941 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
942 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
943 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
944 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
945 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
946
947 # Dash webm audio
d23028a8
S
948 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
949 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 950
0857baad 951 # Dash webm audio with opus inside
d23028a8
S
952 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
953 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
954 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 955
ce6b9a2d
PH
956 # RTMP (unnamed)
957 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
958
959 # av01 video only formats sometimes served with "unknown" codecs
960 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
961 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
962 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
963 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 964 }
29f7c58a 965 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 966
109dd3b2 967 _AGE_GATE_REASONS = (
968 'Sign in to confirm your age',
969 'This video may be inappropriate for some users.',
970 'Sorry, this content is age-restricted.')
971
fd5c4aab
S
972 _GEO_BYPASS = False
973
78caa52a 974 IE_NAME = 'youtube'
2eb88d95
PH
975 _TESTS = [
976 {
2d3d2997 977 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
978 'info_dict': {
979 'id': 'BaW_jenozKc',
980 'ext': 'mp4',
3867038a 981 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
982 'uploader': 'Philipp Hagemeister',
983 'uploader_id': 'phihag',
ec85ded8 984 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
985 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
986 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 987 'upload_date': '20121002',
3867038a 988 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 989 'categories': ['Science & Technology'],
3867038a 990 'tags': ['youtube-dl'],
556dbe7f 991 'duration': 10,
dbdaaa23 992 'view_count': int,
3e7c1224
PH
993 'like_count': int,
994 'dislike_count': int,
7c80519c 995 'start_time': 1,
297a564b 996 'end_time': 9,
2eb88d95 997 }
0e853ca4 998 },
fccd3771 999 {
4bc3a23e
PH
1000 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1001 'note': 'Embed-only video (#1746)',
1002 'info_dict': {
1003 'id': 'yZIXLfi8CZQ',
1004 'ext': 'mp4',
1005 'upload_date': '20120608',
1006 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1007 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1008 'uploader': 'SET India',
94bfcd23 1009 'uploader_id': 'setindia',
ec85ded8 1010 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 1011 'age_limit': 18,
545cc85d 1012 },
1013 'skip': 'Private video',
fccd3771 1014 },
11b56058 1015 {
8bdd16b4 1016 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
1017 'note': 'Use the first video ID in the URL',
1018 'info_dict': {
1019 'id': 'BaW_jenozKc',
1020 'ext': 'mp4',
3867038a 1021 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
1022 'uploader': 'Philipp Hagemeister',
1023 'uploader_id': 'phihag',
ec85ded8 1024 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 1025 'upload_date': '20121002',
3867038a 1026 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 1027 'categories': ['Science & Technology'],
3867038a 1028 'tags': ['youtube-dl'],
556dbe7f 1029 'duration': 10,
dbdaaa23 1030 'view_count': int,
11b56058
PM
1031 'like_count': int,
1032 'dislike_count': int,
34a7de29
S
1033 },
1034 'params': {
1035 'skip_download': True,
1036 },
11b56058 1037 },
dd27fd17 1038 {
2d3d2997 1039 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
1040 'note': '256k DASH audio (format 141) via DASH manifest',
1041 'info_dict': {
1042 'id': 'a9LDPn-MO4I',
1043 'ext': 'm4a',
1044 'upload_date': '20121002',
1045 'uploader_id': '8KVIDEO',
ec85ded8 1046 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
1047 'description': '',
1048 'uploader': '8KVIDEO',
1049 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 1050 },
4bc3a23e
PH
1051 'params': {
1052 'youtube_include_dash_manifest': True,
1053 'format': '141',
4919603f 1054 },
de3c7fe0 1055 'skip': 'format 141 not served anymore',
dd27fd17 1056 },
8bdd16b4 1057 # DASH manifest with encrypted signature
1058 {
1059 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1060 'info_dict': {
1061 'id': 'IB3lcPjvWLA',
1062 'ext': 'm4a',
1063 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1064 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1065 'duration': 244,
1066 'uploader': 'AfrojackVEVO',
1067 'uploader_id': 'AfrojackVEVO',
1068 'upload_date': '20131011',
cc2db878 1069 'abr': 129.495,
8bdd16b4 1070 },
1071 'params': {
1072 'youtube_include_dash_manifest': True,
1073 'format': '141/bestaudio[ext=m4a]',
1074 },
1075 },
aa79ac0c
PH
1076 # Controversy video
1077 {
1078 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
1079 'info_dict': {
1080 'id': 'T4XJQO3qol8',
1081 'ext': 'mp4',
556dbe7f 1082 'duration': 219,
aa79ac0c 1083 'upload_date': '20100909',
4fe54c12 1084 'uploader': 'Amazing Atheist',
aa79ac0c 1085 'uploader_id': 'TheAmazingAtheist',
ec85ded8 1086 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c 1087 'title': 'Burning Everyone\'s Koran',
545cc85d 1088 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
aa79ac0c 1089 }
c522adb1 1090 },
dd2d55f1 1091 # Normal age-gate video (embed allowed)
c522adb1 1092 {
2d3d2997 1093 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1094 'info_dict': {
1095 'id': 'HtVdAasjOgU',
1096 'ext': 'mp4',
1097 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1098 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1099 'duration': 142,
c522adb1
JMF
1100 'uploader': 'The Witcher',
1101 'uploader_id': 'WitcherGame',
ec85ded8 1102 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 1103 'upload_date': '20140605',
34952f09 1104 'age_limit': 18,
c522adb1
JMF
1105 },
1106 },
8bdd16b4 1107 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1108 # YouTube Red ad is not captured for creator
1109 {
1110 'url': '__2ABJjxzNo',
1111 'info_dict': {
1112 'id': '__2ABJjxzNo',
1113 'ext': 'mp4',
1114 'duration': 266,
1115 'upload_date': '20100430',
1116 'uploader_id': 'deadmau5',
1117 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 1118 'creator': 'deadmau5',
1119 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1120 'uploader': 'deadmau5',
1121 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1122 'alt_title': 'Some Chords',
8bdd16b4 1123 },
1124 'expected_warnings': [
1125 'DASH manifest missing',
1126 ]
1127 },
067aa17e 1128 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1129 {
1130 'url': 'lqQg6PlCWgI',
1131 'info_dict': {
1132 'id': 'lqQg6PlCWgI',
1133 'ext': 'mp4',
556dbe7f 1134 'duration': 6085,
90227264 1135 'upload_date': '20150827',
cbe2bd91 1136 'uploader_id': 'olympic',
ec85ded8 1137 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 1138 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 1139 'uploader': 'Olympic',
cbe2bd91
PH
1140 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1141 },
1142 'params': {
1143 'skip_download': 'requires avconv',
e52a40ab 1144 }
cbe2bd91 1145 },
6271f1ca
PH
1146 # Non-square pixels
1147 {
1148 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1149 'info_dict': {
1150 'id': '_b-2C3KPAM0',
1151 'ext': 'mp4',
1152 'stretched_ratio': 16 / 9.,
556dbe7f 1153 'duration': 85,
6271f1ca
PH
1154 'upload_date': '20110310',
1155 'uploader_id': 'AllenMeow',
ec85ded8 1156 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 1157 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 1158 'uploader': '孫ᄋᄅ',
6271f1ca
PH
1159 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1160 },
06b491eb
S
1161 },
1162 # url_encoded_fmt_stream_map is empty string
1163 {
1164 'url': 'qEJwOuvDf7I',
1165 'info_dict': {
1166 'id': 'qEJwOuvDf7I',
f57b7835 1167 'ext': 'webm',
06b491eb
S
1168 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1169 'description': '',
1170 'upload_date': '20150404',
1171 'uploader_id': 'spbelect',
1172 'uploader': 'Наблюдатели Петербурга',
1173 },
1174 'params': {
1175 'skip_download': 'requires avconv',
e323cf3f
S
1176 },
1177 'skip': 'This live event has ended.',
06b491eb 1178 },
067aa17e 1179 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1180 {
1181 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1182 'info_dict': {
1183 'id': 'FIl7x6_3R5Y',
eb6793ba 1184 'ext': 'webm',
da77d856
S
1185 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1186 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1187 'duration': 220,
da77d856
S
1188 'upload_date': '20150625',
1189 'uploader_id': 'dorappi2000',
ec85ded8 1190 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 1191 'uploader': 'dorappi2000',
eb6793ba 1192 'formats': 'mincount:31',
da77d856 1193 },
eb6793ba 1194 'skip': 'not actual anymore',
2ee8f5d8 1195 },
8a1a26ce
YCH
1196 # DASH manifest with segment_list
1197 {
1198 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1199 'md5': '8ce563a1d667b599d21064e982ab9e31',
1200 'info_dict': {
1201 'id': 'CsmdDsKjzN8',
1202 'ext': 'mp4',
17ee98e1 1203 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
1204 'uploader': 'Airtek',
1205 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1206 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1207 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1208 },
1209 'params': {
1210 'youtube_include_dash_manifest': True,
1211 'format': '135', # bestvideo
be49068d
S
1212 },
1213 'skip': 'This live event has ended.',
2ee8f5d8 1214 },
cf7e015f
S
1215 {
1216 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 1217 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 1218 'info_dict': {
545cc85d 1219 'id': 'jvGDaLqkpTg',
1220 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1221 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
1222 },
1223 'playlist': [{
1224 'info_dict': {
545cc85d 1225 'id': 'jvGDaLqkpTg',
cf7e015f 1226 'ext': 'mp4',
545cc85d 1227 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1228 'description': 'md5:e03b909557865076822aa169218d6a5d',
1229 'duration': 10643,
1230 'upload_date': '20161111',
1231 'uploader': 'Team PGP',
1232 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1233 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1234 },
1235 }, {
1236 'info_dict': {
545cc85d 1237 'id': '3AKt1R1aDnw',
cf7e015f 1238 'ext': 'mp4',
545cc85d 1239 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1240 'description': 'md5:e03b909557865076822aa169218d6a5d',
1241 'duration': 10991,
1242 'upload_date': '20161111',
1243 'uploader': 'Team PGP',
1244 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1245 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1246 },
1247 }, {
1248 'info_dict': {
545cc85d 1249 'id': 'RtAMM00gpVc',
cf7e015f 1250 'ext': 'mp4',
545cc85d 1251 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1252 'description': 'md5:e03b909557865076822aa169218d6a5d',
1253 'duration': 10995,
1254 'upload_date': '20161111',
1255 'uploader': 'Team PGP',
1256 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1257 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1258 },
1259 }, {
1260 'info_dict': {
545cc85d 1261 'id': '6N2fdlP3C5U',
cf7e015f 1262 'ext': 'mp4',
545cc85d 1263 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1264 'description': 'md5:e03b909557865076822aa169218d6a5d',
1265 'duration': 10990,
1266 'upload_date': '20161111',
1267 'uploader': 'Team PGP',
1268 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1269 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1270 },
1271 }],
1272 'params': {
1273 'skip_download': True,
1274 },
cbaed4bb 1275 },
f9f49d87 1276 {
067aa17e 1277 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1278 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1279 'info_dict': {
1280 'id': 'gVfLd0zydlo',
1281 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1282 },
1283 'playlist_count': 2,
be49068d 1284 'skip': 'Not multifeed anymore',
f9f49d87 1285 },
cbaed4bb 1286 {
2d3d2997 1287 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1288 'only_matching': True,
0e49d9a6 1289 },
6d4fc66b 1290 {
2d3d2997 1291 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1292 'only_matching': True,
1293 },
0e49d9a6 1294 {
067aa17e 1295 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1296 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1297 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1298 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1299 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1300 'info_dict': {
1301 'id': 'lsguqyKfVQg',
1302 'ext': 'mp4',
1303 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 1304 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 1305 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1306 'duration': 133,
0e49d9a6
LL
1307 'upload_date': '20151119',
1308 'uploader_id': 'IronSoulElf',
ec85ded8 1309 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1310 'uploader': 'IronSoulElf',
eb6793ba
S
1311 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
1312 'track': 'Dark Walk - Position Music',
1313 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 1314 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
1315 },
1316 'params': {
1317 'skip_download': True,
1318 },
1319 },
61f92af1 1320 {
067aa17e 1321 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1322 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1323 'only_matching': True,
1324 },
313dfc45
LL
1325 {
1326 # Video with yt:stretch=17:0
1327 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1328 'info_dict': {
1329 'id': 'Q39EVAstoRM',
1330 'ext': 'mp4',
1331 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1332 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1333 'upload_date': '20151107',
1334 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1335 'uploader': 'CH GAMER DROID',
1336 },
1337 'params': {
1338 'skip_download': True,
1339 },
be49068d 1340 'skip': 'This video does not exist.',
313dfc45 1341 },
201c1459 1342 {
1343 # Video with incomplete 'yt:stretch=16:'
1344 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1345 'only_matching': True,
1346 },
7caf9830
S
1347 {
1348 # Video licensed under Creative Commons
1349 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1350 'info_dict': {
1351 'id': 'M4gD1WSo5mA',
1352 'ext': 'mp4',
1353 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1354 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1355 'duration': 721,
7caf9830
S
1356 'upload_date': '20150127',
1357 'uploader_id': 'BerkmanCenter',
ec85ded8 1358 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1359 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1360 'license': 'Creative Commons Attribution license (reuse allowed)',
1361 },
1362 'params': {
1363 'skip_download': True,
1364 },
1365 },
fd050249
S
1366 {
1367 # Channel-like uploader_url
1368 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1369 'info_dict': {
1370 'id': 'eQcmzGIKrzg',
1371 'ext': 'mp4',
1372 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1373 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1374 'duration': 4060,
fd050249 1375 'upload_date': '20151119',
eb6793ba 1376 'uploader': 'Bernie Sanders',
fd050249 1377 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1378 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1379 'license': 'Creative Commons Attribution license (reuse allowed)',
1380 },
1381 'params': {
1382 'skip_download': True,
1383 },
1384 },
040ac686
S
1385 {
1386 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1387 'only_matching': True,
7f29cf54
S
1388 },
1389 {
067aa17e 1390 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1391 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1392 'only_matching': True,
6496ccb4
S
1393 },
1394 {
1395 # Rental video preview
1396 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1397 'info_dict': {
1398 'id': 'uGpuVWrhIzE',
1399 'ext': 'mp4',
1400 'title': 'Piku - Trailer',
1401 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1402 'upload_date': '20150811',
1403 'uploader': 'FlixMatrix',
1404 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1405 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1406 'license': 'Standard YouTube License',
1407 },
1408 'params': {
1409 'skip_download': True,
1410 },
eb6793ba 1411 'skip': 'This video is not available.',
022a5d66 1412 },
12afdc2a
S
1413 {
1414 # YouTube Red video with episode data
1415 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1416 'info_dict': {
1417 'id': 'iqKdEhx-dD4',
1418 'ext': 'mp4',
1419 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1420 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1421 'duration': 2085,
12afdc2a
S
1422 'upload_date': '20170118',
1423 'uploader': 'Vsauce',
1424 'uploader_id': 'Vsauce',
1425 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1426 'series': 'Mind Field',
1427 'season_number': 1,
1428 'episode_number': 1,
1429 },
1430 'params': {
1431 'skip_download': True,
1432 },
1433 'expected_warnings': [
1434 'Skipping DASH manifest',
1435 ],
1436 },
c7121fa7
S
1437 {
1438 # The following content has been identified by the YouTube community
1439 # as inappropriate or offensive to some audiences.
1440 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1441 'info_dict': {
1442 'id': '6SJNVb0GnPI',
1443 'ext': 'mp4',
1444 'title': 'Race Differences in Intelligence',
1445 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1446 'duration': 965,
1447 'upload_date': '20140124',
1448 'uploader': 'New Century Foundation',
1449 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1450 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1451 },
1452 'params': {
1453 'skip_download': True,
1454 },
545cc85d 1455 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1456 },
022a5d66
S
1457 {
1458 # itag 212
1459 'url': '1t24XAntNCY',
1460 'only_matching': True,
fd5c4aab
S
1461 },
1462 {
1463 # geo restricted to JP
1464 'url': 'sJL6WA-aGkQ',
1465 'only_matching': True,
1466 },
cd5a74a2
S
1467 {
1468 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1469 'only_matching': True,
1470 },
bc2ca1bb 1471 {
1472 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1473 'only_matching': True,
1474 },
1475 {
1476 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1477 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1478 'only_matching': True,
1479 },
825cd268
RA
1480 {
1481 # DRM protected
1482 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1483 'only_matching': True,
4fe54c12
S
1484 },
1485 {
1486 # Video with unsupported adaptive stream type formats
1487 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1488 'info_dict': {
1489 'id': 'Z4Vy8R84T1U',
1490 'ext': 'mp4',
1491 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1492 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1493 'duration': 433,
1494 'upload_date': '20130923',
1495 'uploader': 'Amelia Putri Harwita',
1496 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1497 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1498 'formats': 'maxcount:10',
1499 },
1500 'params': {
1501 'skip_download': True,
1502 'youtube_include_dash_manifest': False,
1503 },
5429d6a9 1504 'skip': 'not actual anymore',
5caabd3c 1505 },
1506 {
822b9d9c 1507 # Youtube Music Auto-generated description
5caabd3c 1508 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1509 'info_dict': {
1510 'id': 'MgNrAu2pzNs',
1511 'ext': 'mp4',
1512 'title': 'Voyeur Girl',
1513 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1514 'upload_date': '20190312',
5429d6a9
S
1515 'uploader': 'Stephen - Topic',
1516 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1517 'artist': 'Stephen',
1518 'track': 'Voyeur Girl',
1519 'album': 'it\'s too much love to know my dear',
1520 'release_date': '20190313',
1521 'release_year': 2019,
1522 },
1523 'params': {
1524 'skip_download': True,
1525 },
1526 },
66b48727
RA
1527 {
1528 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1529 'only_matching': True,
1530 },
011e75e6
S
1531 {
1532 # invalid -> valid video id redirection
1533 'url': 'DJztXj2GPfl',
1534 'info_dict': {
1535 'id': 'DJztXj2GPfk',
1536 'ext': 'mp4',
1537 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1538 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1539 'upload_date': '20090125',
1540 'uploader': 'Prochorowka',
1541 'uploader_id': 'Prochorowka',
1542 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1543 'artist': 'Panjabi MC',
1544 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1545 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1546 },
1547 'params': {
1548 'skip_download': True,
1549 },
545cc85d 1550 'skip': 'Video unavailable',
ea74e00b
DP
1551 },
1552 {
1553 # empty description results in an empty string
1554 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1555 'info_dict': {
1556 'id': 'x41yOUIvK2k',
1557 'ext': 'mp4',
1558 'title': 'IMG 3456',
1559 'description': '',
1560 'upload_date': '20170613',
1561 'uploader_id': 'ElevageOrVert',
1562 'uploader': 'ElevageOrVert',
1563 },
1564 'params': {
1565 'skip_download': True,
1566 },
1567 },
a0566bbf 1568 {
29f7c58a 1569 # with '};' inside yt initial data (see [1])
1570 # see [2] for an example with '};' inside ytInitialPlayerResponse
1571 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1572 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1573 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1574 'info_dict': {
1575 'id': 'CHqg6qOn4no',
1576 'ext': 'mp4',
1577 'title': 'Part 77 Sort a list of simple types in c#',
1578 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1579 'upload_date': '20130831',
1580 'uploader_id': 'kudvenkat',
1581 'uploader': 'kudvenkat',
1582 },
1583 'params': {
1584 'skip_download': True,
1585 },
1586 },
29f7c58a 1587 {
1588 # another example of '};' in ytInitialData
1589 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1590 'only_matching': True,
1591 },
1592 {
1593 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1594 'only_matching': True,
1595 },
545cc85d 1596 {
cc2db878 1597 # https://github.com/ytdl-org/youtube-dl/pull/28094
1598 'url': 'OtqTfy26tG0',
1599 'info_dict': {
1600 'id': 'OtqTfy26tG0',
1601 'ext': 'mp4',
1602 'title': 'Burn Out',
1603 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1604 'upload_date': '20141120',
1605 'uploader': 'The Cinematic Orchestra - Topic',
1606 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1607 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1608 'artist': 'The Cinematic Orchestra',
1609 'track': 'Burn Out',
1610 'album': 'Every Day',
1611 'release_data': None,
1612 'release_year': None,
1613 },
1614 'params': {
1615 'skip_download': True,
1616 },
545cc85d 1617 },
bc2ca1bb 1618 {
1619 # controversial video, only works with bpctr when authenticated with cookies
1620 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1621 'only_matching': True,
1622 },
f7ad7160 1623 {
1624 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1625 'url': 'cBvYw8_A0vQ',
1626 'info_dict': {
1627 'id': 'cBvYw8_A0vQ',
1628 'ext': 'mp4',
1629 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1630 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1631 'upload_date': '20201120',
1632 'uploader': 'Walk around Japan',
1633 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1634 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1635 },
1636 'params': {
1637 'skip_download': True,
1638 },
0fb983f6 1639 }, {
1640 # Has multiple audio streams
1641 'url': 'WaOKSUlf4TM',
1642 'only_matching': True
9297939e 1643 }, {
1644 # Requires Premium: has format 141 when requested using YTM url
1645 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1646 'only_matching': True
1647 }, {
120916da 1648 # multiple subtitles with same lang_code
1649 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1650 'only_matching': True,
109dd3b2 1651 }, {
1652 # Force use android client fallback
1653 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1654 'info_dict': {
1655 'id': 'YOelRv7fMxY',
1656 'title': 'Digging a Secret Tunnel from my Workshop',
1657 'ext': '3gp',
1658 'upload_date': '20210624',
1659 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1660 'uploader': 'colinfurze',
1661 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1662 'description': 'md5:ecb672623246d98c6c562eed6ae798c3'
1663 },
1664 'params': {
1665 'format': '17', # 3gp format available on android
1666 'extractor_args': {'youtube': {'player_client': ['android']}},
1667 },
120916da 1668 },
109dd3b2 1669 {
1670 # Skip download of additional client configs (remix client config in this case)
1671 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1672 'only_matching': True,
1673 'params': {
1674 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1675 },
1676 }
2eb88d95
PH
1677 ]
1678
201c1459 1679 @classmethod
1680 def suitable(cls, url):
1bdae7d3 1681 # Hack for lazy extractors until more generic solution is implemented
1682 # (see #28780)
1683 from .youtube import parse_qs
201c1459 1684 qs = parse_qs(url)
1685 if qs.get('list', [None])[0]:
1686 return False
1687 return super(YoutubeIE, cls).suitable(url)
1688
e0df6211
PH
1689 def __init__(self, *args, **kwargs):
1690 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1691 self._code_cache = {}
83799698 1692 self._player_cache = {}
e0df6211 1693
109dd3b2 1694 def _extract_player_url(self, ytcfg=None, webpage=None):
1695 player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)
1696 if not player_url:
1697 player_url = self._search_regex(
1698 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1699 webpage, 'player URL', fatal=False)
1700 if player_url.startswith('//'):
1701 player_url = 'https:' + player_url
1702 elif not re.match(r'https?://', player_url):
1703 player_url = compat_urlparse.urljoin(
1704 'https://www.youtube.com', player_url)
1705 return player_url
1706
60064c53
PH
1707 def _signature_cache_id(self, example_sig):
1708 """ Return a string representation of a signature """
78caa52a 1709 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1710
e40c758c
S
1711 @classmethod
1712 def _extract_player_info(cls, player_url):
1713 for player_re in cls._PLAYER_INFO_RE:
1714 id_m = re.search(player_re, player_url)
1715 if id_m:
1716 break
1717 else:
c081b35c 1718 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1719 return id_m.group('id')
e40c758c 1720
109dd3b2 1721 def _load_player(self, video_id, player_url, fatal=True) -> bool:
1722 player_id = self._extract_player_info(player_url)
1723 if player_id not in self._code_cache:
1724 self._code_cache[player_id] = self._download_webpage(
1725 player_url, video_id, fatal=fatal,
1726 note='Downloading player ' + player_id,
1727 errnote='Download of %s failed' % player_url)
1728 return player_id in self._code_cache
1729
e40c758c 1730 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1731 player_id = self._extract_player_info(player_url)
e0df6211 1732
c4417ddb 1733 # Read from filesystem cache
545cc85d 1734 func_id = 'js_%s_%s' % (
1735 player_id, self._signature_cache_id(example_sig))
c4417ddb 1736 assert os.path.basename(func_id) == func_id
a0e07d31 1737
69ea8ca4 1738 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1739 if cache_spec is not None:
78caa52a 1740 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1741
109dd3b2 1742 if self._load_player(video_id, player_url):
1743 code = self._code_cache[player_id]
1744 res = self._parse_sig_js(code)
e0df6211 1745
109dd3b2 1746 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1747 cache_res = res(test_string)
1748 cache_spec = [ord(c) for c in cache_res]
83799698 1749
109dd3b2 1750 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1751 return res
83799698 1752
60064c53 1753 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1754 def gen_sig_code(idxs):
1755 def _genslice(start, end, step):
78caa52a 1756 starts = '' if start == 0 else str(start)
8bcc8756 1757 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1758 steps = '' if step == 1 else (':%d' % step)
78caa52a 1759 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1760
1761 step = None
7af808a5
PH
1762 # Quelch pyflakes warnings - start will be set when step is set
1763 start = '(Never used)'
edf3e38e
PH
1764 for i, prev in zip(idxs[1:], idxs[:-1]):
1765 if step is not None:
1766 if i - prev == step:
1767 continue
1768 yield _genslice(start, prev, step)
1769 step = None
1770 continue
1771 if i - prev in [-1, 1]:
1772 step = i - prev
1773 start = prev
1774 continue
1775 else:
78caa52a 1776 yield 's[%d]' % prev
edf3e38e 1777 if step is None:
78caa52a 1778 yield 's[%d]' % i
edf3e38e
PH
1779 else:
1780 yield _genslice(start, i, step)
1781
78caa52a 1782 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1783 cache_res = func(test_string)
edf3e38e 1784 cache_spec = [ord(c) for c in cache_res]
78caa52a 1785 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1786 signature_id_tuple = '(%s)' % (
1787 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1788 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1789 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1790 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1791
e0df6211
PH
1792 def _parse_sig_js(self, jscode):
1793 funcname = self._search_regex(
abefc03f
S
1794 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1795 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1796 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1797 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1798 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1799 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1800 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1801 # Obsolete patterns
1802 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1803 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1804 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1805 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1806 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1807 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1808 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1809 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1810 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1811
1812 jsi = JSInterpreter(jscode)
1813 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1814 return lambda s: initial_function([s])
1815
545cc85d 1816 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1817 """Turn the encrypted s field into a working signature"""
6b37f0be 1818
c8bf86d5 1819 if player_url is None:
69ea8ca4 1820 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1821
c8bf86d5 1822 try:
62af3a0e 1823 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1824 if player_id not in self._player_cache:
1825 func = self._extract_signature_function(
60064c53 1826 video_id, player_url, s
c8bf86d5
PH
1827 )
1828 self._player_cache[player_id] = func
1829 func = self._player_cache[player_id]
a06916d9 1830 if self.get_param('youtube_print_sig_code'):
60064c53 1831 self._print_sig_code(func, s)
c8bf86d5
PH
1832 return func(s)
1833 except Exception as e:
1834 tb = traceback.format_exc()
1835 raise ExtractorError(
78caa52a 1836 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1837
109dd3b2 1838 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1839 """
1840 Extract signatureTimestamp (sts)
1841 Required to tell API what sig/player version is in use.
1842 """
1843 sts = None
1844 if isinstance(ytcfg, dict):
1845 sts = int_or_none(ytcfg.get('STS'))
1846
1847 if not sts:
1848 # Attempt to extract from player
1849 if player_url is None:
1850 error_msg = 'Cannot extract signature timestamp without player_url.'
1851 if fatal:
1852 raise ExtractorError(error_msg)
1853 self.report_warning(error_msg)
1854 return
1855 if self._load_player(video_id, player_url, fatal=fatal):
1856 player_id = self._extract_player_info(player_url)
1857 code = self._code_cache[player_id]
1858 sts = int_or_none(self._search_regex(
1859 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
1860 'JS player signature timestamp', group='sts', fatal=fatal))
1861 return sts
1862
545cc85d 1863 def _mark_watched(self, video_id, player_response):
21c340b8
S
1864 playback_url = url_or_none(try_get(
1865 player_response,
545cc85d 1866 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
d77ab8e2
S
1867 if not playback_url:
1868 return
1869 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1870 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1871
1872 # cpn generation algorithm is reverse engineered from base.js.
1873 # In fact it works even with dummy cpn.
1874 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1875 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1876
1877 qs.update({
1878 'ver': ['2'],
1879 'cpn': [cpn],
1880 })
1881 playback_url = compat_urlparse.urlunparse(
15707c7e 1882 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1883
1884 self._download_webpage(
1885 playback_url, video_id, 'Marking watched',
1886 'Unable to mark watched', fatal=False)
1887
66c9fa36
S
1888 @staticmethod
1889 def _extract_urls(webpage):
1890 # Embedded YouTube player
1891 entries = [
1892 unescapeHTML(mobj.group('url'))
1893 for mobj in re.finditer(r'''(?x)
1894 (?:
1895 <iframe[^>]+?src=|
1896 data-video-url=|
1897 <embed[^>]+?src=|
1898 embedSWF\(?:\s*|
1899 <object[^>]+data=|
1900 new\s+SWFObject\(
1901 )
1902 (["\'])
1903 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1904 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1905 \1''', webpage)]
1906
1907 # lazyYT YouTube embed
1908 entries.extend(list(map(
1909 unescapeHTML,
1910 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1911
1912 # Wordpress "YouTube Video Importer" plugin
1913 matches = re.findall(r'''(?x)<div[^>]+
1914 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1915 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1916 entries.extend(m[-1] for m in matches)
1917
1918 return entries
1919
1920 @staticmethod
1921 def _extract_url(webpage):
1922 urls = YoutubeIE._extract_urls(webpage)
1923 return urls[0] if urls else None
1924
97665381
PH
1925 @classmethod
1926 def extract_id(cls, url):
1927 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1928 if mobj is None:
69ea8ca4 1929 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1930 video_id = mobj.group(2)
1931 return video_id
1932
545cc85d 1933 def _extract_chapters_from_json(self, data, video_id, duration):
84213ea8 1934 chapters_list = try_get(
8bdd16b4 1935 data,
84213ea8
S
1936 lambda x: x['playerOverlays']
1937 ['playerOverlayRenderer']
1938 ['decoratedPlayerBarRenderer']
1939 ['decoratedPlayerBarRenderer']
1940 ['playerBar']
1941 ['chapteredPlayerBarRenderer']
1942 ['chapters'],
1943 list)
1944 if not chapters_list:
1945 return
1946
1947 def chapter_time(chapter):
1948 return float_or_none(
1949 try_get(
1950 chapter,
1951 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1952 int),
1953 scale=1000)
1954 chapters = []
1955 for next_num, chapter in enumerate(chapters_list, start=1):
1956 start_time = chapter_time(chapter)
1957 if start_time is None:
1958 continue
1959 end_time = (chapter_time(chapters_list[next_num])
1960 if next_num < len(chapters_list) else duration)
1961 if end_time is None:
1962 continue
1963 title = try_get(
1964 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1965 compat_str)
1966 chapters.append({
1967 'start_time': start_time,
1968 'end_time': end_time,
1969 'title': title,
1970 })
1971 return chapters
1972
545cc85d 1973 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1974 return self._parse_json(self._search_regex(
1975 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1976 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 1977
d92f5d5a 1978 @staticmethod
1979 def parse_time_text(time_text):
1980 """
1981 Parse the comment time text
1982 time_text is in the format 'X units ago (edited)'
1983 """
1984 time_text_split = time_text.split(' ')
1985 if len(time_text_split) >= 3:
1986 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
1987
a1c5d2ca
M
1988 def _extract_comment(self, comment_renderer, parent=None):
1989 comment_id = comment_renderer.get('commentId')
1990 if not comment_id:
1991 return
fe93e2c4 1992
1993 text = self._get_text(comment_renderer.get('contentText'))
1994
49bd8c66 1995 # note: timestamp is an estimate calculated from the current time and time_text
fe93e2c4 1996 time_text = self._get_text(comment_renderer.get('publishedTimeText')) or ''
1997 time_text_dt = self.parse_time_text(time_text)
1998 if isinstance(time_text_dt, datetime.datetime):
1999 timestamp = calendar.timegm(time_text_dt.timetuple())
2000 author = self._get_text(comment_renderer.get('authorText'))
a1c5d2ca
M
2001 author_id = try_get(comment_renderer,
2002 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
fe93e2c4 2003
49bd8c66 2004 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2005 lambda x: x['likeCount']), compat_str)) or 0
a1c5d2ca
M
2006 author_thumbnail = try_get(comment_renderer,
2007 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2008
2009 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
97524332 2010 is_favorited = 'creatorHeart' in (try_get(
2011 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
a1c5d2ca
M
2012 return {
2013 'id': comment_id,
2014 'text': text,
d92f5d5a 2015 'timestamp': timestamp,
a1c5d2ca
M
2016 'time_text': time_text,
2017 'like_count': votes,
97524332 2018 'is_favorited': is_favorited,
a1c5d2ca
M
2019 'author': author,
2020 'author_id': author_id,
2021 'author_thumbnail': author_thumbnail,
2022 'author_is_uploader': author_is_uploader,
2023 'parent': parent or 'root'
2024 }
2025
2026 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
2d6659b9 2027 ytcfg, video_id, parent=None, comment_counts=None):
2028
2029 def extract_header(contents):
2030 _total_comments = 0
2031 _continuation = None
2032 for content in contents:
2033 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
fe93e2c4 2034 expected_comment_count = parse_count(self._get_text(
2035 comments_header_renderer, (lambda x: x['countText'], lambda x: x['commentsCount']), max_runs=1))
2036
2d6659b9 2037 if expected_comment_count:
fe93e2c4 2038 comment_counts[1] = expected_comment_count
2039 self.to_screen('Downloading ~%d comments' % expected_comment_count)
2d6659b9 2040 _total_comments = comment_counts[1]
2041 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2042 comment_sort_index = int(sort_mode_str != 'top') # 1 = new, 0 = top
2043
2044 sort_menu_item = try_get(
2045 comments_header_renderer,
2046 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2047 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2048
2049 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2050 if not _continuation:
2051 continue
2052
2053 sort_text = sort_menu_item.get('title')
2054 if isinstance(sort_text, compat_str):
2055 sort_text = sort_text.lower()
2056 else:
2057 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2058 self.to_screen('Sorting comments by %s' % sort_text)
2059 break
2060 return _total_comments, _continuation
a1c5d2ca 2061
2d6659b9 2062 def extract_thread(contents):
a1c5d2ca
M
2063 if not parent:
2064 comment_counts[2] = 0
2065 for content in contents:
2066 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2067 comment_renderer = try_get(
2068 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2069 content, (lambda x: x['commentRenderer'], dict))
2070
2071 if not comment_renderer:
2072 continue
2073 comment = self._extract_comment(comment_renderer, parent)
2074 if not comment:
2075 continue
2076 comment_counts[0] += 1
2077 yield comment
2078 # Attempt to get the replies
2079 comment_replies_renderer = try_get(
2080 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2081
2082 if comment_replies_renderer:
2083 comment_counts[2] += 1
2084 comment_entries_iter = self._comment_entries(
f4f751af 2085 comment_replies_renderer, identity_token, account_syncid, ytcfg,
2d6659b9 2086 video_id, parent=comment.get('id'), comment_counts=comment_counts)
a1c5d2ca
M
2087
2088 for reply_comment in comment_entries_iter:
2089 yield reply_comment
2090
2d6659b9 2091 # YouTube comments have a max depth of 2
2092 max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2093 if max_depth == 1 and parent:
2094 return
a1c5d2ca
M
2095 if not comment_counts:
2096 # comment so far, est. total comments, current comment thread #
2097 comment_counts = [0, 0, 0]
a1c5d2ca 2098
2d6659b9 2099 continuation = self._extract_continuation(root_continuation_data)
fe93e2c4 2100 if continuation and len(continuation['continuation']) < 27:
2d6659b9 2101 self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2102 continuation_token = self._generate_comment_continuation(video_id)
fe93e2c4 2103 continuation = self._build_api_continuation_query(continuation_token, None)
2d6659b9 2104
2105 visitor_data = None
2106 is_first_continuation = parent is None
a1c5d2ca
M
2107
2108 for page_num in itertools.count(0):
2109 if not continuation:
2110 break
f4f751af 2111 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
2d6659b9 2112 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2113 if page_num == 0:
2114 if is_first_continuation:
2115 note_prefix = 'Downloading comment section API JSON'
a1c5d2ca 2116 else:
2d6659b9 2117 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
2118 comment_counts[2], comment_prog_str)
2119 else:
2120 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2121 ' ' if parent else '', ' replies' if parent else '',
2122 page_num, comment_prog_str)
2123
2124 response = self._extract_response(
fe93e2c4 2125 item_id=None, query=continuation,
2d6659b9 2126 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2127 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca
M
2128 if not response:
2129 break
f4f751af 2130 visitor_data = try_get(
2131 response,
2132 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2133 compat_str) or visitor_data
a1c5d2ca 2134
2d6659b9 2135 continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca 2136
2d6659b9 2137 continuation = None
2138 if isinstance(continuation_contents, list):
2139 for continuation_section in continuation_contents:
2140 if not isinstance(continuation_section, dict):
2141 continue
2142 continuation_items = try_get(
2143 continuation_section,
2144 (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2145 lambda x: x['appendContinuationItemsAction']['continuationItems']),
2146 list) or []
2147 if is_first_continuation:
2148 total_comments, continuation = extract_header(continuation_items)
2149 if total_comments:
2150 yield total_comments
2151 is_first_continuation = False
2152 if continuation:
2153 break
2154 continue
2155 count = 0
2156 for count, entry in enumerate(extract_thread(continuation_items)):
2157 yield entry
2158 continuation = self._extract_continuation({'contents': continuation_items})
2159 if continuation:
2160 # Sometimes YouTube provides a continuation without any comments
2161 # In most cases we end up just downloading these with very little comments to come.
2162 if count == 0:
2163 if not parent:
2164 self.report_warning('No comments received - assuming end of comments')
2165 continuation = None
a1c5d2ca
M
2166 break
2167
2d6659b9 2168 # Deprecated response structure
2169 elif isinstance(continuation_contents, dict):
2170 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2171 for key, continuation_renderer in continuation_contents.items():
2172 if key not in known_continuation_renderers:
2173 continue
2174 if not isinstance(continuation_renderer, dict):
2175 continue
2176 if is_first_continuation:
2177 header_continuation_items = [continuation_renderer.get('header') or {}]
2178 total_comments, continuation = extract_header(header_continuation_items)
2179 if total_comments:
2180 yield total_comments
2181 is_first_continuation = False
2182 if continuation:
2183 break
a1c5d2ca 2184
2d6659b9 2185 # Sometimes YouTube provides a continuation without any comments
2186 # In most cases we end up just downloading these with very little comments to come.
2187 count = 0
2188 for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2189 yield entry
2190 continuation = self._extract_continuation(continuation_renderer)
2191 if count == 0:
2192 if not parent:
2193 self.report_warning('No comments received - assuming end of comments')
2194 continuation = None
2195 break
a1c5d2ca 2196
2d6659b9 2197 @staticmethod
2198 def _generate_comment_continuation(video_id):
2199 """
2200 Generates initial comment section continuation token from given video id
2201 """
2202 b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2203 parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2204 new_continuation_intlist = list(itertools.chain.from_iterable(
2205 [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2206 return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2207
2208 def _extract_comments(self, ytcfg, video_id, contents, webpage):
a1c5d2ca 2209 """Entry for comment extraction"""
2d6659b9 2210 def _real_comment_extract(contents):
2211 if isinstance(contents, list):
2212 for entry in contents:
2213 for key, renderer in entry.items():
2214 if key not in known_entry_comment_renderers:
2215 continue
2216 yield from self._comment_entries(
2217 renderer, video_id=video_id, ytcfg=ytcfg,
2218 identity_token=self._extract_identity_token(webpage, item_id=video_id),
2219 account_syncid=self._extract_account_syncid(ytcfg))
2220 break
a1c5d2ca 2221 comments = []
2d6659b9 2222 known_entry_comment_renderers = ('itemSectionRenderer',)
a1c5d2ca 2223 estimated_total = 0
2d6659b9 2224 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
a1c5d2ca 2225
2d6659b9 2226 try:
2227 for comment in _real_comment_extract(contents):
2228 if len(comments) >= max_comments:
2229 break
2230 if isinstance(comment, int):
2231 estimated_total = comment
2232 continue
2233 comments.append(comment)
2234 except KeyboardInterrupt:
2235 self.to_screen('Interrupted by user')
d92f5d5a 2236 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
a1c5d2ca
M
2237 return {
2238 'comments': comments,
2239 'comment_count': len(comments),
2240 }
2241
109dd3b2 2242 @staticmethod
2243 def _generate_player_context(sts=None):
2244 context = {
2245 'html5Preference': 'HTML5_PREF_WANTS',
2246 }
2247 if sts is not None:
2248 context['signatureTimestamp'] = sts
2249 return {
2250 'playbackContext': {
2251 'contentPlaybackContext': context
2252 }
2253 }
2254
4e6767b5 2255 @staticmethod
c888ffb9 2256 def _get_video_info_params(video_id, client='TVHTML5'):
2257 GVI_CLIENTS = {
2258 'ANDROID': {
2259 'c': 'ANDROID',
2260 'cver': '16.20',
2261 },
2262 'TVHTML5': {
2263 'c': 'TVHTML5',
2264 'cver': '6.20180913',
2265 }
2266 }
2267 query = {
4e6767b5 2268 'video_id': video_id,
2269 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
c888ffb9 2270 'html5': '1'
4e6767b5 2271 }
c888ffb9 2272 query.update(GVI_CLIENTS.get(client))
2273 return query
4e6767b5 2274
c5e8d7af 2275 def _real_extract(self, url):
cf7e015f 2276 url, smuggled_data = unsmuggle_url(url, {})
545cc85d 2277 video_id = self._match_id(url)
9297939e 2278
2279 is_music_url = smuggled_data.get('is_music_url') or self.is_music_url(url)
2280
545cc85d 2281 base_url = self.http_scheme() + '//www.youtube.com/'
b3d12425 2282 webpage_url = base_url + 'watch?v=' + video_id
2283 webpage = self._download_webpage(
cce889b9 2284 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
545cc85d 2285
109dd3b2 2286 ytcfg = self._extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2287 identity_token = self._extract_identity_token(webpage, video_id)
314ee305 2288 session_index = self._extract_session_index(ytcfg)
109dd3b2 2289 player_url = self._extract_player_url(ytcfg, webpage)
2290
2d6659b9 2291 player_client = self._configuration_arg('player_client', [''])[0]
4bb6b02f 2292 if player_client not in ('web', 'android', ''):
c888ffb9 2293 self.report_warning(f'Invalid player_client {player_client} given. Falling back to android client.')
2294 force_mobile_client = player_client != 'web'
4bb6b02f 2295 player_skip = self._configuration_arg('player_skip')
fe93e2c4 2296 player_response = None
2297 if webpage:
2298 player_response = self._extract_yt_initial_variable(
2299 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2300 video_id, 'initial player response')
109dd3b2 2301
fe93e2c4 2302 syncid = self._extract_account_syncid(ytcfg, player_response)
2303 headers = self._generate_api_headers(ytcfg, identity_token, syncid, session_index=session_index)
9297939e 2304
2305 ytm_streaming_data = {}
2306 if is_music_url:
109dd3b2 2307 ytm_webpage = None
2308 sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2309 if sts and not force_mobile_client and 'configs' not in player_skip:
2310 ytm_webpage = self._download_webpage(
2311 'https://music.youtube.com',
2d6659b9 2312 video_id, fatal=False, note='Downloading remix client config')
109dd3b2 2313
2314 ytm_cfg = self._extract_ytcfg(video_id, ytm_webpage) or {}
2315 ytm_client = 'WEB_REMIX'
2316 if not sts or force_mobile_client:
2317 # Android client already has signature descrambled
2318 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2319 if not sts:
c888ffb9 2320 self.report_warning('Falling back to android remix client for player API.')
109dd3b2 2321 ytm_client = 'ANDROID_MUSIC'
2322 ytm_cfg = {}
2323
2324 ytm_headers = self._generate_api_headers(
2325 ytm_cfg, identity_token, syncid,
314ee305 2326 client=ytm_client, session_index=session_index)
109dd3b2 2327 ytm_query = {'videoId': video_id}
2328 ytm_query.update(self._generate_player_context(sts))
2329
2330 ytm_player_response = self._extract_response(
2331 item_id=video_id, ep='player', query=ytm_query,
2332 ytcfg=ytm_cfg, headers=ytm_headers, fatal=False,
2333 default_client=ytm_client,
c888ffb9 2334 note='Downloading %sremix player API JSON' % ('android ' if force_mobile_client else ''))
2d6659b9 2335 ytm_streaming_data = try_get(ytm_player_response, lambda x: x['streamingData'], dict) or {}
109dd3b2 2336
109dd3b2 2337 if not player_response or force_mobile_client:
2338 sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2339 yt_client = 'WEB'
2340 ytpcfg = ytcfg
2341 ytp_headers = headers
2342 if not sts or force_mobile_client:
2343 # Android client already has signature descrambled
2344 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2345 if not sts:
c888ffb9 2346 self.report_warning('Falling back to android client for player API.')
109dd3b2 2347 yt_client = 'ANDROID'
2348 ytpcfg = {}
314ee305 2349 ytp_headers = self._generate_api_headers(ytpcfg, identity_token, syncid,
2350 client=yt_client, session_index=session_index)
109dd3b2 2351
2352 yt_query = {'videoId': video_id}
2353 yt_query.update(self._generate_player_context(sts))
2354 player_response = self._extract_response(
2355 item_id=video_id, ep='player', query=yt_query,
2356 ytcfg=ytpcfg, headers=ytp_headers, fatal=False,
2357 default_client=yt_client,
c888ffb9 2358 note='Downloading %splayer API JSON' % ('android ' if force_mobile_client else '')
2359 ) or player_response
545cc85d 2360
109dd3b2 2361 # Age-gate workarounds
545cc85d 2362 playability_status = player_response.get('playabilityStatus') or {}
109dd3b2 2363 if playability_status.get('reason') in self._AGE_GATE_REASONS:
c888ffb9 2364 gvi_clients = ('ANDROID', 'TVHTML5') if force_mobile_client else ('TVHTML5', 'ANDROID')
2365 for gvi_client in gvi_clients:
2366 pr = self._parse_json(try_get(compat_parse_qs(
2367 self._download_webpage(
2368 base_url + 'get_video_info', video_id,
2369 'Refetching age-gated %s info webpage' % gvi_client.lower(),
2370 'unable to download video info webpage', fatal=False,
2371 query=self._get_video_info_params(video_id, client=gvi_client))),
2372 lambda x: x['player_response'][0],
2373 compat_str) or '{}', video_id)
2374 if pr:
2375 break
109dd3b2 2376 if not pr:
2377 self.report_warning('Falling back to embedded-only age-gate workaround.')
2378 embed_webpage = None
2379 sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2380 if sts and not force_mobile_client and 'configs' not in player_skip:
2381 embed_webpage = self._download_webpage(
2382 'https://www.youtube.com/embed/%s?html5=1' % video_id,
2383 video_id=video_id, note='Downloading age-gated embed config')
2384
2385 ytcfg_age = self._extract_ytcfg(video_id, embed_webpage) or {}
2386 # If we extracted the embed webpage, it'll tell us if we can view the video
2387 embedded_pr = self._parse_json(
2388 try_get(ytcfg_age, lambda x: x['PLAYER_VARS']['embedded_player_response'], str) or '{}',
2389 video_id=video_id)
2390 embedded_ps_reason = try_get(embedded_pr, lambda x: x['playabilityStatus']['reason'], str) or ''
2391 if embedded_ps_reason not in self._AGE_GATE_REASONS:
2392 yt_client = 'WEB_EMBEDDED_PLAYER'
2393 if not sts or force_mobile_client:
2394 # Android client already has signature descrambled
2395 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2396 if not sts:
2397 self.report_warning(
c888ffb9 2398 'Falling back to android embedded client for player API (note: some formats may be missing).')
109dd3b2 2399 yt_client = 'ANDROID_EMBEDDED_PLAYER'
2400 ytcfg_age = {}
2401
2402 ytage_headers = self._generate_api_headers(
314ee305 2403 ytcfg_age, identity_token, syncid,
2404 client=yt_client, session_index=session_index)
109dd3b2 2405 yt_age_query = {'videoId': video_id}
2406 yt_age_query.update(self._generate_player_context(sts))
2407 pr = self._extract_response(
2408 item_id=video_id, ep='player', query=yt_age_query,
2409 ytcfg=ytcfg_age, headers=ytage_headers, fatal=False,
2410 default_client=yt_client,
c888ffb9 2411 note='Downloading %sage-gated player API JSON' % ('android ' if force_mobile_client else '')
109dd3b2 2412 ) or {}
2413
545cc85d 2414 if pr:
2415 player_response = pr
2416
2417 trailer_video_id = try_get(
2418 playability_status,
2419 lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
2420 compat_str)
2421 if trailer_video_id:
2422 return self.url_result(
2423 trailer_video_id, self.ie_key(), trailer_video_id)
cf7e015f 2424
545cc85d 2425 search_meta = (
2426 lambda x: self._html_search_meta(x, webpage, default=None)) \
2427 if webpage else lambda x: None
dbdaaa23 2428
545cc85d 2429 video_details = player_response.get('videoDetails') or {}
37357d21 2430 microformat = try_get(
545cc85d 2431 player_response,
2432 lambda x: x['microformat']['playerMicroformatRenderer'],
2433 dict) or {}
2434 video_title = video_details.get('title') \
fe93e2c4 2435 or self._get_text(microformat.get('title')) \
545cc85d 2436 or search_meta(['og:title', 'twitter:title', 'title'])
2437 video_description = video_details.get('shortDescription')
cf7e015f 2438
8fe10494 2439 if not smuggled_data.get('force_singlefeed', False):
a06916d9 2440 if not self.get_param('noplaylist'):
8fe10494
S
2441 multifeed_metadata_list = try_get(
2442 player_response,
2443 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
545cc85d 2444 compat_str)
8fe10494
S
2445 if multifeed_metadata_list:
2446 entries = []
2447 feed_ids = []
2448 for feed in multifeed_metadata_list.split(','):
2449 # Unquote should take place before split on comma (,) since textual
2450 # fields may contain comma as well (see
067aa17e 2451 # https://github.com/ytdl-org/youtube-dl/issues/8536)
545cc85d 2452 feed_data = compat_parse_qs(
2453 compat_urllib_parse_unquote_plus(feed))
6b09401b
S
2454
2455 def feed_entry(name):
545cc85d 2456 return try_get(
2457 feed_data, lambda x: x[name][0], compat_str)
6b09401b
S
2458
2459 feed_id = feed_entry('id')
2460 if not feed_id:
2461 continue
2462 feed_title = feed_entry('title')
2463 title = video_title
2464 if feed_title:
2465 title += ' (%s)' % feed_title
8fe10494
S
2466 entries.append({
2467 '_type': 'url_transparent',
2468 'ie_key': 'Youtube',
2469 'url': smuggle_url(
545cc85d 2470 base_url + 'watch?v=' + feed_data['id'][0],
8fe10494 2471 {'force_singlefeed': True}),
6b09401b 2472 'title': title,
8fe10494 2473 })
6b09401b 2474 feed_ids.append(feed_id)
8fe10494
S
2475 self.to_screen(
2476 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2477 % (', '.join(feed_ids), video_id))
545cc85d 2478 return self.playlist_result(
2479 entries, video_id, video_title, video_description)
8fe10494
S
2480 else:
2481 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 2482
9297939e 2483 formats, itags, stream_ids = [], [], []
cc2db878 2484 itag_qualities = {}
d3fc8074 2485 q = qualities([
60bdb7bd 2486 # "tiny" is the smallest video-only format. But some audio-only formats
2487 # was also labeled "tiny". It is not clear if such formats still exist
d3fc8074 2488 'tiny', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
2489 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2490 ])
9297939e 2491
545cc85d 2492 streaming_data = player_response.get('streamingData') or {}
2493 streaming_formats = streaming_data.get('formats') or []
2494 streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
9297939e 2495 streaming_formats.extend(ytm_streaming_data.get('formats') or [])
2496 streaming_formats.extend(ytm_streaming_data.get('adaptiveFormats') or [])
2497
545cc85d 2498 for fmt in streaming_formats:
2499 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2500 continue
321bf820 2501
cc2db878 2502 itag = str_or_none(fmt.get('itag'))
9297939e 2503 audio_track = fmt.get('audioTrack') or {}
2504 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2505 if stream_id in stream_ids:
2506 continue
2507
cc2db878 2508 quality = fmt.get('quality')
d3fc8074 2509 if quality == 'tiny' or not quality:
2510 quality = fmt.get('audioQuality', '').lower() or quality
cc2db878 2511 if itag and quality:
2512 itag_qualities[itag] = quality
2513 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2514 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2515 # number of fragment that would subsequently requested with (`&sq=N`)
2516 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2517 continue
2518
545cc85d 2519 fmt_url = fmt.get('url')
2520 if not fmt_url:
2521 sc = compat_parse_qs(fmt.get('signatureCipher'))
2522 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2523 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2524 if not (sc and fmt_url and encrypted_sig):
2525 continue
545cc85d 2526 if not player_url:
201e9eaa 2527 continue
545cc85d 2528 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2529 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2530 fmt_url += '&' + sp + '=' + signature
2531
545cc85d 2532 if itag:
2533 itags.append(itag)
9297939e 2534 stream_ids.append(stream_id)
2535
cc2db878 2536 tbr = float_or_none(
2537 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 2538 dct = {
2539 'asr': int_or_none(fmt.get('audioSampleRate')),
2540 'filesize': int_or_none(fmt.get('contentLength')),
2541 'format_id': itag,
0fb983f6 2542 'format_note': audio_track.get('displayName') or fmt.get('qualityLabel') or quality,
545cc85d 2543 'fps': int_or_none(fmt.get('fps')),
2544 'height': int_or_none(fmt.get('height')),
dca3ff4a 2545 'quality': q(quality),
cc2db878 2546 'tbr': tbr,
545cc85d 2547 'url': fmt_url,
2548 'width': fmt.get('width'),
0fb983f6 2549 'language': audio_track.get('id', '').split('.')[0],
545cc85d 2550 }
60bdb7bd 2551 mime_mobj = re.match(
2552 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2553 if mime_mobj:
2554 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2555 dct.update(parse_codecs(mime_mobj.group(2)))
2556 # The 3gp format in android client has a quality of "small",
2557 # but is actually worse than all other formats
2558 if dct['ext'] == '3gp':
2559 dct['quality'] = q('tiny')
cc2db878 2560 no_audio = dct.get('acodec') == 'none'
2561 no_video = dct.get('vcodec') == 'none'
2562 if no_audio:
2563 dct['vbr'] = tbr
2564 if no_video:
2565 dct['abr'] = tbr
2566 if no_audio or no_video:
545cc85d 2567 dct['downloader_options'] = {
2568 # Youtube throttles chunks >~10M
2569 'http_chunk_size': 10485760,
bf1317d2 2570 }
7c60c33e 2571 if dct.get('ext'):
2572 dct['container'] = dct['ext'] + '_dash'
545cc85d 2573 formats.append(dct)
2574
4bb6b02f 2575 skip_manifests = self._configuration_arg('skip')
5d3a0e79 2576 get_dash = 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True)
2577 get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2578
9297939e 2579 for sd in (streaming_data, ytm_streaming_data):
5d3a0e79 2580 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
9297939e 2581 if hls_manifest_url:
2582 for f in self._extract_m3u8_formats(
2583 hls_manifest_url, video_id, 'mp4', fatal=False):
2584 itag = self._search_regex(
2585 r'/itag/(\d+)', f['url'], 'itag', default=None)
2586 if itag:
2587 f['format_id'] = itag
8d68ab98 2588 formats.append(f)
545cc85d 2589
5d3a0e79 2590 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2591 if dash_manifest_url:
2592 for f in self._extract_mpd_formats(
2593 dash_manifest_url, video_id, fatal=False):
2594 itag = f['format_id']
2595 if itag in itags:
2596 continue
2597 if itag in itag_qualities:
2598 f['quality'] = q(itag_qualities[itag])
2599 filesize = int_or_none(self._search_regex(
2600 r'/clen/(\d+)', f.get('fragment_base_url')
2601 or f['url'], 'file size', default=None))
2602 if filesize:
2603 f['filesize'] = filesize
2604 formats.append(f)
bf1317d2 2605
545cc85d 2606 if not formats:
a06916d9 2607 if not self.get_param('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
b7da73eb 2608 self.raise_no_formats(
545cc85d 2609 'This video is DRM protected.', expected=True)
2610 pemr = try_get(
2611 playability_status,
2612 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
2613 dict) or {}
fe93e2c4 2614 reason = self._get_text(pemr.get('reason')) or playability_status.get('reason')
545cc85d 2615 subreason = pemr.get('subreason')
2616 if subreason:
fe93e2c4 2617 subreason = clean_html(self._get_text(subreason))
545cc85d 2618 if subreason == 'The uploader has not made this video available in your country.':
2619 countries = microformat.get('availableCountries')
2620 if not countries:
2621 regions_allowed = search_meta('regionsAllowed')
2622 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2623 self.raise_geo_restricted(subreason, countries, metadata_available=True)
545cc85d 2624 reason += '\n' + subreason
2625 if reason:
b7da73eb 2626 self.raise_no_formats(reason, expected=True)
bf1317d2 2627
545cc85d 2628 self._sort_formats(formats)
bf1317d2 2629
545cc85d 2630 keywords = video_details.get('keywords') or []
2631 if not keywords and webpage:
2632 keywords = [
2633 unescapeHTML(m.group('content'))
2634 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2635 for keyword in keywords:
2636 if keyword.startswith('yt:stretch='):
201c1459 2637 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2638 if mobj:
2639 # NB: float is intentional for forcing float division
2640 w, h = (float(v) for v in mobj.groups())
2641 if w > 0 and h > 0:
2642 ratio = w / h
2643 for f in formats:
2644 if f.get('vcodec') != 'none':
2645 f['stretched_ratio'] = ratio
2646 break
6449cd80 2647
0ba692ac 2648 category = microformat.get('category') or search_meta('genre')
2649 channel_id = video_details.get('channelId') \
2650 or microformat.get('externalChannelId') \
2651 or search_meta('channelId')
2652 duration = int_or_none(
2653 video_details.get('lengthSeconds')
2654 or microformat.get('lengthSeconds')) \
2655 or parse_duration(search_meta('duration'))
2656 is_live = video_details.get('isLive')
2657 is_upcoming = video_details.get('isUpcoming')
2658 owner_profile_url = microformat.get('ownerProfileUrl')
2659
545cc85d 2660 thumbnails = []
2661 for container in (video_details, microformat):
2662 for thumbnail in (try_get(
2663 container,
2664 lambda x: x['thumbnail']['thumbnails'], list) or []):
2665 thumbnail_url = thumbnail.get('url')
2666 if not thumbnail_url:
bf1317d2 2667 continue
1988fab7 2668 # Sometimes youtube gives a wrong thumbnail URL. See:
2669 # https://github.com/yt-dlp/yt-dlp/issues/233
2670 # https://github.com/ytdl-org/youtube-dl/issues/28023
2671 if 'maxresdefault' in thumbnail_url:
2672 thumbnail_url = thumbnail_url.split('?')[0]
545cc85d 2673 thumbnails.append({
545cc85d 2674 'url': thumbnail_url,
ff2751ac 2675 'height': int_or_none(thumbnail.get('height')),
545cc85d 2676 'width': int_or_none(thumbnail.get('width')),
2677 })
ff2751ac 2678 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2679 if thumbnail_url:
2680 thumbnails.append({
2681 'url': thumbnail_url,
ff2751ac 2682 })
0ba692ac 2683 # The best resolution thumbnails sometimes does not appear in the webpage
2684 # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
cca80fe6 2685 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2686 hq_thumbnail_names = ['maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3']
2687 guaranteed_thumbnail_names = [
2688 'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2689 'mqdefault', 'mq1', 'mq2', 'mq3',
2690 'default', '1', '2', '3'
2691 ]
2692 thumbnail_names = hq_thumbnail_names + guaranteed_thumbnail_names
2693 n_thumbnail_names = len(thumbnail_names)
2694
0ba692ac 2695 thumbnails.extend({
2696 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2697 video_id=video_id, name=name, ext=ext,
2698 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
cca80fe6 2699 '_test_url': name in hq_thumbnail_names,
2700 } for name in thumbnail_names for ext in ('webp', 'jpg'))
0ba692ac 2701 for thumb in thumbnails:
cca80fe6 2702 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
0ba692ac 2703 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
ff2751ac 2704 self._remove_duplicate_formats(thumbnails)
545cc85d 2705
545cc85d 2706 info = {
2707 'id': video_id,
2708 'title': self._live_title(video_title) if is_live else video_title,
2709 'formats': formats,
2710 'thumbnails': thumbnails,
2711 'description': video_description,
2712 'upload_date': unified_strdate(
2713 microformat.get('uploadDate')
2714 or search_meta('uploadDate')),
2715 'uploader': video_details['author'],
2716 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2717 'uploader_url': owner_profile_url,
2718 'channel_id': channel_id,
2719 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
2720 'duration': duration,
2721 'view_count': int_or_none(
2722 video_details.get('viewCount')
2723 or microformat.get('viewCount')
2724 or search_meta('interactionCount')),
2725 'average_rating': float_or_none(video_details.get('averageRating')),
2726 'age_limit': 18 if (
2727 microformat.get('isFamilySafe') is False
2728 or search_meta('isFamilyFriendly') == 'false'
2729 or search_meta('og:restrictions:age') == '18+') else 0,
2730 'webpage_url': webpage_url,
2731 'categories': [category] if category else None,
2732 'tags': keywords,
2733 'is_live': is_live,
2734 'playable_in_embed': playability_status.get('playableInEmbed'),
c224251a 2735 'was_live': video_details.get('isLiveContent'),
545cc85d 2736 }
b477fc13 2737
545cc85d 2738 pctr = try_get(
2739 player_response,
2740 lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
2741 subtitles = {}
2742 if pctr:
774d79cc 2743 def process_language(container, base_url, lang_code, sub_name, query):
120916da 2744 lang_subs = container.setdefault(lang_code, [])
545cc85d 2745 for fmt in self._SUBTITLE_FORMATS:
2746 query.update({
2747 'fmt': fmt,
2748 })
2749 lang_subs.append({
2750 'ext': fmt,
2751 'url': update_url_query(base_url, query),
774d79cc 2752 'name': sub_name,
545cc85d 2753 })
7e72694b 2754
545cc85d 2755 for caption_track in (pctr.get('captionTracks') or []):
2756 base_url = caption_track.get('baseUrl')
2757 if not base_url:
2758 continue
2759 if caption_track.get('kind') != 'asr':
120916da 2760 lang_code = (
2761 remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2762 or caption_track.get('languageCode'))
545cc85d 2763 if not lang_code:
2764 continue
2765 process_language(
774d79cc 2766 subtitles, base_url, lang_code,
2d6659b9 2767 try_get(caption_track, lambda x: x['name']['simpleText']),
774d79cc 2768 {})
545cc85d 2769 continue
2770 automatic_captions = {}
2771 for translation_language in (pctr.get('translationLanguages') or []):
2772 translation_language_code = translation_language.get('languageCode')
2773 if not translation_language_code:
2774 continue
2775 process_language(
2776 automatic_captions, base_url, translation_language_code,
fe93e2c4 2777 self._get_text(translation_language.get('languageName'), max_runs=1),
545cc85d 2778 {'tlang': translation_language_code})
2779 info['automatic_captions'] = automatic_captions
2780 info['subtitles'] = subtitles
7e72694b 2781
545cc85d 2782 parsed_url = compat_urllib_parse_urlparse(url)
2783 for component in [parsed_url.fragment, parsed_url.query]:
2784 query = compat_parse_qs(component)
2785 for k, v in query.items():
2786 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2787 d_k += '_time'
2788 if d_k not in info and k in s_ks:
2789 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2790
2791 # Youtube Music Auto-generated description
822b9d9c 2792 if video_description:
38d70284 2793 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2794 if mobj:
822b9d9c
RA
2795 release_year = mobj.group('release_year')
2796 release_date = mobj.group('release_date')
2797 if release_date:
2798 release_date = release_date.replace('-', '')
2799 if not release_year:
545cc85d 2800 release_year = release_date[:4]
2801 info.update({
2802 'album': mobj.group('album'.strip()),
2803 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2804 'track': mobj.group('track').strip(),
2805 'release_date': release_date,
cc2db878 2806 'release_year': int_or_none(release_year),
545cc85d 2807 })
7e72694b 2808
545cc85d 2809 initial_data = None
2810 if webpage:
2811 initial_data = self._extract_yt_initial_variable(
2812 webpage, self._YT_INITIAL_DATA_RE, video_id,
2813 'yt initial data')
2814 if not initial_data:
109dd3b2 2815 initial_data = self._extract_response(
2816 item_id=video_id, ep='next', fatal=False,
2817 ytcfg=ytcfg, headers=headers, query={'videoId': video_id},
2818 note='Downloading initial data API JSON')
545cc85d 2819
c60ee3a2 2820 try:
2821 # This will error if there is no livechat
2822 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2823 info['subtitles']['live_chat'] = [{
2824 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
2825 'video_id': video_id,
2826 'ext': 'json',
f6745c49 2827 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
c60ee3a2 2828 }]
2829 except (KeyError, IndexError, TypeError):
2830 pass
545cc85d 2831
2832 if initial_data:
2833 chapters = self._extract_chapters_from_json(
2834 initial_data, video_id, duration)
2835 if not chapters:
2836 for engagment_pannel in (initial_data.get('engagementPanels') or []):
2837 contents = try_get(
2838 engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
2839 list)
2840 if not contents:
2841 continue
2842
2843 def chapter_time(mmlir):
2844 return parse_duration(
fe93e2c4 2845 self._get_text(mmlir.get('timeDescription')))
545cc85d 2846
2847 chapters = []
2848 for next_num, content in enumerate(contents, start=1):
2849 mmlir = content.get('macroMarkersListItemRenderer') or {}
2850 start_time = chapter_time(mmlir)
2851 end_time = chapter_time(try_get(
2852 contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
2853 if next_num < len(contents) else duration
2854 if start_time is None or end_time is None:
2855 continue
2856 chapters.append({
2857 'start_time': start_time,
2858 'end_time': end_time,
fe93e2c4 2859 'title': self._get_text(mmlir.get('title')),
545cc85d 2860 })
2861 if chapters:
2862 break
2863 if chapters:
2864 info['chapters'] = chapters
2865
2866 contents = try_get(
2867 initial_data,
2868 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2869 list) or []
2870 for content in contents:
2871 vpir = content.get('videoPrimaryInfoRenderer')
2872 if vpir:
2873 stl = vpir.get('superTitleLink')
2874 if stl:
fe93e2c4 2875 stl = self._get_text(stl)
545cc85d 2876 if try_get(
2877 vpir,
2878 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2879 info['location'] = stl
2880 else:
2881 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2882 if mobj:
2883 info.update({
2884 'series': mobj.group(1),
2885 'season_number': int(mobj.group(2)),
2886 'episode_number': int(mobj.group(3)),
2887 })
2888 for tlb in (try_get(
2889 vpir,
2890 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2891 list) or []):
2892 tbr = tlb.get('toggleButtonRenderer') or {}
2893 for getter, regex in [(
2894 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2895 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2896 lambda x: x['accessibility'],
2897 lambda x: x['accessibilityData']['accessibilityData'],
2898 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2899 label = (try_get(tbr, getter, dict) or {}).get('label')
2900 if label:
2901 mobj = re.match(regex, label)
2902 if mobj:
2903 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2904 break
2905 sbr_tooltip = try_get(
2906 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2907 if sbr_tooltip:
2908 like_count, dislike_count = sbr_tooltip.split(' / ')
2909 info.update({
2910 'like_count': str_to_int(like_count),
2911 'dislike_count': str_to_int(dislike_count),
2912 })
2913 vsir = content.get('videoSecondaryInfoRenderer')
2914 if vsir:
fe93e2c4 2915 info['channel'] = self._get_text(try_get(
545cc85d 2916 vsir,
2917 lambda x: x['owner']['videoOwnerRenderer']['title'],
cce889b9 2918 dict))
545cc85d 2919 rows = try_get(
2920 vsir,
2921 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2922 list) or []
2923 multiple_songs = False
2924 for row in rows:
2925 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2926 multiple_songs = True
2927 break
2928 for row in rows:
2929 mrr = row.get('metadataRowRenderer') or {}
2930 mrr_title = mrr.get('title')
2931 if not mrr_title:
2932 continue
fe93e2c4 2933 mrr_title = self._get_text(mrr['title'])
2934 mrr_contents_text = self._get_text(mrr['contents'][0])
545cc85d 2935 if mrr_title == 'License':
2936 info['license'] = mrr_contents_text
2937 elif not multiple_songs:
2938 if mrr_title == 'Album':
2939 info['album'] = mrr_contents_text
2940 elif mrr_title == 'Artist':
2941 info['artist'] = mrr_contents_text
2942 elif mrr_title == 'Song':
2943 info['track'] = mrr_contents_text
2944
2945 fallbacks = {
2946 'channel': 'uploader',
2947 'channel_id': 'uploader_id',
2948 'channel_url': 'uploader_url',
2949 }
2950 for to, frm in fallbacks.items():
2951 if not info.get(to):
2952 info[to] = info.get(frm)
2953
2954 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
2955 v = info.get(s_k)
2956 if v:
2957 info[d_k] = v
b84071c0 2958
c224251a
M
2959 is_private = bool_or_none(video_details.get('isPrivate'))
2960 is_unlisted = bool_or_none(microformat.get('isUnlisted'))
2961 is_membersonly = None
b28f8d24 2962 is_premium = None
c224251a
M
2963 if initial_data and is_private is not None:
2964 is_membersonly = False
b28f8d24 2965 is_premium = False
47193e02 2966 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
2967 badge_labels = set()
2968 for content in contents:
2969 if not isinstance(content, dict):
2970 continue
2971 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
2972 for badge_label in badge_labels:
2973 if badge_label.lower() == 'members only':
2974 is_membersonly = True
2975 elif badge_label.lower() == 'premium':
2976 is_premium = True
2977 elif badge_label.lower() == 'unlisted':
2978 is_unlisted = True
c224251a 2979
c224251a
M
2980 info['availability'] = self._availability(
2981 is_private=is_private,
b28f8d24 2982 needs_premium=is_premium,
c224251a
M
2983 needs_subscription=is_membersonly,
2984 needs_auth=info['age_limit'] >= 18,
2985 is_unlisted=None if is_private is None else is_unlisted)
2986
06167fbb 2987 # get xsrf for annotations or comments
a06916d9 2988 get_annotations = self.get_param('writeannotations', False)
2989 get_comments = self.get_param('getcomments', False)
06167fbb 2990 if get_annotations or get_comments:
29f7c58a 2991 xsrf_token = None
545cc85d 2992 ytcfg = self._extract_ytcfg(video_id, webpage)
29f7c58a 2993 if ytcfg:
2994 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2995 if not xsrf_token:
2996 xsrf_token = self._search_regex(
2997 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 2998 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 2999
3000 # annotations
06167fbb 3001 if get_annotations:
64b6a4e9
RA
3002 invideo_url = try_get(
3003 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
3004 if xsrf_token and invideo_url:
29f7c58a 3005 xsrf_field_name = None
3006 if ytcfg:
3007 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
3008 if not xsrf_field_name:
3009 xsrf_field_name = self._search_regex(
3010 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 3011 webpage, 'xsrf field name',
29f7c58a 3012 group='xsrf_field_name', default='session_token')
8a784c74 3013 info['annotations'] = self._download_webpage(
64b6a4e9
RA
3014 self._proto_relative_url(invideo_url),
3015 video_id, note='Downloading annotations',
3016 errnote='Unable to download video annotations', fatal=False,
3017 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 3018
277d6ff5 3019 if get_comments:
2d6659b9 3020 info['__post_extractor'] = lambda: self._extract_comments(ytcfg, video_id, contents, webpage)
4ea3be0a 3021
545cc85d 3022 self.mark_watched(video_id, player_response)
d77ab8e2 3023
545cc85d 3024 return info
c5e8d7af 3025
5f6a1245 3026
8bdd16b4 3027class YoutubeTabIE(YoutubeBaseInfoExtractor):
3028 IE_DESC = 'YouTube.com tab'
70d5c17b 3029 _VALID_URL = r'''(?x)
3030 https?://
3031 (?:\w+\.)?
3032 (?:
3033 youtube(?:kids)?\.com|
3034 invidio\.us
3035 )/
3036 (?:
fe03a6cd 3037 (?P<channel_type>channel|c|user|browse)/|
70d5c17b 3038 (?P<not_channel>
9ba5705a 3039 feed/|hashtag/|
70d5c17b 3040 (?:playlist|watch)\?.*?\blist=
3041 )|
29f7c58a 3042 (?!(?:%s)\b) # Direct URLs
70d5c17b 3043 )
3044 (?P<id>[^/?\#&]+)
3045 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 3046 IE_NAME = 'youtube:tab'
3047
81127aa5 3048 _TESTS = [{
da692b79 3049 'note': 'playlists, multipage',
8bdd16b4 3050 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3051 'playlist_mincount': 94,
3052 'info_dict': {
3053 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3054 'title': 'Игорь Клейнер - Playlists',
3055 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3056 'uploader': 'Игорь Клейнер',
3057 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 3058 },
3059 }, {
da692b79 3060 'note': 'playlists, multipage, different order',
8bdd16b4 3061 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3062 'playlist_mincount': 94,
3063 'info_dict': {
3064 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3065 'title': 'Игорь Клейнер - Playlists',
3066 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3067 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3068 'uploader': 'Игорь Клейнер',
8bdd16b4 3069 },
201c1459 3070 }, {
da692b79 3071 'note': 'playlists, series',
201c1459 3072 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3073 'playlist_mincount': 5,
3074 'info_dict': {
3075 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3076 'title': '3Blue1Brown - Playlists',
3077 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
da692b79 3078 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3079 'uploader': '3Blue1Brown',
201c1459 3080 },
8bdd16b4 3081 }, {
da692b79 3082 'note': 'playlists, singlepage',
8bdd16b4 3083 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3084 'playlist_mincount': 4,
3085 'info_dict': {
3086 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3087 'title': 'ThirstForScience - Playlists',
3088 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 3089 'uploader': 'ThirstForScience',
3090 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 3091 }
3092 }, {
3093 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3094 'only_matching': True,
3095 }, {
da692b79 3096 'note': 'basic, single video playlist',
0e30a7b9 3097 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 3098 'info_dict': {
0e30a7b9 3099 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3100 'uploader': 'Sergey M.',
3101 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 3102 'title': 'youtube-dl public playlist',
81127aa5 3103 },
0e30a7b9 3104 'playlist_count': 1,
9291475f 3105 }, {
da692b79 3106 'note': 'empty playlist',
0e30a7b9 3107 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 3108 'info_dict': {
0e30a7b9 3109 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3110 'uploader': 'Sergey M.',
3111 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 3112 'title': 'youtube-dl empty playlist',
9291475f
PH
3113 },
3114 'playlist_count': 0,
3115 }, {
da692b79 3116 'note': 'Home tab',
8bdd16b4 3117 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 3118 'info_dict': {
8bdd16b4 3119 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3120 'title': 'lex will - Home',
3121 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3122 'uploader': 'lex will',
3123 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3124 },
8bdd16b4 3125 'playlist_mincount': 2,
9291475f 3126 }, {
da692b79 3127 'note': 'Videos tab',
8bdd16b4 3128 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 3129 'info_dict': {
8bdd16b4 3130 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3131 'title': 'lex will - Videos',
3132 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3133 'uploader': 'lex will',
3134 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3135 },
8bdd16b4 3136 'playlist_mincount': 975,
9291475f 3137 }, {
da692b79 3138 'note': 'Videos tab, sorted by popular',
8bdd16b4 3139 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 3140 'info_dict': {
8bdd16b4 3141 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3142 'title': 'lex will - Videos',
3143 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3144 'uploader': 'lex will',
3145 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3146 },
8bdd16b4 3147 'playlist_mincount': 199,
9291475f 3148 }, {
da692b79 3149 'note': 'Playlists tab',
8bdd16b4 3150 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 3151 'info_dict': {
8bdd16b4 3152 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3153 'title': 'lex will - Playlists',
3154 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3155 'uploader': 'lex will',
3156 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3157 },
8bdd16b4 3158 'playlist_mincount': 17,
ac7553d0 3159 }, {
da692b79 3160 'note': 'Community tab',
8bdd16b4 3161 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 3162 'info_dict': {
8bdd16b4 3163 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3164 'title': 'lex will - Community',
3165 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3166 'uploader': 'lex will',
3167 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3168 },
3169 'playlist_mincount': 18,
87dadd45 3170 }, {
da692b79 3171 'note': 'Channels tab',
8bdd16b4 3172 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 3173 'info_dict': {
8bdd16b4 3174 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3175 'title': 'lex will - Channels',
3176 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3177 'uploader': 'lex will',
3178 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3179 },
deaec5af 3180 'playlist_mincount': 12,
cd684175 3181 }, {
3182 'note': 'Search tab',
3183 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3184 'playlist_mincount': 40,
3185 'info_dict': {
3186 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3187 'title': '3Blue1Brown - Search - linear algebra',
3188 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3189 'uploader': '3Blue1Brown',
3190 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3191 },
6b08cdf6 3192 }, {
a0566bbf 3193 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3194 'only_matching': True,
3195 }, {
a0566bbf 3196 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3197 'only_matching': True,
3198 }, {
a0566bbf 3199 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3200 'only_matching': True,
3201 }, {
3202 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3203 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3204 'info_dict': {
3205 'title': '29C3: Not my department',
3206 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3207 'uploader': 'Christiaan008',
3208 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 3209 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 3210 },
3211 'playlist_count': 96,
3212 }, {
3213 'note': 'Large playlist',
3214 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 3215 'info_dict': {
8bdd16b4 3216 'title': 'Uploads from Cauchemar',
3217 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3218 'uploader': 'Cauchemar',
3219 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 3220 },
8bdd16b4 3221 'playlist_mincount': 1123,
3222 }, {
da692b79 3223 'note': 'even larger playlist, 8832 videos',
8bdd16b4 3224 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3225 'only_matching': True,
4b7df0d3
JMF
3226 }, {
3227 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3228 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3229 'info_dict': {
acf757f4
PH
3230 'title': 'Uploads from Interstellar Movie',
3231 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 3232 'uploader': 'Interstellar Movie',
8bdd16b4 3233 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 3234 },
481cc733 3235 'playlist_mincount': 21,
358de58c 3236 }, {
3237 'note': 'Playlist with "show unavailable videos" button',
3238 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3239 'info_dict': {
3240 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3241 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3242 'uploader': 'Phim Siêu Nhân Nhật Bản',
3243 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3244 },
da692b79 3245 'playlist_mincount': 200,
5d342002 3246 }, {
da692b79 3247 'note': 'Playlist with unavailable videos in page 7',
5d342002 3248 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3249 'info_dict': {
3250 'title': 'Uploads from BlankTV',
3251 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3252 'uploader': 'BlankTV',
3253 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3254 },
da692b79 3255 'playlist_mincount': 1000,
8bdd16b4 3256 }, {
da692b79 3257 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
8bdd16b4 3258 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3259 'info_dict': {
3260 'title': 'Data Analysis with Dr Mike Pound',
3261 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3262 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3263 'uploader': 'Computerphile',
deaec5af 3264 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 3265 },
3266 'playlist_mincount': 11,
3267 }, {
a0566bbf 3268 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 3269 'only_matching': True,
dacb3a86 3270 }, {
da692b79 3271 'note': 'Playlist URL that does not actually serve a playlist',
dacb3a86
S
3272 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3273 'info_dict': {
3274 'id': 'FqZTN594JQw',
3275 'ext': 'webm',
3276 'title': "Smiley's People 01 detective, Adventure Series, Action",
3277 'uploader': 'STREEM',
3278 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 3279 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
3280 'upload_date': '20150526',
3281 'license': 'Standard YouTube License',
3282 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3283 'categories': ['People & Blogs'],
3284 'tags': list,
dbdaaa23 3285 'view_count': int,
dacb3a86
S
3286 'like_count': int,
3287 'dislike_count': int,
3288 },
3289 'params': {
3290 'skip_download': True,
3291 },
13a75688 3292 'skip': 'This video is not available.',
dacb3a86 3293 'add_ie': [YoutubeIE.ie_key()],
481cc733 3294 }, {
8bdd16b4 3295 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 3296 'only_matching': True,
66b48727 3297 }, {
8bdd16b4 3298 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 3299 'only_matching': True,
a0566bbf 3300 }, {
3301 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3302 'info_dict': {
da692b79 3303 'id': 'X1whbWASnNQ', # This will keep changing
a0566bbf 3304 'ext': 'mp4',
deaec5af 3305 'title': compat_str,
a0566bbf 3306 'uploader': 'Sky News',
3307 'uploader_id': 'skynews',
3308 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
da692b79 3309 'upload_date': r're:\d{8}',
3310 'description': compat_str,
a0566bbf 3311 'categories': ['News & Politics'],
3312 'tags': list,
3313 'like_count': int,
3314 'dislike_count': int,
3315 },
3316 'params': {
3317 'skip_download': True,
3318 },
da692b79 3319 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
a0566bbf 3320 }, {
3321 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3322 'info_dict': {
3323 'id': 'a48o2S1cPoo',
3324 'ext': 'mp4',
3325 'title': 'The Young Turks - Live Main Show',
3326 'uploader': 'The Young Turks',
3327 'uploader_id': 'TheYoungTurks',
3328 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3329 'upload_date': '20150715',
3330 'license': 'Standard YouTube License',
3331 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3332 'categories': ['News & Politics'],
3333 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3334 'like_count': int,
3335 'dislike_count': int,
3336 },
3337 'params': {
3338 'skip_download': True,
3339 },
3340 'only_matching': True,
3341 }, {
3342 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3343 'only_matching': True,
3344 }, {
3345 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3346 'only_matching': True,
09f1580e 3347 }, {
3348 'note': 'A channel that is not live. Should raise error',
3349 'url': 'https://www.youtube.com/user/numberphile/live',
3350 'only_matching': True,
3d3dddc9 3351 }, {
3352 'url': 'https://www.youtube.com/feed/trending',
3353 'only_matching': True,
3354 }, {
3d3dddc9 3355 'url': 'https://www.youtube.com/feed/library',
3356 'only_matching': True,
3357 }, {
3d3dddc9 3358 'url': 'https://www.youtube.com/feed/history',
3359 'only_matching': True,
3360 }, {
3d3dddc9 3361 'url': 'https://www.youtube.com/feed/subscriptions',
3362 'only_matching': True,
3363 }, {
3d3dddc9 3364 'url': 'https://www.youtube.com/feed/watch_later',
3365 'only_matching': True,
3366 }, {
da692b79 3367 'note': 'Recommended - redirects to home page',
3d3dddc9 3368 'url': 'https://www.youtube.com/feed/recommended',
3369 'only_matching': True,
29f7c58a 3370 }, {
da692b79 3371 'note': 'inline playlist with not always working continuations',
29f7c58a 3372 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3373 'only_matching': True,
3374 }, {
3375 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3376 'only_matching': True,
3377 }, {
3378 'url': 'https://www.youtube.com/course',
3379 'only_matching': True,
3380 }, {
3381 'url': 'https://www.youtube.com/zsecurity',
3382 'only_matching': True,
3383 }, {
3384 'url': 'http://www.youtube.com/NASAgovVideo/videos',
3385 'only_matching': True,
3386 }, {
3387 'url': 'https://www.youtube.com/TheYoungTurks/live',
3388 'only_matching': True,
39ed931e 3389 }, {
3390 'url': 'https://www.youtube.com/hashtag/cctv9',
3391 'info_dict': {
3392 'id': 'cctv9',
3393 'title': '#cctv9',
3394 },
3395 'playlist_mincount': 350,
201c1459 3396 }, {
3397 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3398 'only_matching': True,
9297939e 3399 }, {
da692b79 3400 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
9297939e 3401 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3402 'only_matching': True
fe03a6cd 3403 }, {
3404 'note': '/browse/ should redirect to /channel/',
3405 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3406 'only_matching': True
3407 }, {
3408 'note': 'VLPL, should redirect to playlist?list=PL...',
3409 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3410 'info_dict': {
3411 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3412 'uploader': 'NoCopyrightSounds',
3413 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3414 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3415 'title': 'NCS Releases',
3416 },
3417 'playlist_mincount': 166,
18db7548 3418 }, {
3419 'note': 'Topic, should redirect to playlist?list=UU...',
3420 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3421 'info_dict': {
3422 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3423 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3424 'title': 'Uploads from Royalty Free Music - Topic',
3425 'uploader': 'Royalty Free Music - Topic',
3426 },
3427 'expected_warnings': [
3428 'A channel/user page was given',
3429 'The URL does not have a videos tab',
3430 ],
3431 'playlist_mincount': 101,
3432 }, {
3433 'note': 'Topic without a UU playlist',
3434 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3435 'info_dict': {
3436 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3437 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3438 },
3439 'expected_warnings': [
3440 'A channel/user page was given',
3441 'The URL does not have a videos tab',
3442 'Falling back to channel URL',
3443 ],
3444 'playlist_mincount': 9,
abcdd12b 3445 }, {
3446 'note': 'Youtube music Album',
3447 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3448 'info_dict': {
3449 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3450 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3451 },
3452 'playlist_count': 50,
47193e02 3453 }, {
3454 'note': 'unlisted single video playlist',
3455 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3456 'info_dict': {
3457 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3458 'uploader': 'colethedj',
3459 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3460 'title': 'yt-dlp unlisted playlist test',
3461 'availability': 'unlisted'
3462 },
3463 'playlist_count': 1,
29f7c58a 3464 }]
3465
3466 @classmethod
3467 def suitable(cls, url):
3468 return False if YoutubeIE.suitable(url) else super(
3469 YoutubeTabIE, cls).suitable(url)
8bdd16b4 3470
3471 def _extract_channel_id(self, webpage):
3472 channel_id = self._html_search_meta(
3473 'channelId', webpage, 'channel id', default=None)
3474 if channel_id:
3475 return channel_id
3476 channel_url = self._html_search_meta(
3477 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3478 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3479 'twitter:app:url:googleplay'), webpage, 'channel url')
3480 return self._search_regex(
3481 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3482 channel_url, 'channel id')
15f6397c 3483
8bdd16b4 3484 @staticmethod
cd7c66cf 3485 def _extract_basic_item_renderer(item):
3486 # Modified from _extract_grid_item_renderer
201c1459 3487 known_basic_renderers = (
3488 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 3489 )
3490 for key, renderer in item.items():
201c1459 3491 if not isinstance(renderer, dict):
cd7c66cf 3492 continue
201c1459 3493 elif key in known_basic_renderers:
3494 return renderer
3495 elif key.startswith('grid') and key.endswith('Renderer'):
3496 return renderer
8bdd16b4 3497
8bdd16b4 3498 def _grid_entries(self, grid_renderer):
3499 for item in grid_renderer['items']:
3500 if not isinstance(item, dict):
39b62db1 3501 continue
cd7c66cf 3502 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 3503 if not isinstance(renderer, dict):
3504 continue
fe93e2c4 3505 title = self._get_text(renderer.get('title'))
3506
8bdd16b4 3507 # playlist
3508 playlist_id = renderer.get('playlistId')
3509 if playlist_id:
3510 yield self.url_result(
3511 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3512 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3513 video_title=title)
201c1459 3514 continue
8bdd16b4 3515 # video
3516 video_id = renderer.get('videoId')
3517 if video_id:
3518 yield self._extract_video(renderer)
201c1459 3519 continue
8bdd16b4 3520 # channel
3521 channel_id = renderer.get('channelId')
3522 if channel_id:
8bdd16b4 3523 yield self.url_result(
3524 'https://www.youtube.com/channel/%s' % channel_id,
3525 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 3526 continue
3527 # generic endpoint URL support
3528 ep_url = urljoin('https://www.youtube.com/', try_get(
3529 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3530 compat_str))
3531 if ep_url:
3532 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3533 if ie.suitable(ep_url):
3534 yield self.url_result(
3535 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3536 break
8bdd16b4 3537
3d3dddc9 3538 def _shelf_entries_from_content(self, shelf_renderer):
3539 content = shelf_renderer.get('content')
3540 if not isinstance(content, dict):
8bdd16b4 3541 return
cd7c66cf 3542 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 3543 if renderer:
3544 # TODO: add support for nested playlists so each shelf is processed
3545 # as separate playlist
3546 # TODO: this includes only first N items
3547 for entry in self._grid_entries(renderer):
3548 yield entry
3549 renderer = content.get('horizontalListRenderer')
3550 if renderer:
3551 # TODO
3552 pass
8bdd16b4 3553
29f7c58a 3554 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3555 ep = try_get(
3556 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3557 compat_str)
3558 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3559 if shelf_url:
29f7c58a 3560 # Skipping links to another channels, note that checking for
3561 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3562 # will not work
3563 if skip_channels and '/channels?' in shelf_url:
3564 return
fe93e2c4 3565 title = self._get_text(shelf_renderer, lambda x: x['title'])
3d3dddc9 3566 yield self.url_result(shelf_url, video_title=title)
3567 # Shelf may not contain shelf URL, fallback to extraction from content
3568 for entry in self._shelf_entries_from_content(shelf_renderer):
3569 yield entry
c5e8d7af 3570
8bdd16b4 3571 def _playlist_entries(self, video_list_renderer):
3572 for content in video_list_renderer['contents']:
3573 if not isinstance(content, dict):
3574 continue
3575 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3576 if not isinstance(renderer, dict):
3577 continue
3578 video_id = renderer.get('videoId')
3579 if not video_id:
3580 continue
3581 yield self._extract_video(renderer)
07aeced6 3582
3462ffa8 3583 def _rich_entries(self, rich_grid_renderer):
3584 renderer = try_get(
70d5c17b 3585 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3586 video_id = renderer.get('videoId')
3587 if not video_id:
3588 return
3589 yield self._extract_video(renderer)
3590
8bdd16b4 3591 def _video_entry(self, video_renderer):
3592 video_id = video_renderer.get('videoId')
3593 if video_id:
3594 return self._extract_video(video_renderer)
dacb3a86 3595
8bdd16b4 3596 def _post_thread_entries(self, post_thread_renderer):
3597 post_renderer = try_get(
3598 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3599 if not post_renderer:
3600 return
3601 # video attachment
3602 video_renderer = try_get(
895b0931 3603 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3604 video_id = video_renderer.get('videoId')
3605 if video_id:
3606 entry = self._extract_video(video_renderer)
8bdd16b4 3607 if entry:
3608 yield entry
895b0931 3609 # playlist attachment
3610 playlist_id = try_get(
3611 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3612 if playlist_id:
3613 yield self.url_result(
e28f1c0a 3614 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3615 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3616 # inline video links
3617 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3618 for run in runs:
3619 if not isinstance(run, dict):
3620 continue
3621 ep_url = try_get(
3622 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3623 if not ep_url:
3624 continue
3625 if not YoutubeIE.suitable(ep_url):
3626 continue
3627 ep_video_id = YoutubeIE._match_id(ep_url)
3628 if video_id == ep_video_id:
3629 continue
895b0931 3630 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 3631
8bdd16b4 3632 def _post_thread_continuation_entries(self, post_thread_continuation):
3633 contents = post_thread_continuation.get('contents')
3634 if not isinstance(contents, list):
3635 return
3636 for content in contents:
3637 renderer = content.get('backstagePostThreadRenderer')
3638 if not isinstance(renderer, dict):
3639 continue
3640 for entry in self._post_thread_entries(renderer):
3641 yield entry
07aeced6 3642
39ed931e 3643 r''' # unused
3644 def _rich_grid_entries(self, contents):
3645 for content in contents:
3646 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3647 if video_renderer:
3648 entry = self._video_entry(video_renderer)
3649 if entry:
3650 yield entry
3651 '''
f4f751af 3652 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3462ffa8 3653
70d5c17b 3654 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3655 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3656 for content in contents:
3657 if not isinstance(content, dict):
8bdd16b4 3658 continue
70d5c17b 3659 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3660 if not is_renderer:
70d5c17b 3661 renderer = content.get('richItemRenderer')
3462ffa8 3662 if renderer:
3663 for entry in self._rich_entries(renderer):
3664 yield entry
3665 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3666 continue
3462ffa8 3667 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3668 for isr_content in isr_contents:
3669 if not isinstance(isr_content, dict):
3670 continue
69184e41 3671
3672 known_renderers = {
3673 'playlistVideoListRenderer': self._playlist_entries,
3674 'gridRenderer': self._grid_entries,
3675 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3676 'backstagePostThreadRenderer': self._post_thread_entries,
3677 'videoRenderer': lambda x: [self._video_entry(x)],
3678 }
3679 for key, renderer in isr_content.items():
3680 if key not in known_renderers:
3681 continue
3682 for entry in known_renderers[key](renderer):
3683 if entry:
3684 yield entry
3462ffa8 3685 continuation_list[0] = self._extract_continuation(renderer)
69184e41 3686 break
70d5c17b 3687
3462ffa8 3688 if not continuation_list[0]:
3689 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3690
3691 if not continuation_list[0]:
3692 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3693
3694 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3695 tab_content = try_get(tab, lambda x: x['content'], dict)
3696 if not tab_content:
3697 return
3462ffa8 3698 parent_renderer = (
29f7c58a 3699 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3700 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3701 for entry in extract_entries(parent_renderer):
3702 yield entry
3462ffa8 3703 continuation = continuation_list[0]
fe93e2c4 3704 visitor_data = None
d069eca7 3705
8bdd16b4 3706 for page_num in itertools.count(1):
3707 if not continuation:
3708 break
f4f751af 3709 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
79360d99 3710 response = self._extract_response(
3711 item_id='%s page %s' % (item_id, page_num),
fe93e2c4 3712 query=continuation, headers=headers, ytcfg=ytcfg,
79360d99 3713 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3714
3715 if not response:
8bdd16b4 3716 break
f4f751af 3717 visitor_data = try_get(
3718 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
ebf1b291 3719
69184e41 3720 known_continuation_renderers = {
3721 'playlistVideoListContinuation': self._playlist_entries,
3722 'gridContinuation': self._grid_entries,
3723 'itemSectionContinuation': self._post_thread_continuation_entries,
3724 'sectionListContinuation': extract_entries, # for feeds
3725 }
8bdd16b4 3726 continuation_contents = try_get(
69184e41 3727 response, lambda x: x['continuationContents'], dict) or {}
3728 continuation_renderer = None
3729 for key, value in continuation_contents.items():
3730 if key not in known_continuation_renderers:
3462ffa8 3731 continue
69184e41 3732 continuation_renderer = value
3733 continuation_list = [None]
3734 for entry in known_continuation_renderers[key](continuation_renderer):
3735 yield entry
3736 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3737 break
3738 if continuation_renderer:
3739 continue
c5e8d7af 3740
a1b535bd 3741 known_renderers = {
3742 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3743 'gridVideoRenderer': (self._grid_entries, 'items'),
3744 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3745 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3746 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3747 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3748 }
cce889b9 3749 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3750 continuation_items = try_get(
cce889b9 3751 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3752 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3753 video_items_renderer = None
3754 for key, value in continuation_item.items():
3755 if key not in known_renderers:
8bdd16b4 3756 continue
a1b535bd 3757 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3758 continuation_list = [None]
a1b535bd 3759 for entry in known_renderers[key][0](video_items_renderer):
3760 yield entry
9ba5705a 3761 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3762 break
3763 if video_items_renderer:
3764 continue
8bdd16b4 3765 break
9558dcec 3766
8bdd16b4 3767 @staticmethod
3768 def _extract_selected_tab(tabs):
3769 for tab in tabs:
cd684175 3770 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3771 if renderer.get('selected') is True:
3772 return renderer
2b3c2546 3773 else:
8bdd16b4 3774 raise ExtractorError('Unable to find selected tab')
b82f815f 3775
47193e02 3776 @classmethod
3777 def _extract_uploader(cls, data):
8bdd16b4 3778 uploader = {}
47193e02 3779 renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3780 owner = try_get(
3781 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3782 if owner:
3783 uploader['uploader'] = owner.get('text')
3784 uploader['uploader_id'] = try_get(
3785 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3786 uploader['uploader_url'] = urljoin(
3787 'https://www.youtube.com/',
3788 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3789 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3790
d069eca7 3791 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3792 playlist_id = title = description = channel_url = channel_name = channel_id = None
3793 thumbnails_list = tags = []
3794
8bdd16b4 3795 selected_tab = self._extract_selected_tab(tabs)
3796 renderer = try_get(
3797 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3798 if renderer:
b60419c5 3799 channel_name = renderer.get('title')
3800 channel_url = renderer.get('channelUrl')
3801 channel_id = renderer.get('externalId')
39ed931e 3802 else:
64c0d954 3803 renderer = try_get(
3804 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3805
8bdd16b4 3806 if renderer:
3807 title = renderer.get('title')
ecc97af3 3808 description = renderer.get('description', '')
b60419c5 3809 playlist_id = channel_id
3810 tags = renderer.get('keywords', '').split()
3811 thumbnails_list = (
3812 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3813 or try_get(
47193e02 3814 self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3815 lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
ff84930c 3816 list)
b60419c5 3817 or [])
3818
3819 thumbnails = []
3820 for t in thumbnails_list:
3821 if not isinstance(t, dict):
3822 continue
3823 thumbnail_url = url_or_none(t.get('url'))
3824 if not thumbnail_url:
3825 continue
3826 thumbnails.append({
3827 'url': thumbnail_url,
3828 'width': int_or_none(t.get('width')),
3829 'height': int_or_none(t.get('height')),
3830 })
3462ffa8 3831 if playlist_id is None:
70d5c17b 3832 playlist_id = item_id
3833 if title is None:
39ed931e 3834 title = (
3835 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3836 or playlist_id)
b60419c5 3837 title += format_field(selected_tab, 'title', ' - %s')
cd684175 3838 title += format_field(selected_tab, 'expandedText', ' - %s')
b60419c5 3839 metadata = {
3840 'playlist_id': playlist_id,
3841 'playlist_title': title,
3842 'playlist_description': description,
3843 'uploader': channel_name,
3844 'uploader_id': channel_id,
3845 'uploader_url': channel_url,
3846 'thumbnails': thumbnails,
3847 'tags': tags,
3848 }
47193e02 3849 availability = self._extract_availability(data)
3850 if availability:
3851 metadata['availability'] = availability
b60419c5 3852 if not channel_id:
3853 metadata.update(self._extract_uploader(data))
3854 metadata.update({
3855 'channel': metadata['uploader'],
3856 'channel_id': metadata['uploader_id'],
3857 'channel_url': metadata['uploader_url']})
fe93e2c4 3858 ytcfg = self._extract_ytcfg(item_id, webpage)
b60419c5 3859 return self.playlist_result(
d069eca7
M
3860 self._entries(
3861 selected_tab, playlist_id,
3862 self._extract_identity_token(webpage, item_id),
fe93e2c4 3863 self._extract_account_syncid(ytcfg, data), ytcfg),
b60419c5 3864 **metadata)
73c4ac2c 3865
79360d99 3866 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
2be71994 3867 first_id = last_id = None
79360d99 3868 ytcfg = self._extract_ytcfg(playlist_id, webpage)
3869 headers = self._generate_api_headers(
fe93e2c4 3870 ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3871 identity_token=self._extract_identity_token(webpage, item_id=playlist_id))
2be71994 3872 for page_num in itertools.count(1):
cd7c66cf 3873 videos = list(self._playlist_entries(playlist))
3874 if not videos:
3875 return
2be71994 3876 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3877 if start >= len(videos):
3878 return
3879 for video in videos[start:]:
3880 if video['id'] == first_id:
3881 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3882 return
3883 yield video
3884 first_id = first_id or videos[0]['id']
3885 last_id = videos[-1]['id']
79360d99 3886 watch_endpoint = try_get(
3887 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3888 query = {
3889 'playlistId': playlist_id,
3890 'videoId': watch_endpoint.get('videoId') or last_id,
3891 'index': watch_endpoint.get('index') or len(videos),
3892 'params': watch_endpoint.get('params') or 'OAE%3D'
3893 }
3894 response = self._extract_response(
3895 item_id='%s page %d' % (playlist_id, page_num),
fe93e2c4 3896 query=query, ep='next', headers=headers, ytcfg=ytcfg,
79360d99 3897 check_get_keys='contents'
3898 )
cd7c66cf 3899 playlist = try_get(
79360d99 3900 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 3901
79360d99 3902 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
8bdd16b4 3903 title = playlist.get('title') or try_get(
3904 data, lambda x: x['titleText']['simpleText'], compat_str)
3905 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3906
3907 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3908 playlist_url = urljoin(url, try_get(
3909 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3910 compat_str))
3911 if playlist_url and playlist_url != url:
3912 return self.url_result(
3913 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3914 video_title=title)
cd7c66cf 3915
8bdd16b4 3916 return self.playlist_result(
79360d99 3917 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
cd7c66cf 3918 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 3919
47193e02 3920 def _extract_availability(self, data):
3921 """
3922 Gets the availability of a given playlist/tab.
3923 Note: Unless YouTube tells us explicitly, we do not assume it is public
3924 @param data: response
3925 """
3926 is_private = is_unlisted = None
3927 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
3928 badge_labels = self._extract_badges(renderer)
3929
3930 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
3931 privacy_dropdown_entries = try_get(
3932 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
3933 for renderer_dict in privacy_dropdown_entries:
3934 is_selected = try_get(
3935 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
3936 if not is_selected:
3937 continue
fe93e2c4 3938 label = self._get_text(
3939 try_get(renderer_dict, lambda x: x['privacyDropdownItemRenderer']['label'], dict) or [])
47193e02 3940 if label:
3941 badge_labels.add(label.lower())
3942 break
3943
3944 for badge_label in badge_labels:
3945 if badge_label == 'unlisted':
3946 is_unlisted = True
3947 elif badge_label == 'private':
3948 is_private = True
3949 elif badge_label == 'public':
3950 is_unlisted = is_private = False
3951 return self._availability(is_private, False, False, False, is_unlisted)
3952
3953 @staticmethod
3954 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
3955 sidebar_renderer = try_get(
3956 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
3957 for item in sidebar_renderer:
3958 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
3959 if renderer:
3960 return renderer
3961
358de58c 3962 def _reload_with_unavailable_videos(self, item_id, data, webpage):
3963 """
3964 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3965 """
5d342002 3966 browse_id = params = None
47193e02 3967 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
3968 if not renderer:
3969 return
3970 menu_renderer = try_get(
3971 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3972 for menu_item in menu_renderer:
3973 if not isinstance(menu_item, dict):
358de58c 3974 continue
47193e02 3975 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3976 text = try_get(
3977 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3978 if not text or text.lower() != 'show unavailable videos':
3979 continue
3980 browse_endpoint = try_get(
3981 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3982 browse_id = browse_endpoint.get('browseId')
3983 params = browse_endpoint.get('params')
3984 break
5d342002 3985
47193e02 3986 ytcfg = self._extract_ytcfg(item_id, webpage)
3987 headers = self._generate_api_headers(
fe93e2c4 3988 ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
47193e02 3989 identity_token=self._extract_identity_token(webpage, item_id=item_id),
3990 visitor_data=try_get(
3991 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
3992 query = {
3993 'params': params or 'wgYCCAA=',
3994 'browseId': browse_id or 'VL%s' % item_id
3995 }
3996 return self._extract_response(
3997 item_id=item_id, headers=headers, query=query,
fe93e2c4 3998 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
47193e02 3999 note='Downloading API JSON with unavailable videos')
358de58c 4000
cd7c66cf 4001 def _extract_webpage(self, url, item_id):
a06916d9 4002 retries = self.get_param('extractor_retries', 3)
62bff2c1 4003 count = -1
c705177d 4004 last_error = 'Incomplete yt initial data recieved'
14fdfea9 4005 while count < retries:
62bff2c1 4006 count += 1
14fdfea9 4007 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 4008 # See: https://github.com/yt-dlp/yt-dlp/issues/116
4009 if count:
c705177d 4010 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 4011 webpage = self._download_webpage(
4012 url, item_id,
cd7c66cf 4013 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
14fdfea9 4014 data = self._extract_yt_initial_data(item_id, webpage)
14fdfea9 4015 if data.get('contents') or data.get('currentVideoEndpoint'):
4016 break
95c01b6c 4017 # Extract alerts here only when there is error
4018 self._extract_and_report_alerts(data)
c705177d 4019 if count >= retries:
6a39ee13 4020 raise ExtractorError(last_error)
cd7c66cf 4021 return webpage, data
4022
9297939e 4023 @staticmethod
4024 def _smuggle_data(entries, data):
4025 for entry in entries:
4026 if data:
4027 entry['url'] = smuggle_url(entry['url'], data)
4028 yield entry
4029
cd7c66cf 4030 def _real_extract(self, url):
9297939e 4031 url, smuggled_data = unsmuggle_url(url, {})
4032 if self.is_music_url(url):
4033 smuggled_data['is_music_url'] = True
fe03a6cd 4034 info_dict = self.__real_extract(url, smuggled_data)
9297939e 4035 if info_dict.get('entries'):
4036 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4037 return info_dict
4038
fe03a6cd 4039 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4040
4041 def __real_extract(self, url, smuggled_data):
cd7c66cf 4042 item_id = self._match_id(url)
4043 url = compat_urlparse.urlunparse(
4044 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 4045 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 4046
fe03a6cd 4047 def get_mobj(url):
4048 mobj = self._url_re.match(url).groupdict()
07cce701 4049 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 4050 return mobj
4051
4052 mobj = get_mobj(url)
4053 # Youtube returns incomplete data if tabname is not lower case
4054 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4055
4056 if is_channel:
4057 if smuggled_data.get('is_music_url'):
4058 if item_id[:2] == 'VL':
4059 # Youtube music VL channels have an equivalent playlist
4060 item_id = item_id[2:]
4061 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
abcdd12b 4062 elif item_id[:2] == 'MP':
4063 # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4064 item_id = self._search_regex(
4065 r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4066 self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4067 'playlist id')
4068 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
fe03a6cd 4069 elif mobj['channel_type'] == 'browse':
4070 # Youtube music /browse/ should be changed to /channel/
4071 pre = 'https://www.youtube.com/channel/%s' % item_id
4072 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4073 # Home URLs should redirect to /videos/
6a39ee13 4074 self.report_warning(
cd7c66cf 4075 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4076 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 4077 tab = '/videos'
4078
4079 url = ''.join((pre, tab, post))
4080 mobj = get_mobj(url)
cd7c66cf 4081
4082 # Handle both video/playlist URLs
201c1459 4083 qs = parse_qs(url)
cd7c66cf 4084 video_id = qs.get('v', [None])[0]
4085 playlist_id = qs.get('list', [None])[0]
4086
fe03a6cd 4087 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 4088 if not playlist_id:
fe03a6cd 4089 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 4090 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 4091 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6a39ee13 4092 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
cd7c66cf 4093 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
18db7548 4094 mobj = get_mobj(url)
cd7c66cf 4095
4096 if video_id and playlist_id:
a06916d9 4097 if self.get_param('noplaylist'):
cd7c66cf 4098 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4099 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4100 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4101
4102 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 4103
18db7548 4104 tabs = try_get(
4105 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4106 if tabs:
4107 selected_tab = self._extract_selected_tab(tabs)
4108 tab_name = selected_tab.get('title', '')
09f1580e 4109 if 'no-youtube-channel-redirect' not in compat_opts:
4110 if mobj['tab'] == '/live':
4111 # Live tab should have redirected to the video
4112 raise ExtractorError('The channel is not currently live', expected=True)
4113 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4114 if not mobj['not_channel'] and item_id[:2] == 'UC':
4115 # Topic channels don't have /videos. Use the equivalent playlist instead
4116 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4117 pl_id = 'UU%s' % item_id[2:]
4118 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4119 try:
4120 pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4121 for alert_type, alert_message in self._extract_alerts(pl_data):
4122 if alert_type == 'error':
4123 raise ExtractorError('Youtube said: %s' % alert_message)
4124 item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4125 except ExtractorError:
4126 self.report_warning('The playlist gave error. Falling back to channel URL')
4127 else:
4128 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
18db7548 4129
4130 self.write_debug('Final URL: %s' % url)
4131
358de58c 4132 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 4133 if 'no-youtube-unavailable-videos' not in compat_opts:
4134 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
95c01b6c 4135 self._extract_and_report_alerts(data)
8bdd16b4 4136 tabs = try_get(
4137 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4138 if tabs:
d069eca7 4139 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 4140
8bdd16b4 4141 playlist = try_get(
4142 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4143 if playlist:
79360d99 4144 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
cd7c66cf 4145
a0566bbf 4146 video_id = try_get(
4147 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4148 compat_str) or video_id
8bdd16b4 4149 if video_id:
09f1580e 4150 if mobj['tab'] != '/live': # live tab is expected to redirect to video
4151 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 4152 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 4153
8bdd16b4 4154 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 4155
c5e8d7af 4156
8bdd16b4 4157class YoutubePlaylistIE(InfoExtractor):
4158 IE_DESC = 'YouTube.com playlists'
4159 _VALID_URL = r'''(?x)(?:
4160 (?:https?://)?
4161 (?:\w+\.)?
4162 (?:
4163 (?:
4164 youtube(?:kids)?\.com|
29f7c58a 4165 invidio\.us
8bdd16b4 4166 )
4167 /.*?\?.*?\blist=
4168 )?
4169 (?P<id>%(playlist_id)s)
4170 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4171 IE_NAME = 'youtube:playlist'
cdc628a4 4172 _TESTS = [{
8bdd16b4 4173 'note': 'issue #673',
4174 'url': 'PLBB231211A4F62143',
cdc628a4 4175 'info_dict': {
8bdd16b4 4176 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4177 'id': 'PLBB231211A4F62143',
4178 'uploader': 'Wickydoo',
4179 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4180 },
4181 'playlist_mincount': 29,
4182 }, {
4183 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4184 'info_dict': {
4185 'title': 'YDL_safe_search',
4186 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4187 },
4188 'playlist_count': 2,
4189 'skip': 'This playlist is private',
9558dcec 4190 }, {
8bdd16b4 4191 'note': 'embedded',
4192 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4193 'playlist_count': 4,
9558dcec 4194 'info_dict': {
8bdd16b4 4195 'title': 'JODA15',
4196 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4197 'uploader': 'milan',
4198 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 4199 }
cdc628a4 4200 }, {
8bdd16b4 4201 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4202 'playlist_mincount': 982,
4203 'info_dict': {
4204 'title': '2018 Chinese New Singles (11/6 updated)',
4205 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4206 'uploader': 'LBK',
4207 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4208 }
daa0df9e 4209 }, {
29f7c58a 4210 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4211 'only_matching': True,
4212 }, {
4213 # music album playlist
4214 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4215 'only_matching': True,
4216 }]
4217
4218 @classmethod
4219 def suitable(cls, url):
201c1459 4220 if YoutubeTabIE.suitable(url):
4221 return False
1bdae7d3 4222 # Hack for lazy extractors until more generic solution is implemented
4223 # (see #28780)
4224 from .youtube import parse_qs
201c1459 4225 qs = parse_qs(url)
4226 if qs.get('v', [None])[0]:
4227 return False
4228 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 4229
4230 def _real_extract(self, url):
4231 playlist_id = self._match_id(url)
46953e7e 4232 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 4233 url = update_url_query(
4234 'https://www.youtube.com/playlist',
4235 parse_qs(url) or {'list': playlist_id})
4236 if is_music_url:
4237 url = smuggle_url(url, {'is_music_url': True})
4238 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 4239
4240
4241class YoutubeYtBeIE(InfoExtractor):
c76eb41b 4242 IE_DESC = 'youtu.be'
29f7c58a 4243 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4244 _TESTS = [{
8bdd16b4 4245 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4246 'info_dict': {
4247 'id': 'yeWKywCrFtk',
4248 'ext': 'mp4',
4249 'title': 'Small Scale Baler and Braiding Rugs',
4250 'uploader': 'Backus-Page House Museum',
4251 'uploader_id': 'backuspagemuseum',
4252 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4253 'upload_date': '20161008',
4254 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4255 'categories': ['Nonprofits & Activism'],
4256 'tags': list,
4257 'like_count': int,
4258 'dislike_count': int,
4259 },
4260 'params': {
4261 'noplaylist': True,
4262 'skip_download': True,
4263 },
39e7107d 4264 }, {
8bdd16b4 4265 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 4266 'only_matching': True,
cdc628a4
PH
4267 }]
4268
8bdd16b4 4269 def _real_extract(self, url):
29f7c58a 4270 mobj = re.match(self._VALID_URL, url)
4271 video_id = mobj.group('id')
4272 playlist_id = mobj.group('playlist_id')
8bdd16b4 4273 return self.url_result(
29f7c58a 4274 update_url_query('https://www.youtube.com/watch', {
4275 'v': video_id,
4276 'list': playlist_id,
4277 'feature': 'youtu.be',
4278 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 4279
4280
4281class YoutubeYtUserIE(InfoExtractor):
c76eb41b 4282 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 4283 _VALID_URL = r'ytuser:(?P<id>.+)'
4284 _TESTS = [{
4285 'url': 'ytuser:phihag',
4286 'only_matching': True,
4287 }]
4288
4289 def _real_extract(self, url):
4290 user_id = self._match_id(url)
4291 return self.url_result(
4292 'https://www.youtube.com/user/%s' % user_id,
4293 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 4294
b05654f0 4295
3d3dddc9 4296class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 4297 IE_NAME = 'youtube:favorites'
4298 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4299 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4300 _LOGIN_REQUIRED = True
4301 _TESTS = [{
4302 'url': ':ytfav',
4303 'only_matching': True,
4304 }, {
4305 'url': ':ytfavorites',
4306 'only_matching': True,
4307 }]
4308
4309 def _real_extract(self, url):
4310 return self.url_result(
4311 'https://www.youtube.com/playlist?list=LL',
4312 ie=YoutubeTabIE.ie_key())
4313
4314
79360d99 4315class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
69184e41 4316 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
4317 # there doesn't appear to be a real limit, for example if you search for
4318 # 'python' you get more than 8.000.000 results
4319 _MAX_RESULTS = float('inf')
78caa52a 4320 IE_NAME = 'youtube:search'
b05654f0 4321 _SEARCH_KEY = 'ytsearch'
6c894ea1 4322 _SEARCH_PARAMS = None
9dd8e46a 4323 _TESTS = []
b05654f0 4324
6c894ea1 4325 def _entries(self, query, n):
a5c56234 4326 data = {'query': query}
6c894ea1
U
4327 if self._SEARCH_PARAMS:
4328 data['params'] = self._SEARCH_PARAMS
4329 total = 0
fe93e2c4 4330 continuation = {}
6c894ea1 4331 for page_num in itertools.count(1):
fe93e2c4 4332 data.update(continuation)
79360d99 4333 search = self._extract_response(
4334 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4335 check_get_keys=('contents', 'onResponseReceivedCommands')
4336 )
6c894ea1 4337 if not search:
b4c08069 4338 break
6c894ea1
U
4339 slr_contents = try_get(
4340 search,
4341 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4342 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4343 list)
4344 if not slr_contents:
a22b2fd1 4345 break
0366ae87 4346
0366ae87
M
4347 # Youtube sometimes adds promoted content to searches,
4348 # changing the index location of videos and token.
4349 # So we search through all entries till we find them.
fe93e2c4 4350 continuation = None
30a074c2 4351 for slr_content in slr_contents:
fe93e2c4 4352 if not continuation:
4353 continuation = self._extract_continuation({'contents': [slr_content]})
a96c6d15 4354
30a074c2 4355 isr_contents = try_get(
4356 slr_content,
4357 lambda x: x['itemSectionRenderer']['contents'],
4358 list)
9da76d30 4359 if not isr_contents:
30a074c2 4360 continue
4361 for content in isr_contents:
4362 if not isinstance(content, dict):
4363 continue
4364 video = content.get('videoRenderer')
4365 if not isinstance(video, dict):
4366 continue
4367 video_id = video.get('videoId')
4368 if not video_id:
4369 continue
4370
4371 yield self._extract_video(video)
4372 total += 1
4373 if total == n:
4374 return
0366ae87 4375
fe93e2c4 4376 if not continuation:
6c894ea1 4377 break
b05654f0 4378
6c894ea1
U
4379 def _get_n_results(self, query, n):
4380 """Get a specified number of results for a query"""
4381 return self.playlist_result(self._entries(query, n), query)
75dff0ee 4382
c9ae7b95 4383
a3dd9248 4384class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 4385 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 4386 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 4387 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 4388 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 4389
c9ae7b95 4390
386e1dd9 4391class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 4392 IE_DESC = 'YouTube.com search URLs'
386e1dd9 4393 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4394 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 4395 # _MAX_RESULTS = 100
3462ffa8 4396 _TESTS = [{
4397 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4398 'playlist_mincount': 5,
4399 'info_dict': {
4400 'title': 'youtube-dl test video',
4401 }
4402 }, {
4403 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4404 'only_matching': True,
4405 }]
4406
386e1dd9 4407 @classmethod
4408 def _make_valid_url(cls):
4409 return cls._VALID_URL
4410
3462ffa8 4411 def _real_extract(self, url):
386e1dd9 4412 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4413 query = (qs.get('search_query') or qs.get('q'))[0]
4414 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4415 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 4416
4417
4418class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 4419 """
25f14e9f 4420 Base class for feed extractors
3d3dddc9 4421 Subclasses must define the _FEED_NAME property.
d7ae0639 4422 """
b2e8bc1b 4423 _LOGIN_REQUIRED = True
ef2f3c7f 4424 _TESTS = []
d7ae0639
JMF
4425
4426 @property
4427 def IE_NAME(self):
78caa52a 4428 return 'youtube:%s' % self._FEED_NAME
04cc9617 4429
3853309f 4430 def _real_extract(self, url):
3d3dddc9 4431 return self.url_result(
4432 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4433 ie=YoutubeTabIE.ie_key())
25f14e9f
S
4434
4435
ef2f3c7f 4436class YoutubeWatchLaterIE(InfoExtractor):
4437 IE_NAME = 'youtube:watchlater'
70d5c17b 4438 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 4439 _VALID_URL = r':ytwatchlater'
bc7a9cd8 4440 _TESTS = [{
8bdd16b4 4441 'url': ':ytwatchlater',
bc7a9cd8
S
4442 'only_matching': True,
4443 }]
25f14e9f
S
4444
4445 def _real_extract(self, url):
ef2f3c7f 4446 return self.url_result(
4447 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 4448
4449
25f14e9f
S
4450class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4451 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 4452 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 4453 _FEED_NAME = 'recommended'
45db527f 4454 _LOGIN_REQUIRED = False
3d3dddc9 4455 _TESTS = [{
4456 'url': ':ytrec',
4457 'only_matching': True,
4458 }, {
4459 'url': ':ytrecommended',
4460 'only_matching': True,
4461 }, {
4462 'url': 'https://youtube.com',
4463 'only_matching': True,
4464 }]
1ed5b5c9 4465
1ed5b5c9 4466
25f14e9f 4467class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 4468 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 4469 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 4470 _FEED_NAME = 'subscriptions'
3d3dddc9 4471 _TESTS = [{
4472 'url': ':ytsubs',
4473 'only_matching': True,
4474 }, {
4475 'url': ':ytsubscriptions',
4476 'only_matching': True,
4477 }]
1ed5b5c9 4478
1ed5b5c9 4479
25f14e9f 4480class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
4481 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4482 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 4483 _FEED_NAME = 'history'
3d3dddc9 4484 _TESTS = [{
4485 'url': ':ythistory',
4486 'only_matching': True,
4487 }]
1ed5b5c9
JMF
4488
4489
15870e90
PH
4490class YoutubeTruncatedURLIE(InfoExtractor):
4491 IE_NAME = 'youtube:truncated_url'
4492 IE_DESC = False # Do not list
975d35db 4493 _VALID_URL = r'''(?x)
b95aab84
PH
4494 (?:https?://)?
4495 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4496 (?:watch\?(?:
c4808c60 4497 feature=[a-z_]+|
b95aab84
PH
4498 annotation_id=annotation_[^&]+|
4499 x-yt-cl=[0-9]+|
c1708b89 4500 hl=[^&]*|
287be8c6 4501 t=[0-9]+
b95aab84
PH
4502 )?
4503 |
4504 attribution_link\?a=[^&]+
4505 )
4506 $
975d35db 4507 '''
15870e90 4508
c4808c60 4509 _TESTS = [{
2d3d2997 4510 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 4511 'only_matching': True,
dc2fc736 4512 }, {
2d3d2997 4513 'url': 'https://www.youtube.com/watch?',
dc2fc736 4514 'only_matching': True,
b95aab84
PH
4515 }, {
4516 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4517 'only_matching': True,
4518 }, {
4519 'url': 'https://www.youtube.com/watch?feature=foo',
4520 'only_matching': True,
c1708b89
PH
4521 }, {
4522 'url': 'https://www.youtube.com/watch?hl=en-GB',
4523 'only_matching': True,
287be8c6
PH
4524 }, {
4525 'url': 'https://www.youtube.com/watch?t=2372',
4526 'only_matching': True,
c4808c60
PH
4527 }]
4528
15870e90
PH
4529 def _real_extract(self, url):
4530 raise ExtractorError(
78caa52a
PH
4531 'Did you forget to quote the URL? Remember that & is a meta '
4532 'character in most shells, so you want to put the URL in quotes, '
3867038a 4533 'like youtube-dl '
2d3d2997 4534 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 4535 ' or simply youtube-dl BaW_jenozKc .',
15870e90 4536 expected=True)
772fd5cc
PH
4537
4538
4539class YoutubeTruncatedIDIE(InfoExtractor):
4540 IE_NAME = 'youtube:truncated_id'
4541 IE_DESC = False # Do not list
b95aab84 4542 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
4543
4544 _TESTS = [{
4545 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4546 'only_matching': True,
4547 }]
4548
4549 def _real_extract(self, url):
4550 video_id = self._match_id(url)
4551 raise ExtractorError(
4552 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4553 expected=True)