]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[crunchyroll:playlist] Force http
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
2d6659b9 5import base64
d92f5d5a 6import calendar
109dd3b2 7import copy
fe93e2c4 8import datetime
a5c56234 9import hashlib
0ca96d48 10import itertools
c5e8d7af 11import json
c4417ddb 12import os.path
d77ab8e2 13import random
c5e8d7af 14import re
8a784c74 15import time
e0df6211 16import traceback
c5e8d7af 17
b05654f0 18from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 19from ..compat import (
edf3e38e 20 compat_chr,
29f7c58a 21 compat_HTTPError,
c5e8d7af 22 compat_parse_qs,
545cc85d 23 compat_str,
7fd002c0 24 compat_urllib_parse_unquote_plus,
15707c7e 25 compat_urllib_parse_urlencode,
7c80519c 26 compat_urllib_parse_urlparse,
7c61bd36 27 compat_urlparse,
4bb4a188 28)
545cc85d 29from ..jsinterp import JSInterpreter
4bb4a188 30from ..utils import (
c224251a 31 bool_or_none,
2d6659b9 32 bytes_to_intlist,
c5e8d7af 33 clean_html,
26fe8ffe 34 dict_get,
d92f5d5a 35 datetime_from_str,
358de58c 36 error_to_compat_str,
c5e8d7af 37 ExtractorError,
b60419c5 38 format_field,
2d30521a 39 float_or_none,
dd27fd17 40 int_or_none,
2d6659b9 41 intlist_to_bytes,
94278f72 42 mimetype2ext,
6310acf5 43 parse_codecs,
49bd8c66 44 parse_count,
7c80519c 45 parse_duration,
dca3ff4a 46 qualities,
3995d37d 47 remove_start,
cf7e015f 48 smuggle_url,
dbdaaa23 49 str_or_none,
c93d53f5 50 str_to_int,
556dbe7f 51 try_get,
c5e8d7af
PH
52 unescapeHTML,
53 unified_strdate,
cf7e015f 54 unsmuggle_url,
8bdd16b4 55 update_url_query,
21c340b8 56 url_or_none,
6e6bc8da 57 urlencode_postdata,
fe93e2c4 58 urljoin,
59 variadic
c5e8d7af
PH
60)
61
5f6a1245 62
201c1459 63def parse_qs(url):
64 return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
65
66
de7f3446 67class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
68 """Provide base functions for Youtube extractors"""
69 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 70 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
71
72 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
73 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
74 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 75
3462ffa8 76 _RESERVED_NAMES = (
bea74222 77 r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|shorts|'
46953e7e 78 r'movies|results|shared|hashtag|trending|feed|feeds|oembed|get_video_info|'
cd7c66cf 79 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 80
b2e8bc1b
JMF
81 _NETRC_MACHINE = 'youtube'
82 # If True it will raise an error if no login info is provided
83 _LOGIN_REQUIRED = False
84
70d5c17b 85 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 86
b2e8bc1b 87 def _login(self):
83317f69 88 """
89 Attempt to log in to YouTube.
90 True is returned if successful or skipped.
91 False is returned if login failed.
92
93 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
94 """
9d5d4d64 95
96 def warn(message):
97 self.report_warning(message)
98
99 # username+password login is broken
100 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
101 self.raise_login_required(
102 'Login details are needed to download this content', method='cookies')
68217024 103 username, password = self._get_login_info()
9d5d4d64 104 if username:
105 warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
106 return
9d5d4d64 107
2d6659b9 108 # Everything below this is broken!
109 r'''
b2e8bc1b
JMF
110 # No authentication to be performed
111 if username is None:
a06916d9 112 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
69ea8ca4 113 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
a06916d9 114 # if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
545cc85d 115 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 116 return True
b2e8bc1b 117
7cc3570e
PH
118 login_page = self._download_webpage(
119 self._LOGIN_URL, None,
69ea8ca4
PH
120 note='Downloading login page',
121 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
122 if login_page is False:
123 return
b2e8bc1b 124
1212e997 125 login_form = self._hidden_inputs(login_page)
c5e8d7af 126
e00eb564
S
127 def req(url, f_req, note, errnote):
128 data = login_form.copy()
129 data.update({
130 'pstMsg': 1,
131 'checkConnection': 'youtube',
132 'checkedDomains': 'youtube',
133 'hl': 'en',
134 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 135 'f.req': json.dumps(f_req),
e00eb564
S
136 'flowName': 'GlifWebSignIn',
137 'flowEntry': 'ServiceLogin',
baf67a60
S
138 # TODO: reverse actual botguard identifier generation algo
139 'bgRequest': '["identifier",""]',
041bc3ad 140 })
e00eb564
S
141 return self._download_json(
142 url, None, note=note, errnote=errnote,
143 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
144 fatal=False,
145 data=urlencode_postdata(data), headers={
146 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
147 'Google-Accounts-XSRF': 1,
148 })
149
3995d37d
S
150 lookup_req = [
151 username,
152 None, [], None, 'US', None, None, 2, False, True,
153 [
154 None, None,
155 [2, 1, None, 1,
156 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
157 None, [], 4],
158 1, [None, None, []], None, None, None, True
159 ],
160 username,
161 ]
162
e00eb564 163 lookup_results = req(
3995d37d 164 self._LOOKUP_URL, lookup_req,
e00eb564
S
165 'Looking up account info', 'Unable to look up account info')
166
167 if lookup_results is False:
168 return False
041bc3ad 169
3995d37d
S
170 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
171 if not user_hash:
172 warn('Unable to extract user hash')
173 return False
174
175 challenge_req = [
176 user_hash,
177 None, 1, None, [1, None, None, None, [password, None, True]],
178 [
179 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
180 1, [None, None, []], None, None, None, True
181 ]]
83317f69 182
3995d37d
S
183 challenge_results = req(
184 self._CHALLENGE_URL, challenge_req,
185 'Logging in', 'Unable to log in')
83317f69 186
3995d37d 187 if challenge_results is False:
e00eb564 188 return
83317f69 189
3995d37d
S
190 login_res = try_get(challenge_results, lambda x: x[0][5], list)
191 if login_res:
192 login_msg = try_get(login_res, lambda x: x[5], compat_str)
193 warn(
194 'Unable to login: %s' % 'Invalid password'
195 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
196 return False
197
198 res = try_get(challenge_results, lambda x: x[0][-1], list)
199 if not res:
200 warn('Unable to extract result entry')
201 return False
202
9a6628aa
S
203 login_challenge = try_get(res, lambda x: x[0][0], list)
204 if login_challenge:
205 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
206 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
207 # SEND_SUCCESS - TFA code has been successfully sent to phone
208 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 209 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
210 if status == 'QUOTA_EXCEEDED':
211 warn('Exceeded the limit of TFA codes, try later')
212 return False
213
214 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
215 if not tl:
216 warn('Unable to extract TL')
217 return False
218
219 tfa_code = self._get_tfa_info('2-step verification code')
220
221 if not tfa_code:
222 warn(
223 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
224 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
225 return False
226
227 tfa_code = remove_start(tfa_code, 'G-')
228
229 tfa_req = [
230 user_hash, None, 2, None,
231 [
232 9, None, None, None, None, None, None, None,
233 [None, tfa_code, True, 2]
234 ]]
235
236 tfa_results = req(
237 self._TFA_URL.format(tl), tfa_req,
238 'Submitting TFA code', 'Unable to submit TFA code')
239
240 if tfa_results is False:
241 return False
242
243 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
244 if tfa_res:
245 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
246 warn(
247 'Unable to finish TFA: %s' % 'Invalid TFA code'
248 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
249 return False
250
251 check_cookie_url = try_get(
252 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
253 else:
254 CHALLENGES = {
255 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
256 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
257 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
258 }
259 challenge = CHALLENGES.get(
260 challenge_str,
261 '%s returned error %s.' % (self.IE_NAME, challenge_str))
262 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
263 return False
3995d37d
S
264 else:
265 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
266
267 if not check_cookie_url:
268 warn('Unable to extract CheckCookie URL')
269 return False
e00eb564
S
270
271 check_cookie_results = self._download_webpage(
3995d37d
S
272 check_cookie_url, None, 'Checking cookie', fatal=False)
273
274 if check_cookie_results is False:
275 return False
e00eb564 276
3995d37d
S
277 if 'https://myaccount.google.com/' not in check_cookie_results:
278 warn('Unable to log in')
b2e8bc1b 279 return False
e00eb564 280
b2e8bc1b 281 return True
2d6659b9 282 '''
b2e8bc1b 283
cce889b9 284 def _initialize_consent(self):
285 cookies = self._get_cookies('https://www.youtube.com/')
286 if cookies.get('__Secure-3PSID'):
287 return
288 consent_id = None
289 consent = cookies.get('CONSENT')
290 if consent:
291 if 'YES' in consent.value:
292 return
293 consent_id = self._search_regex(
294 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
295 if not consent_id:
296 consent_id = random.randint(100, 999)
297 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 298
b2e8bc1b 299 def _real_initialize(self):
cce889b9 300 self._initialize_consent()
b2e8bc1b
JMF
301 if self._downloader is None:
302 return
b2e8bc1b
JMF
303 if not self._login():
304 return
c5e8d7af 305
a0566bbf 306 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 307 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
308 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 309
109dd3b2 310 _YT_DEFAULT_YTCFGS = {
311 'WEB': {
312 'INNERTUBE_API_VERSION': 'v1',
313 'INNERTUBE_CLIENT_NAME': 'WEB',
314 'INNERTUBE_CLIENT_VERSION': '2.20210622.10.00',
315 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
316 'INNERTUBE_CONTEXT': {
317 'client': {
318 'clientName': 'WEB',
319 'clientVersion': '2.20210622.10.00',
320 'hl': 'en',
321 }
322 },
323 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
324 },
325 'WEB_REMIX': {
326 'INNERTUBE_API_VERSION': 'v1',
327 'INNERTUBE_CLIENT_NAME': 'WEB_REMIX',
328 'INNERTUBE_CLIENT_VERSION': '1.20210621.00.00',
329 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
330 'INNERTUBE_CONTEXT': {
331 'client': {
332 'clientName': 'WEB_REMIX',
333 'clientVersion': '1.20210621.00.00',
334 'hl': 'en',
335 }
336 },
337 'INNERTUBE_CONTEXT_CLIENT_NAME': 67
338 },
339 'WEB_EMBEDDED_PLAYER': {
340 'INNERTUBE_API_VERSION': 'v1',
341 'INNERTUBE_CLIENT_NAME': 'WEB_EMBEDDED_PLAYER',
342 'INNERTUBE_CLIENT_VERSION': '1.20210620.0.1',
343 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
344 'INNERTUBE_CONTEXT': {
345 'client': {
346 'clientName': 'WEB_EMBEDDED_PLAYER',
347 'clientVersion': '1.20210620.0.1',
348 'hl': 'en',
349 }
350 },
351 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
352 },
353 'ANDROID': {
354 'INNERTUBE_API_VERSION': 'v1',
355 'INNERTUBE_CLIENT_NAME': 'ANDROID',
356 'INNERTUBE_CLIENT_VERSION': '16.20',
357 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
358 'INNERTUBE_CONTEXT': {
359 'client': {
360 'clientName': 'ANDROID',
361 'clientVersion': '16.20',
362 'hl': 'en',
363 }
364 },
fe93e2c4 365 'INNERTUBE_CONTEXT_CLIENT_NAME': 3
109dd3b2 366 },
367 'ANDROID_EMBEDDED_PLAYER': {
368 'INNERTUBE_API_VERSION': 'v1',
369 'INNERTUBE_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER',
370 'INNERTUBE_CLIENT_VERSION': '16.20',
371 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
372 'INNERTUBE_CONTEXT': {
373 'client': {
374 'clientName': 'ANDROID_EMBEDDED_PLAYER',
375 'clientVersion': '16.20',
376 'hl': 'en',
377 }
378 },
fe93e2c4 379 'INNERTUBE_CONTEXT_CLIENT_NAME': 55
109dd3b2 380 },
381 'ANDROID_MUSIC': {
382 'INNERTUBE_API_VERSION': 'v1',
383 'INNERTUBE_CLIENT_NAME': 'ANDROID_MUSIC',
384 'INNERTUBE_CLIENT_VERSION': '4.32',
385 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
386 'INNERTUBE_CONTEXT': {
387 'client': {
388 'clientName': 'ANDROID_MUSIC',
389 'clientVersion': '4.32',
390 'hl': 'en',
391 }
392 },
fe93e2c4 393 'INNERTUBE_CONTEXT_CLIENT_NAME': 21
109dd3b2 394 }
395 }
396
397 _YT_DEFAULT_INNERTUBE_HOSTS = {
398 'DIRECT': 'youtubei.googleapis.com',
399 'WEB': 'www.youtube.com',
400 'WEB_REMIX': 'music.youtube.com',
401 'ANDROID_MUSIC': 'music.youtube.com'
402 }
403
404 def _get_default_ytcfg(self, client='WEB'):
405 if client in self._YT_DEFAULT_YTCFGS:
406 return copy.deepcopy(self._YT_DEFAULT_YTCFGS[client])
407 self.write_debug(f'INNERTUBE default client {client} does not exist - falling back to WEB client.')
408 return copy.deepcopy(self._YT_DEFAULT_YTCFGS['WEB'])
409
410 def _get_innertube_host(self, client='WEB'):
411 return dict_get(self._YT_DEFAULT_INNERTUBE_HOSTS, (client, 'WEB'))
412
413 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='WEB'):
414 # try_get but with fallback to default ytcfg client values when present
415 _func = lambda y: try_get(y, getter, expected_type)
416 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
417
418 def _extract_client_name(self, ytcfg, default_client='WEB'):
419 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str, default_client)
420
314ee305 421 @staticmethod
422 def _extract_session_index(ytcfg):
423 return int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
424
109dd3b2 425 def _extract_client_version(self, ytcfg, default_client='WEB'):
426 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str, default_client)
427
428 def _extract_api_key(self, ytcfg=None, default_client='WEB'):
429 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
430
431 def _extract_context(self, ytcfg=None, default_client='WEB'):
432 _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
433 context = _get_context(ytcfg)
434 if context:
435 return context
436
437 context = _get_context(self._get_default_ytcfg(default_client))
438 if not ytcfg:
439 return context
440
441 # Recreate the client context (required)
442 context['client'].update({
443 'clientVersion': self._extract_client_version(ytcfg, default_client),
444 'clientName': self._extract_client_name(ytcfg, default_client),
445 })
446 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
447 if visitor_data:
448 context['client']['visitorData'] = visitor_data
449 return context
450
451 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
1974e99f 452 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
453 # See: https://github.com/yt-dlp/yt-dlp/issues/393
454 yt_cookies = self._get_cookies('https://www.youtube.com')
455 sapisid_cookie = dict_get(
456 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
a5c56234
M
457 if sapisid_cookie is None:
458 return
459 time_now = round(time.time())
1974e99f 460 # SAPISID cookie is required if not already present
461 if not yt_cookies.get('SAPISID'):
462 self._set_cookie(
463 '.youtube.com', 'SAPISID', sapisid_cookie.value, secure=True, expire_time=time_now + 3600)
464 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
465 sapisidhash = hashlib.sha1(
109dd3b2 466 f'{time_now} {sapisid_cookie.value} {origin}'.encode('utf-8')).hexdigest()
1974e99f 467 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
468
469 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 470 note='Downloading API JSON', errnote='Unable to download API page',
109dd3b2 471 context=None, api_key=None, api_hostname=None, default_client='WEB'):
f4f751af 472
109dd3b2 473 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 474 data.update(query)
109dd3b2 475 real_headers = self._generate_api_headers(client=default_client)
f4f751af 476 real_headers.update({'content-type': 'application/json'})
477 if headers:
478 real_headers.update(headers)
545cc85d 479 return self._download_json(
109dd3b2 480 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
a5c56234 481 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 482 data=json.dumps(data).encode('utf8'), headers=real_headers,
483 query={'key': api_key or self._extract_api_key()})
484
8bdd16b4 485 def _extract_yt_initial_data(self, video_id, webpage):
486 return self._parse_json(
487 self._search_regex(
29f7c58a 488 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 489 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 490 video_id)
0c148415 491
a1c5d2ca
M
492 def _extract_identity_token(self, webpage, item_id):
493 ytcfg = self._extract_ytcfg(item_id, webpage)
494 if ytcfg:
495 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
496 if token:
497 return token
498 return self._search_regex(
499 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
500 'identity token', default=None)
501
502 @staticmethod
fe93e2c4 503 def _extract_account_syncid(*args):
8ea3f7b9 504 """
505 Extract syncId required to download private playlists of secondary channels
fe93e2c4 506 @params response and/or ytcfg
8ea3f7b9 507 """
fe93e2c4 508 for data in args:
509 # ytcfg includes channel_syncid if on secondary channel
510 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
511 if delegated_sid:
512 return delegated_sid
513 sync_ids = (try_get(
514 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
515 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
516 if len(sync_ids) >= 2 and sync_ids[1]:
517 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
518 # and just "user_syncid||" for primary channel. We only want the channel_syncid
519 return sync_ids[0]
a1c5d2ca 520
29f7c58a 521 def _extract_ytcfg(self, video_id, webpage):
8c54a305 522 if not webpage:
523 return {}
29f7c58a 524 return self._parse_json(
525 self._search_regex(
526 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 527 default='{}'), video_id, fatal=False) or {}
528
109dd3b2 529 def _generate_api_headers(self, ytcfg=None, identity_token=None, account_syncid=None,
314ee305 530 visitor_data=None, api_hostname=None, client='WEB', session_index=None):
109dd3b2 531 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(client))
f4f751af 532 headers = {
109dd3b2 533 'X-YouTube-Client-Name': compat_str(
534 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=client)),
535 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, client),
536 'Origin': origin
f4f751af 537 }
2d6659b9 538 if not visitor_data and ytcfg:
539 visitor_data = try_get(
540 self._extract_context(ytcfg, client), lambda x: x['client']['visitorData'], compat_str)
f4f751af 541 if identity_token:
109dd3b2 542 headers['X-Youtube-Identity-Token'] = identity_token
f4f751af 543 if account_syncid:
544 headers['X-Goog-PageId'] = account_syncid
314ee305 545 if session_index is None and ytcfg:
546 session_index = self._extract_session_index(ytcfg)
547 if account_syncid or session_index is not None:
548 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
f4f751af 549 if visitor_data:
109dd3b2 550 headers['X-Goog-Visitor-Id'] = visitor_data
551 auth = self._generate_sapisidhash_header(origin)
f4f751af 552 if auth is not None:
553 headers['Authorization'] = auth
109dd3b2 554 headers['X-Origin'] = origin
f4f751af 555 return headers
29f7c58a 556
2d6659b9 557 @staticmethod
558 def _build_api_continuation_query(continuation, ctp=None):
559 query = {
560 'continuation': continuation
561 }
562 # TODO: Inconsistency with clickTrackingParams.
563 # Currently we have a fixed ctp contained within context (from ytcfg)
564 # and a ctp in root query for continuation.
565 if ctp:
566 query['clickTracking'] = {'clickTrackingParams': ctp}
567 return query
568
2d6659b9 569 @classmethod
570 def _extract_next_continuation_data(cls, renderer):
571 next_continuation = try_get(
572 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
573 lambda x: x['continuation']['reloadContinuationData']), dict)
574 if not next_continuation:
575 return
576 continuation = next_continuation.get('continuation')
577 if not continuation:
578 return
579 ctp = next_continuation.get('clickTrackingParams')
fe93e2c4 580 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 581
582 @classmethod
583 def _extract_continuation_ep_data(cls, continuation_ep: dict):
584 if isinstance(continuation_ep, dict):
585 continuation = try_get(
586 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
587 if not continuation:
588 return
589 ctp = continuation_ep.get('clickTrackingParams')
fe93e2c4 590 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 591
592 @classmethod
593 def _extract_continuation(cls, renderer):
594 next_continuation = cls._extract_next_continuation_data(renderer)
595 if next_continuation:
596 return next_continuation
fe93e2c4 597
2d6659b9 598 contents = []
599 for key in ('contents', 'items'):
600 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
fe93e2c4 601
2d6659b9 602 for content in contents:
603 if not isinstance(content, dict):
604 continue
605 continuation_ep = try_get(
606 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
607 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
608 dict)
609 continuation = cls._extract_continuation_ep_data(continuation_ep)
610 if continuation:
611 return continuation
612
fe93e2c4 613 @classmethod
614 def _extract_alerts(cls, data):
109dd3b2 615 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
616 if not isinstance(alert_dict, dict):
617 continue
618 for alert in alert_dict.values():
619 alert_type = alert.get('type')
620 if not alert_type:
621 continue
fe93e2c4 622 message = cls._get_text(alert.get('text'))
109dd3b2 623 if message:
624 yield alert_type, message
625
626 def _report_alerts(self, alerts, expected=True):
627 errors = []
628 warnings = []
629 for alert_type, alert_message in alerts:
630 if alert_type.lower() == 'error':
631 errors.append([alert_type, alert_message])
632 else:
633 warnings.append([alert_type, alert_message])
634
635 for alert_type, alert_message in (warnings + errors[:-1]):
636 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
637 if errors:
638 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
639
640 def _extract_and_report_alerts(self, data, *args, **kwargs):
641 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
642
47193e02 643 def _extract_badges(self, renderer: dict):
644 badges = set()
645 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
646 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
647 if label:
648 badges.add(label.lower())
649 return badges
650
651 @staticmethod
fe93e2c4 652 def _get_text(data, getter=None, max_runs=None):
653 for get in variadic(getter):
654 d = try_get(data, get) if get is not None else data
655 text = try_get(d, lambda x: x['simpleText'], compat_str)
656 if text:
657 return text
658 runs = try_get(d, lambda x: x['runs'], list) or []
659 if not runs and isinstance(d, list):
660 runs = d
661
662 def get_runs(runs):
663 for run in runs[:min(len(runs), max_runs or len(runs))]:
664 yield try_get(run, lambda x: x['text'], compat_str) or ''
665
666 text = ''.join(get_runs(runs))
667 if text:
668 return text
47193e02 669
109dd3b2 670 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
671 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
672 default_client='WEB'):
673 response = None
674 last_error = None
675 count = -1
676 retries = self.get_param('extractor_retries', 3)
677 if check_get_keys is None:
678 check_get_keys = []
679 while count < retries:
680 count += 1
681 if last_error:
682 self.report_warning('%s. Retrying ...' % last_error)
683 try:
684 response = self._call_api(
685 ep=ep, fatal=True, headers=headers,
686 video_id=item_id, query=query,
687 context=self._extract_context(ytcfg, default_client),
688 api_key=self._extract_api_key(ytcfg, default_client),
689 api_hostname=api_hostname, default_client=default_client,
690 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
691 except ExtractorError as e:
692 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
693 # Downloading page may result in intermittent 5xx HTTP error
694 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
695 last_error = 'HTTP Error %s' % e.cause.code
696 if count < retries:
697 continue
698 if fatal:
699 raise
700 else:
701 self.report_warning(error_to_compat_str(e))
702 return
703
704 else:
705 # Youtube may send alerts if there was an issue with the continuation page
706 try:
707 self._extract_and_report_alerts(response, expected=False)
708 except ExtractorError as e:
709 if fatal:
710 raise
711 self.report_warning(error_to_compat_str(e))
712 return
713 if not check_get_keys or dict_get(response, check_get_keys):
714 break
715 # Youtube sometimes sends incomplete data
716 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
717 last_error = 'Incomplete data received'
718 if count >= retries:
719 if fatal:
720 raise ExtractorError(last_error)
721 else:
722 self.report_warning(last_error)
723 return
724 return response
725
9297939e 726 @staticmethod
727 def is_music_url(url):
728 return re.match(r'https?://music\.youtube\.com/', url) is not None
729
30a074c2 730 def _extract_video(self, renderer):
731 video_id = renderer.get('videoId')
fe93e2c4 732 title = self._get_text(renderer.get('title'))
733 description = self._get_text(renderer.get('descriptionSnippet'))
734 duration = parse_duration(self._get_text(renderer.get('lengthText')))
735 view_count_text = self._get_text(renderer.get('viewCountText')) or ''
30a074c2 736 view_count = str_to_int(self._search_regex(
737 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
738 'view count', default=None))
fe93e2c4 739
740 uploader = self._get_text(renderer, (lambda x: x['ownerText'], lambda x: x['shortBylineText']))
741
30a074c2 742 return {
39ed931e 743 '_type': 'url',
30a074c2 744 'ie_key': YoutubeIE.ie_key(),
745 'id': video_id,
746 'url': video_id,
747 'title': title,
748 'description': description,
749 'duration': duration,
750 'view_count': view_count,
751 'uploader': uploader,
752 }
753
0c148415 754
360e1ca5 755class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 756 IE_DESC = 'YouTube.com'
bc2ca1bb 757 _INVIDIOUS_SITES = (
758 # invidious-redirect websites
759 r'(?:www\.)?redirect\.invidious\.io',
760 r'(?:(?:www|dev)\.)?invidio\.us',
761 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
762 r'(?:www\.)?invidious\.pussthecat\.org',
bc2ca1bb 763 r'(?:www\.)?invidious\.zee\.li',
bc2ca1bb 764 r'(?:www\.)?invidious\.ethibox\.fr',
bc2ca1bb 765 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
bc2ca1bb 766 # youtube-dl invidious instances list
767 r'(?:(?:www|no)\.)?invidiou\.sh',
768 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
769 r'(?:www\.)?invidious\.kabi\.tk',
bc2ca1bb 770 r'(?:www\.)?invidious\.mastodon\.host',
771 r'(?:www\.)?invidious\.zapashcanon\.fr',
ed807c18 772 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
201c1459 773 r'(?:www\.)?invidious\.tinfoil-hat\.net',
774 r'(?:www\.)?invidious\.himiko\.cloud',
775 r'(?:www\.)?invidious\.reallyancient\.tech',
bc2ca1bb 776 r'(?:www\.)?invidious\.tube',
777 r'(?:www\.)?invidiou\.site',
778 r'(?:www\.)?invidious\.site',
779 r'(?:www\.)?invidious\.xyz',
780 r'(?:www\.)?invidious\.nixnet\.xyz',
201c1459 781 r'(?:www\.)?invidious\.048596\.xyz',
bc2ca1bb 782 r'(?:www\.)?invidious\.drycat\.fr',
201c1459 783 r'(?:www\.)?inv\.skyn3t\.in',
bc2ca1bb 784 r'(?:www\.)?tube\.poal\.co',
785 r'(?:www\.)?tube\.connect\.cafe',
786 r'(?:www\.)?vid\.wxzm\.sx',
787 r'(?:www\.)?vid\.mint\.lgbt',
201c1459 788 r'(?:www\.)?vid\.puffyan\.us',
bc2ca1bb 789 r'(?:www\.)?yewtu\.be',
790 r'(?:www\.)?yt\.elukerio\.org',
791 r'(?:www\.)?yt\.lelux\.fi',
792 r'(?:www\.)?invidious\.ggc-project\.de',
793 r'(?:www\.)?yt\.maisputain\.ovh',
201c1459 794 r'(?:www\.)?ytprivate\.com',
795 r'(?:www\.)?invidious\.13ad\.de',
bc2ca1bb 796 r'(?:www\.)?invidious\.toot\.koeln',
797 r'(?:www\.)?invidious\.fdn\.fr',
798 r'(?:www\.)?watch\.nettohikari\.com',
ed807c18 799 r'(?:www\.)?invidious\.namazso\.eu',
800 r'(?:www\.)?invidious\.silkky\.cloud',
801 r'(?:www\.)?invidious\.exonip\.de',
802 r'(?:www\.)?invidious\.riverside\.rocks',
803 r'(?:www\.)?invidious\.blamefran\.net',
804 r'(?:www\.)?invidious\.moomoo\.de',
805 r'(?:www\.)?ytb\.trom\.tf',
806 r'(?:www\.)?yt\.cyberhost\.uk',
bc2ca1bb 807 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
808 r'(?:www\.)?qklhadlycap4cnod\.onion',
809 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
810 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
811 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
812 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
813 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
814 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
ed807c18 815 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
816 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
817 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
818 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
bc2ca1bb 819 )
cb7dfeea 820 _VALID_URL = r"""(?x)^
c5e8d7af 821 (
edb53e2d 822 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 823 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
824 (?:www\.)?deturl\.com/www\.youtube\.com|
825 (?:www\.)?pwnyoutube\.com|
826 (?:www\.)?hooktube\.com|
827 (?:www\.)?yourepeat\.com|
828 tube\.majestyc\.net|
829 %(invidious)s|
830 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
831 (?:.*?\#/)? # handle anchor (#/) redirect urls
832 (?: # the various things that can precede the ID:
ac7553d0 833 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 834 |(?: # or the v= param in all its forms
f7000f3a 835 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 836 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 837 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
838 v=
839 )
f4b05232 840 ))
cbaed4bb
S
841 |(?:
842 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
843 vid\.plus| # or vid.plus/xxxx
844 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 845 %(invidious)s
cbaed4bb 846 )/
edb53e2d 847 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 848 )
c5e8d7af 849 )? # all until now is optional -> you can pass the naked ID
201c1459 850 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 851 (?(1).+)? # if we found the ID, everything can follow
9297939e 852 (?:\#|$)""" % {
bc2ca1bb 853 'invidious': '|'.join(_INVIDIOUS_SITES),
854 }
e40c758c 855 _PLAYER_INFO_RE = (
cc2db878 856 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
857 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 858 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 859 )
2c62dc26 860 _formats = {
c2d3cb4c 861 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
862 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
863 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
864 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
865 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
866 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
867 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
868 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 869 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 870 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
871 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
872 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
873 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
874 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
875 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 876 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 877 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
878 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 879
880
881 # 3D videos
c2d3cb4c 882 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
883 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
884 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
885 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 886 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
887 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
888 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 889
96fb5605 890 # Apple HTTP Live Streaming
11f12195 891 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 892 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
893 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
894 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
895 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
896 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 897 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
898 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
899
900 # DASH mp4 video
d23028a8
S
901 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
902 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
903 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
904 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
905 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 906 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
907 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
908 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
909 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
910 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
911 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
912 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 913
f6f1fc92 914 # Dash mp4 audio
d23028a8
S
915 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
916 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
917 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
918 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
919 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
920 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
921 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
922
923 # Dash webm
d23028a8
S
924 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
925 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
926 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
927 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
928 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
929 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
930 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
931 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
932 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
933 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
934 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
935 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
936 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
937 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
938 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 939 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
940 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
941 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
942 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
943 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
944 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
945 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
946
947 # Dash webm audio
d23028a8
S
948 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
949 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 950
0857baad 951 # Dash webm audio with opus inside
d23028a8
S
952 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
953 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
954 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 955
ce6b9a2d
PH
956 # RTMP (unnamed)
957 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
958
959 # av01 video only formats sometimes served with "unknown" codecs
960 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
961 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
962 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
963 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 964 }
29f7c58a 965 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 966
109dd3b2 967 _AGE_GATE_REASONS = (
968 'Sign in to confirm your age',
969 'This video may be inappropriate for some users.',
970 'Sorry, this content is age-restricted.')
971
fd5c4aab
S
972 _GEO_BYPASS = False
973
78caa52a 974 IE_NAME = 'youtube'
2eb88d95
PH
975 _TESTS = [
976 {
2d3d2997 977 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
978 'info_dict': {
979 'id': 'BaW_jenozKc',
980 'ext': 'mp4',
3867038a 981 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
982 'uploader': 'Philipp Hagemeister',
983 'uploader_id': 'phihag',
ec85ded8 984 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
985 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
986 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 987 'upload_date': '20121002',
3867038a 988 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 989 'categories': ['Science & Technology'],
3867038a 990 'tags': ['youtube-dl'],
556dbe7f 991 'duration': 10,
dbdaaa23 992 'view_count': int,
3e7c1224
PH
993 'like_count': int,
994 'dislike_count': int,
7c80519c 995 'start_time': 1,
297a564b 996 'end_time': 9,
2eb88d95 997 }
0e853ca4 998 },
fccd3771 999 {
4bc3a23e
PH
1000 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1001 'note': 'Embed-only video (#1746)',
1002 'info_dict': {
1003 'id': 'yZIXLfi8CZQ',
1004 'ext': 'mp4',
1005 'upload_date': '20120608',
1006 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1007 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1008 'uploader': 'SET India',
94bfcd23 1009 'uploader_id': 'setindia',
ec85ded8 1010 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 1011 'age_limit': 18,
545cc85d 1012 },
1013 'skip': 'Private video',
fccd3771 1014 },
11b56058 1015 {
8bdd16b4 1016 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
1017 'note': 'Use the first video ID in the URL',
1018 'info_dict': {
1019 'id': 'BaW_jenozKc',
1020 'ext': 'mp4',
3867038a 1021 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
1022 'uploader': 'Philipp Hagemeister',
1023 'uploader_id': 'phihag',
ec85ded8 1024 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 1025 'upload_date': '20121002',
3867038a 1026 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 1027 'categories': ['Science & Technology'],
3867038a 1028 'tags': ['youtube-dl'],
556dbe7f 1029 'duration': 10,
dbdaaa23 1030 'view_count': int,
11b56058
PM
1031 'like_count': int,
1032 'dislike_count': int,
34a7de29
S
1033 },
1034 'params': {
1035 'skip_download': True,
1036 },
11b56058 1037 },
dd27fd17 1038 {
2d3d2997 1039 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
1040 'note': '256k DASH audio (format 141) via DASH manifest',
1041 'info_dict': {
1042 'id': 'a9LDPn-MO4I',
1043 'ext': 'm4a',
1044 'upload_date': '20121002',
1045 'uploader_id': '8KVIDEO',
ec85ded8 1046 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
1047 'description': '',
1048 'uploader': '8KVIDEO',
1049 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 1050 },
4bc3a23e
PH
1051 'params': {
1052 'youtube_include_dash_manifest': True,
1053 'format': '141',
4919603f 1054 },
de3c7fe0 1055 'skip': 'format 141 not served anymore',
dd27fd17 1056 },
8bdd16b4 1057 # DASH manifest with encrypted signature
1058 {
1059 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1060 'info_dict': {
1061 'id': 'IB3lcPjvWLA',
1062 'ext': 'm4a',
1063 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1064 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1065 'duration': 244,
1066 'uploader': 'AfrojackVEVO',
1067 'uploader_id': 'AfrojackVEVO',
1068 'upload_date': '20131011',
cc2db878 1069 'abr': 129.495,
8bdd16b4 1070 },
1071 'params': {
1072 'youtube_include_dash_manifest': True,
1073 'format': '141/bestaudio[ext=m4a]',
1074 },
1075 },
aa79ac0c
PH
1076 # Controversy video
1077 {
1078 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
1079 'info_dict': {
1080 'id': 'T4XJQO3qol8',
1081 'ext': 'mp4',
556dbe7f 1082 'duration': 219,
aa79ac0c 1083 'upload_date': '20100909',
4fe54c12 1084 'uploader': 'Amazing Atheist',
aa79ac0c 1085 'uploader_id': 'TheAmazingAtheist',
ec85ded8 1086 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c 1087 'title': 'Burning Everyone\'s Koran',
545cc85d 1088 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
aa79ac0c 1089 }
c522adb1 1090 },
dd2d55f1 1091 # Normal age-gate video (embed allowed)
c522adb1 1092 {
2d3d2997 1093 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1094 'info_dict': {
1095 'id': 'HtVdAasjOgU',
1096 'ext': 'mp4',
1097 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1098 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1099 'duration': 142,
c522adb1
JMF
1100 'uploader': 'The Witcher',
1101 'uploader_id': 'WitcherGame',
ec85ded8 1102 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 1103 'upload_date': '20140605',
34952f09 1104 'age_limit': 18,
c522adb1
JMF
1105 },
1106 },
8bdd16b4 1107 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1108 # YouTube Red ad is not captured for creator
1109 {
1110 'url': '__2ABJjxzNo',
1111 'info_dict': {
1112 'id': '__2ABJjxzNo',
1113 'ext': 'mp4',
1114 'duration': 266,
1115 'upload_date': '20100430',
1116 'uploader_id': 'deadmau5',
1117 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 1118 'creator': 'deadmau5',
1119 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1120 'uploader': 'deadmau5',
1121 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1122 'alt_title': 'Some Chords',
8bdd16b4 1123 },
1124 'expected_warnings': [
1125 'DASH manifest missing',
1126 ]
1127 },
067aa17e 1128 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1129 {
1130 'url': 'lqQg6PlCWgI',
1131 'info_dict': {
1132 'id': 'lqQg6PlCWgI',
1133 'ext': 'mp4',
556dbe7f 1134 'duration': 6085,
90227264 1135 'upload_date': '20150827',
cbe2bd91 1136 'uploader_id': 'olympic',
ec85ded8 1137 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 1138 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 1139 'uploader': 'Olympic',
cbe2bd91
PH
1140 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1141 },
1142 'params': {
1143 'skip_download': 'requires avconv',
e52a40ab 1144 }
cbe2bd91 1145 },
6271f1ca
PH
1146 # Non-square pixels
1147 {
1148 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1149 'info_dict': {
1150 'id': '_b-2C3KPAM0',
1151 'ext': 'mp4',
1152 'stretched_ratio': 16 / 9.,
556dbe7f 1153 'duration': 85,
6271f1ca
PH
1154 'upload_date': '20110310',
1155 'uploader_id': 'AllenMeow',
ec85ded8 1156 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 1157 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 1158 'uploader': '孫ᄋᄅ',
6271f1ca
PH
1159 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1160 },
06b491eb
S
1161 },
1162 # url_encoded_fmt_stream_map is empty string
1163 {
1164 'url': 'qEJwOuvDf7I',
1165 'info_dict': {
1166 'id': 'qEJwOuvDf7I',
f57b7835 1167 'ext': 'webm',
06b491eb
S
1168 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1169 'description': '',
1170 'upload_date': '20150404',
1171 'uploader_id': 'spbelect',
1172 'uploader': 'Наблюдатели Петербурга',
1173 },
1174 'params': {
1175 'skip_download': 'requires avconv',
e323cf3f
S
1176 },
1177 'skip': 'This live event has ended.',
06b491eb 1178 },
067aa17e 1179 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1180 {
1181 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1182 'info_dict': {
1183 'id': 'FIl7x6_3R5Y',
eb6793ba 1184 'ext': 'webm',
da77d856
S
1185 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1186 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1187 'duration': 220,
da77d856
S
1188 'upload_date': '20150625',
1189 'uploader_id': 'dorappi2000',
ec85ded8 1190 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 1191 'uploader': 'dorappi2000',
eb6793ba 1192 'formats': 'mincount:31',
da77d856 1193 },
eb6793ba 1194 'skip': 'not actual anymore',
2ee8f5d8 1195 },
8a1a26ce
YCH
1196 # DASH manifest with segment_list
1197 {
1198 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1199 'md5': '8ce563a1d667b599d21064e982ab9e31',
1200 'info_dict': {
1201 'id': 'CsmdDsKjzN8',
1202 'ext': 'mp4',
17ee98e1 1203 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
1204 'uploader': 'Airtek',
1205 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1206 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1207 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1208 },
1209 'params': {
1210 'youtube_include_dash_manifest': True,
1211 'format': '135', # bestvideo
be49068d
S
1212 },
1213 'skip': 'This live event has ended.',
2ee8f5d8 1214 },
cf7e015f
S
1215 {
1216 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 1217 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 1218 'info_dict': {
545cc85d 1219 'id': 'jvGDaLqkpTg',
1220 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1221 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
1222 },
1223 'playlist': [{
1224 'info_dict': {
545cc85d 1225 'id': 'jvGDaLqkpTg',
cf7e015f 1226 'ext': 'mp4',
545cc85d 1227 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1228 'description': 'md5:e03b909557865076822aa169218d6a5d',
1229 'duration': 10643,
1230 'upload_date': '20161111',
1231 'uploader': 'Team PGP',
1232 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1233 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1234 },
1235 }, {
1236 'info_dict': {
545cc85d 1237 'id': '3AKt1R1aDnw',
cf7e015f 1238 'ext': 'mp4',
545cc85d 1239 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1240 'description': 'md5:e03b909557865076822aa169218d6a5d',
1241 'duration': 10991,
1242 'upload_date': '20161111',
1243 'uploader': 'Team PGP',
1244 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1245 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1246 },
1247 }, {
1248 'info_dict': {
545cc85d 1249 'id': 'RtAMM00gpVc',
cf7e015f 1250 'ext': 'mp4',
545cc85d 1251 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1252 'description': 'md5:e03b909557865076822aa169218d6a5d',
1253 'duration': 10995,
1254 'upload_date': '20161111',
1255 'uploader': 'Team PGP',
1256 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1257 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1258 },
1259 }, {
1260 'info_dict': {
545cc85d 1261 'id': '6N2fdlP3C5U',
cf7e015f 1262 'ext': 'mp4',
545cc85d 1263 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1264 'description': 'md5:e03b909557865076822aa169218d6a5d',
1265 'duration': 10990,
1266 'upload_date': '20161111',
1267 'uploader': 'Team PGP',
1268 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1269 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1270 },
1271 }],
1272 'params': {
1273 'skip_download': True,
1274 },
cbaed4bb 1275 },
f9f49d87 1276 {
067aa17e 1277 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1278 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1279 'info_dict': {
1280 'id': 'gVfLd0zydlo',
1281 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1282 },
1283 'playlist_count': 2,
be49068d 1284 'skip': 'Not multifeed anymore',
f9f49d87 1285 },
cbaed4bb 1286 {
2d3d2997 1287 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1288 'only_matching': True,
0e49d9a6 1289 },
6d4fc66b 1290 {
2d3d2997 1291 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1292 'only_matching': True,
1293 },
0e49d9a6 1294 {
067aa17e 1295 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1296 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1297 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1298 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1299 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1300 'info_dict': {
1301 'id': 'lsguqyKfVQg',
1302 'ext': 'mp4',
1303 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 1304 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 1305 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1306 'duration': 133,
0e49d9a6
LL
1307 'upload_date': '20151119',
1308 'uploader_id': 'IronSoulElf',
ec85ded8 1309 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1310 'uploader': 'IronSoulElf',
eb6793ba
S
1311 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
1312 'track': 'Dark Walk - Position Music',
1313 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 1314 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
1315 },
1316 'params': {
1317 'skip_download': True,
1318 },
1319 },
61f92af1 1320 {
067aa17e 1321 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1322 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1323 'only_matching': True,
1324 },
313dfc45
LL
1325 {
1326 # Video with yt:stretch=17:0
1327 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1328 'info_dict': {
1329 'id': 'Q39EVAstoRM',
1330 'ext': 'mp4',
1331 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1332 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1333 'upload_date': '20151107',
1334 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1335 'uploader': 'CH GAMER DROID',
1336 },
1337 'params': {
1338 'skip_download': True,
1339 },
be49068d 1340 'skip': 'This video does not exist.',
313dfc45 1341 },
201c1459 1342 {
1343 # Video with incomplete 'yt:stretch=16:'
1344 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1345 'only_matching': True,
1346 },
7caf9830
S
1347 {
1348 # Video licensed under Creative Commons
1349 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1350 'info_dict': {
1351 'id': 'M4gD1WSo5mA',
1352 'ext': 'mp4',
1353 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1354 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1355 'duration': 721,
7caf9830
S
1356 'upload_date': '20150127',
1357 'uploader_id': 'BerkmanCenter',
ec85ded8 1358 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1359 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1360 'license': 'Creative Commons Attribution license (reuse allowed)',
1361 },
1362 'params': {
1363 'skip_download': True,
1364 },
1365 },
fd050249
S
1366 {
1367 # Channel-like uploader_url
1368 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1369 'info_dict': {
1370 'id': 'eQcmzGIKrzg',
1371 'ext': 'mp4',
1372 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1373 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1374 'duration': 4060,
fd050249 1375 'upload_date': '20151119',
eb6793ba 1376 'uploader': 'Bernie Sanders',
fd050249 1377 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1378 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1379 'license': 'Creative Commons Attribution license (reuse allowed)',
1380 },
1381 'params': {
1382 'skip_download': True,
1383 },
1384 },
040ac686
S
1385 {
1386 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1387 'only_matching': True,
7f29cf54
S
1388 },
1389 {
067aa17e 1390 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1391 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1392 'only_matching': True,
6496ccb4
S
1393 },
1394 {
1395 # Rental video preview
1396 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1397 'info_dict': {
1398 'id': 'uGpuVWrhIzE',
1399 'ext': 'mp4',
1400 'title': 'Piku - Trailer',
1401 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1402 'upload_date': '20150811',
1403 'uploader': 'FlixMatrix',
1404 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1405 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1406 'license': 'Standard YouTube License',
1407 },
1408 'params': {
1409 'skip_download': True,
1410 },
eb6793ba 1411 'skip': 'This video is not available.',
022a5d66 1412 },
12afdc2a
S
1413 {
1414 # YouTube Red video with episode data
1415 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1416 'info_dict': {
1417 'id': 'iqKdEhx-dD4',
1418 'ext': 'mp4',
1419 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1420 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1421 'duration': 2085,
12afdc2a
S
1422 'upload_date': '20170118',
1423 'uploader': 'Vsauce',
1424 'uploader_id': 'Vsauce',
1425 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1426 'series': 'Mind Field',
1427 'season_number': 1,
1428 'episode_number': 1,
1429 },
1430 'params': {
1431 'skip_download': True,
1432 },
1433 'expected_warnings': [
1434 'Skipping DASH manifest',
1435 ],
1436 },
c7121fa7
S
1437 {
1438 # The following content has been identified by the YouTube community
1439 # as inappropriate or offensive to some audiences.
1440 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1441 'info_dict': {
1442 'id': '6SJNVb0GnPI',
1443 'ext': 'mp4',
1444 'title': 'Race Differences in Intelligence',
1445 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1446 'duration': 965,
1447 'upload_date': '20140124',
1448 'uploader': 'New Century Foundation',
1449 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1450 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1451 },
1452 'params': {
1453 'skip_download': True,
1454 },
545cc85d 1455 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1456 },
022a5d66
S
1457 {
1458 # itag 212
1459 'url': '1t24XAntNCY',
1460 'only_matching': True,
fd5c4aab
S
1461 },
1462 {
1463 # geo restricted to JP
1464 'url': 'sJL6WA-aGkQ',
1465 'only_matching': True,
1466 },
cd5a74a2
S
1467 {
1468 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1469 'only_matching': True,
1470 },
bc2ca1bb 1471 {
1472 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1473 'only_matching': True,
1474 },
1475 {
1476 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1477 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1478 'only_matching': True,
1479 },
825cd268
RA
1480 {
1481 # DRM protected
1482 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1483 'only_matching': True,
4fe54c12
S
1484 },
1485 {
1486 # Video with unsupported adaptive stream type formats
1487 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1488 'info_dict': {
1489 'id': 'Z4Vy8R84T1U',
1490 'ext': 'mp4',
1491 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1492 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1493 'duration': 433,
1494 'upload_date': '20130923',
1495 'uploader': 'Amelia Putri Harwita',
1496 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1497 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1498 'formats': 'maxcount:10',
1499 },
1500 'params': {
1501 'skip_download': True,
1502 'youtube_include_dash_manifest': False,
1503 },
5429d6a9 1504 'skip': 'not actual anymore',
5caabd3c 1505 },
1506 {
822b9d9c 1507 # Youtube Music Auto-generated description
5caabd3c 1508 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1509 'info_dict': {
1510 'id': 'MgNrAu2pzNs',
1511 'ext': 'mp4',
1512 'title': 'Voyeur Girl',
1513 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1514 'upload_date': '20190312',
5429d6a9
S
1515 'uploader': 'Stephen - Topic',
1516 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1517 'artist': 'Stephen',
1518 'track': 'Voyeur Girl',
1519 'album': 'it\'s too much love to know my dear',
1520 'release_date': '20190313',
1521 'release_year': 2019,
1522 },
1523 'params': {
1524 'skip_download': True,
1525 },
1526 },
66b48727
RA
1527 {
1528 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1529 'only_matching': True,
1530 },
011e75e6
S
1531 {
1532 # invalid -> valid video id redirection
1533 'url': 'DJztXj2GPfl',
1534 'info_dict': {
1535 'id': 'DJztXj2GPfk',
1536 'ext': 'mp4',
1537 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1538 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1539 'upload_date': '20090125',
1540 'uploader': 'Prochorowka',
1541 'uploader_id': 'Prochorowka',
1542 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1543 'artist': 'Panjabi MC',
1544 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1545 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1546 },
1547 'params': {
1548 'skip_download': True,
1549 },
545cc85d 1550 'skip': 'Video unavailable',
ea74e00b
DP
1551 },
1552 {
1553 # empty description results in an empty string
1554 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1555 'info_dict': {
1556 'id': 'x41yOUIvK2k',
1557 'ext': 'mp4',
1558 'title': 'IMG 3456',
1559 'description': '',
1560 'upload_date': '20170613',
1561 'uploader_id': 'ElevageOrVert',
1562 'uploader': 'ElevageOrVert',
1563 },
1564 'params': {
1565 'skip_download': True,
1566 },
1567 },
a0566bbf 1568 {
29f7c58a 1569 # with '};' inside yt initial data (see [1])
1570 # see [2] for an example with '};' inside ytInitialPlayerResponse
1571 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1572 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1573 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1574 'info_dict': {
1575 'id': 'CHqg6qOn4no',
1576 'ext': 'mp4',
1577 'title': 'Part 77 Sort a list of simple types in c#',
1578 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1579 'upload_date': '20130831',
1580 'uploader_id': 'kudvenkat',
1581 'uploader': 'kudvenkat',
1582 },
1583 'params': {
1584 'skip_download': True,
1585 },
1586 },
29f7c58a 1587 {
1588 # another example of '};' in ytInitialData
1589 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1590 'only_matching': True,
1591 },
1592 {
1593 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1594 'only_matching': True,
1595 },
545cc85d 1596 {
cc2db878 1597 # https://github.com/ytdl-org/youtube-dl/pull/28094
1598 'url': 'OtqTfy26tG0',
1599 'info_dict': {
1600 'id': 'OtqTfy26tG0',
1601 'ext': 'mp4',
1602 'title': 'Burn Out',
1603 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1604 'upload_date': '20141120',
1605 'uploader': 'The Cinematic Orchestra - Topic',
1606 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1607 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1608 'artist': 'The Cinematic Orchestra',
1609 'track': 'Burn Out',
1610 'album': 'Every Day',
1611 'release_data': None,
1612 'release_year': None,
1613 },
1614 'params': {
1615 'skip_download': True,
1616 },
545cc85d 1617 },
bc2ca1bb 1618 {
1619 # controversial video, only works with bpctr when authenticated with cookies
1620 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1621 'only_matching': True,
1622 },
f7ad7160 1623 {
1624 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1625 'url': 'cBvYw8_A0vQ',
1626 'info_dict': {
1627 'id': 'cBvYw8_A0vQ',
1628 'ext': 'mp4',
1629 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1630 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1631 'upload_date': '20201120',
1632 'uploader': 'Walk around Japan',
1633 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1634 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1635 },
1636 'params': {
1637 'skip_download': True,
1638 },
0fb983f6 1639 }, {
1640 # Has multiple audio streams
1641 'url': 'WaOKSUlf4TM',
1642 'only_matching': True
9297939e 1643 }, {
1644 # Requires Premium: has format 141 when requested using YTM url
1645 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1646 'only_matching': True
1647 }, {
120916da 1648 # multiple subtitles with same lang_code
1649 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1650 'only_matching': True,
109dd3b2 1651 }, {
1652 # Force use android client fallback
1653 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1654 'info_dict': {
1655 'id': 'YOelRv7fMxY',
1656 'title': 'Digging a Secret Tunnel from my Workshop',
1657 'ext': '3gp',
1658 'upload_date': '20210624',
1659 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1660 'uploader': 'colinfurze',
1661 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1662 'description': 'md5:ecb672623246d98c6c562eed6ae798c3'
1663 },
1664 'params': {
1665 'format': '17', # 3gp format available on android
1666 'extractor_args': {'youtube': {'player_client': ['android']}},
1667 },
120916da 1668 },
109dd3b2 1669 {
1670 # Skip download of additional client configs (remix client config in this case)
1671 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1672 'only_matching': True,
1673 'params': {
1674 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1675 },
1676 }
2eb88d95
PH
1677 ]
1678
201c1459 1679 @classmethod
1680 def suitable(cls, url):
1bdae7d3 1681 # Hack for lazy extractors until more generic solution is implemented
1682 # (see #28780)
1683 from .youtube import parse_qs
201c1459 1684 qs = parse_qs(url)
1685 if qs.get('list', [None])[0]:
1686 return False
1687 return super(YoutubeIE, cls).suitable(url)
1688
e0df6211
PH
1689 def __init__(self, *args, **kwargs):
1690 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1691 self._code_cache = {}
83799698 1692 self._player_cache = {}
e0df6211 1693
109dd3b2 1694 def _extract_player_url(self, ytcfg=None, webpage=None):
1695 player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)
1696 if not player_url:
1697 player_url = self._search_regex(
1698 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1699 webpage, 'player URL', fatal=False)
1700 if player_url.startswith('//'):
1701 player_url = 'https:' + player_url
1702 elif not re.match(r'https?://', player_url):
1703 player_url = compat_urlparse.urljoin(
1704 'https://www.youtube.com', player_url)
1705 return player_url
1706
60064c53
PH
1707 def _signature_cache_id(self, example_sig):
1708 """ Return a string representation of a signature """
78caa52a 1709 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1710
e40c758c
S
1711 @classmethod
1712 def _extract_player_info(cls, player_url):
1713 for player_re in cls._PLAYER_INFO_RE:
1714 id_m = re.search(player_re, player_url)
1715 if id_m:
1716 break
1717 else:
c081b35c 1718 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1719 return id_m.group('id')
e40c758c 1720
109dd3b2 1721 def _load_player(self, video_id, player_url, fatal=True) -> bool:
1722 player_id = self._extract_player_info(player_url)
1723 if player_id not in self._code_cache:
1724 self._code_cache[player_id] = self._download_webpage(
1725 player_url, video_id, fatal=fatal,
1726 note='Downloading player ' + player_id,
1727 errnote='Download of %s failed' % player_url)
1728 return player_id in self._code_cache
1729
e40c758c 1730 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1731 player_id = self._extract_player_info(player_url)
e0df6211 1732
c4417ddb 1733 # Read from filesystem cache
545cc85d 1734 func_id = 'js_%s_%s' % (
1735 player_id, self._signature_cache_id(example_sig))
c4417ddb 1736 assert os.path.basename(func_id) == func_id
a0e07d31 1737
69ea8ca4 1738 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1739 if cache_spec is not None:
78caa52a 1740 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1741
109dd3b2 1742 if self._load_player(video_id, player_url):
1743 code = self._code_cache[player_id]
1744 res = self._parse_sig_js(code)
e0df6211 1745
109dd3b2 1746 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1747 cache_res = res(test_string)
1748 cache_spec = [ord(c) for c in cache_res]
83799698 1749
109dd3b2 1750 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1751 return res
83799698 1752
60064c53 1753 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1754 def gen_sig_code(idxs):
1755 def _genslice(start, end, step):
78caa52a 1756 starts = '' if start == 0 else str(start)
8bcc8756 1757 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1758 steps = '' if step == 1 else (':%d' % step)
78caa52a 1759 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1760
1761 step = None
7af808a5
PH
1762 # Quelch pyflakes warnings - start will be set when step is set
1763 start = '(Never used)'
edf3e38e
PH
1764 for i, prev in zip(idxs[1:], idxs[:-1]):
1765 if step is not None:
1766 if i - prev == step:
1767 continue
1768 yield _genslice(start, prev, step)
1769 step = None
1770 continue
1771 if i - prev in [-1, 1]:
1772 step = i - prev
1773 start = prev
1774 continue
1775 else:
78caa52a 1776 yield 's[%d]' % prev
edf3e38e 1777 if step is None:
78caa52a 1778 yield 's[%d]' % i
edf3e38e
PH
1779 else:
1780 yield _genslice(start, i, step)
1781
78caa52a 1782 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1783 cache_res = func(test_string)
edf3e38e 1784 cache_spec = [ord(c) for c in cache_res]
78caa52a 1785 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1786 signature_id_tuple = '(%s)' % (
1787 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1788 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1789 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1790 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1791
e0df6211
PH
1792 def _parse_sig_js(self, jscode):
1793 funcname = self._search_regex(
abefc03f
S
1794 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1795 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1796 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1797 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1798 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1799 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1800 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1801 # Obsolete patterns
1802 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1803 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1804 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1805 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1806 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1807 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1808 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1809 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1810 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1811
1812 jsi = JSInterpreter(jscode)
1813 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1814 return lambda s: initial_function([s])
1815
545cc85d 1816 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1817 """Turn the encrypted s field into a working signature"""
6b37f0be 1818
c8bf86d5 1819 if player_url is None:
69ea8ca4 1820 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1821
c8bf86d5 1822 try:
62af3a0e 1823 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1824 if player_id not in self._player_cache:
1825 func = self._extract_signature_function(
60064c53 1826 video_id, player_url, s
c8bf86d5
PH
1827 )
1828 self._player_cache[player_id] = func
1829 func = self._player_cache[player_id]
a06916d9 1830 if self.get_param('youtube_print_sig_code'):
60064c53 1831 self._print_sig_code(func, s)
c8bf86d5
PH
1832 return func(s)
1833 except Exception as e:
1834 tb = traceback.format_exc()
1835 raise ExtractorError(
78caa52a 1836 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1837
109dd3b2 1838 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1839 """
1840 Extract signatureTimestamp (sts)
1841 Required to tell API what sig/player version is in use.
1842 """
1843 sts = None
1844 if isinstance(ytcfg, dict):
1845 sts = int_or_none(ytcfg.get('STS'))
1846
1847 if not sts:
1848 # Attempt to extract from player
1849 if player_url is None:
1850 error_msg = 'Cannot extract signature timestamp without player_url.'
1851 if fatal:
1852 raise ExtractorError(error_msg)
1853 self.report_warning(error_msg)
1854 return
1855 if self._load_player(video_id, player_url, fatal=fatal):
1856 player_id = self._extract_player_info(player_url)
1857 code = self._code_cache[player_id]
1858 sts = int_or_none(self._search_regex(
1859 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
1860 'JS player signature timestamp', group='sts', fatal=fatal))
1861 return sts
1862
545cc85d 1863 def _mark_watched(self, video_id, player_response):
21c340b8
S
1864 playback_url = url_or_none(try_get(
1865 player_response,
545cc85d 1866 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
d77ab8e2
S
1867 if not playback_url:
1868 return
1869 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1870 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1871
1872 # cpn generation algorithm is reverse engineered from base.js.
1873 # In fact it works even with dummy cpn.
1874 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1875 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1876
1877 qs.update({
1878 'ver': ['2'],
1879 'cpn': [cpn],
1880 })
1881 playback_url = compat_urlparse.urlunparse(
15707c7e 1882 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1883
1884 self._download_webpage(
1885 playback_url, video_id, 'Marking watched',
1886 'Unable to mark watched', fatal=False)
1887
66c9fa36
S
1888 @staticmethod
1889 def _extract_urls(webpage):
1890 # Embedded YouTube player
1891 entries = [
1892 unescapeHTML(mobj.group('url'))
1893 for mobj in re.finditer(r'''(?x)
1894 (?:
1895 <iframe[^>]+?src=|
1896 data-video-url=|
1897 <embed[^>]+?src=|
1898 embedSWF\(?:\s*|
1899 <object[^>]+data=|
1900 new\s+SWFObject\(
1901 )
1902 (["\'])
1903 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1904 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1905 \1''', webpage)]
1906
1907 # lazyYT YouTube embed
1908 entries.extend(list(map(
1909 unescapeHTML,
1910 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1911
1912 # Wordpress "YouTube Video Importer" plugin
1913 matches = re.findall(r'''(?x)<div[^>]+
1914 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1915 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1916 entries.extend(m[-1] for m in matches)
1917
1918 return entries
1919
1920 @staticmethod
1921 def _extract_url(webpage):
1922 urls = YoutubeIE._extract_urls(webpage)
1923 return urls[0] if urls else None
1924
97665381
PH
1925 @classmethod
1926 def extract_id(cls, url):
1927 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1928 if mobj is None:
69ea8ca4 1929 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1930 video_id = mobj.group(2)
1931 return video_id
1932
545cc85d 1933 def _extract_chapters_from_json(self, data, video_id, duration):
84213ea8 1934 chapters_list = try_get(
8bdd16b4 1935 data,
84213ea8
S
1936 lambda x: x['playerOverlays']
1937 ['playerOverlayRenderer']
1938 ['decoratedPlayerBarRenderer']
1939 ['decoratedPlayerBarRenderer']
1940 ['playerBar']
1941 ['chapteredPlayerBarRenderer']
1942 ['chapters'],
1943 list)
1944 if not chapters_list:
1945 return
1946
1947 def chapter_time(chapter):
1948 return float_or_none(
1949 try_get(
1950 chapter,
1951 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1952 int),
1953 scale=1000)
1954 chapters = []
1955 for next_num, chapter in enumerate(chapters_list, start=1):
1956 start_time = chapter_time(chapter)
1957 if start_time is None:
1958 continue
1959 end_time = (chapter_time(chapters_list[next_num])
1960 if next_num < len(chapters_list) else duration)
1961 if end_time is None:
1962 continue
1963 title = try_get(
1964 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1965 compat_str)
1966 chapters.append({
1967 'start_time': start_time,
1968 'end_time': end_time,
1969 'title': title,
1970 })
1971 return chapters
1972
545cc85d 1973 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1974 return self._parse_json(self._search_regex(
1975 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1976 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 1977
d92f5d5a 1978 @staticmethod
1979 def parse_time_text(time_text):
1980 """
1981 Parse the comment time text
1982 time_text is in the format 'X units ago (edited)'
1983 """
1984 time_text_split = time_text.split(' ')
1985 if len(time_text_split) >= 3:
1986 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
1987
a1c5d2ca
M
1988 def _extract_comment(self, comment_renderer, parent=None):
1989 comment_id = comment_renderer.get('commentId')
1990 if not comment_id:
1991 return
fe93e2c4 1992
1993 text = self._get_text(comment_renderer.get('contentText'))
1994
49bd8c66 1995 # note: timestamp is an estimate calculated from the current time and time_text
fe93e2c4 1996 time_text = self._get_text(comment_renderer.get('publishedTimeText')) or ''
1997 time_text_dt = self.parse_time_text(time_text)
1998 if isinstance(time_text_dt, datetime.datetime):
1999 timestamp = calendar.timegm(time_text_dt.timetuple())
2000 author = self._get_text(comment_renderer.get('authorText'))
a1c5d2ca
M
2001 author_id = try_get(comment_renderer,
2002 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
fe93e2c4 2003
49bd8c66 2004 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2005 lambda x: x['likeCount']), compat_str)) or 0
a1c5d2ca
M
2006 author_thumbnail = try_get(comment_renderer,
2007 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2008
2009 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
97524332 2010 is_favorited = 'creatorHeart' in (try_get(
2011 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
a1c5d2ca
M
2012 return {
2013 'id': comment_id,
2014 'text': text,
d92f5d5a 2015 'timestamp': timestamp,
a1c5d2ca
M
2016 'time_text': time_text,
2017 'like_count': votes,
97524332 2018 'is_favorited': is_favorited,
a1c5d2ca
M
2019 'author': author,
2020 'author_id': author_id,
2021 'author_thumbnail': author_thumbnail,
2022 'author_is_uploader': author_is_uploader,
2023 'parent': parent or 'root'
2024 }
2025
2026 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
2d6659b9 2027 ytcfg, video_id, parent=None, comment_counts=None):
2028
2029 def extract_header(contents):
2030 _total_comments = 0
2031 _continuation = None
2032 for content in contents:
2033 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
fe93e2c4 2034 expected_comment_count = parse_count(self._get_text(
2035 comments_header_renderer, (lambda x: x['countText'], lambda x: x['commentsCount']), max_runs=1))
2036
2d6659b9 2037 if expected_comment_count:
fe93e2c4 2038 comment_counts[1] = expected_comment_count
2039 self.to_screen('Downloading ~%d comments' % expected_comment_count)
2d6659b9 2040 _total_comments = comment_counts[1]
2041 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2042 comment_sort_index = int(sort_mode_str != 'top') # 1 = new, 0 = top
2043
2044 sort_menu_item = try_get(
2045 comments_header_renderer,
2046 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2047 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2048
2049 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2050 if not _continuation:
2051 continue
2052
2053 sort_text = sort_menu_item.get('title')
2054 if isinstance(sort_text, compat_str):
2055 sort_text = sort_text.lower()
2056 else:
2057 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2058 self.to_screen('Sorting comments by %s' % sort_text)
2059 break
2060 return _total_comments, _continuation
a1c5d2ca 2061
2d6659b9 2062 def extract_thread(contents):
a1c5d2ca
M
2063 if not parent:
2064 comment_counts[2] = 0
2065 for content in contents:
2066 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2067 comment_renderer = try_get(
2068 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2069 content, (lambda x: x['commentRenderer'], dict))
2070
2071 if not comment_renderer:
2072 continue
2073 comment = self._extract_comment(comment_renderer, parent)
2074 if not comment:
2075 continue
2076 comment_counts[0] += 1
2077 yield comment
2078 # Attempt to get the replies
2079 comment_replies_renderer = try_get(
2080 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2081
2082 if comment_replies_renderer:
2083 comment_counts[2] += 1
2084 comment_entries_iter = self._comment_entries(
f4f751af 2085 comment_replies_renderer, identity_token, account_syncid, ytcfg,
2d6659b9 2086 video_id, parent=comment.get('id'), comment_counts=comment_counts)
a1c5d2ca
M
2087
2088 for reply_comment in comment_entries_iter:
2089 yield reply_comment
2090
2d6659b9 2091 # YouTube comments have a max depth of 2
2092 max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2093 if max_depth == 1 and parent:
2094 return
a1c5d2ca
M
2095 if not comment_counts:
2096 # comment so far, est. total comments, current comment thread #
2097 comment_counts = [0, 0, 0]
a1c5d2ca 2098
2d6659b9 2099 continuation = self._extract_continuation(root_continuation_data)
fe93e2c4 2100 if continuation and len(continuation['continuation']) < 27:
2d6659b9 2101 self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2102 continuation_token = self._generate_comment_continuation(video_id)
fe93e2c4 2103 continuation = self._build_api_continuation_query(continuation_token, None)
2d6659b9 2104
2105 visitor_data = None
2106 is_first_continuation = parent is None
a1c5d2ca
M
2107
2108 for page_num in itertools.count(0):
2109 if not continuation:
2110 break
f4f751af 2111 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
2d6659b9 2112 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2113 if page_num == 0:
2114 if is_first_continuation:
2115 note_prefix = 'Downloading comment section API JSON'
a1c5d2ca 2116 else:
2d6659b9 2117 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
2118 comment_counts[2], comment_prog_str)
2119 else:
2120 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2121 ' ' if parent else '', ' replies' if parent else '',
2122 page_num, comment_prog_str)
2123
2124 response = self._extract_response(
fe93e2c4 2125 item_id=None, query=continuation,
2d6659b9 2126 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2127 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca
M
2128 if not response:
2129 break
f4f751af 2130 visitor_data = try_get(
2131 response,
2132 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2133 compat_str) or visitor_data
a1c5d2ca 2134
2d6659b9 2135 continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca 2136
2d6659b9 2137 continuation = None
2138 if isinstance(continuation_contents, list):
2139 for continuation_section in continuation_contents:
2140 if not isinstance(continuation_section, dict):
2141 continue
2142 continuation_items = try_get(
2143 continuation_section,
2144 (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2145 lambda x: x['appendContinuationItemsAction']['continuationItems']),
2146 list) or []
2147 if is_first_continuation:
2148 total_comments, continuation = extract_header(continuation_items)
2149 if total_comments:
2150 yield total_comments
2151 is_first_continuation = False
2152 if continuation:
2153 break
2154 continue
2155 count = 0
2156 for count, entry in enumerate(extract_thread(continuation_items)):
2157 yield entry
2158 continuation = self._extract_continuation({'contents': continuation_items})
2159 if continuation:
2160 # Sometimes YouTube provides a continuation without any comments
2161 # In most cases we end up just downloading these with very little comments to come.
2162 if count == 0:
2163 if not parent:
2164 self.report_warning('No comments received - assuming end of comments')
2165 continuation = None
a1c5d2ca
M
2166 break
2167
2d6659b9 2168 # Deprecated response structure
2169 elif isinstance(continuation_contents, dict):
2170 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2171 for key, continuation_renderer in continuation_contents.items():
2172 if key not in known_continuation_renderers:
2173 continue
2174 if not isinstance(continuation_renderer, dict):
2175 continue
2176 if is_first_continuation:
2177 header_continuation_items = [continuation_renderer.get('header') or {}]
2178 total_comments, continuation = extract_header(header_continuation_items)
2179 if total_comments:
2180 yield total_comments
2181 is_first_continuation = False
2182 if continuation:
2183 break
a1c5d2ca 2184
2d6659b9 2185 # Sometimes YouTube provides a continuation without any comments
2186 # In most cases we end up just downloading these with very little comments to come.
2187 count = 0
2188 for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2189 yield entry
2190 continuation = self._extract_continuation(continuation_renderer)
2191 if count == 0:
2192 if not parent:
2193 self.report_warning('No comments received - assuming end of comments')
2194 continuation = None
2195 break
a1c5d2ca 2196
2d6659b9 2197 @staticmethod
2198 def _generate_comment_continuation(video_id):
2199 """
2200 Generates initial comment section continuation token from given video id
2201 """
2202 b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2203 parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2204 new_continuation_intlist = list(itertools.chain.from_iterable(
2205 [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2206 return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2207
2208 def _extract_comments(self, ytcfg, video_id, contents, webpage):
a1c5d2ca 2209 """Entry for comment extraction"""
2d6659b9 2210 def _real_comment_extract(contents):
2211 if isinstance(contents, list):
2212 for entry in contents:
2213 for key, renderer in entry.items():
2214 if key not in known_entry_comment_renderers:
2215 continue
2216 yield from self._comment_entries(
2217 renderer, video_id=video_id, ytcfg=ytcfg,
2218 identity_token=self._extract_identity_token(webpage, item_id=video_id),
2219 account_syncid=self._extract_account_syncid(ytcfg))
2220 break
a1c5d2ca 2221 comments = []
2d6659b9 2222 known_entry_comment_renderers = ('itemSectionRenderer',)
a1c5d2ca 2223 estimated_total = 0
2d6659b9 2224 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
a1c5d2ca 2225
2d6659b9 2226 try:
2227 for comment in _real_comment_extract(contents):
2228 if len(comments) >= max_comments:
2229 break
2230 if isinstance(comment, int):
2231 estimated_total = comment
2232 continue
2233 comments.append(comment)
2234 except KeyboardInterrupt:
2235 self.to_screen('Interrupted by user')
d92f5d5a 2236 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
a1c5d2ca
M
2237 return {
2238 'comments': comments,
2239 'comment_count': len(comments),
2240 }
2241
109dd3b2 2242 @staticmethod
2243 def _generate_player_context(sts=None):
2244 context = {
2245 'html5Preference': 'HTML5_PREF_WANTS',
2246 }
2247 if sts is not None:
2248 context['signatureTimestamp'] = sts
2249 return {
2250 'playbackContext': {
2251 'contentPlaybackContext': context
2252 }
2253 }
2254
4e6767b5 2255 @staticmethod
c888ffb9 2256 def _get_video_info_params(video_id, client='TVHTML5'):
2257 GVI_CLIENTS = {
2258 'ANDROID': {
2259 'c': 'ANDROID',
2260 'cver': '16.20',
2261 },
2262 'TVHTML5': {
2263 'c': 'TVHTML5',
2264 'cver': '6.20180913',
2265 }
2266 }
2267 query = {
4e6767b5 2268 'video_id': video_id,
2269 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
c888ffb9 2270 'html5': '1'
4e6767b5 2271 }
c888ffb9 2272 query.update(GVI_CLIENTS.get(client))
2273 return query
4e6767b5 2274
c5e8d7af 2275 def _real_extract(self, url):
cf7e015f 2276 url, smuggled_data = unsmuggle_url(url, {})
545cc85d 2277 video_id = self._match_id(url)
9297939e 2278
2279 is_music_url = smuggled_data.get('is_music_url') or self.is_music_url(url)
2280
545cc85d 2281 base_url = self.http_scheme() + '//www.youtube.com/'
b3d12425 2282 webpage_url = base_url + 'watch?v=' + video_id
2283 webpage = self._download_webpage(
cce889b9 2284 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
545cc85d 2285
109dd3b2 2286 ytcfg = self._extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2287 identity_token = self._extract_identity_token(webpage, video_id)
314ee305 2288 session_index = self._extract_session_index(ytcfg)
109dd3b2 2289 player_url = self._extract_player_url(ytcfg, webpage)
2290
2d6659b9 2291 player_client = self._configuration_arg('player_client', [''])[0]
4bb6b02f 2292 if player_client not in ('web', 'android', ''):
c888ffb9 2293 self.report_warning(f'Invalid player_client {player_client} given. Falling back to android client.')
2294 force_mobile_client = player_client != 'web'
4bb6b02f 2295 player_skip = self._configuration_arg('player_skip')
fe93e2c4 2296 player_response = None
2297 if webpage:
2298 player_response = self._extract_yt_initial_variable(
2299 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2300 video_id, 'initial player response')
109dd3b2 2301
fe93e2c4 2302 syncid = self._extract_account_syncid(ytcfg, player_response)
2303 headers = self._generate_api_headers(ytcfg, identity_token, syncid, session_index=session_index)
9297939e 2304
2305 ytm_streaming_data = {}
2306 if is_music_url:
109dd3b2 2307 ytm_webpage = None
2308 sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2309 if sts and not force_mobile_client and 'configs' not in player_skip:
2310 ytm_webpage = self._download_webpage(
2311 'https://music.youtube.com',
2d6659b9 2312 video_id, fatal=False, note='Downloading remix client config')
109dd3b2 2313
2314 ytm_cfg = self._extract_ytcfg(video_id, ytm_webpage) or {}
2315 ytm_client = 'WEB_REMIX'
2316 if not sts or force_mobile_client:
2317 # Android client already has signature descrambled
2318 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2319 if not sts:
c888ffb9 2320 self.report_warning('Falling back to android remix client for player API.')
109dd3b2 2321 ytm_client = 'ANDROID_MUSIC'
2322 ytm_cfg = {}
2323
2324 ytm_headers = self._generate_api_headers(
2325 ytm_cfg, identity_token, syncid,
314ee305 2326 client=ytm_client, session_index=session_index)
109dd3b2 2327 ytm_query = {'videoId': video_id}
2328 ytm_query.update(self._generate_player_context(sts))
2329
2330 ytm_player_response = self._extract_response(
2331 item_id=video_id, ep='player', query=ytm_query,
2332 ytcfg=ytm_cfg, headers=ytm_headers, fatal=False,
2333 default_client=ytm_client,
c888ffb9 2334 note='Downloading %sremix player API JSON' % ('android ' if force_mobile_client else ''))
2d6659b9 2335 ytm_streaming_data = try_get(ytm_player_response, lambda x: x['streamingData'], dict) or {}
109dd3b2 2336
109dd3b2 2337 if not player_response or force_mobile_client:
2338 sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2339 yt_client = 'WEB'
2340 ytpcfg = ytcfg
2341 ytp_headers = headers
2342 if not sts or force_mobile_client:
2343 # Android client already has signature descrambled
2344 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2345 if not sts:
c888ffb9 2346 self.report_warning('Falling back to android client for player API.')
109dd3b2 2347 yt_client = 'ANDROID'
2348 ytpcfg = {}
314ee305 2349 ytp_headers = self._generate_api_headers(ytpcfg, identity_token, syncid,
2350 client=yt_client, session_index=session_index)
109dd3b2 2351
2352 yt_query = {'videoId': video_id}
2353 yt_query.update(self._generate_player_context(sts))
2354 player_response = self._extract_response(
2355 item_id=video_id, ep='player', query=yt_query,
2356 ytcfg=ytpcfg, headers=ytp_headers, fatal=False,
2357 default_client=yt_client,
c888ffb9 2358 note='Downloading %splayer API JSON' % ('android ' if force_mobile_client else '')
2359 ) or player_response
545cc85d 2360
109dd3b2 2361 # Age-gate workarounds
545cc85d 2362 playability_status = player_response.get('playabilityStatus') or {}
109dd3b2 2363 if playability_status.get('reason') in self._AGE_GATE_REASONS:
c888ffb9 2364 gvi_clients = ('ANDROID', 'TVHTML5') if force_mobile_client else ('TVHTML5', 'ANDROID')
2365 for gvi_client in gvi_clients:
2366 pr = self._parse_json(try_get(compat_parse_qs(
2367 self._download_webpage(
2368 base_url + 'get_video_info', video_id,
2369 'Refetching age-gated %s info webpage' % gvi_client.lower(),
2370 'unable to download video info webpage', fatal=False,
2371 query=self._get_video_info_params(video_id, client=gvi_client))),
2372 lambda x: x['player_response'][0],
2373 compat_str) or '{}', video_id)
2374 if pr:
2375 break
109dd3b2 2376 if not pr:
2377 self.report_warning('Falling back to embedded-only age-gate workaround.')
2378 embed_webpage = None
2379 sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2380 if sts and not force_mobile_client and 'configs' not in player_skip:
2381 embed_webpage = self._download_webpage(
2382 'https://www.youtube.com/embed/%s?html5=1' % video_id,
2383 video_id=video_id, note='Downloading age-gated embed config')
2384
2385 ytcfg_age = self._extract_ytcfg(video_id, embed_webpage) or {}
2386 # If we extracted the embed webpage, it'll tell us if we can view the video
2387 embedded_pr = self._parse_json(
2388 try_get(ytcfg_age, lambda x: x['PLAYER_VARS']['embedded_player_response'], str) or '{}',
2389 video_id=video_id)
2390 embedded_ps_reason = try_get(embedded_pr, lambda x: x['playabilityStatus']['reason'], str) or ''
2391 if embedded_ps_reason not in self._AGE_GATE_REASONS:
2392 yt_client = 'WEB_EMBEDDED_PLAYER'
2393 if not sts or force_mobile_client:
2394 # Android client already has signature descrambled
2395 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2396 if not sts:
2397 self.report_warning(
c888ffb9 2398 'Falling back to android embedded client for player API (note: some formats may be missing).')
109dd3b2 2399 yt_client = 'ANDROID_EMBEDDED_PLAYER'
2400 ytcfg_age = {}
2401
2402 ytage_headers = self._generate_api_headers(
314ee305 2403 ytcfg_age, identity_token, syncid,
2404 client=yt_client, session_index=session_index)
109dd3b2 2405 yt_age_query = {'videoId': video_id}
2406 yt_age_query.update(self._generate_player_context(sts))
2407 pr = self._extract_response(
2408 item_id=video_id, ep='player', query=yt_age_query,
2409 ytcfg=ytcfg_age, headers=ytage_headers, fatal=False,
2410 default_client=yt_client,
c888ffb9 2411 note='Downloading %sage-gated player API JSON' % ('android ' if force_mobile_client else '')
109dd3b2 2412 ) or {}
2413
545cc85d 2414 if pr:
2415 player_response = pr
2416
2417 trailer_video_id = try_get(
2418 playability_status,
2419 lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
2420 compat_str)
2421 if trailer_video_id:
2422 return self.url_result(
2423 trailer_video_id, self.ie_key(), trailer_video_id)
cf7e015f 2424
545cc85d 2425 search_meta = (
2426 lambda x: self._html_search_meta(x, webpage, default=None)) \
2427 if webpage else lambda x: None
dbdaaa23 2428
545cc85d 2429 video_details = player_response.get('videoDetails') or {}
37357d21 2430 microformat = try_get(
545cc85d 2431 player_response,
2432 lambda x: x['microformat']['playerMicroformatRenderer'],
2433 dict) or {}
2434 video_title = video_details.get('title') \
fe93e2c4 2435 or self._get_text(microformat.get('title')) \
545cc85d 2436 or search_meta(['og:title', 'twitter:title', 'title'])
2437 video_description = video_details.get('shortDescription')
cf7e015f 2438
8fe10494 2439 if not smuggled_data.get('force_singlefeed', False):
a06916d9 2440 if not self.get_param('noplaylist'):
8fe10494
S
2441 multifeed_metadata_list = try_get(
2442 player_response,
2443 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
545cc85d 2444 compat_str)
8fe10494
S
2445 if multifeed_metadata_list:
2446 entries = []
2447 feed_ids = []
2448 for feed in multifeed_metadata_list.split(','):
2449 # Unquote should take place before split on comma (,) since textual
2450 # fields may contain comma as well (see
067aa17e 2451 # https://github.com/ytdl-org/youtube-dl/issues/8536)
545cc85d 2452 feed_data = compat_parse_qs(
2453 compat_urllib_parse_unquote_plus(feed))
6b09401b
S
2454
2455 def feed_entry(name):
545cc85d 2456 return try_get(
2457 feed_data, lambda x: x[name][0], compat_str)
6b09401b
S
2458
2459 feed_id = feed_entry('id')
2460 if not feed_id:
2461 continue
2462 feed_title = feed_entry('title')
2463 title = video_title
2464 if feed_title:
2465 title += ' (%s)' % feed_title
8fe10494
S
2466 entries.append({
2467 '_type': 'url_transparent',
2468 'ie_key': 'Youtube',
2469 'url': smuggle_url(
545cc85d 2470 base_url + 'watch?v=' + feed_data['id'][0],
8fe10494 2471 {'force_singlefeed': True}),
6b09401b 2472 'title': title,
8fe10494 2473 })
6b09401b 2474 feed_ids.append(feed_id)
8fe10494
S
2475 self.to_screen(
2476 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2477 % (', '.join(feed_ids), video_id))
545cc85d 2478 return self.playlist_result(
2479 entries, video_id, video_title, video_description)
8fe10494
S
2480 else:
2481 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 2482
9297939e 2483 formats, itags, stream_ids = [], [], []
cc2db878 2484 itag_qualities = {}
d3fc8074 2485 q = qualities([
60bdb7bd 2486 # "tiny" is the smallest video-only format. But some audio-only formats
2487 # was also labeled "tiny". It is not clear if such formats still exist
d3fc8074 2488 'tiny', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
2489 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2490 ])
9297939e 2491
545cc85d 2492 streaming_data = player_response.get('streamingData') or {}
2493 streaming_formats = streaming_data.get('formats') or []
2494 streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
9297939e 2495 streaming_formats.extend(ytm_streaming_data.get('formats') or [])
2496 streaming_formats.extend(ytm_streaming_data.get('adaptiveFormats') or [])
2497
545cc85d 2498 for fmt in streaming_formats:
2499 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2500 continue
321bf820 2501
cc2db878 2502 itag = str_or_none(fmt.get('itag'))
9297939e 2503 audio_track = fmt.get('audioTrack') or {}
2504 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2505 if stream_id in stream_ids:
2506 continue
2507
cc2db878 2508 quality = fmt.get('quality')
d3fc8074 2509 if quality == 'tiny' or not quality:
2510 quality = fmt.get('audioQuality', '').lower() or quality
cc2db878 2511 if itag and quality:
2512 itag_qualities[itag] = quality
2513 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2514 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2515 # number of fragment that would subsequently requested with (`&sq=N`)
2516 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2517 continue
2518
545cc85d 2519 fmt_url = fmt.get('url')
2520 if not fmt_url:
2521 sc = compat_parse_qs(fmt.get('signatureCipher'))
2522 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2523 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2524 if not (sc and fmt_url and encrypted_sig):
2525 continue
545cc85d 2526 if not player_url:
201e9eaa 2527 continue
545cc85d 2528 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2529 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2530 fmt_url += '&' + sp + '=' + signature
2531
545cc85d 2532 if itag:
2533 itags.append(itag)
9297939e 2534 stream_ids.append(stream_id)
2535
cc2db878 2536 tbr = float_or_none(
2537 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 2538 dct = {
2539 'asr': int_or_none(fmt.get('audioSampleRate')),
2540 'filesize': int_or_none(fmt.get('contentLength')),
2541 'format_id': itag,
0fb983f6 2542 'format_note': audio_track.get('displayName') or fmt.get('qualityLabel') or quality,
545cc85d 2543 'fps': int_or_none(fmt.get('fps')),
2544 'height': int_or_none(fmt.get('height')),
dca3ff4a 2545 'quality': q(quality),
cc2db878 2546 'tbr': tbr,
545cc85d 2547 'url': fmt_url,
2548 'width': fmt.get('width'),
0fb983f6 2549 'language': audio_track.get('id', '').split('.')[0],
545cc85d 2550 }
60bdb7bd 2551 mime_mobj = re.match(
2552 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2553 if mime_mobj:
2554 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2555 dct.update(parse_codecs(mime_mobj.group(2)))
2556 # The 3gp format in android client has a quality of "small",
2557 # but is actually worse than all other formats
2558 if dct['ext'] == '3gp':
2559 dct['quality'] = q('tiny')
cc2db878 2560 no_audio = dct.get('acodec') == 'none'
2561 no_video = dct.get('vcodec') == 'none'
2562 if no_audio:
2563 dct['vbr'] = tbr
2564 if no_video:
2565 dct['abr'] = tbr
2566 if no_audio or no_video:
545cc85d 2567 dct['downloader_options'] = {
2568 # Youtube throttles chunks >~10M
2569 'http_chunk_size': 10485760,
bf1317d2 2570 }
7c60c33e 2571 if dct.get('ext'):
2572 dct['container'] = dct['ext'] + '_dash'
545cc85d 2573 formats.append(dct)
2574
4bb6b02f 2575 skip_manifests = self._configuration_arg('skip')
5d3a0e79 2576 get_dash = 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True)
2577 get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2578
9297939e 2579 for sd in (streaming_data, ytm_streaming_data):
5d3a0e79 2580 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
9297939e 2581 if hls_manifest_url:
2582 for f in self._extract_m3u8_formats(
2583 hls_manifest_url, video_id, 'mp4', fatal=False):
2584 itag = self._search_regex(
2585 r'/itag/(\d+)', f['url'], 'itag', default=None)
2586 if itag:
2587 f['format_id'] = itag
8d68ab98 2588 formats.append(f)
545cc85d 2589
5d3a0e79 2590 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2591 if dash_manifest_url:
2592 for f in self._extract_mpd_formats(
2593 dash_manifest_url, video_id, fatal=False):
2594 itag = f['format_id']
2595 if itag in itags:
2596 continue
2597 if itag in itag_qualities:
2598 f['quality'] = q(itag_qualities[itag])
2599 filesize = int_or_none(self._search_regex(
2600 r'/clen/(\d+)', f.get('fragment_base_url')
2601 or f['url'], 'file size', default=None))
2602 if filesize:
2603 f['filesize'] = filesize
2604 formats.append(f)
bf1317d2 2605
545cc85d 2606 if not formats:
a06916d9 2607 if not self.get_param('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
b7da73eb 2608 self.raise_no_formats(
545cc85d 2609 'This video is DRM protected.', expected=True)
2610 pemr = try_get(
2611 playability_status,
2612 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
2613 dict) or {}
fe93e2c4 2614 reason = self._get_text(pemr.get('reason')) or playability_status.get('reason')
545cc85d 2615 subreason = pemr.get('subreason')
2616 if subreason:
fe93e2c4 2617 subreason = clean_html(self._get_text(subreason))
545cc85d 2618 if subreason == 'The uploader has not made this video available in your country.':
2619 countries = microformat.get('availableCountries')
2620 if not countries:
2621 regions_allowed = search_meta('regionsAllowed')
2622 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2623 self.raise_geo_restricted(subreason, countries, metadata_available=True)
545cc85d 2624 reason += '\n' + subreason
2625 if reason:
b7da73eb 2626 self.raise_no_formats(reason, expected=True)
bf1317d2 2627
545cc85d 2628 self._sort_formats(formats)
bf1317d2 2629
545cc85d 2630 keywords = video_details.get('keywords') or []
2631 if not keywords and webpage:
2632 keywords = [
2633 unescapeHTML(m.group('content'))
2634 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2635 for keyword in keywords:
2636 if keyword.startswith('yt:stretch='):
201c1459 2637 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2638 if mobj:
2639 # NB: float is intentional for forcing float division
2640 w, h = (float(v) for v in mobj.groups())
2641 if w > 0 and h > 0:
2642 ratio = w / h
2643 for f in formats:
2644 if f.get('vcodec') != 'none':
2645 f['stretched_ratio'] = ratio
2646 break
6449cd80 2647
545cc85d 2648 thumbnails = []
2649 for container in (video_details, microformat):
2650 for thumbnail in (try_get(
2651 container,
2652 lambda x: x['thumbnail']['thumbnails'], list) or []):
2653 thumbnail_url = thumbnail.get('url')
2654 if not thumbnail_url:
bf1317d2 2655 continue
1988fab7 2656 # Sometimes youtube gives a wrong thumbnail URL. See:
2657 # https://github.com/yt-dlp/yt-dlp/issues/233
2658 # https://github.com/ytdl-org/youtube-dl/issues/28023
2659 if 'maxresdefault' in thumbnail_url:
2660 thumbnail_url = thumbnail_url.split('?')[0]
545cc85d 2661 thumbnails.append({
545cc85d 2662 'url': thumbnail_url,
ff2751ac 2663 'height': int_or_none(thumbnail.get('height')),
545cc85d 2664 'width': int_or_none(thumbnail.get('width')),
ff2751ac 2665 'preference': 1 if 'maxresdefault' in thumbnail_url else -1
545cc85d 2666 })
ff2751ac 2667 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2668 if thumbnail_url:
2669 thumbnails.append({
2670 'url': thumbnail_url,
2671 'preference': 1 if 'maxresdefault' in thumbnail_url else -1
2672 })
2673 # All videos have a maxresdefault thumbnail, but sometimes it does not appear in the webpage
2674 # See: https://github.com/ytdl-org/youtube-dl/issues/29049
2675 thumbnails.append({
2676 'url': 'https://i.ytimg.com/vi/%s/maxresdefault.jpg' % video_id,
2677 'preference': 1,
2678 })
2679 self._remove_duplicate_formats(thumbnails)
545cc85d 2680
2681 category = microformat.get('category') or search_meta('genre')
2682 channel_id = video_details.get('channelId') \
2683 or microformat.get('externalChannelId') \
2684 or search_meta('channelId')
2685 duration = int_or_none(
2686 video_details.get('lengthSeconds')
2687 or microformat.get('lengthSeconds')) \
2688 or parse_duration(search_meta('duration'))
2689 is_live = video_details.get('isLive')
f6745c49 2690 is_upcoming = video_details.get('isUpcoming')
545cc85d 2691 owner_profile_url = microformat.get('ownerProfileUrl')
2692
2693 info = {
2694 'id': video_id,
2695 'title': self._live_title(video_title) if is_live else video_title,
2696 'formats': formats,
2697 'thumbnails': thumbnails,
2698 'description': video_description,
2699 'upload_date': unified_strdate(
2700 microformat.get('uploadDate')
2701 or search_meta('uploadDate')),
2702 'uploader': video_details['author'],
2703 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2704 'uploader_url': owner_profile_url,
2705 'channel_id': channel_id,
2706 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
2707 'duration': duration,
2708 'view_count': int_or_none(
2709 video_details.get('viewCount')
2710 or microformat.get('viewCount')
2711 or search_meta('interactionCount')),
2712 'average_rating': float_or_none(video_details.get('averageRating')),
2713 'age_limit': 18 if (
2714 microformat.get('isFamilySafe') is False
2715 or search_meta('isFamilyFriendly') == 'false'
2716 or search_meta('og:restrictions:age') == '18+') else 0,
2717 'webpage_url': webpage_url,
2718 'categories': [category] if category else None,
2719 'tags': keywords,
2720 'is_live': is_live,
2721 'playable_in_embed': playability_status.get('playableInEmbed'),
c224251a 2722 'was_live': video_details.get('isLiveContent'),
545cc85d 2723 }
b477fc13 2724
545cc85d 2725 pctr = try_get(
2726 player_response,
2727 lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
2728 subtitles = {}
2729 if pctr:
774d79cc 2730 def process_language(container, base_url, lang_code, sub_name, query):
120916da 2731 lang_subs = container.setdefault(lang_code, [])
545cc85d 2732 for fmt in self._SUBTITLE_FORMATS:
2733 query.update({
2734 'fmt': fmt,
2735 })
2736 lang_subs.append({
2737 'ext': fmt,
2738 'url': update_url_query(base_url, query),
774d79cc 2739 'name': sub_name,
545cc85d 2740 })
7e72694b 2741
545cc85d 2742 for caption_track in (pctr.get('captionTracks') or []):
2743 base_url = caption_track.get('baseUrl')
2744 if not base_url:
2745 continue
2746 if caption_track.get('kind') != 'asr':
120916da 2747 lang_code = (
2748 remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2749 or caption_track.get('languageCode'))
545cc85d 2750 if not lang_code:
2751 continue
2752 process_language(
774d79cc 2753 subtitles, base_url, lang_code,
2d6659b9 2754 try_get(caption_track, lambda x: x['name']['simpleText']),
774d79cc 2755 {})
545cc85d 2756 continue
2757 automatic_captions = {}
2758 for translation_language in (pctr.get('translationLanguages') or []):
2759 translation_language_code = translation_language.get('languageCode')
2760 if not translation_language_code:
2761 continue
2762 process_language(
2763 automatic_captions, base_url, translation_language_code,
fe93e2c4 2764 self._get_text(translation_language.get('languageName'), max_runs=1),
545cc85d 2765 {'tlang': translation_language_code})
2766 info['automatic_captions'] = automatic_captions
2767 info['subtitles'] = subtitles
7e72694b 2768
545cc85d 2769 parsed_url = compat_urllib_parse_urlparse(url)
2770 for component in [parsed_url.fragment, parsed_url.query]:
2771 query = compat_parse_qs(component)
2772 for k, v in query.items():
2773 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2774 d_k += '_time'
2775 if d_k not in info and k in s_ks:
2776 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2777
2778 # Youtube Music Auto-generated description
822b9d9c 2779 if video_description:
38d70284 2780 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2781 if mobj:
822b9d9c
RA
2782 release_year = mobj.group('release_year')
2783 release_date = mobj.group('release_date')
2784 if release_date:
2785 release_date = release_date.replace('-', '')
2786 if not release_year:
545cc85d 2787 release_year = release_date[:4]
2788 info.update({
2789 'album': mobj.group('album'.strip()),
2790 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2791 'track': mobj.group('track').strip(),
2792 'release_date': release_date,
cc2db878 2793 'release_year': int_or_none(release_year),
545cc85d 2794 })
7e72694b 2795
545cc85d 2796 initial_data = None
2797 if webpage:
2798 initial_data = self._extract_yt_initial_variable(
2799 webpage, self._YT_INITIAL_DATA_RE, video_id,
2800 'yt initial data')
2801 if not initial_data:
109dd3b2 2802 initial_data = self._extract_response(
2803 item_id=video_id, ep='next', fatal=False,
2804 ytcfg=ytcfg, headers=headers, query={'videoId': video_id},
2805 note='Downloading initial data API JSON')
545cc85d 2806
c60ee3a2 2807 try:
2808 # This will error if there is no livechat
2809 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2810 info['subtitles']['live_chat'] = [{
2811 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
2812 'video_id': video_id,
2813 'ext': 'json',
f6745c49 2814 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
c60ee3a2 2815 }]
2816 except (KeyError, IndexError, TypeError):
2817 pass
545cc85d 2818
2819 if initial_data:
2820 chapters = self._extract_chapters_from_json(
2821 initial_data, video_id, duration)
2822 if not chapters:
2823 for engagment_pannel in (initial_data.get('engagementPanels') or []):
2824 contents = try_get(
2825 engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
2826 list)
2827 if not contents:
2828 continue
2829
2830 def chapter_time(mmlir):
2831 return parse_duration(
fe93e2c4 2832 self._get_text(mmlir.get('timeDescription')))
545cc85d 2833
2834 chapters = []
2835 for next_num, content in enumerate(contents, start=1):
2836 mmlir = content.get('macroMarkersListItemRenderer') or {}
2837 start_time = chapter_time(mmlir)
2838 end_time = chapter_time(try_get(
2839 contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
2840 if next_num < len(contents) else duration
2841 if start_time is None or end_time is None:
2842 continue
2843 chapters.append({
2844 'start_time': start_time,
2845 'end_time': end_time,
fe93e2c4 2846 'title': self._get_text(mmlir.get('title')),
545cc85d 2847 })
2848 if chapters:
2849 break
2850 if chapters:
2851 info['chapters'] = chapters
2852
2853 contents = try_get(
2854 initial_data,
2855 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2856 list) or []
2857 for content in contents:
2858 vpir = content.get('videoPrimaryInfoRenderer')
2859 if vpir:
2860 stl = vpir.get('superTitleLink')
2861 if stl:
fe93e2c4 2862 stl = self._get_text(stl)
545cc85d 2863 if try_get(
2864 vpir,
2865 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2866 info['location'] = stl
2867 else:
2868 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2869 if mobj:
2870 info.update({
2871 'series': mobj.group(1),
2872 'season_number': int(mobj.group(2)),
2873 'episode_number': int(mobj.group(3)),
2874 })
2875 for tlb in (try_get(
2876 vpir,
2877 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2878 list) or []):
2879 tbr = tlb.get('toggleButtonRenderer') or {}
2880 for getter, regex in [(
2881 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2882 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2883 lambda x: x['accessibility'],
2884 lambda x: x['accessibilityData']['accessibilityData'],
2885 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2886 label = (try_get(tbr, getter, dict) or {}).get('label')
2887 if label:
2888 mobj = re.match(regex, label)
2889 if mobj:
2890 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2891 break
2892 sbr_tooltip = try_get(
2893 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2894 if sbr_tooltip:
2895 like_count, dislike_count = sbr_tooltip.split(' / ')
2896 info.update({
2897 'like_count': str_to_int(like_count),
2898 'dislike_count': str_to_int(dislike_count),
2899 })
2900 vsir = content.get('videoSecondaryInfoRenderer')
2901 if vsir:
fe93e2c4 2902 info['channel'] = self._get_text(try_get(
545cc85d 2903 vsir,
2904 lambda x: x['owner']['videoOwnerRenderer']['title'],
cce889b9 2905 dict))
545cc85d 2906 rows = try_get(
2907 vsir,
2908 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2909 list) or []
2910 multiple_songs = False
2911 for row in rows:
2912 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2913 multiple_songs = True
2914 break
2915 for row in rows:
2916 mrr = row.get('metadataRowRenderer') or {}
2917 mrr_title = mrr.get('title')
2918 if not mrr_title:
2919 continue
fe93e2c4 2920 mrr_title = self._get_text(mrr['title'])
2921 mrr_contents_text = self._get_text(mrr['contents'][0])
545cc85d 2922 if mrr_title == 'License':
2923 info['license'] = mrr_contents_text
2924 elif not multiple_songs:
2925 if mrr_title == 'Album':
2926 info['album'] = mrr_contents_text
2927 elif mrr_title == 'Artist':
2928 info['artist'] = mrr_contents_text
2929 elif mrr_title == 'Song':
2930 info['track'] = mrr_contents_text
2931
2932 fallbacks = {
2933 'channel': 'uploader',
2934 'channel_id': 'uploader_id',
2935 'channel_url': 'uploader_url',
2936 }
2937 for to, frm in fallbacks.items():
2938 if not info.get(to):
2939 info[to] = info.get(frm)
2940
2941 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
2942 v = info.get(s_k)
2943 if v:
2944 info[d_k] = v
b84071c0 2945
c224251a
M
2946 is_private = bool_or_none(video_details.get('isPrivate'))
2947 is_unlisted = bool_or_none(microformat.get('isUnlisted'))
2948 is_membersonly = None
b28f8d24 2949 is_premium = None
c224251a
M
2950 if initial_data and is_private is not None:
2951 is_membersonly = False
b28f8d24 2952 is_premium = False
47193e02 2953 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
2954 badge_labels = set()
2955 for content in contents:
2956 if not isinstance(content, dict):
2957 continue
2958 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
2959 for badge_label in badge_labels:
2960 if badge_label.lower() == 'members only':
2961 is_membersonly = True
2962 elif badge_label.lower() == 'premium':
2963 is_premium = True
2964 elif badge_label.lower() == 'unlisted':
2965 is_unlisted = True
c224251a 2966
c224251a
M
2967 info['availability'] = self._availability(
2968 is_private=is_private,
b28f8d24 2969 needs_premium=is_premium,
c224251a
M
2970 needs_subscription=is_membersonly,
2971 needs_auth=info['age_limit'] >= 18,
2972 is_unlisted=None if is_private is None else is_unlisted)
2973
06167fbb 2974 # get xsrf for annotations or comments
a06916d9 2975 get_annotations = self.get_param('writeannotations', False)
2976 get_comments = self.get_param('getcomments', False)
06167fbb 2977 if get_annotations or get_comments:
29f7c58a 2978 xsrf_token = None
545cc85d 2979 ytcfg = self._extract_ytcfg(video_id, webpage)
29f7c58a 2980 if ytcfg:
2981 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2982 if not xsrf_token:
2983 xsrf_token = self._search_regex(
2984 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 2985 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 2986
2987 # annotations
06167fbb 2988 if get_annotations:
64b6a4e9
RA
2989 invideo_url = try_get(
2990 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2991 if xsrf_token and invideo_url:
29f7c58a 2992 xsrf_field_name = None
2993 if ytcfg:
2994 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2995 if not xsrf_field_name:
2996 xsrf_field_name = self._search_regex(
2997 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 2998 webpage, 'xsrf field name',
29f7c58a 2999 group='xsrf_field_name', default='session_token')
8a784c74 3000 info['annotations'] = self._download_webpage(
64b6a4e9
RA
3001 self._proto_relative_url(invideo_url),
3002 video_id, note='Downloading annotations',
3003 errnote='Unable to download video annotations', fatal=False,
3004 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 3005
277d6ff5 3006 if get_comments:
2d6659b9 3007 info['__post_extractor'] = lambda: self._extract_comments(ytcfg, video_id, contents, webpage)
4ea3be0a 3008
545cc85d 3009 self.mark_watched(video_id, player_response)
d77ab8e2 3010
545cc85d 3011 return info
c5e8d7af 3012
5f6a1245 3013
8bdd16b4 3014class YoutubeTabIE(YoutubeBaseInfoExtractor):
3015 IE_DESC = 'YouTube.com tab'
70d5c17b 3016 _VALID_URL = r'''(?x)
3017 https?://
3018 (?:\w+\.)?
3019 (?:
3020 youtube(?:kids)?\.com|
3021 invidio\.us
3022 )/
3023 (?:
fe03a6cd 3024 (?P<channel_type>channel|c|user|browse)/|
70d5c17b 3025 (?P<not_channel>
9ba5705a 3026 feed/|hashtag/|
70d5c17b 3027 (?:playlist|watch)\?.*?\blist=
3028 )|
29f7c58a 3029 (?!(?:%s)\b) # Direct URLs
70d5c17b 3030 )
3031 (?P<id>[^/?\#&]+)
3032 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 3033 IE_NAME = 'youtube:tab'
3034
81127aa5 3035 _TESTS = [{
da692b79 3036 'note': 'playlists, multipage',
8bdd16b4 3037 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3038 'playlist_mincount': 94,
3039 'info_dict': {
3040 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3041 'title': 'Игорь Клейнер - Playlists',
3042 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3043 'uploader': 'Игорь Клейнер',
3044 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 3045 },
3046 }, {
da692b79 3047 'note': 'playlists, multipage, different order',
8bdd16b4 3048 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3049 'playlist_mincount': 94,
3050 'info_dict': {
3051 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3052 'title': 'Игорь Клейнер - Playlists',
3053 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3054 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3055 'uploader': 'Игорь Клейнер',
8bdd16b4 3056 },
201c1459 3057 }, {
da692b79 3058 'note': 'playlists, series',
201c1459 3059 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3060 'playlist_mincount': 5,
3061 'info_dict': {
3062 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3063 'title': '3Blue1Brown - Playlists',
3064 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
da692b79 3065 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3066 'uploader': '3Blue1Brown',
201c1459 3067 },
8bdd16b4 3068 }, {
da692b79 3069 'note': 'playlists, singlepage',
8bdd16b4 3070 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3071 'playlist_mincount': 4,
3072 'info_dict': {
3073 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3074 'title': 'ThirstForScience - Playlists',
3075 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 3076 'uploader': 'ThirstForScience',
3077 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 3078 }
3079 }, {
3080 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3081 'only_matching': True,
3082 }, {
da692b79 3083 'note': 'basic, single video playlist',
0e30a7b9 3084 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 3085 'info_dict': {
0e30a7b9 3086 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3087 'uploader': 'Sergey M.',
3088 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 3089 'title': 'youtube-dl public playlist',
81127aa5 3090 },
0e30a7b9 3091 'playlist_count': 1,
9291475f 3092 }, {
da692b79 3093 'note': 'empty playlist',
0e30a7b9 3094 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 3095 'info_dict': {
0e30a7b9 3096 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3097 'uploader': 'Sergey M.',
3098 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 3099 'title': 'youtube-dl empty playlist',
9291475f
PH
3100 },
3101 'playlist_count': 0,
3102 }, {
da692b79 3103 'note': 'Home tab',
8bdd16b4 3104 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 3105 'info_dict': {
8bdd16b4 3106 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3107 'title': 'lex will - Home',
3108 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3109 'uploader': 'lex will',
3110 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3111 },
8bdd16b4 3112 'playlist_mincount': 2,
9291475f 3113 }, {
da692b79 3114 'note': 'Videos tab',
8bdd16b4 3115 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 3116 'info_dict': {
8bdd16b4 3117 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3118 'title': 'lex will - Videos',
3119 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3120 'uploader': 'lex will',
3121 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3122 },
8bdd16b4 3123 'playlist_mincount': 975,
9291475f 3124 }, {
da692b79 3125 'note': 'Videos tab, sorted by popular',
8bdd16b4 3126 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 3127 'info_dict': {
8bdd16b4 3128 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3129 'title': 'lex will - Videos',
3130 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3131 'uploader': 'lex will',
3132 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3133 },
8bdd16b4 3134 'playlist_mincount': 199,
9291475f 3135 }, {
da692b79 3136 'note': 'Playlists tab',
8bdd16b4 3137 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 3138 'info_dict': {
8bdd16b4 3139 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3140 'title': 'lex will - Playlists',
3141 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3142 'uploader': 'lex will',
3143 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3144 },
8bdd16b4 3145 'playlist_mincount': 17,
ac7553d0 3146 }, {
da692b79 3147 'note': 'Community tab',
8bdd16b4 3148 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 3149 'info_dict': {
8bdd16b4 3150 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3151 'title': 'lex will - Community',
3152 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3153 'uploader': 'lex will',
3154 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3155 },
3156 'playlist_mincount': 18,
87dadd45 3157 }, {
da692b79 3158 'note': 'Channels tab',
8bdd16b4 3159 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 3160 'info_dict': {
8bdd16b4 3161 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3162 'title': 'lex will - Channels',
3163 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3164 'uploader': 'lex will',
3165 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3166 },
deaec5af 3167 'playlist_mincount': 12,
cd684175 3168 }, {
3169 'note': 'Search tab',
3170 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3171 'playlist_mincount': 40,
3172 'info_dict': {
3173 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3174 'title': '3Blue1Brown - Search - linear algebra',
3175 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3176 'uploader': '3Blue1Brown',
3177 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3178 },
6b08cdf6 3179 }, {
a0566bbf 3180 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3181 'only_matching': True,
3182 }, {
a0566bbf 3183 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3184 'only_matching': True,
3185 }, {
a0566bbf 3186 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3187 'only_matching': True,
3188 }, {
3189 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3190 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3191 'info_dict': {
3192 'title': '29C3: Not my department',
3193 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3194 'uploader': 'Christiaan008',
3195 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 3196 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 3197 },
3198 'playlist_count': 96,
3199 }, {
3200 'note': 'Large playlist',
3201 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 3202 'info_dict': {
8bdd16b4 3203 'title': 'Uploads from Cauchemar',
3204 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3205 'uploader': 'Cauchemar',
3206 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 3207 },
8bdd16b4 3208 'playlist_mincount': 1123,
3209 }, {
da692b79 3210 'note': 'even larger playlist, 8832 videos',
8bdd16b4 3211 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3212 'only_matching': True,
4b7df0d3
JMF
3213 }, {
3214 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3215 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3216 'info_dict': {
acf757f4
PH
3217 'title': 'Uploads from Interstellar Movie',
3218 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 3219 'uploader': 'Interstellar Movie',
8bdd16b4 3220 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 3221 },
481cc733 3222 'playlist_mincount': 21,
358de58c 3223 }, {
3224 'note': 'Playlist with "show unavailable videos" button',
3225 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3226 'info_dict': {
3227 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3228 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3229 'uploader': 'Phim Siêu Nhân Nhật Bản',
3230 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3231 },
da692b79 3232 'playlist_mincount': 200,
5d342002 3233 }, {
da692b79 3234 'note': 'Playlist with unavailable videos in page 7',
5d342002 3235 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3236 'info_dict': {
3237 'title': 'Uploads from BlankTV',
3238 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3239 'uploader': 'BlankTV',
3240 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3241 },
da692b79 3242 'playlist_mincount': 1000,
8bdd16b4 3243 }, {
da692b79 3244 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
8bdd16b4 3245 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3246 'info_dict': {
3247 'title': 'Data Analysis with Dr Mike Pound',
3248 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3249 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3250 'uploader': 'Computerphile',
deaec5af 3251 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 3252 },
3253 'playlist_mincount': 11,
3254 }, {
a0566bbf 3255 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 3256 'only_matching': True,
dacb3a86 3257 }, {
da692b79 3258 'note': 'Playlist URL that does not actually serve a playlist',
dacb3a86
S
3259 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3260 'info_dict': {
3261 'id': 'FqZTN594JQw',
3262 'ext': 'webm',
3263 'title': "Smiley's People 01 detective, Adventure Series, Action",
3264 'uploader': 'STREEM',
3265 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 3266 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
3267 'upload_date': '20150526',
3268 'license': 'Standard YouTube License',
3269 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3270 'categories': ['People & Blogs'],
3271 'tags': list,
dbdaaa23 3272 'view_count': int,
dacb3a86
S
3273 'like_count': int,
3274 'dislike_count': int,
3275 },
3276 'params': {
3277 'skip_download': True,
3278 },
13a75688 3279 'skip': 'This video is not available.',
dacb3a86 3280 'add_ie': [YoutubeIE.ie_key()],
481cc733 3281 }, {
8bdd16b4 3282 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 3283 'only_matching': True,
66b48727 3284 }, {
8bdd16b4 3285 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 3286 'only_matching': True,
a0566bbf 3287 }, {
3288 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3289 'info_dict': {
da692b79 3290 'id': 'X1whbWASnNQ', # This will keep changing
a0566bbf 3291 'ext': 'mp4',
deaec5af 3292 'title': compat_str,
a0566bbf 3293 'uploader': 'Sky News',
3294 'uploader_id': 'skynews',
3295 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
da692b79 3296 'upload_date': r're:\d{8}',
3297 'description': compat_str,
a0566bbf 3298 'categories': ['News & Politics'],
3299 'tags': list,
3300 'like_count': int,
3301 'dislike_count': int,
3302 },
3303 'params': {
3304 'skip_download': True,
3305 },
da692b79 3306 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
a0566bbf 3307 }, {
3308 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3309 'info_dict': {
3310 'id': 'a48o2S1cPoo',
3311 'ext': 'mp4',
3312 'title': 'The Young Turks - Live Main Show',
3313 'uploader': 'The Young Turks',
3314 'uploader_id': 'TheYoungTurks',
3315 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3316 'upload_date': '20150715',
3317 'license': 'Standard YouTube License',
3318 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3319 'categories': ['News & Politics'],
3320 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3321 'like_count': int,
3322 'dislike_count': int,
3323 },
3324 'params': {
3325 'skip_download': True,
3326 },
3327 'only_matching': True,
3328 }, {
3329 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3330 'only_matching': True,
3331 }, {
3332 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3333 'only_matching': True,
09f1580e 3334 }, {
3335 'note': 'A channel that is not live. Should raise error',
3336 'url': 'https://www.youtube.com/user/numberphile/live',
3337 'only_matching': True,
3d3dddc9 3338 }, {
3339 'url': 'https://www.youtube.com/feed/trending',
3340 'only_matching': True,
3341 }, {
3d3dddc9 3342 'url': 'https://www.youtube.com/feed/library',
3343 'only_matching': True,
3344 }, {
3d3dddc9 3345 'url': 'https://www.youtube.com/feed/history',
3346 'only_matching': True,
3347 }, {
3d3dddc9 3348 'url': 'https://www.youtube.com/feed/subscriptions',
3349 'only_matching': True,
3350 }, {
3d3dddc9 3351 'url': 'https://www.youtube.com/feed/watch_later',
3352 'only_matching': True,
3353 }, {
da692b79 3354 'note': 'Recommended - redirects to home page',
3d3dddc9 3355 'url': 'https://www.youtube.com/feed/recommended',
3356 'only_matching': True,
29f7c58a 3357 }, {
da692b79 3358 'note': 'inline playlist with not always working continuations',
29f7c58a 3359 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3360 'only_matching': True,
3361 }, {
3362 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3363 'only_matching': True,
3364 }, {
3365 'url': 'https://www.youtube.com/course',
3366 'only_matching': True,
3367 }, {
3368 'url': 'https://www.youtube.com/zsecurity',
3369 'only_matching': True,
3370 }, {
3371 'url': 'http://www.youtube.com/NASAgovVideo/videos',
3372 'only_matching': True,
3373 }, {
3374 'url': 'https://www.youtube.com/TheYoungTurks/live',
3375 'only_matching': True,
39ed931e 3376 }, {
3377 'url': 'https://www.youtube.com/hashtag/cctv9',
3378 'info_dict': {
3379 'id': 'cctv9',
3380 'title': '#cctv9',
3381 },
3382 'playlist_mincount': 350,
201c1459 3383 }, {
3384 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3385 'only_matching': True,
9297939e 3386 }, {
da692b79 3387 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
9297939e 3388 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3389 'only_matching': True
fe03a6cd 3390 }, {
3391 'note': '/browse/ should redirect to /channel/',
3392 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3393 'only_matching': True
3394 }, {
3395 'note': 'VLPL, should redirect to playlist?list=PL...',
3396 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3397 'info_dict': {
3398 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3399 'uploader': 'NoCopyrightSounds',
3400 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3401 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3402 'title': 'NCS Releases',
3403 },
3404 'playlist_mincount': 166,
18db7548 3405 }, {
3406 'note': 'Topic, should redirect to playlist?list=UU...',
3407 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3408 'info_dict': {
3409 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3410 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3411 'title': 'Uploads from Royalty Free Music - Topic',
3412 'uploader': 'Royalty Free Music - Topic',
3413 },
3414 'expected_warnings': [
3415 'A channel/user page was given',
3416 'The URL does not have a videos tab',
3417 ],
3418 'playlist_mincount': 101,
3419 }, {
3420 'note': 'Topic without a UU playlist',
3421 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3422 'info_dict': {
3423 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3424 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3425 },
3426 'expected_warnings': [
3427 'A channel/user page was given',
3428 'The URL does not have a videos tab',
3429 'Falling back to channel URL',
3430 ],
3431 'playlist_mincount': 9,
abcdd12b 3432 }, {
3433 'note': 'Youtube music Album',
3434 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3435 'info_dict': {
3436 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3437 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3438 },
3439 'playlist_count': 50,
47193e02 3440 }, {
3441 'note': 'unlisted single video playlist',
3442 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3443 'info_dict': {
3444 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3445 'uploader': 'colethedj',
3446 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3447 'title': 'yt-dlp unlisted playlist test',
3448 'availability': 'unlisted'
3449 },
3450 'playlist_count': 1,
29f7c58a 3451 }]
3452
3453 @classmethod
3454 def suitable(cls, url):
3455 return False if YoutubeIE.suitable(url) else super(
3456 YoutubeTabIE, cls).suitable(url)
8bdd16b4 3457
3458 def _extract_channel_id(self, webpage):
3459 channel_id = self._html_search_meta(
3460 'channelId', webpage, 'channel id', default=None)
3461 if channel_id:
3462 return channel_id
3463 channel_url = self._html_search_meta(
3464 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3465 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3466 'twitter:app:url:googleplay'), webpage, 'channel url')
3467 return self._search_regex(
3468 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3469 channel_url, 'channel id')
15f6397c 3470
8bdd16b4 3471 @staticmethod
cd7c66cf 3472 def _extract_basic_item_renderer(item):
3473 # Modified from _extract_grid_item_renderer
201c1459 3474 known_basic_renderers = (
3475 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 3476 )
3477 for key, renderer in item.items():
201c1459 3478 if not isinstance(renderer, dict):
cd7c66cf 3479 continue
201c1459 3480 elif key in known_basic_renderers:
3481 return renderer
3482 elif key.startswith('grid') and key.endswith('Renderer'):
3483 return renderer
8bdd16b4 3484
8bdd16b4 3485 def _grid_entries(self, grid_renderer):
3486 for item in grid_renderer['items']:
3487 if not isinstance(item, dict):
39b62db1 3488 continue
cd7c66cf 3489 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 3490 if not isinstance(renderer, dict):
3491 continue
fe93e2c4 3492 title = self._get_text(renderer.get('title'))
3493
8bdd16b4 3494 # playlist
3495 playlist_id = renderer.get('playlistId')
3496 if playlist_id:
3497 yield self.url_result(
3498 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3499 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3500 video_title=title)
201c1459 3501 continue
8bdd16b4 3502 # video
3503 video_id = renderer.get('videoId')
3504 if video_id:
3505 yield self._extract_video(renderer)
201c1459 3506 continue
8bdd16b4 3507 # channel
3508 channel_id = renderer.get('channelId')
3509 if channel_id:
8bdd16b4 3510 yield self.url_result(
3511 'https://www.youtube.com/channel/%s' % channel_id,
3512 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 3513 continue
3514 # generic endpoint URL support
3515 ep_url = urljoin('https://www.youtube.com/', try_get(
3516 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3517 compat_str))
3518 if ep_url:
3519 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3520 if ie.suitable(ep_url):
3521 yield self.url_result(
3522 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3523 break
8bdd16b4 3524
3d3dddc9 3525 def _shelf_entries_from_content(self, shelf_renderer):
3526 content = shelf_renderer.get('content')
3527 if not isinstance(content, dict):
8bdd16b4 3528 return
cd7c66cf 3529 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 3530 if renderer:
3531 # TODO: add support for nested playlists so each shelf is processed
3532 # as separate playlist
3533 # TODO: this includes only first N items
3534 for entry in self._grid_entries(renderer):
3535 yield entry
3536 renderer = content.get('horizontalListRenderer')
3537 if renderer:
3538 # TODO
3539 pass
8bdd16b4 3540
29f7c58a 3541 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3542 ep = try_get(
3543 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3544 compat_str)
3545 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3546 if shelf_url:
29f7c58a 3547 # Skipping links to another channels, note that checking for
3548 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3549 # will not work
3550 if skip_channels and '/channels?' in shelf_url:
3551 return
fe93e2c4 3552 title = self._get_text(shelf_renderer, lambda x: x['title'])
3d3dddc9 3553 yield self.url_result(shelf_url, video_title=title)
3554 # Shelf may not contain shelf URL, fallback to extraction from content
3555 for entry in self._shelf_entries_from_content(shelf_renderer):
3556 yield entry
c5e8d7af 3557
8bdd16b4 3558 def _playlist_entries(self, video_list_renderer):
3559 for content in video_list_renderer['contents']:
3560 if not isinstance(content, dict):
3561 continue
3562 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3563 if not isinstance(renderer, dict):
3564 continue
3565 video_id = renderer.get('videoId')
3566 if not video_id:
3567 continue
3568 yield self._extract_video(renderer)
07aeced6 3569
3462ffa8 3570 def _rich_entries(self, rich_grid_renderer):
3571 renderer = try_get(
70d5c17b 3572 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3573 video_id = renderer.get('videoId')
3574 if not video_id:
3575 return
3576 yield self._extract_video(renderer)
3577
8bdd16b4 3578 def _video_entry(self, video_renderer):
3579 video_id = video_renderer.get('videoId')
3580 if video_id:
3581 return self._extract_video(video_renderer)
dacb3a86 3582
8bdd16b4 3583 def _post_thread_entries(self, post_thread_renderer):
3584 post_renderer = try_get(
3585 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3586 if not post_renderer:
3587 return
3588 # video attachment
3589 video_renderer = try_get(
895b0931 3590 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3591 video_id = video_renderer.get('videoId')
3592 if video_id:
3593 entry = self._extract_video(video_renderer)
8bdd16b4 3594 if entry:
3595 yield entry
895b0931 3596 # playlist attachment
3597 playlist_id = try_get(
3598 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3599 if playlist_id:
3600 yield self.url_result(
e28f1c0a 3601 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3602 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3603 # inline video links
3604 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3605 for run in runs:
3606 if not isinstance(run, dict):
3607 continue
3608 ep_url = try_get(
3609 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3610 if not ep_url:
3611 continue
3612 if not YoutubeIE.suitable(ep_url):
3613 continue
3614 ep_video_id = YoutubeIE._match_id(ep_url)
3615 if video_id == ep_video_id:
3616 continue
895b0931 3617 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 3618
8bdd16b4 3619 def _post_thread_continuation_entries(self, post_thread_continuation):
3620 contents = post_thread_continuation.get('contents')
3621 if not isinstance(contents, list):
3622 return
3623 for content in contents:
3624 renderer = content.get('backstagePostThreadRenderer')
3625 if not isinstance(renderer, dict):
3626 continue
3627 for entry in self._post_thread_entries(renderer):
3628 yield entry
07aeced6 3629
39ed931e 3630 r''' # unused
3631 def _rich_grid_entries(self, contents):
3632 for content in contents:
3633 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3634 if video_renderer:
3635 entry = self._video_entry(video_renderer)
3636 if entry:
3637 yield entry
3638 '''
f4f751af 3639 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3462ffa8 3640
70d5c17b 3641 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3642 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3643 for content in contents:
3644 if not isinstance(content, dict):
8bdd16b4 3645 continue
70d5c17b 3646 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3647 if not is_renderer:
70d5c17b 3648 renderer = content.get('richItemRenderer')
3462ffa8 3649 if renderer:
3650 for entry in self._rich_entries(renderer):
3651 yield entry
3652 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3653 continue
3462ffa8 3654 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3655 for isr_content in isr_contents:
3656 if not isinstance(isr_content, dict):
3657 continue
69184e41 3658
3659 known_renderers = {
3660 'playlistVideoListRenderer': self._playlist_entries,
3661 'gridRenderer': self._grid_entries,
3662 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3663 'backstagePostThreadRenderer': self._post_thread_entries,
3664 'videoRenderer': lambda x: [self._video_entry(x)],
3665 }
3666 for key, renderer in isr_content.items():
3667 if key not in known_renderers:
3668 continue
3669 for entry in known_renderers[key](renderer):
3670 if entry:
3671 yield entry
3462ffa8 3672 continuation_list[0] = self._extract_continuation(renderer)
69184e41 3673 break
70d5c17b 3674
3462ffa8 3675 if not continuation_list[0]:
3676 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3677
3678 if not continuation_list[0]:
3679 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3680
3681 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3682 tab_content = try_get(tab, lambda x: x['content'], dict)
3683 if not tab_content:
3684 return
3462ffa8 3685 parent_renderer = (
29f7c58a 3686 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3687 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3688 for entry in extract_entries(parent_renderer):
3689 yield entry
3462ffa8 3690 continuation = continuation_list[0]
fe93e2c4 3691 visitor_data = None
d069eca7 3692
8bdd16b4 3693 for page_num in itertools.count(1):
3694 if not continuation:
3695 break
f4f751af 3696 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
79360d99 3697 response = self._extract_response(
3698 item_id='%s page %s' % (item_id, page_num),
fe93e2c4 3699 query=continuation, headers=headers, ytcfg=ytcfg,
79360d99 3700 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3701
3702 if not response:
8bdd16b4 3703 break
f4f751af 3704 visitor_data = try_get(
3705 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
ebf1b291 3706
69184e41 3707 known_continuation_renderers = {
3708 'playlistVideoListContinuation': self._playlist_entries,
3709 'gridContinuation': self._grid_entries,
3710 'itemSectionContinuation': self._post_thread_continuation_entries,
3711 'sectionListContinuation': extract_entries, # for feeds
3712 }
8bdd16b4 3713 continuation_contents = try_get(
69184e41 3714 response, lambda x: x['continuationContents'], dict) or {}
3715 continuation_renderer = None
3716 for key, value in continuation_contents.items():
3717 if key not in known_continuation_renderers:
3462ffa8 3718 continue
69184e41 3719 continuation_renderer = value
3720 continuation_list = [None]
3721 for entry in known_continuation_renderers[key](continuation_renderer):
3722 yield entry
3723 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3724 break
3725 if continuation_renderer:
3726 continue
c5e8d7af 3727
a1b535bd 3728 known_renderers = {
3729 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3730 'gridVideoRenderer': (self._grid_entries, 'items'),
3731 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3732 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3733 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3734 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3735 }
cce889b9 3736 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3737 continuation_items = try_get(
cce889b9 3738 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3739 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3740 video_items_renderer = None
3741 for key, value in continuation_item.items():
3742 if key not in known_renderers:
8bdd16b4 3743 continue
a1b535bd 3744 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3745 continuation_list = [None]
a1b535bd 3746 for entry in known_renderers[key][0](video_items_renderer):
3747 yield entry
9ba5705a 3748 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3749 break
3750 if video_items_renderer:
3751 continue
8bdd16b4 3752 break
9558dcec 3753
8bdd16b4 3754 @staticmethod
3755 def _extract_selected_tab(tabs):
3756 for tab in tabs:
cd684175 3757 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3758 if renderer.get('selected') is True:
3759 return renderer
2b3c2546 3760 else:
8bdd16b4 3761 raise ExtractorError('Unable to find selected tab')
b82f815f 3762
47193e02 3763 @classmethod
3764 def _extract_uploader(cls, data):
8bdd16b4 3765 uploader = {}
47193e02 3766 renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3767 owner = try_get(
3768 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3769 if owner:
3770 uploader['uploader'] = owner.get('text')
3771 uploader['uploader_id'] = try_get(
3772 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3773 uploader['uploader_url'] = urljoin(
3774 'https://www.youtube.com/',
3775 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3776 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3777
d069eca7 3778 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3779 playlist_id = title = description = channel_url = channel_name = channel_id = None
3780 thumbnails_list = tags = []
3781
8bdd16b4 3782 selected_tab = self._extract_selected_tab(tabs)
3783 renderer = try_get(
3784 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3785 if renderer:
b60419c5 3786 channel_name = renderer.get('title')
3787 channel_url = renderer.get('channelUrl')
3788 channel_id = renderer.get('externalId')
39ed931e 3789 else:
64c0d954 3790 renderer = try_get(
3791 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3792
8bdd16b4 3793 if renderer:
3794 title = renderer.get('title')
ecc97af3 3795 description = renderer.get('description', '')
b60419c5 3796 playlist_id = channel_id
3797 tags = renderer.get('keywords', '').split()
3798 thumbnails_list = (
3799 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3800 or try_get(
47193e02 3801 self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3802 lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
ff84930c 3803 list)
b60419c5 3804 or [])
3805
3806 thumbnails = []
3807 for t in thumbnails_list:
3808 if not isinstance(t, dict):
3809 continue
3810 thumbnail_url = url_or_none(t.get('url'))
3811 if not thumbnail_url:
3812 continue
3813 thumbnails.append({
3814 'url': thumbnail_url,
3815 'width': int_or_none(t.get('width')),
3816 'height': int_or_none(t.get('height')),
3817 })
3462ffa8 3818 if playlist_id is None:
70d5c17b 3819 playlist_id = item_id
3820 if title is None:
39ed931e 3821 title = (
3822 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3823 or playlist_id)
b60419c5 3824 title += format_field(selected_tab, 'title', ' - %s')
cd684175 3825 title += format_field(selected_tab, 'expandedText', ' - %s')
b60419c5 3826 metadata = {
3827 'playlist_id': playlist_id,
3828 'playlist_title': title,
3829 'playlist_description': description,
3830 'uploader': channel_name,
3831 'uploader_id': channel_id,
3832 'uploader_url': channel_url,
3833 'thumbnails': thumbnails,
3834 'tags': tags,
3835 }
47193e02 3836 availability = self._extract_availability(data)
3837 if availability:
3838 metadata['availability'] = availability
b60419c5 3839 if not channel_id:
3840 metadata.update(self._extract_uploader(data))
3841 metadata.update({
3842 'channel': metadata['uploader'],
3843 'channel_id': metadata['uploader_id'],
3844 'channel_url': metadata['uploader_url']})
fe93e2c4 3845 ytcfg = self._extract_ytcfg(item_id, webpage)
b60419c5 3846 return self.playlist_result(
d069eca7
M
3847 self._entries(
3848 selected_tab, playlist_id,
3849 self._extract_identity_token(webpage, item_id),
fe93e2c4 3850 self._extract_account_syncid(ytcfg, data), ytcfg),
b60419c5 3851 **metadata)
73c4ac2c 3852
79360d99 3853 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
2be71994 3854 first_id = last_id = None
79360d99 3855 ytcfg = self._extract_ytcfg(playlist_id, webpage)
3856 headers = self._generate_api_headers(
fe93e2c4 3857 ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3858 identity_token=self._extract_identity_token(webpage, item_id=playlist_id))
2be71994 3859 for page_num in itertools.count(1):
cd7c66cf 3860 videos = list(self._playlist_entries(playlist))
3861 if not videos:
3862 return
2be71994 3863 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3864 if start >= len(videos):
3865 return
3866 for video in videos[start:]:
3867 if video['id'] == first_id:
3868 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3869 return
3870 yield video
3871 first_id = first_id or videos[0]['id']
3872 last_id = videos[-1]['id']
79360d99 3873 watch_endpoint = try_get(
3874 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3875 query = {
3876 'playlistId': playlist_id,
3877 'videoId': watch_endpoint.get('videoId') or last_id,
3878 'index': watch_endpoint.get('index') or len(videos),
3879 'params': watch_endpoint.get('params') or 'OAE%3D'
3880 }
3881 response = self._extract_response(
3882 item_id='%s page %d' % (playlist_id, page_num),
fe93e2c4 3883 query=query, ep='next', headers=headers, ytcfg=ytcfg,
79360d99 3884 check_get_keys='contents'
3885 )
cd7c66cf 3886 playlist = try_get(
79360d99 3887 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 3888
79360d99 3889 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
8bdd16b4 3890 title = playlist.get('title') or try_get(
3891 data, lambda x: x['titleText']['simpleText'], compat_str)
3892 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3893
3894 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3895 playlist_url = urljoin(url, try_get(
3896 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3897 compat_str))
3898 if playlist_url and playlist_url != url:
3899 return self.url_result(
3900 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3901 video_title=title)
cd7c66cf 3902
8bdd16b4 3903 return self.playlist_result(
79360d99 3904 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
cd7c66cf 3905 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 3906
47193e02 3907 def _extract_availability(self, data):
3908 """
3909 Gets the availability of a given playlist/tab.
3910 Note: Unless YouTube tells us explicitly, we do not assume it is public
3911 @param data: response
3912 """
3913 is_private = is_unlisted = None
3914 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
3915 badge_labels = self._extract_badges(renderer)
3916
3917 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
3918 privacy_dropdown_entries = try_get(
3919 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
3920 for renderer_dict in privacy_dropdown_entries:
3921 is_selected = try_get(
3922 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
3923 if not is_selected:
3924 continue
fe93e2c4 3925 label = self._get_text(
3926 try_get(renderer_dict, lambda x: x['privacyDropdownItemRenderer']['label'], dict) or [])
47193e02 3927 if label:
3928 badge_labels.add(label.lower())
3929 break
3930
3931 for badge_label in badge_labels:
3932 if badge_label == 'unlisted':
3933 is_unlisted = True
3934 elif badge_label == 'private':
3935 is_private = True
3936 elif badge_label == 'public':
3937 is_unlisted = is_private = False
3938 return self._availability(is_private, False, False, False, is_unlisted)
3939
3940 @staticmethod
3941 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
3942 sidebar_renderer = try_get(
3943 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
3944 for item in sidebar_renderer:
3945 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
3946 if renderer:
3947 return renderer
3948
358de58c 3949 def _reload_with_unavailable_videos(self, item_id, data, webpage):
3950 """
3951 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3952 """
5d342002 3953 browse_id = params = None
47193e02 3954 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
3955 if not renderer:
3956 return
3957 menu_renderer = try_get(
3958 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3959 for menu_item in menu_renderer:
3960 if not isinstance(menu_item, dict):
358de58c 3961 continue
47193e02 3962 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3963 text = try_get(
3964 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3965 if not text or text.lower() != 'show unavailable videos':
3966 continue
3967 browse_endpoint = try_get(
3968 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3969 browse_id = browse_endpoint.get('browseId')
3970 params = browse_endpoint.get('params')
3971 break
5d342002 3972
47193e02 3973 ytcfg = self._extract_ytcfg(item_id, webpage)
3974 headers = self._generate_api_headers(
fe93e2c4 3975 ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
47193e02 3976 identity_token=self._extract_identity_token(webpage, item_id=item_id),
3977 visitor_data=try_get(
3978 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
3979 query = {
3980 'params': params or 'wgYCCAA=',
3981 'browseId': browse_id or 'VL%s' % item_id
3982 }
3983 return self._extract_response(
3984 item_id=item_id, headers=headers, query=query,
fe93e2c4 3985 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
47193e02 3986 note='Downloading API JSON with unavailable videos')
358de58c 3987
cd7c66cf 3988 def _extract_webpage(self, url, item_id):
a06916d9 3989 retries = self.get_param('extractor_retries', 3)
62bff2c1 3990 count = -1
c705177d 3991 last_error = 'Incomplete yt initial data recieved'
14fdfea9 3992 while count < retries:
62bff2c1 3993 count += 1
14fdfea9 3994 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 3995 # See: https://github.com/yt-dlp/yt-dlp/issues/116
3996 if count:
c705177d 3997 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 3998 webpage = self._download_webpage(
3999 url, item_id,
cd7c66cf 4000 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
14fdfea9 4001 data = self._extract_yt_initial_data(item_id, webpage)
14fdfea9 4002 if data.get('contents') or data.get('currentVideoEndpoint'):
4003 break
95c01b6c 4004 # Extract alerts here only when there is error
4005 self._extract_and_report_alerts(data)
c705177d 4006 if count >= retries:
6a39ee13 4007 raise ExtractorError(last_error)
cd7c66cf 4008 return webpage, data
4009
9297939e 4010 @staticmethod
4011 def _smuggle_data(entries, data):
4012 for entry in entries:
4013 if data:
4014 entry['url'] = smuggle_url(entry['url'], data)
4015 yield entry
4016
cd7c66cf 4017 def _real_extract(self, url):
9297939e 4018 url, smuggled_data = unsmuggle_url(url, {})
4019 if self.is_music_url(url):
4020 smuggled_data['is_music_url'] = True
fe03a6cd 4021 info_dict = self.__real_extract(url, smuggled_data)
9297939e 4022 if info_dict.get('entries'):
4023 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4024 return info_dict
4025
fe03a6cd 4026 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4027
4028 def __real_extract(self, url, smuggled_data):
cd7c66cf 4029 item_id = self._match_id(url)
4030 url = compat_urlparse.urlunparse(
4031 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 4032 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 4033
fe03a6cd 4034 def get_mobj(url):
4035 mobj = self._url_re.match(url).groupdict()
07cce701 4036 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 4037 return mobj
4038
4039 mobj = get_mobj(url)
4040 # Youtube returns incomplete data if tabname is not lower case
4041 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4042
4043 if is_channel:
4044 if smuggled_data.get('is_music_url'):
4045 if item_id[:2] == 'VL':
4046 # Youtube music VL channels have an equivalent playlist
4047 item_id = item_id[2:]
4048 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
abcdd12b 4049 elif item_id[:2] == 'MP':
4050 # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4051 item_id = self._search_regex(
4052 r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4053 self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4054 'playlist id')
4055 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
fe03a6cd 4056 elif mobj['channel_type'] == 'browse':
4057 # Youtube music /browse/ should be changed to /channel/
4058 pre = 'https://www.youtube.com/channel/%s' % item_id
4059 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4060 # Home URLs should redirect to /videos/
6a39ee13 4061 self.report_warning(
cd7c66cf 4062 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4063 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 4064 tab = '/videos'
4065
4066 url = ''.join((pre, tab, post))
4067 mobj = get_mobj(url)
cd7c66cf 4068
4069 # Handle both video/playlist URLs
201c1459 4070 qs = parse_qs(url)
cd7c66cf 4071 video_id = qs.get('v', [None])[0]
4072 playlist_id = qs.get('list', [None])[0]
4073
fe03a6cd 4074 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 4075 if not playlist_id:
fe03a6cd 4076 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 4077 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 4078 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6a39ee13 4079 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
cd7c66cf 4080 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
18db7548 4081 mobj = get_mobj(url)
cd7c66cf 4082
4083 if video_id and playlist_id:
a06916d9 4084 if self.get_param('noplaylist'):
cd7c66cf 4085 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4086 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4087 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4088
4089 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 4090
18db7548 4091 tabs = try_get(
4092 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4093 if tabs:
4094 selected_tab = self._extract_selected_tab(tabs)
4095 tab_name = selected_tab.get('title', '')
09f1580e 4096 if 'no-youtube-channel-redirect' not in compat_opts:
4097 if mobj['tab'] == '/live':
4098 # Live tab should have redirected to the video
4099 raise ExtractorError('The channel is not currently live', expected=True)
4100 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4101 if not mobj['not_channel'] and item_id[:2] == 'UC':
4102 # Topic channels don't have /videos. Use the equivalent playlist instead
4103 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4104 pl_id = 'UU%s' % item_id[2:]
4105 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4106 try:
4107 pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4108 for alert_type, alert_message in self._extract_alerts(pl_data):
4109 if alert_type == 'error':
4110 raise ExtractorError('Youtube said: %s' % alert_message)
4111 item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4112 except ExtractorError:
4113 self.report_warning('The playlist gave error. Falling back to channel URL')
4114 else:
4115 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
18db7548 4116
4117 self.write_debug('Final URL: %s' % url)
4118
358de58c 4119 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 4120 if 'no-youtube-unavailable-videos' not in compat_opts:
4121 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
95c01b6c 4122 self._extract_and_report_alerts(data)
8bdd16b4 4123 tabs = try_get(
4124 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4125 if tabs:
d069eca7 4126 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 4127
8bdd16b4 4128 playlist = try_get(
4129 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4130 if playlist:
79360d99 4131 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
cd7c66cf 4132
a0566bbf 4133 video_id = try_get(
4134 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4135 compat_str) or video_id
8bdd16b4 4136 if video_id:
09f1580e 4137 if mobj['tab'] != '/live': # live tab is expected to redirect to video
4138 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 4139 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 4140
8bdd16b4 4141 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 4142
c5e8d7af 4143
8bdd16b4 4144class YoutubePlaylistIE(InfoExtractor):
4145 IE_DESC = 'YouTube.com playlists'
4146 _VALID_URL = r'''(?x)(?:
4147 (?:https?://)?
4148 (?:\w+\.)?
4149 (?:
4150 (?:
4151 youtube(?:kids)?\.com|
29f7c58a 4152 invidio\.us
8bdd16b4 4153 )
4154 /.*?\?.*?\blist=
4155 )?
4156 (?P<id>%(playlist_id)s)
4157 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4158 IE_NAME = 'youtube:playlist'
cdc628a4 4159 _TESTS = [{
8bdd16b4 4160 'note': 'issue #673',
4161 'url': 'PLBB231211A4F62143',
cdc628a4 4162 'info_dict': {
8bdd16b4 4163 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4164 'id': 'PLBB231211A4F62143',
4165 'uploader': 'Wickydoo',
4166 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4167 },
4168 'playlist_mincount': 29,
4169 }, {
4170 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4171 'info_dict': {
4172 'title': 'YDL_safe_search',
4173 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4174 },
4175 'playlist_count': 2,
4176 'skip': 'This playlist is private',
9558dcec 4177 }, {
8bdd16b4 4178 'note': 'embedded',
4179 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4180 'playlist_count': 4,
9558dcec 4181 'info_dict': {
8bdd16b4 4182 'title': 'JODA15',
4183 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4184 'uploader': 'milan',
4185 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 4186 }
cdc628a4 4187 }, {
8bdd16b4 4188 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4189 'playlist_mincount': 982,
4190 'info_dict': {
4191 'title': '2018 Chinese New Singles (11/6 updated)',
4192 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4193 'uploader': 'LBK',
4194 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4195 }
daa0df9e 4196 }, {
29f7c58a 4197 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4198 'only_matching': True,
4199 }, {
4200 # music album playlist
4201 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4202 'only_matching': True,
4203 }]
4204
4205 @classmethod
4206 def suitable(cls, url):
201c1459 4207 if YoutubeTabIE.suitable(url):
4208 return False
1bdae7d3 4209 # Hack for lazy extractors until more generic solution is implemented
4210 # (see #28780)
4211 from .youtube import parse_qs
201c1459 4212 qs = parse_qs(url)
4213 if qs.get('v', [None])[0]:
4214 return False
4215 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 4216
4217 def _real_extract(self, url):
4218 playlist_id = self._match_id(url)
46953e7e 4219 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 4220 url = update_url_query(
4221 'https://www.youtube.com/playlist',
4222 parse_qs(url) or {'list': playlist_id})
4223 if is_music_url:
4224 url = smuggle_url(url, {'is_music_url': True})
4225 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 4226
4227
4228class YoutubeYtBeIE(InfoExtractor):
c76eb41b 4229 IE_DESC = 'youtu.be'
29f7c58a 4230 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4231 _TESTS = [{
8bdd16b4 4232 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4233 'info_dict': {
4234 'id': 'yeWKywCrFtk',
4235 'ext': 'mp4',
4236 'title': 'Small Scale Baler and Braiding Rugs',
4237 'uploader': 'Backus-Page House Museum',
4238 'uploader_id': 'backuspagemuseum',
4239 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4240 'upload_date': '20161008',
4241 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4242 'categories': ['Nonprofits & Activism'],
4243 'tags': list,
4244 'like_count': int,
4245 'dislike_count': int,
4246 },
4247 'params': {
4248 'noplaylist': True,
4249 'skip_download': True,
4250 },
39e7107d 4251 }, {
8bdd16b4 4252 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 4253 'only_matching': True,
cdc628a4
PH
4254 }]
4255
8bdd16b4 4256 def _real_extract(self, url):
29f7c58a 4257 mobj = re.match(self._VALID_URL, url)
4258 video_id = mobj.group('id')
4259 playlist_id = mobj.group('playlist_id')
8bdd16b4 4260 return self.url_result(
29f7c58a 4261 update_url_query('https://www.youtube.com/watch', {
4262 'v': video_id,
4263 'list': playlist_id,
4264 'feature': 'youtu.be',
4265 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 4266
4267
4268class YoutubeYtUserIE(InfoExtractor):
c76eb41b 4269 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 4270 _VALID_URL = r'ytuser:(?P<id>.+)'
4271 _TESTS = [{
4272 'url': 'ytuser:phihag',
4273 'only_matching': True,
4274 }]
4275
4276 def _real_extract(self, url):
4277 user_id = self._match_id(url)
4278 return self.url_result(
4279 'https://www.youtube.com/user/%s' % user_id,
4280 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 4281
b05654f0 4282
3d3dddc9 4283class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 4284 IE_NAME = 'youtube:favorites'
4285 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4286 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4287 _LOGIN_REQUIRED = True
4288 _TESTS = [{
4289 'url': ':ytfav',
4290 'only_matching': True,
4291 }, {
4292 'url': ':ytfavorites',
4293 'only_matching': True,
4294 }]
4295
4296 def _real_extract(self, url):
4297 return self.url_result(
4298 'https://www.youtube.com/playlist?list=LL',
4299 ie=YoutubeTabIE.ie_key())
4300
4301
79360d99 4302class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
69184e41 4303 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
4304 # there doesn't appear to be a real limit, for example if you search for
4305 # 'python' you get more than 8.000.000 results
4306 _MAX_RESULTS = float('inf')
78caa52a 4307 IE_NAME = 'youtube:search'
b05654f0 4308 _SEARCH_KEY = 'ytsearch'
6c894ea1 4309 _SEARCH_PARAMS = None
9dd8e46a 4310 _TESTS = []
b05654f0 4311
6c894ea1 4312 def _entries(self, query, n):
a5c56234 4313 data = {'query': query}
6c894ea1
U
4314 if self._SEARCH_PARAMS:
4315 data['params'] = self._SEARCH_PARAMS
4316 total = 0
fe93e2c4 4317 continuation = {}
6c894ea1 4318 for page_num in itertools.count(1):
fe93e2c4 4319 data.update(continuation)
79360d99 4320 search = self._extract_response(
4321 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4322 check_get_keys=('contents', 'onResponseReceivedCommands')
4323 )
6c894ea1 4324 if not search:
b4c08069 4325 break
6c894ea1
U
4326 slr_contents = try_get(
4327 search,
4328 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4329 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4330 list)
4331 if not slr_contents:
a22b2fd1 4332 break
0366ae87 4333
0366ae87
M
4334 # Youtube sometimes adds promoted content to searches,
4335 # changing the index location of videos and token.
4336 # So we search through all entries till we find them.
fe93e2c4 4337 continuation = None
30a074c2 4338 for slr_content in slr_contents:
fe93e2c4 4339 if not continuation:
4340 continuation = self._extract_continuation({'contents': [slr_content]})
a96c6d15 4341
30a074c2 4342 isr_contents = try_get(
4343 slr_content,
4344 lambda x: x['itemSectionRenderer']['contents'],
4345 list)
9da76d30 4346 if not isr_contents:
30a074c2 4347 continue
4348 for content in isr_contents:
4349 if not isinstance(content, dict):
4350 continue
4351 video = content.get('videoRenderer')
4352 if not isinstance(video, dict):
4353 continue
4354 video_id = video.get('videoId')
4355 if not video_id:
4356 continue
4357
4358 yield self._extract_video(video)
4359 total += 1
4360 if total == n:
4361 return
0366ae87 4362
fe93e2c4 4363 if not continuation:
6c894ea1 4364 break
b05654f0 4365
6c894ea1
U
4366 def _get_n_results(self, query, n):
4367 """Get a specified number of results for a query"""
4368 return self.playlist_result(self._entries(query, n), query)
75dff0ee 4369
c9ae7b95 4370
a3dd9248 4371class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 4372 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 4373 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 4374 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 4375 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 4376
c9ae7b95 4377
386e1dd9 4378class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 4379 IE_DESC = 'YouTube.com search URLs'
386e1dd9 4380 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4381 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 4382 # _MAX_RESULTS = 100
3462ffa8 4383 _TESTS = [{
4384 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4385 'playlist_mincount': 5,
4386 'info_dict': {
4387 'title': 'youtube-dl test video',
4388 }
4389 }, {
4390 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4391 'only_matching': True,
4392 }]
4393
386e1dd9 4394 @classmethod
4395 def _make_valid_url(cls):
4396 return cls._VALID_URL
4397
3462ffa8 4398 def _real_extract(self, url):
386e1dd9 4399 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4400 query = (qs.get('search_query') or qs.get('q'))[0]
4401 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4402 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 4403
4404
4405class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 4406 """
25f14e9f 4407 Base class for feed extractors
3d3dddc9 4408 Subclasses must define the _FEED_NAME property.
d7ae0639 4409 """
b2e8bc1b 4410 _LOGIN_REQUIRED = True
ef2f3c7f 4411 _TESTS = []
d7ae0639
JMF
4412
4413 @property
4414 def IE_NAME(self):
78caa52a 4415 return 'youtube:%s' % self._FEED_NAME
04cc9617 4416
3853309f 4417 def _real_extract(self, url):
3d3dddc9 4418 return self.url_result(
4419 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4420 ie=YoutubeTabIE.ie_key())
25f14e9f
S
4421
4422
ef2f3c7f 4423class YoutubeWatchLaterIE(InfoExtractor):
4424 IE_NAME = 'youtube:watchlater'
70d5c17b 4425 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 4426 _VALID_URL = r':ytwatchlater'
bc7a9cd8 4427 _TESTS = [{
8bdd16b4 4428 'url': ':ytwatchlater',
bc7a9cd8
S
4429 'only_matching': True,
4430 }]
25f14e9f
S
4431
4432 def _real_extract(self, url):
ef2f3c7f 4433 return self.url_result(
4434 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 4435
4436
25f14e9f
S
4437class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4438 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 4439 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 4440 _FEED_NAME = 'recommended'
45db527f 4441 _LOGIN_REQUIRED = False
3d3dddc9 4442 _TESTS = [{
4443 'url': ':ytrec',
4444 'only_matching': True,
4445 }, {
4446 'url': ':ytrecommended',
4447 'only_matching': True,
4448 }, {
4449 'url': 'https://youtube.com',
4450 'only_matching': True,
4451 }]
1ed5b5c9 4452
1ed5b5c9 4453
25f14e9f 4454class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 4455 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 4456 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 4457 _FEED_NAME = 'subscriptions'
3d3dddc9 4458 _TESTS = [{
4459 'url': ':ytsubs',
4460 'only_matching': True,
4461 }, {
4462 'url': ':ytsubscriptions',
4463 'only_matching': True,
4464 }]
1ed5b5c9 4465
1ed5b5c9 4466
25f14e9f 4467class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
4468 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4469 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 4470 _FEED_NAME = 'history'
3d3dddc9 4471 _TESTS = [{
4472 'url': ':ythistory',
4473 'only_matching': True,
4474 }]
1ed5b5c9
JMF
4475
4476
15870e90
PH
4477class YoutubeTruncatedURLIE(InfoExtractor):
4478 IE_NAME = 'youtube:truncated_url'
4479 IE_DESC = False # Do not list
975d35db 4480 _VALID_URL = r'''(?x)
b95aab84
PH
4481 (?:https?://)?
4482 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4483 (?:watch\?(?:
c4808c60 4484 feature=[a-z_]+|
b95aab84
PH
4485 annotation_id=annotation_[^&]+|
4486 x-yt-cl=[0-9]+|
c1708b89 4487 hl=[^&]*|
287be8c6 4488 t=[0-9]+
b95aab84
PH
4489 )?
4490 |
4491 attribution_link\?a=[^&]+
4492 )
4493 $
975d35db 4494 '''
15870e90 4495
c4808c60 4496 _TESTS = [{
2d3d2997 4497 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 4498 'only_matching': True,
dc2fc736 4499 }, {
2d3d2997 4500 'url': 'https://www.youtube.com/watch?',
dc2fc736 4501 'only_matching': True,
b95aab84
PH
4502 }, {
4503 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4504 'only_matching': True,
4505 }, {
4506 'url': 'https://www.youtube.com/watch?feature=foo',
4507 'only_matching': True,
c1708b89
PH
4508 }, {
4509 'url': 'https://www.youtube.com/watch?hl=en-GB',
4510 'only_matching': True,
287be8c6
PH
4511 }, {
4512 'url': 'https://www.youtube.com/watch?t=2372',
4513 'only_matching': True,
c4808c60
PH
4514 }]
4515
15870e90
PH
4516 def _real_extract(self, url):
4517 raise ExtractorError(
78caa52a
PH
4518 'Did you forget to quote the URL? Remember that & is a meta '
4519 'character in most shells, so you want to put the URL in quotes, '
3867038a 4520 'like youtube-dl '
2d3d2997 4521 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 4522 ' or simply youtube-dl BaW_jenozKc .',
15870e90 4523 expected=True)
772fd5cc
PH
4524
4525
4526class YoutubeTruncatedIDIE(InfoExtractor):
4527 IE_NAME = 'youtube:truncated_id'
4528 IE_DESC = False # Do not list
b95aab84 4529 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
4530
4531 _TESTS = [{
4532 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4533 'only_matching': True,
4534 }]
4535
4536 def _real_extract(self, url):
4537 video_id = self._match_id(url)
4538 raise ExtractorError(
4539 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4540 expected=True)