]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[youtube] Sanity check `chapters` (and refactor related code)
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
2d6659b9 5import base64
d92f5d5a 6import calendar
109dd3b2 7import copy
fe93e2c4 8import datetime
a5c56234 9import hashlib
0ca96d48 10import itertools
c5e8d7af 11import json
c4417ddb 12import os.path
d77ab8e2 13import random
c5e8d7af 14import re
8a784c74 15import time
e0df6211 16import traceback
c5e8d7af 17
b05654f0 18from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 19from ..compat import (
edf3e38e 20 compat_chr,
29f7c58a 21 compat_HTTPError,
c5e8d7af 22 compat_parse_qs,
545cc85d 23 compat_str,
7fd002c0 24 compat_urllib_parse_unquote_plus,
15707c7e 25 compat_urllib_parse_urlencode,
7c80519c 26 compat_urllib_parse_urlparse,
7c61bd36 27 compat_urlparse,
4bb4a188 28)
545cc85d 29from ..jsinterp import JSInterpreter
4bb4a188 30from ..utils import (
c224251a 31 bool_or_none,
2d6659b9 32 bytes_to_intlist,
c5e8d7af 33 clean_html,
26fe8ffe 34 dict_get,
d92f5d5a 35 datetime_from_str,
358de58c 36 error_to_compat_str,
c5e8d7af 37 ExtractorError,
b60419c5 38 format_field,
2d30521a 39 float_or_none,
dd27fd17 40 int_or_none,
2d6659b9 41 intlist_to_bytes,
94278f72 42 mimetype2ext,
6310acf5 43 parse_codecs,
49bd8c66 44 parse_count,
7c80519c 45 parse_duration,
dca3ff4a 46 qualities,
3995d37d 47 remove_start,
cf7e015f 48 smuggle_url,
dbdaaa23 49 str_or_none,
c93d53f5 50 str_to_int,
7c365c21 51 traverse_obj,
556dbe7f 52 try_get,
c5e8d7af
PH
53 unescapeHTML,
54 unified_strdate,
cf7e015f 55 unsmuggle_url,
8bdd16b4 56 update_url_query,
21c340b8 57 url_or_none,
6e6bc8da 58 urlencode_postdata,
fe93e2c4 59 urljoin,
7c365c21 60 variadic,
c5e8d7af
PH
61)
62
5f6a1245 63
201c1459 64def parse_qs(url):
65 return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
66
67
de7f3446 68class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
69 """Provide base functions for Youtube extractors"""
70 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 71 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
72
73 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
74 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
75 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 76
3462ffa8 77 _RESERVED_NAMES = (
bea74222 78 r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|shorts|'
46953e7e 79 r'movies|results|shared|hashtag|trending|feed|feeds|oembed|get_video_info|'
cd7c66cf 80 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 81
b2e8bc1b
JMF
82 _NETRC_MACHINE = 'youtube'
83 # If True it will raise an error if no login info is provided
84 _LOGIN_REQUIRED = False
85
70d5c17b 86 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 87
b2e8bc1b 88 def _login(self):
83317f69 89 """
90 Attempt to log in to YouTube.
91 True is returned if successful or skipped.
92 False is returned if login failed.
93
94 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
95 """
9d5d4d64 96
97 def warn(message):
98 self.report_warning(message)
99
100 # username+password login is broken
101 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
102 self.raise_login_required(
103 'Login details are needed to download this content', method='cookies')
68217024 104 username, password = self._get_login_info()
9d5d4d64 105 if username:
106 warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
107 return
9d5d4d64 108
2d6659b9 109 # Everything below this is broken!
110 r'''
b2e8bc1b
JMF
111 # No authentication to be performed
112 if username is None:
a06916d9 113 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
69ea8ca4 114 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
a06916d9 115 # if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
545cc85d 116 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 117 return True
b2e8bc1b 118
7cc3570e
PH
119 login_page = self._download_webpage(
120 self._LOGIN_URL, None,
69ea8ca4
PH
121 note='Downloading login page',
122 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
123 if login_page is False:
124 return
b2e8bc1b 125
1212e997 126 login_form = self._hidden_inputs(login_page)
c5e8d7af 127
e00eb564
S
128 def req(url, f_req, note, errnote):
129 data = login_form.copy()
130 data.update({
131 'pstMsg': 1,
132 'checkConnection': 'youtube',
133 'checkedDomains': 'youtube',
134 'hl': 'en',
135 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 136 'f.req': json.dumps(f_req),
e00eb564
S
137 'flowName': 'GlifWebSignIn',
138 'flowEntry': 'ServiceLogin',
baf67a60
S
139 # TODO: reverse actual botguard identifier generation algo
140 'bgRequest': '["identifier",""]',
041bc3ad 141 })
e00eb564
S
142 return self._download_json(
143 url, None, note=note, errnote=errnote,
144 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
145 fatal=False,
146 data=urlencode_postdata(data), headers={
147 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
148 'Google-Accounts-XSRF': 1,
149 })
150
3995d37d
S
151 lookup_req = [
152 username,
153 None, [], None, 'US', None, None, 2, False, True,
154 [
155 None, None,
156 [2, 1, None, 1,
157 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
158 None, [], 4],
159 1, [None, None, []], None, None, None, True
160 ],
161 username,
162 ]
163
e00eb564 164 lookup_results = req(
3995d37d 165 self._LOOKUP_URL, lookup_req,
e00eb564
S
166 'Looking up account info', 'Unable to look up account info')
167
168 if lookup_results is False:
169 return False
041bc3ad 170
3995d37d
S
171 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
172 if not user_hash:
173 warn('Unable to extract user hash')
174 return False
175
176 challenge_req = [
177 user_hash,
178 None, 1, None, [1, None, None, None, [password, None, True]],
179 [
180 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
181 1, [None, None, []], None, None, None, True
182 ]]
83317f69 183
3995d37d
S
184 challenge_results = req(
185 self._CHALLENGE_URL, challenge_req,
186 'Logging in', 'Unable to log in')
83317f69 187
3995d37d 188 if challenge_results is False:
e00eb564 189 return
83317f69 190
3995d37d
S
191 login_res = try_get(challenge_results, lambda x: x[0][5], list)
192 if login_res:
193 login_msg = try_get(login_res, lambda x: x[5], compat_str)
194 warn(
195 'Unable to login: %s' % 'Invalid password'
196 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
197 return False
198
199 res = try_get(challenge_results, lambda x: x[0][-1], list)
200 if not res:
201 warn('Unable to extract result entry')
202 return False
203
9a6628aa
S
204 login_challenge = try_get(res, lambda x: x[0][0], list)
205 if login_challenge:
206 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
207 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
208 # SEND_SUCCESS - TFA code has been successfully sent to phone
209 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 210 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
211 if status == 'QUOTA_EXCEEDED':
212 warn('Exceeded the limit of TFA codes, try later')
213 return False
214
215 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
216 if not tl:
217 warn('Unable to extract TL')
218 return False
219
220 tfa_code = self._get_tfa_info('2-step verification code')
221
222 if not tfa_code:
223 warn(
224 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
225 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
226 return False
227
228 tfa_code = remove_start(tfa_code, 'G-')
229
230 tfa_req = [
231 user_hash, None, 2, None,
232 [
233 9, None, None, None, None, None, None, None,
234 [None, tfa_code, True, 2]
235 ]]
236
237 tfa_results = req(
238 self._TFA_URL.format(tl), tfa_req,
239 'Submitting TFA code', 'Unable to submit TFA code')
240
241 if tfa_results is False:
242 return False
243
244 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
245 if tfa_res:
246 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
247 warn(
248 'Unable to finish TFA: %s' % 'Invalid TFA code'
249 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
250 return False
251
252 check_cookie_url = try_get(
253 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
254 else:
255 CHALLENGES = {
256 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
257 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
258 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
259 }
260 challenge = CHALLENGES.get(
261 challenge_str,
262 '%s returned error %s.' % (self.IE_NAME, challenge_str))
263 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
264 return False
3995d37d
S
265 else:
266 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
267
268 if not check_cookie_url:
269 warn('Unable to extract CheckCookie URL')
270 return False
e00eb564
S
271
272 check_cookie_results = self._download_webpage(
3995d37d
S
273 check_cookie_url, None, 'Checking cookie', fatal=False)
274
275 if check_cookie_results is False:
276 return False
e00eb564 277
3995d37d
S
278 if 'https://myaccount.google.com/' not in check_cookie_results:
279 warn('Unable to log in')
b2e8bc1b 280 return False
e00eb564 281
b2e8bc1b 282 return True
2d6659b9 283 '''
b2e8bc1b 284
cce889b9 285 def _initialize_consent(self):
286 cookies = self._get_cookies('https://www.youtube.com/')
287 if cookies.get('__Secure-3PSID'):
288 return
289 consent_id = None
290 consent = cookies.get('CONSENT')
291 if consent:
292 if 'YES' in consent.value:
293 return
294 consent_id = self._search_regex(
295 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
296 if not consent_id:
297 consent_id = random.randint(100, 999)
298 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 299
b2e8bc1b 300 def _real_initialize(self):
cce889b9 301 self._initialize_consent()
b2e8bc1b
JMF
302 if self._downloader is None:
303 return
b2e8bc1b
JMF
304 if not self._login():
305 return
c5e8d7af 306
a0566bbf 307 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 308 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
309 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 310
109dd3b2 311 _YT_DEFAULT_YTCFGS = {
312 'WEB': {
313 'INNERTUBE_API_VERSION': 'v1',
314 'INNERTUBE_CLIENT_NAME': 'WEB',
315 'INNERTUBE_CLIENT_VERSION': '2.20210622.10.00',
316 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
317 'INNERTUBE_CONTEXT': {
318 'client': {
319 'clientName': 'WEB',
320 'clientVersion': '2.20210622.10.00',
321 'hl': 'en',
322 }
323 },
324 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
325 },
326 'WEB_REMIX': {
327 'INNERTUBE_API_VERSION': 'v1',
328 'INNERTUBE_CLIENT_NAME': 'WEB_REMIX',
329 'INNERTUBE_CLIENT_VERSION': '1.20210621.00.00',
330 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
331 'INNERTUBE_CONTEXT': {
332 'client': {
333 'clientName': 'WEB_REMIX',
334 'clientVersion': '1.20210621.00.00',
335 'hl': 'en',
336 }
337 },
338 'INNERTUBE_CONTEXT_CLIENT_NAME': 67
339 },
340 'WEB_EMBEDDED_PLAYER': {
341 'INNERTUBE_API_VERSION': 'v1',
342 'INNERTUBE_CLIENT_NAME': 'WEB_EMBEDDED_PLAYER',
343 'INNERTUBE_CLIENT_VERSION': '1.20210620.0.1',
344 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
345 'INNERTUBE_CONTEXT': {
346 'client': {
347 'clientName': 'WEB_EMBEDDED_PLAYER',
348 'clientVersion': '1.20210620.0.1',
349 'hl': 'en',
350 }
351 },
352 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
353 },
354 'ANDROID': {
355 'INNERTUBE_API_VERSION': 'v1',
356 'INNERTUBE_CLIENT_NAME': 'ANDROID',
357 'INNERTUBE_CLIENT_VERSION': '16.20',
358 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
359 'INNERTUBE_CONTEXT': {
360 'client': {
361 'clientName': 'ANDROID',
362 'clientVersion': '16.20',
363 'hl': 'en',
364 }
365 },
fe93e2c4 366 'INNERTUBE_CONTEXT_CLIENT_NAME': 3
109dd3b2 367 },
368 'ANDROID_EMBEDDED_PLAYER': {
369 'INNERTUBE_API_VERSION': 'v1',
370 'INNERTUBE_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER',
371 'INNERTUBE_CLIENT_VERSION': '16.20',
372 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
373 'INNERTUBE_CONTEXT': {
374 'client': {
375 'clientName': 'ANDROID_EMBEDDED_PLAYER',
376 'clientVersion': '16.20',
377 'hl': 'en',
378 }
379 },
fe93e2c4 380 'INNERTUBE_CONTEXT_CLIENT_NAME': 55
109dd3b2 381 },
382 'ANDROID_MUSIC': {
383 'INNERTUBE_API_VERSION': 'v1',
384 'INNERTUBE_CLIENT_NAME': 'ANDROID_MUSIC',
385 'INNERTUBE_CLIENT_VERSION': '4.32',
386 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
387 'INNERTUBE_CONTEXT': {
388 'client': {
389 'clientName': 'ANDROID_MUSIC',
390 'clientVersion': '4.32',
391 'hl': 'en',
392 }
393 },
fe93e2c4 394 'INNERTUBE_CONTEXT_CLIENT_NAME': 21
109dd3b2 395 }
396 }
397
398 _YT_DEFAULT_INNERTUBE_HOSTS = {
399 'DIRECT': 'youtubei.googleapis.com',
400 'WEB': 'www.youtube.com',
401 'WEB_REMIX': 'music.youtube.com',
402 'ANDROID_MUSIC': 'music.youtube.com'
403 }
404
405 def _get_default_ytcfg(self, client='WEB'):
406 if client in self._YT_DEFAULT_YTCFGS:
407 return copy.deepcopy(self._YT_DEFAULT_YTCFGS[client])
408 self.write_debug(f'INNERTUBE default client {client} does not exist - falling back to WEB client.')
409 return copy.deepcopy(self._YT_DEFAULT_YTCFGS['WEB'])
410
411 def _get_innertube_host(self, client='WEB'):
412 return dict_get(self._YT_DEFAULT_INNERTUBE_HOSTS, (client, 'WEB'))
413
414 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='WEB'):
415 # try_get but with fallback to default ytcfg client values when present
416 _func = lambda y: try_get(y, getter, expected_type)
417 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
418
419 def _extract_client_name(self, ytcfg, default_client='WEB'):
420 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str, default_client)
421
314ee305 422 @staticmethod
423 def _extract_session_index(ytcfg):
424 return int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
425
109dd3b2 426 def _extract_client_version(self, ytcfg, default_client='WEB'):
427 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str, default_client)
428
429 def _extract_api_key(self, ytcfg=None, default_client='WEB'):
430 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
431
432 def _extract_context(self, ytcfg=None, default_client='WEB'):
433 _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
434 context = _get_context(ytcfg)
435 if context:
436 return context
437
438 context = _get_context(self._get_default_ytcfg(default_client))
439 if not ytcfg:
440 return context
441
442 # Recreate the client context (required)
443 context['client'].update({
444 'clientVersion': self._extract_client_version(ytcfg, default_client),
445 'clientName': self._extract_client_name(ytcfg, default_client),
446 })
447 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
448 if visitor_data:
449 context['client']['visitorData'] = visitor_data
450 return context
451
452 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
1974e99f 453 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
454 # See: https://github.com/yt-dlp/yt-dlp/issues/393
455 yt_cookies = self._get_cookies('https://www.youtube.com')
456 sapisid_cookie = dict_get(
457 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
a5c56234
M
458 if sapisid_cookie is None:
459 return
460 time_now = round(time.time())
1974e99f 461 # SAPISID cookie is required if not already present
462 if not yt_cookies.get('SAPISID'):
463 self._set_cookie(
464 '.youtube.com', 'SAPISID', sapisid_cookie.value, secure=True, expire_time=time_now + 3600)
465 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
466 sapisidhash = hashlib.sha1(
109dd3b2 467 f'{time_now} {sapisid_cookie.value} {origin}'.encode('utf-8')).hexdigest()
1974e99f 468 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
469
470 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 471 note='Downloading API JSON', errnote='Unable to download API page',
109dd3b2 472 context=None, api_key=None, api_hostname=None, default_client='WEB'):
f4f751af 473
109dd3b2 474 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 475 data.update(query)
109dd3b2 476 real_headers = self._generate_api_headers(client=default_client)
f4f751af 477 real_headers.update({'content-type': 'application/json'})
478 if headers:
479 real_headers.update(headers)
545cc85d 480 return self._download_json(
109dd3b2 481 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
a5c56234 482 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 483 data=json.dumps(data).encode('utf8'), headers=real_headers,
484 query={'key': api_key or self._extract_api_key()})
485
8bdd16b4 486 def _extract_yt_initial_data(self, video_id, webpage):
487 return self._parse_json(
488 self._search_regex(
29f7c58a 489 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 490 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 491 video_id)
0c148415 492
a1c5d2ca
M
493 def _extract_identity_token(self, webpage, item_id):
494 ytcfg = self._extract_ytcfg(item_id, webpage)
495 if ytcfg:
496 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
497 if token:
498 return token
499 return self._search_regex(
500 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
501 'identity token', default=None)
502
503 @staticmethod
fe93e2c4 504 def _extract_account_syncid(*args):
8ea3f7b9 505 """
506 Extract syncId required to download private playlists of secondary channels
fe93e2c4 507 @params response and/or ytcfg
8ea3f7b9 508 """
fe93e2c4 509 for data in args:
510 # ytcfg includes channel_syncid if on secondary channel
511 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
512 if delegated_sid:
513 return delegated_sid
514 sync_ids = (try_get(
515 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
516 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
517 if len(sync_ids) >= 2 and sync_ids[1]:
518 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
519 # and just "user_syncid||" for primary channel. We only want the channel_syncid
520 return sync_ids[0]
a1c5d2ca 521
29f7c58a 522 def _extract_ytcfg(self, video_id, webpage):
8c54a305 523 if not webpage:
524 return {}
29f7c58a 525 return self._parse_json(
526 self._search_regex(
527 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 528 default='{}'), video_id, fatal=False) or {}
529
109dd3b2 530 def _generate_api_headers(self, ytcfg=None, identity_token=None, account_syncid=None,
314ee305 531 visitor_data=None, api_hostname=None, client='WEB', session_index=None):
109dd3b2 532 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(client))
f4f751af 533 headers = {
109dd3b2 534 'X-YouTube-Client-Name': compat_str(
535 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=client)),
536 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, client),
537 'Origin': origin
f4f751af 538 }
2d6659b9 539 if not visitor_data and ytcfg:
540 visitor_data = try_get(
541 self._extract_context(ytcfg, client), lambda x: x['client']['visitorData'], compat_str)
f4f751af 542 if identity_token:
109dd3b2 543 headers['X-Youtube-Identity-Token'] = identity_token
f4f751af 544 if account_syncid:
545 headers['X-Goog-PageId'] = account_syncid
314ee305 546 if session_index is None and ytcfg:
547 session_index = self._extract_session_index(ytcfg)
548 if account_syncid or session_index is not None:
549 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
f4f751af 550 if visitor_data:
109dd3b2 551 headers['X-Goog-Visitor-Id'] = visitor_data
552 auth = self._generate_sapisidhash_header(origin)
f4f751af 553 if auth is not None:
554 headers['Authorization'] = auth
109dd3b2 555 headers['X-Origin'] = origin
f4f751af 556 return headers
29f7c58a 557
2d6659b9 558 @staticmethod
559 def _build_api_continuation_query(continuation, ctp=None):
560 query = {
561 'continuation': continuation
562 }
563 # TODO: Inconsistency with clickTrackingParams.
564 # Currently we have a fixed ctp contained within context (from ytcfg)
565 # and a ctp in root query for continuation.
566 if ctp:
567 query['clickTracking'] = {'clickTrackingParams': ctp}
568 return query
569
2d6659b9 570 @classmethod
571 def _extract_next_continuation_data(cls, renderer):
572 next_continuation = try_get(
573 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
574 lambda x: x['continuation']['reloadContinuationData']), dict)
575 if not next_continuation:
576 return
577 continuation = next_continuation.get('continuation')
578 if not continuation:
579 return
580 ctp = next_continuation.get('clickTrackingParams')
fe93e2c4 581 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 582
583 @classmethod
584 def _extract_continuation_ep_data(cls, continuation_ep: dict):
585 if isinstance(continuation_ep, dict):
586 continuation = try_get(
587 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
588 if not continuation:
589 return
590 ctp = continuation_ep.get('clickTrackingParams')
fe93e2c4 591 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 592
593 @classmethod
594 def _extract_continuation(cls, renderer):
595 next_continuation = cls._extract_next_continuation_data(renderer)
596 if next_continuation:
597 return next_continuation
fe93e2c4 598
2d6659b9 599 contents = []
600 for key in ('contents', 'items'):
601 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
fe93e2c4 602
2d6659b9 603 for content in contents:
604 if not isinstance(content, dict):
605 continue
606 continuation_ep = try_get(
607 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
608 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
609 dict)
610 continuation = cls._extract_continuation_ep_data(continuation_ep)
611 if continuation:
612 return continuation
613
fe93e2c4 614 @classmethod
615 def _extract_alerts(cls, data):
109dd3b2 616 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
617 if not isinstance(alert_dict, dict):
618 continue
619 for alert in alert_dict.values():
620 alert_type = alert.get('type')
621 if not alert_type:
622 continue
fe93e2c4 623 message = cls._get_text(alert.get('text'))
109dd3b2 624 if message:
625 yield alert_type, message
626
627 def _report_alerts(self, alerts, expected=True):
628 errors = []
629 warnings = []
630 for alert_type, alert_message in alerts:
631 if alert_type.lower() == 'error':
632 errors.append([alert_type, alert_message])
633 else:
634 warnings.append([alert_type, alert_message])
635
636 for alert_type, alert_message in (warnings + errors[:-1]):
637 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
638 if errors:
639 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
640
641 def _extract_and_report_alerts(self, data, *args, **kwargs):
642 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
643
47193e02 644 def _extract_badges(self, renderer: dict):
645 badges = set()
646 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
647 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
648 if label:
649 badges.add(label.lower())
650 return badges
651
652 @staticmethod
fe93e2c4 653 def _get_text(data, getter=None, max_runs=None):
654 for get in variadic(getter):
655 d = try_get(data, get) if get is not None else data
656 text = try_get(d, lambda x: x['simpleText'], compat_str)
657 if text:
658 return text
659 runs = try_get(d, lambda x: x['runs'], list) or []
660 if not runs and isinstance(d, list):
661 runs = d
662
663 def get_runs(runs):
664 for run in runs[:min(len(runs), max_runs or len(runs))]:
665 yield try_get(run, lambda x: x['text'], compat_str) or ''
666
667 text = ''.join(get_runs(runs))
668 if text:
669 return text
47193e02 670
109dd3b2 671 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
672 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
673 default_client='WEB'):
674 response = None
675 last_error = None
676 count = -1
677 retries = self.get_param('extractor_retries', 3)
678 if check_get_keys is None:
679 check_get_keys = []
680 while count < retries:
681 count += 1
682 if last_error:
683 self.report_warning('%s. Retrying ...' % last_error)
684 try:
685 response = self._call_api(
686 ep=ep, fatal=True, headers=headers,
687 video_id=item_id, query=query,
688 context=self._extract_context(ytcfg, default_client),
689 api_key=self._extract_api_key(ytcfg, default_client),
690 api_hostname=api_hostname, default_client=default_client,
691 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
692 except ExtractorError as e:
693 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
694 # Downloading page may result in intermittent 5xx HTTP error
695 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
696 last_error = 'HTTP Error %s' % e.cause.code
697 if count < retries:
698 continue
699 if fatal:
700 raise
701 else:
702 self.report_warning(error_to_compat_str(e))
703 return
704
705 else:
706 # Youtube may send alerts if there was an issue with the continuation page
707 try:
708 self._extract_and_report_alerts(response, expected=False)
709 except ExtractorError as e:
710 if fatal:
711 raise
712 self.report_warning(error_to_compat_str(e))
713 return
714 if not check_get_keys or dict_get(response, check_get_keys):
715 break
716 # Youtube sometimes sends incomplete data
717 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
718 last_error = 'Incomplete data received'
719 if count >= retries:
720 if fatal:
721 raise ExtractorError(last_error)
722 else:
723 self.report_warning(last_error)
724 return
725 return response
726
9297939e 727 @staticmethod
728 def is_music_url(url):
729 return re.match(r'https?://music\.youtube\.com/', url) is not None
730
30a074c2 731 def _extract_video(self, renderer):
732 video_id = renderer.get('videoId')
fe93e2c4 733 title = self._get_text(renderer.get('title'))
734 description = self._get_text(renderer.get('descriptionSnippet'))
735 duration = parse_duration(self._get_text(renderer.get('lengthText')))
736 view_count_text = self._get_text(renderer.get('viewCountText')) or ''
30a074c2 737 view_count = str_to_int(self._search_regex(
738 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
739 'view count', default=None))
fe93e2c4 740
741 uploader = self._get_text(renderer, (lambda x: x['ownerText'], lambda x: x['shortBylineText']))
742
30a074c2 743 return {
39ed931e 744 '_type': 'url',
30a074c2 745 'ie_key': YoutubeIE.ie_key(),
746 'id': video_id,
747 'url': video_id,
748 'title': title,
749 'description': description,
750 'duration': duration,
751 'view_count': view_count,
752 'uploader': uploader,
753 }
754
0c148415 755
360e1ca5 756class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 757 IE_DESC = 'YouTube.com'
bc2ca1bb 758 _INVIDIOUS_SITES = (
759 # invidious-redirect websites
760 r'(?:www\.)?redirect\.invidious\.io',
761 r'(?:(?:www|dev)\.)?invidio\.us',
762 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
763 r'(?:www\.)?invidious\.pussthecat\.org',
bc2ca1bb 764 r'(?:www\.)?invidious\.zee\.li',
bc2ca1bb 765 r'(?:www\.)?invidious\.ethibox\.fr',
bc2ca1bb 766 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
bc2ca1bb 767 # youtube-dl invidious instances list
768 r'(?:(?:www|no)\.)?invidiou\.sh',
769 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
770 r'(?:www\.)?invidious\.kabi\.tk',
bc2ca1bb 771 r'(?:www\.)?invidious\.mastodon\.host',
772 r'(?:www\.)?invidious\.zapashcanon\.fr',
ed807c18 773 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
201c1459 774 r'(?:www\.)?invidious\.tinfoil-hat\.net',
775 r'(?:www\.)?invidious\.himiko\.cloud',
776 r'(?:www\.)?invidious\.reallyancient\.tech',
bc2ca1bb 777 r'(?:www\.)?invidious\.tube',
778 r'(?:www\.)?invidiou\.site',
779 r'(?:www\.)?invidious\.site',
780 r'(?:www\.)?invidious\.xyz',
781 r'(?:www\.)?invidious\.nixnet\.xyz',
201c1459 782 r'(?:www\.)?invidious\.048596\.xyz',
bc2ca1bb 783 r'(?:www\.)?invidious\.drycat\.fr',
201c1459 784 r'(?:www\.)?inv\.skyn3t\.in',
bc2ca1bb 785 r'(?:www\.)?tube\.poal\.co',
786 r'(?:www\.)?tube\.connect\.cafe',
787 r'(?:www\.)?vid\.wxzm\.sx',
788 r'(?:www\.)?vid\.mint\.lgbt',
201c1459 789 r'(?:www\.)?vid\.puffyan\.us',
bc2ca1bb 790 r'(?:www\.)?yewtu\.be',
791 r'(?:www\.)?yt\.elukerio\.org',
792 r'(?:www\.)?yt\.lelux\.fi',
793 r'(?:www\.)?invidious\.ggc-project\.de',
794 r'(?:www\.)?yt\.maisputain\.ovh',
201c1459 795 r'(?:www\.)?ytprivate\.com',
796 r'(?:www\.)?invidious\.13ad\.de',
bc2ca1bb 797 r'(?:www\.)?invidious\.toot\.koeln',
798 r'(?:www\.)?invidious\.fdn\.fr',
799 r'(?:www\.)?watch\.nettohikari\.com',
ed807c18 800 r'(?:www\.)?invidious\.namazso\.eu',
801 r'(?:www\.)?invidious\.silkky\.cloud',
802 r'(?:www\.)?invidious\.exonip\.de',
803 r'(?:www\.)?invidious\.riverside\.rocks',
804 r'(?:www\.)?invidious\.blamefran\.net',
805 r'(?:www\.)?invidious\.moomoo\.de',
806 r'(?:www\.)?ytb\.trom\.tf',
807 r'(?:www\.)?yt\.cyberhost\.uk',
bc2ca1bb 808 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
809 r'(?:www\.)?qklhadlycap4cnod\.onion',
810 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
811 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
812 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
813 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
814 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
815 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
ed807c18 816 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
817 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
818 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
819 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
bc2ca1bb 820 )
cb7dfeea 821 _VALID_URL = r"""(?x)^
c5e8d7af 822 (
edb53e2d 823 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 824 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
825 (?:www\.)?deturl\.com/www\.youtube\.com|
826 (?:www\.)?pwnyoutube\.com|
827 (?:www\.)?hooktube\.com|
828 (?:www\.)?yourepeat\.com|
829 tube\.majestyc\.net|
830 %(invidious)s|
831 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
832 (?:.*?\#/)? # handle anchor (#/) redirect urls
833 (?: # the various things that can precede the ID:
ac7553d0 834 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 835 |(?: # or the v= param in all its forms
f7000f3a 836 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 837 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 838 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
839 v=
840 )
f4b05232 841 ))
cbaed4bb
S
842 |(?:
843 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
844 vid\.plus| # or vid.plus/xxxx
845 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 846 %(invidious)s
cbaed4bb 847 )/
edb53e2d 848 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 849 )
c5e8d7af 850 )? # all until now is optional -> you can pass the naked ID
201c1459 851 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 852 (?(1).+)? # if we found the ID, everything can follow
9297939e 853 (?:\#|$)""" % {
bc2ca1bb 854 'invidious': '|'.join(_INVIDIOUS_SITES),
855 }
e40c758c 856 _PLAYER_INFO_RE = (
cc2db878 857 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
858 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 859 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 860 )
2c62dc26 861 _formats = {
c2d3cb4c 862 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
863 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
864 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
865 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
866 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
867 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
868 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
869 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 870 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 871 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
872 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
873 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
874 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
875 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
876 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 877 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 878 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
879 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 880
881
882 # 3D videos
c2d3cb4c 883 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
884 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
885 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
886 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 887 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
888 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
889 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 890
96fb5605 891 # Apple HTTP Live Streaming
11f12195 892 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 893 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
894 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
895 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
896 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
897 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 898 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
899 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
900
901 # DASH mp4 video
d23028a8
S
902 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
903 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
904 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
905 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
906 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 907 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
908 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
909 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
910 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
911 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
912 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
913 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 914
f6f1fc92 915 # Dash mp4 audio
d23028a8
S
916 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
917 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
918 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
919 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
920 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
921 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
922 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
923
924 # Dash webm
d23028a8
S
925 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
926 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
927 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
928 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
929 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
930 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
931 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
932 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
933 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
934 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
935 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
936 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
937 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
938 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
939 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 940 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
941 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
942 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
943 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
944 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
945 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
946 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
947
948 # Dash webm audio
d23028a8
S
949 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
950 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 951
0857baad 952 # Dash webm audio with opus inside
d23028a8
S
953 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
954 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
955 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 956
ce6b9a2d
PH
957 # RTMP (unnamed)
958 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
959
960 # av01 video only formats sometimes served with "unknown" codecs
961 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
962 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
963 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
964 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 965 }
29f7c58a 966 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 967
109dd3b2 968 _AGE_GATE_REASONS = (
969 'Sign in to confirm your age',
970 'This video may be inappropriate for some users.',
971 'Sorry, this content is age-restricted.')
972
fd5c4aab
S
973 _GEO_BYPASS = False
974
78caa52a 975 IE_NAME = 'youtube'
2eb88d95
PH
976 _TESTS = [
977 {
2d3d2997 978 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
979 'info_dict': {
980 'id': 'BaW_jenozKc',
981 'ext': 'mp4',
3867038a 982 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
983 'uploader': 'Philipp Hagemeister',
984 'uploader_id': 'phihag',
ec85ded8 985 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
986 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
987 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 988 'upload_date': '20121002',
3867038a 989 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 990 'categories': ['Science & Technology'],
3867038a 991 'tags': ['youtube-dl'],
556dbe7f 992 'duration': 10,
dbdaaa23 993 'view_count': int,
3e7c1224
PH
994 'like_count': int,
995 'dislike_count': int,
7c80519c 996 'start_time': 1,
297a564b 997 'end_time': 9,
2eb88d95 998 }
0e853ca4 999 },
fccd3771 1000 {
4bc3a23e
PH
1001 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1002 'note': 'Embed-only video (#1746)',
1003 'info_dict': {
1004 'id': 'yZIXLfi8CZQ',
1005 'ext': 'mp4',
1006 'upload_date': '20120608',
1007 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1008 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1009 'uploader': 'SET India',
94bfcd23 1010 'uploader_id': 'setindia',
ec85ded8 1011 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 1012 'age_limit': 18,
545cc85d 1013 },
1014 'skip': 'Private video',
fccd3771 1015 },
11b56058 1016 {
8bdd16b4 1017 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
1018 'note': 'Use the first video ID in the URL',
1019 'info_dict': {
1020 'id': 'BaW_jenozKc',
1021 'ext': 'mp4',
3867038a 1022 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
1023 'uploader': 'Philipp Hagemeister',
1024 'uploader_id': 'phihag',
ec85ded8 1025 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 1026 'upload_date': '20121002',
3867038a 1027 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 1028 'categories': ['Science & Technology'],
3867038a 1029 'tags': ['youtube-dl'],
556dbe7f 1030 'duration': 10,
dbdaaa23 1031 'view_count': int,
11b56058
PM
1032 'like_count': int,
1033 'dislike_count': int,
34a7de29
S
1034 },
1035 'params': {
1036 'skip_download': True,
1037 },
11b56058 1038 },
dd27fd17 1039 {
2d3d2997 1040 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
1041 'note': '256k DASH audio (format 141) via DASH manifest',
1042 'info_dict': {
1043 'id': 'a9LDPn-MO4I',
1044 'ext': 'm4a',
1045 'upload_date': '20121002',
1046 'uploader_id': '8KVIDEO',
ec85ded8 1047 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
1048 'description': '',
1049 'uploader': '8KVIDEO',
1050 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 1051 },
4bc3a23e
PH
1052 'params': {
1053 'youtube_include_dash_manifest': True,
1054 'format': '141',
4919603f 1055 },
de3c7fe0 1056 'skip': 'format 141 not served anymore',
dd27fd17 1057 },
8bdd16b4 1058 # DASH manifest with encrypted signature
1059 {
1060 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1061 'info_dict': {
1062 'id': 'IB3lcPjvWLA',
1063 'ext': 'm4a',
1064 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1065 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1066 'duration': 244,
1067 'uploader': 'AfrojackVEVO',
1068 'uploader_id': 'AfrojackVEVO',
1069 'upload_date': '20131011',
cc2db878 1070 'abr': 129.495,
8bdd16b4 1071 },
1072 'params': {
1073 'youtube_include_dash_manifest': True,
1074 'format': '141/bestaudio[ext=m4a]',
1075 },
1076 },
aa79ac0c
PH
1077 # Controversy video
1078 {
1079 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
1080 'info_dict': {
1081 'id': 'T4XJQO3qol8',
1082 'ext': 'mp4',
556dbe7f 1083 'duration': 219,
aa79ac0c 1084 'upload_date': '20100909',
4fe54c12 1085 'uploader': 'Amazing Atheist',
aa79ac0c 1086 'uploader_id': 'TheAmazingAtheist',
ec85ded8 1087 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c 1088 'title': 'Burning Everyone\'s Koran',
545cc85d 1089 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
aa79ac0c 1090 }
c522adb1 1091 },
dd2d55f1 1092 # Normal age-gate video (embed allowed)
c522adb1 1093 {
2d3d2997 1094 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1095 'info_dict': {
1096 'id': 'HtVdAasjOgU',
1097 'ext': 'mp4',
1098 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1099 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1100 'duration': 142,
c522adb1
JMF
1101 'uploader': 'The Witcher',
1102 'uploader_id': 'WitcherGame',
ec85ded8 1103 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 1104 'upload_date': '20140605',
34952f09 1105 'age_limit': 18,
c522adb1
JMF
1106 },
1107 },
8bdd16b4 1108 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1109 # YouTube Red ad is not captured for creator
1110 {
1111 'url': '__2ABJjxzNo',
1112 'info_dict': {
1113 'id': '__2ABJjxzNo',
1114 'ext': 'mp4',
1115 'duration': 266,
1116 'upload_date': '20100430',
1117 'uploader_id': 'deadmau5',
1118 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 1119 'creator': 'deadmau5',
1120 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1121 'uploader': 'deadmau5',
1122 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1123 'alt_title': 'Some Chords',
8bdd16b4 1124 },
1125 'expected_warnings': [
1126 'DASH manifest missing',
1127 ]
1128 },
067aa17e 1129 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1130 {
1131 'url': 'lqQg6PlCWgI',
1132 'info_dict': {
1133 'id': 'lqQg6PlCWgI',
1134 'ext': 'mp4',
556dbe7f 1135 'duration': 6085,
90227264 1136 'upload_date': '20150827',
cbe2bd91 1137 'uploader_id': 'olympic',
ec85ded8 1138 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 1139 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 1140 'uploader': 'Olympic',
cbe2bd91
PH
1141 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1142 },
1143 'params': {
1144 'skip_download': 'requires avconv',
e52a40ab 1145 }
cbe2bd91 1146 },
6271f1ca
PH
1147 # Non-square pixels
1148 {
1149 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1150 'info_dict': {
1151 'id': '_b-2C3KPAM0',
1152 'ext': 'mp4',
1153 'stretched_ratio': 16 / 9.,
556dbe7f 1154 'duration': 85,
6271f1ca
PH
1155 'upload_date': '20110310',
1156 'uploader_id': 'AllenMeow',
ec85ded8 1157 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 1158 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 1159 'uploader': '孫ᄋᄅ',
6271f1ca
PH
1160 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1161 },
06b491eb
S
1162 },
1163 # url_encoded_fmt_stream_map is empty string
1164 {
1165 'url': 'qEJwOuvDf7I',
1166 'info_dict': {
1167 'id': 'qEJwOuvDf7I',
f57b7835 1168 'ext': 'webm',
06b491eb
S
1169 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1170 'description': '',
1171 'upload_date': '20150404',
1172 'uploader_id': 'spbelect',
1173 'uploader': 'Наблюдатели Петербурга',
1174 },
1175 'params': {
1176 'skip_download': 'requires avconv',
e323cf3f
S
1177 },
1178 'skip': 'This live event has ended.',
06b491eb 1179 },
067aa17e 1180 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1181 {
1182 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1183 'info_dict': {
1184 'id': 'FIl7x6_3R5Y',
eb6793ba 1185 'ext': 'webm',
da77d856
S
1186 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1187 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1188 'duration': 220,
da77d856
S
1189 'upload_date': '20150625',
1190 'uploader_id': 'dorappi2000',
ec85ded8 1191 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 1192 'uploader': 'dorappi2000',
eb6793ba 1193 'formats': 'mincount:31',
da77d856 1194 },
eb6793ba 1195 'skip': 'not actual anymore',
2ee8f5d8 1196 },
8a1a26ce
YCH
1197 # DASH manifest with segment_list
1198 {
1199 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1200 'md5': '8ce563a1d667b599d21064e982ab9e31',
1201 'info_dict': {
1202 'id': 'CsmdDsKjzN8',
1203 'ext': 'mp4',
17ee98e1 1204 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
1205 'uploader': 'Airtek',
1206 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1207 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1208 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1209 },
1210 'params': {
1211 'youtube_include_dash_manifest': True,
1212 'format': '135', # bestvideo
be49068d
S
1213 },
1214 'skip': 'This live event has ended.',
2ee8f5d8 1215 },
cf7e015f
S
1216 {
1217 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 1218 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 1219 'info_dict': {
545cc85d 1220 'id': 'jvGDaLqkpTg',
1221 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1222 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
1223 },
1224 'playlist': [{
1225 'info_dict': {
545cc85d 1226 'id': 'jvGDaLqkpTg',
cf7e015f 1227 'ext': 'mp4',
545cc85d 1228 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1229 'description': 'md5:e03b909557865076822aa169218d6a5d',
1230 'duration': 10643,
1231 'upload_date': '20161111',
1232 'uploader': 'Team PGP',
1233 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1234 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1235 },
1236 }, {
1237 'info_dict': {
545cc85d 1238 'id': '3AKt1R1aDnw',
cf7e015f 1239 'ext': 'mp4',
545cc85d 1240 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1241 'description': 'md5:e03b909557865076822aa169218d6a5d',
1242 'duration': 10991,
1243 'upload_date': '20161111',
1244 'uploader': 'Team PGP',
1245 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1246 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1247 },
1248 }, {
1249 'info_dict': {
545cc85d 1250 'id': 'RtAMM00gpVc',
cf7e015f 1251 'ext': 'mp4',
545cc85d 1252 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1253 'description': 'md5:e03b909557865076822aa169218d6a5d',
1254 'duration': 10995,
1255 'upload_date': '20161111',
1256 'uploader': 'Team PGP',
1257 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1258 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1259 },
1260 }, {
1261 'info_dict': {
545cc85d 1262 'id': '6N2fdlP3C5U',
cf7e015f 1263 'ext': 'mp4',
545cc85d 1264 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1265 'description': 'md5:e03b909557865076822aa169218d6a5d',
1266 'duration': 10990,
1267 'upload_date': '20161111',
1268 'uploader': 'Team PGP',
1269 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1270 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1271 },
1272 }],
1273 'params': {
1274 'skip_download': True,
1275 },
cbaed4bb 1276 },
f9f49d87 1277 {
067aa17e 1278 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1279 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1280 'info_dict': {
1281 'id': 'gVfLd0zydlo',
1282 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1283 },
1284 'playlist_count': 2,
be49068d 1285 'skip': 'Not multifeed anymore',
f9f49d87 1286 },
cbaed4bb 1287 {
2d3d2997 1288 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1289 'only_matching': True,
0e49d9a6 1290 },
6d4fc66b 1291 {
2d3d2997 1292 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1293 'only_matching': True,
1294 },
0e49d9a6 1295 {
067aa17e 1296 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1297 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1298 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1299 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1300 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1301 'info_dict': {
1302 'id': 'lsguqyKfVQg',
1303 'ext': 'mp4',
1304 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 1305 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 1306 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1307 'duration': 133,
0e49d9a6
LL
1308 'upload_date': '20151119',
1309 'uploader_id': 'IronSoulElf',
ec85ded8 1310 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1311 'uploader': 'IronSoulElf',
eb6793ba
S
1312 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
1313 'track': 'Dark Walk - Position Music',
1314 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 1315 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
1316 },
1317 'params': {
1318 'skip_download': True,
1319 },
1320 },
61f92af1 1321 {
067aa17e 1322 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1323 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1324 'only_matching': True,
1325 },
313dfc45
LL
1326 {
1327 # Video with yt:stretch=17:0
1328 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1329 'info_dict': {
1330 'id': 'Q39EVAstoRM',
1331 'ext': 'mp4',
1332 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1333 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1334 'upload_date': '20151107',
1335 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1336 'uploader': 'CH GAMER DROID',
1337 },
1338 'params': {
1339 'skip_download': True,
1340 },
be49068d 1341 'skip': 'This video does not exist.',
313dfc45 1342 },
201c1459 1343 {
1344 # Video with incomplete 'yt:stretch=16:'
1345 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1346 'only_matching': True,
1347 },
7caf9830
S
1348 {
1349 # Video licensed under Creative Commons
1350 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1351 'info_dict': {
1352 'id': 'M4gD1WSo5mA',
1353 'ext': 'mp4',
1354 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1355 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1356 'duration': 721,
7caf9830
S
1357 'upload_date': '20150127',
1358 'uploader_id': 'BerkmanCenter',
ec85ded8 1359 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1360 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1361 'license': 'Creative Commons Attribution license (reuse allowed)',
1362 },
1363 'params': {
1364 'skip_download': True,
1365 },
1366 },
fd050249
S
1367 {
1368 # Channel-like uploader_url
1369 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1370 'info_dict': {
1371 'id': 'eQcmzGIKrzg',
1372 'ext': 'mp4',
1373 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1374 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1375 'duration': 4060,
fd050249 1376 'upload_date': '20151119',
eb6793ba 1377 'uploader': 'Bernie Sanders',
fd050249 1378 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1379 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1380 'license': 'Creative Commons Attribution license (reuse allowed)',
1381 },
1382 'params': {
1383 'skip_download': True,
1384 },
1385 },
040ac686
S
1386 {
1387 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1388 'only_matching': True,
7f29cf54
S
1389 },
1390 {
067aa17e 1391 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1392 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1393 'only_matching': True,
6496ccb4
S
1394 },
1395 {
1396 # Rental video preview
1397 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1398 'info_dict': {
1399 'id': 'uGpuVWrhIzE',
1400 'ext': 'mp4',
1401 'title': 'Piku - Trailer',
1402 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1403 'upload_date': '20150811',
1404 'uploader': 'FlixMatrix',
1405 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1406 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1407 'license': 'Standard YouTube License',
1408 },
1409 'params': {
1410 'skip_download': True,
1411 },
eb6793ba 1412 'skip': 'This video is not available.',
022a5d66 1413 },
12afdc2a
S
1414 {
1415 # YouTube Red video with episode data
1416 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1417 'info_dict': {
1418 'id': 'iqKdEhx-dD4',
1419 'ext': 'mp4',
1420 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1421 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1422 'duration': 2085,
12afdc2a
S
1423 'upload_date': '20170118',
1424 'uploader': 'Vsauce',
1425 'uploader_id': 'Vsauce',
1426 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1427 'series': 'Mind Field',
1428 'season_number': 1,
1429 'episode_number': 1,
1430 },
1431 'params': {
1432 'skip_download': True,
1433 },
1434 'expected_warnings': [
1435 'Skipping DASH manifest',
1436 ],
1437 },
c7121fa7
S
1438 {
1439 # The following content has been identified by the YouTube community
1440 # as inappropriate or offensive to some audiences.
1441 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1442 'info_dict': {
1443 'id': '6SJNVb0GnPI',
1444 'ext': 'mp4',
1445 'title': 'Race Differences in Intelligence',
1446 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1447 'duration': 965,
1448 'upload_date': '20140124',
1449 'uploader': 'New Century Foundation',
1450 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1451 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1452 },
1453 'params': {
1454 'skip_download': True,
1455 },
545cc85d 1456 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1457 },
022a5d66
S
1458 {
1459 # itag 212
1460 'url': '1t24XAntNCY',
1461 'only_matching': True,
fd5c4aab
S
1462 },
1463 {
1464 # geo restricted to JP
1465 'url': 'sJL6WA-aGkQ',
1466 'only_matching': True,
1467 },
cd5a74a2
S
1468 {
1469 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1470 'only_matching': True,
1471 },
bc2ca1bb 1472 {
1473 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1474 'only_matching': True,
1475 },
1476 {
1477 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1478 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1479 'only_matching': True,
1480 },
825cd268
RA
1481 {
1482 # DRM protected
1483 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1484 'only_matching': True,
4fe54c12
S
1485 },
1486 {
1487 # Video with unsupported adaptive stream type formats
1488 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1489 'info_dict': {
1490 'id': 'Z4Vy8R84T1U',
1491 'ext': 'mp4',
1492 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1493 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1494 'duration': 433,
1495 'upload_date': '20130923',
1496 'uploader': 'Amelia Putri Harwita',
1497 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1498 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1499 'formats': 'maxcount:10',
1500 },
1501 'params': {
1502 'skip_download': True,
1503 'youtube_include_dash_manifest': False,
1504 },
5429d6a9 1505 'skip': 'not actual anymore',
5caabd3c 1506 },
1507 {
822b9d9c 1508 # Youtube Music Auto-generated description
5caabd3c 1509 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1510 'info_dict': {
1511 'id': 'MgNrAu2pzNs',
1512 'ext': 'mp4',
1513 'title': 'Voyeur Girl',
1514 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1515 'upload_date': '20190312',
5429d6a9
S
1516 'uploader': 'Stephen - Topic',
1517 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1518 'artist': 'Stephen',
1519 'track': 'Voyeur Girl',
1520 'album': 'it\'s too much love to know my dear',
1521 'release_date': '20190313',
1522 'release_year': 2019,
1523 },
1524 'params': {
1525 'skip_download': True,
1526 },
1527 },
66b48727
RA
1528 {
1529 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1530 'only_matching': True,
1531 },
011e75e6
S
1532 {
1533 # invalid -> valid video id redirection
1534 'url': 'DJztXj2GPfl',
1535 'info_dict': {
1536 'id': 'DJztXj2GPfk',
1537 'ext': 'mp4',
1538 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1539 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1540 'upload_date': '20090125',
1541 'uploader': 'Prochorowka',
1542 'uploader_id': 'Prochorowka',
1543 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1544 'artist': 'Panjabi MC',
1545 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1546 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1547 },
1548 'params': {
1549 'skip_download': True,
1550 },
545cc85d 1551 'skip': 'Video unavailable',
ea74e00b
DP
1552 },
1553 {
1554 # empty description results in an empty string
1555 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1556 'info_dict': {
1557 'id': 'x41yOUIvK2k',
1558 'ext': 'mp4',
1559 'title': 'IMG 3456',
1560 'description': '',
1561 'upload_date': '20170613',
1562 'uploader_id': 'ElevageOrVert',
1563 'uploader': 'ElevageOrVert',
1564 },
1565 'params': {
1566 'skip_download': True,
1567 },
1568 },
a0566bbf 1569 {
29f7c58a 1570 # with '};' inside yt initial data (see [1])
1571 # see [2] for an example with '};' inside ytInitialPlayerResponse
1572 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1573 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1574 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1575 'info_dict': {
1576 'id': 'CHqg6qOn4no',
1577 'ext': 'mp4',
1578 'title': 'Part 77 Sort a list of simple types in c#',
1579 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1580 'upload_date': '20130831',
1581 'uploader_id': 'kudvenkat',
1582 'uploader': 'kudvenkat',
1583 },
1584 'params': {
1585 'skip_download': True,
1586 },
1587 },
29f7c58a 1588 {
1589 # another example of '};' in ytInitialData
1590 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1591 'only_matching': True,
1592 },
1593 {
1594 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1595 'only_matching': True,
1596 },
545cc85d 1597 {
cc2db878 1598 # https://github.com/ytdl-org/youtube-dl/pull/28094
1599 'url': 'OtqTfy26tG0',
1600 'info_dict': {
1601 'id': 'OtqTfy26tG0',
1602 'ext': 'mp4',
1603 'title': 'Burn Out',
1604 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1605 'upload_date': '20141120',
1606 'uploader': 'The Cinematic Orchestra - Topic',
1607 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1608 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1609 'artist': 'The Cinematic Orchestra',
1610 'track': 'Burn Out',
1611 'album': 'Every Day',
1612 'release_data': None,
1613 'release_year': None,
1614 },
1615 'params': {
1616 'skip_download': True,
1617 },
545cc85d 1618 },
bc2ca1bb 1619 {
1620 # controversial video, only works with bpctr when authenticated with cookies
1621 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1622 'only_matching': True,
1623 },
f7ad7160 1624 {
1625 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1626 'url': 'cBvYw8_A0vQ',
1627 'info_dict': {
1628 'id': 'cBvYw8_A0vQ',
1629 'ext': 'mp4',
1630 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1631 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1632 'upload_date': '20201120',
1633 'uploader': 'Walk around Japan',
1634 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1635 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1636 },
1637 'params': {
1638 'skip_download': True,
1639 },
0fb983f6 1640 }, {
1641 # Has multiple audio streams
1642 'url': 'WaOKSUlf4TM',
1643 'only_matching': True
9297939e 1644 }, {
1645 # Requires Premium: has format 141 when requested using YTM url
1646 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1647 'only_matching': True
1648 }, {
120916da 1649 # multiple subtitles with same lang_code
1650 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1651 'only_matching': True,
109dd3b2 1652 }, {
1653 # Force use android client fallback
1654 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1655 'info_dict': {
1656 'id': 'YOelRv7fMxY',
1657 'title': 'Digging a Secret Tunnel from my Workshop',
1658 'ext': '3gp',
1659 'upload_date': '20210624',
1660 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1661 'uploader': 'colinfurze',
1662 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1663 'description': 'md5:ecb672623246d98c6c562eed6ae798c3'
1664 },
1665 'params': {
1666 'format': '17', # 3gp format available on android
1667 'extractor_args': {'youtube': {'player_client': ['android']}},
1668 },
120916da 1669 },
109dd3b2 1670 {
1671 # Skip download of additional client configs (remix client config in this case)
1672 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1673 'only_matching': True,
1674 'params': {
1675 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1676 },
1677 }
2eb88d95
PH
1678 ]
1679
201c1459 1680 @classmethod
1681 def suitable(cls, url):
1bdae7d3 1682 # Hack for lazy extractors until more generic solution is implemented
1683 # (see #28780)
1684 from .youtube import parse_qs
201c1459 1685 qs = parse_qs(url)
1686 if qs.get('list', [None])[0]:
1687 return False
1688 return super(YoutubeIE, cls).suitable(url)
1689
e0df6211
PH
1690 def __init__(self, *args, **kwargs):
1691 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1692 self._code_cache = {}
83799698 1693 self._player_cache = {}
e0df6211 1694
109dd3b2 1695 def _extract_player_url(self, ytcfg=None, webpage=None):
1696 player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)
1697 if not player_url:
1698 player_url = self._search_regex(
1699 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1700 webpage, 'player URL', fatal=False)
1701 if player_url.startswith('//'):
1702 player_url = 'https:' + player_url
1703 elif not re.match(r'https?://', player_url):
1704 player_url = compat_urlparse.urljoin(
1705 'https://www.youtube.com', player_url)
1706 return player_url
1707
60064c53
PH
1708 def _signature_cache_id(self, example_sig):
1709 """ Return a string representation of a signature """
78caa52a 1710 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1711
e40c758c
S
1712 @classmethod
1713 def _extract_player_info(cls, player_url):
1714 for player_re in cls._PLAYER_INFO_RE:
1715 id_m = re.search(player_re, player_url)
1716 if id_m:
1717 break
1718 else:
c081b35c 1719 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1720 return id_m.group('id')
e40c758c 1721
109dd3b2 1722 def _load_player(self, video_id, player_url, fatal=True) -> bool:
1723 player_id = self._extract_player_info(player_url)
1724 if player_id not in self._code_cache:
1725 self._code_cache[player_id] = self._download_webpage(
1726 player_url, video_id, fatal=fatal,
1727 note='Downloading player ' + player_id,
1728 errnote='Download of %s failed' % player_url)
1729 return player_id in self._code_cache
1730
e40c758c 1731 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1732 player_id = self._extract_player_info(player_url)
e0df6211 1733
c4417ddb 1734 # Read from filesystem cache
545cc85d 1735 func_id = 'js_%s_%s' % (
1736 player_id, self._signature_cache_id(example_sig))
c4417ddb 1737 assert os.path.basename(func_id) == func_id
a0e07d31 1738
69ea8ca4 1739 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1740 if cache_spec is not None:
78caa52a 1741 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1742
109dd3b2 1743 if self._load_player(video_id, player_url):
1744 code = self._code_cache[player_id]
1745 res = self._parse_sig_js(code)
e0df6211 1746
109dd3b2 1747 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1748 cache_res = res(test_string)
1749 cache_spec = [ord(c) for c in cache_res]
83799698 1750
109dd3b2 1751 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1752 return res
83799698 1753
60064c53 1754 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1755 def gen_sig_code(idxs):
1756 def _genslice(start, end, step):
78caa52a 1757 starts = '' if start == 0 else str(start)
8bcc8756 1758 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1759 steps = '' if step == 1 else (':%d' % step)
78caa52a 1760 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1761
1762 step = None
7af808a5
PH
1763 # Quelch pyflakes warnings - start will be set when step is set
1764 start = '(Never used)'
edf3e38e
PH
1765 for i, prev in zip(idxs[1:], idxs[:-1]):
1766 if step is not None:
1767 if i - prev == step:
1768 continue
1769 yield _genslice(start, prev, step)
1770 step = None
1771 continue
1772 if i - prev in [-1, 1]:
1773 step = i - prev
1774 start = prev
1775 continue
1776 else:
78caa52a 1777 yield 's[%d]' % prev
edf3e38e 1778 if step is None:
78caa52a 1779 yield 's[%d]' % i
edf3e38e
PH
1780 else:
1781 yield _genslice(start, i, step)
1782
78caa52a 1783 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1784 cache_res = func(test_string)
edf3e38e 1785 cache_spec = [ord(c) for c in cache_res]
78caa52a 1786 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1787 signature_id_tuple = '(%s)' % (
1788 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1789 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1790 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1791 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1792
e0df6211
PH
1793 def _parse_sig_js(self, jscode):
1794 funcname = self._search_regex(
abefc03f
S
1795 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1796 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1797 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1798 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1799 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1800 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1801 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1802 # Obsolete patterns
1803 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1804 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1805 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1806 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1807 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1808 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1809 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1810 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1811 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1812
1813 jsi = JSInterpreter(jscode)
1814 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1815 return lambda s: initial_function([s])
1816
545cc85d 1817 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1818 """Turn the encrypted s field into a working signature"""
6b37f0be 1819
c8bf86d5 1820 if player_url is None:
69ea8ca4 1821 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1822
c8bf86d5 1823 try:
62af3a0e 1824 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1825 if player_id not in self._player_cache:
1826 func = self._extract_signature_function(
60064c53 1827 video_id, player_url, s
c8bf86d5
PH
1828 )
1829 self._player_cache[player_id] = func
1830 func = self._player_cache[player_id]
a06916d9 1831 if self.get_param('youtube_print_sig_code'):
60064c53 1832 self._print_sig_code(func, s)
c8bf86d5
PH
1833 return func(s)
1834 except Exception as e:
1835 tb = traceback.format_exc()
1836 raise ExtractorError(
78caa52a 1837 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1838
109dd3b2 1839 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1840 """
1841 Extract signatureTimestamp (sts)
1842 Required to tell API what sig/player version is in use.
1843 """
1844 sts = None
1845 if isinstance(ytcfg, dict):
1846 sts = int_or_none(ytcfg.get('STS'))
1847
1848 if not sts:
1849 # Attempt to extract from player
1850 if player_url is None:
1851 error_msg = 'Cannot extract signature timestamp without player_url.'
1852 if fatal:
1853 raise ExtractorError(error_msg)
1854 self.report_warning(error_msg)
1855 return
1856 if self._load_player(video_id, player_url, fatal=fatal):
1857 player_id = self._extract_player_info(player_url)
1858 code = self._code_cache[player_id]
1859 sts = int_or_none(self._search_regex(
1860 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
1861 'JS player signature timestamp', group='sts', fatal=fatal))
1862 return sts
1863
545cc85d 1864 def _mark_watched(self, video_id, player_response):
21c340b8
S
1865 playback_url = url_or_none(try_get(
1866 player_response,
545cc85d 1867 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
d77ab8e2
S
1868 if not playback_url:
1869 return
1870 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1871 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1872
1873 # cpn generation algorithm is reverse engineered from base.js.
1874 # In fact it works even with dummy cpn.
1875 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1876 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1877
1878 qs.update({
1879 'ver': ['2'],
1880 'cpn': [cpn],
1881 })
1882 playback_url = compat_urlparse.urlunparse(
15707c7e 1883 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1884
1885 self._download_webpage(
1886 playback_url, video_id, 'Marking watched',
1887 'Unable to mark watched', fatal=False)
1888
66c9fa36
S
1889 @staticmethod
1890 def _extract_urls(webpage):
1891 # Embedded YouTube player
1892 entries = [
1893 unescapeHTML(mobj.group('url'))
1894 for mobj in re.finditer(r'''(?x)
1895 (?:
1896 <iframe[^>]+?src=|
1897 data-video-url=|
1898 <embed[^>]+?src=|
1899 embedSWF\(?:\s*|
1900 <object[^>]+data=|
1901 new\s+SWFObject\(
1902 )
1903 (["\'])
1904 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1905 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1906 \1''', webpage)]
1907
1908 # lazyYT YouTube embed
1909 entries.extend(list(map(
1910 unescapeHTML,
1911 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1912
1913 # Wordpress "YouTube Video Importer" plugin
1914 matches = re.findall(r'''(?x)<div[^>]+
1915 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1916 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1917 entries.extend(m[-1] for m in matches)
1918
1919 return entries
1920
1921 @staticmethod
1922 def _extract_url(webpage):
1923 urls = YoutubeIE._extract_urls(webpage)
1924 return urls[0] if urls else None
1925
97665381
PH
1926 @classmethod
1927 def extract_id(cls, url):
1928 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1929 if mobj is None:
69ea8ca4 1930 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1931 video_id = mobj.group(2)
1932 return video_id
1933
7c365c21 1934 def _extract_chapters_from_json(self, data, duration):
1935 chapter_list = traverse_obj(
1936 data, (
1937 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
1938 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
1939 ), expected_type=list)
1940
1941 return self._extract_chapters(
1942 chapter_list,
1943 chapter_time=lambda chapter: float_or_none(
1944 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
1945 chapter_title=lambda chapter: traverse_obj(
1946 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
1947 duration=duration)
1948
1949 def _extract_chapters_from_engagement_panel(self, data, duration):
1950 content_list = traverse_obj(
8bdd16b4 1951 data,
7c365c21 1952 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
1953 expected_type=list)
1954 chapter_time = lambda chapter: parse_duration(self._get_text(chapter.get('timeDescription')))
1955 chapter_title = lambda chapter: self._get_text(chapter.get('title'))
1956
1957 return next((
1958 filter(None, (
1959 self._extract_chapters(
1960 traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
1961 chapter_time, chapter_title, duration)
1962 for contents in content_list
1963 ))), [])
1964
1965 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
84213ea8 1966 chapters = []
7c365c21 1967 last_chapter = {'start_time': 0}
1968 for idx, chapter in enumerate(chapter_list or []):
1969 title = chapter_title(chapter)
84213ea8
S
1970 start_time = chapter_time(chapter)
1971 if start_time is None:
1972 continue
7c365c21 1973 last_chapter['end_time'] = start_time
1974 if start_time < last_chapter['start_time']:
1975 if idx == 1:
1976 chapters.pop()
1977 self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
1978 else:
1979 self.report_warning(f'Invalid start time for chapter "{title}"')
1980 continue
1981 last_chapter = {'start_time': start_time, 'title': title}
1982 chapters.append(last_chapter)
1983 last_chapter['end_time'] = duration
84213ea8
S
1984 return chapters
1985
545cc85d 1986 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1987 return self._parse_json(self._search_regex(
1988 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1989 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 1990
d92f5d5a 1991 @staticmethod
1992 def parse_time_text(time_text):
1993 """
1994 Parse the comment time text
1995 time_text is in the format 'X units ago (edited)'
1996 """
1997 time_text_split = time_text.split(' ')
1998 if len(time_text_split) >= 3:
1999 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
2000
a1c5d2ca
M
2001 def _extract_comment(self, comment_renderer, parent=None):
2002 comment_id = comment_renderer.get('commentId')
2003 if not comment_id:
2004 return
fe93e2c4 2005
2006 text = self._get_text(comment_renderer.get('contentText'))
2007
49bd8c66 2008 # note: timestamp is an estimate calculated from the current time and time_text
fe93e2c4 2009 time_text = self._get_text(comment_renderer.get('publishedTimeText')) or ''
2010 time_text_dt = self.parse_time_text(time_text)
2011 if isinstance(time_text_dt, datetime.datetime):
2012 timestamp = calendar.timegm(time_text_dt.timetuple())
2013 author = self._get_text(comment_renderer.get('authorText'))
a1c5d2ca
M
2014 author_id = try_get(comment_renderer,
2015 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
fe93e2c4 2016
49bd8c66 2017 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2018 lambda x: x['likeCount']), compat_str)) or 0
a1c5d2ca
M
2019 author_thumbnail = try_get(comment_renderer,
2020 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2021
2022 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
97524332 2023 is_favorited = 'creatorHeart' in (try_get(
2024 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
a1c5d2ca
M
2025 return {
2026 'id': comment_id,
2027 'text': text,
d92f5d5a 2028 'timestamp': timestamp,
a1c5d2ca
M
2029 'time_text': time_text,
2030 'like_count': votes,
97524332 2031 'is_favorited': is_favorited,
a1c5d2ca
M
2032 'author': author,
2033 'author_id': author_id,
2034 'author_thumbnail': author_thumbnail,
2035 'author_is_uploader': author_is_uploader,
2036 'parent': parent or 'root'
2037 }
2038
2039 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
2d6659b9 2040 ytcfg, video_id, parent=None, comment_counts=None):
2041
2042 def extract_header(contents):
2043 _total_comments = 0
2044 _continuation = None
2045 for content in contents:
2046 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
fe93e2c4 2047 expected_comment_count = parse_count(self._get_text(
2048 comments_header_renderer, (lambda x: x['countText'], lambda x: x['commentsCount']), max_runs=1))
2049
2d6659b9 2050 if expected_comment_count:
fe93e2c4 2051 comment_counts[1] = expected_comment_count
2052 self.to_screen('Downloading ~%d comments' % expected_comment_count)
2d6659b9 2053 _total_comments = comment_counts[1]
2054 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2055 comment_sort_index = int(sort_mode_str != 'top') # 1 = new, 0 = top
2056
2057 sort_menu_item = try_get(
2058 comments_header_renderer,
2059 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2060 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2061
2062 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2063 if not _continuation:
2064 continue
2065
2066 sort_text = sort_menu_item.get('title')
2067 if isinstance(sort_text, compat_str):
2068 sort_text = sort_text.lower()
2069 else:
2070 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2071 self.to_screen('Sorting comments by %s' % sort_text)
2072 break
2073 return _total_comments, _continuation
a1c5d2ca 2074
2d6659b9 2075 def extract_thread(contents):
a1c5d2ca
M
2076 if not parent:
2077 comment_counts[2] = 0
2078 for content in contents:
2079 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2080 comment_renderer = try_get(
2081 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2082 content, (lambda x: x['commentRenderer'], dict))
2083
2084 if not comment_renderer:
2085 continue
2086 comment = self._extract_comment(comment_renderer, parent)
2087 if not comment:
2088 continue
2089 comment_counts[0] += 1
2090 yield comment
2091 # Attempt to get the replies
2092 comment_replies_renderer = try_get(
2093 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2094
2095 if comment_replies_renderer:
2096 comment_counts[2] += 1
2097 comment_entries_iter = self._comment_entries(
f4f751af 2098 comment_replies_renderer, identity_token, account_syncid, ytcfg,
2d6659b9 2099 video_id, parent=comment.get('id'), comment_counts=comment_counts)
a1c5d2ca
M
2100
2101 for reply_comment in comment_entries_iter:
2102 yield reply_comment
2103
2d6659b9 2104 # YouTube comments have a max depth of 2
2105 max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2106 if max_depth == 1 and parent:
2107 return
a1c5d2ca
M
2108 if not comment_counts:
2109 # comment so far, est. total comments, current comment thread #
2110 comment_counts = [0, 0, 0]
a1c5d2ca 2111
2d6659b9 2112 continuation = self._extract_continuation(root_continuation_data)
fe93e2c4 2113 if continuation and len(continuation['continuation']) < 27:
2d6659b9 2114 self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2115 continuation_token = self._generate_comment_continuation(video_id)
fe93e2c4 2116 continuation = self._build_api_continuation_query(continuation_token, None)
2d6659b9 2117
2118 visitor_data = None
2119 is_first_continuation = parent is None
a1c5d2ca
M
2120
2121 for page_num in itertools.count(0):
2122 if not continuation:
2123 break
f4f751af 2124 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
2d6659b9 2125 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2126 if page_num == 0:
2127 if is_first_continuation:
2128 note_prefix = 'Downloading comment section API JSON'
a1c5d2ca 2129 else:
2d6659b9 2130 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
2131 comment_counts[2], comment_prog_str)
2132 else:
2133 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2134 ' ' if parent else '', ' replies' if parent else '',
2135 page_num, comment_prog_str)
2136
2137 response = self._extract_response(
fe93e2c4 2138 item_id=None, query=continuation,
2d6659b9 2139 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2140 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca
M
2141 if not response:
2142 break
f4f751af 2143 visitor_data = try_get(
2144 response,
2145 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2146 compat_str) or visitor_data
a1c5d2ca 2147
2d6659b9 2148 continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca 2149
2d6659b9 2150 continuation = None
2151 if isinstance(continuation_contents, list):
2152 for continuation_section in continuation_contents:
2153 if not isinstance(continuation_section, dict):
2154 continue
2155 continuation_items = try_get(
2156 continuation_section,
2157 (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2158 lambda x: x['appendContinuationItemsAction']['continuationItems']),
2159 list) or []
2160 if is_first_continuation:
2161 total_comments, continuation = extract_header(continuation_items)
2162 if total_comments:
2163 yield total_comments
2164 is_first_continuation = False
2165 if continuation:
2166 break
2167 continue
2168 count = 0
2169 for count, entry in enumerate(extract_thread(continuation_items)):
2170 yield entry
2171 continuation = self._extract_continuation({'contents': continuation_items})
2172 if continuation:
2173 # Sometimes YouTube provides a continuation without any comments
2174 # In most cases we end up just downloading these with very little comments to come.
2175 if count == 0:
2176 if not parent:
2177 self.report_warning('No comments received - assuming end of comments')
2178 continuation = None
a1c5d2ca
M
2179 break
2180
2d6659b9 2181 # Deprecated response structure
2182 elif isinstance(continuation_contents, dict):
2183 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2184 for key, continuation_renderer in continuation_contents.items():
2185 if key not in known_continuation_renderers:
2186 continue
2187 if not isinstance(continuation_renderer, dict):
2188 continue
2189 if is_first_continuation:
2190 header_continuation_items = [continuation_renderer.get('header') or {}]
2191 total_comments, continuation = extract_header(header_continuation_items)
2192 if total_comments:
2193 yield total_comments
2194 is_first_continuation = False
2195 if continuation:
2196 break
a1c5d2ca 2197
2d6659b9 2198 # Sometimes YouTube provides a continuation without any comments
2199 # In most cases we end up just downloading these with very little comments to come.
2200 count = 0
2201 for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2202 yield entry
2203 continuation = self._extract_continuation(continuation_renderer)
2204 if count == 0:
2205 if not parent:
2206 self.report_warning('No comments received - assuming end of comments')
2207 continuation = None
2208 break
a1c5d2ca 2209
2d6659b9 2210 @staticmethod
2211 def _generate_comment_continuation(video_id):
2212 """
2213 Generates initial comment section continuation token from given video id
2214 """
2215 b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2216 parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2217 new_continuation_intlist = list(itertools.chain.from_iterable(
2218 [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2219 return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2220
2221 def _extract_comments(self, ytcfg, video_id, contents, webpage):
a1c5d2ca 2222 """Entry for comment extraction"""
2d6659b9 2223 def _real_comment_extract(contents):
2224 if isinstance(contents, list):
2225 for entry in contents:
2226 for key, renderer in entry.items():
2227 if key not in known_entry_comment_renderers:
2228 continue
2229 yield from self._comment_entries(
2230 renderer, video_id=video_id, ytcfg=ytcfg,
2231 identity_token=self._extract_identity_token(webpage, item_id=video_id),
2232 account_syncid=self._extract_account_syncid(ytcfg))
2233 break
a1c5d2ca 2234 comments = []
2d6659b9 2235 known_entry_comment_renderers = ('itemSectionRenderer',)
a1c5d2ca 2236 estimated_total = 0
2d6659b9 2237 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
a1c5d2ca 2238
2d6659b9 2239 try:
2240 for comment in _real_comment_extract(contents):
2241 if len(comments) >= max_comments:
2242 break
2243 if isinstance(comment, int):
2244 estimated_total = comment
2245 continue
2246 comments.append(comment)
2247 except KeyboardInterrupt:
2248 self.to_screen('Interrupted by user')
d92f5d5a 2249 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
a1c5d2ca
M
2250 return {
2251 'comments': comments,
2252 'comment_count': len(comments),
2253 }
2254
109dd3b2 2255 @staticmethod
2256 def _generate_player_context(sts=None):
2257 context = {
2258 'html5Preference': 'HTML5_PREF_WANTS',
2259 }
2260 if sts is not None:
2261 context['signatureTimestamp'] = sts
2262 return {
2263 'playbackContext': {
2264 'contentPlaybackContext': context
2265 }
2266 }
2267
4e6767b5 2268 @staticmethod
c888ffb9 2269 def _get_video_info_params(video_id, client='TVHTML5'):
2270 GVI_CLIENTS = {
2271 'ANDROID': {
2272 'c': 'ANDROID',
2273 'cver': '16.20',
2274 },
2275 'TVHTML5': {
2276 'c': 'TVHTML5',
2277 'cver': '6.20180913',
2278 }
2279 }
2280 query = {
4e6767b5 2281 'video_id': video_id,
2282 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
c888ffb9 2283 'html5': '1'
4e6767b5 2284 }
c888ffb9 2285 query.update(GVI_CLIENTS.get(client))
2286 return query
4e6767b5 2287
c5e8d7af 2288 def _real_extract(self, url):
cf7e015f 2289 url, smuggled_data = unsmuggle_url(url, {})
545cc85d 2290 video_id = self._match_id(url)
9297939e 2291
2292 is_music_url = smuggled_data.get('is_music_url') or self.is_music_url(url)
2293
545cc85d 2294 base_url = self.http_scheme() + '//www.youtube.com/'
b3d12425 2295 webpage_url = base_url + 'watch?v=' + video_id
2296 webpage = self._download_webpage(
cce889b9 2297 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
545cc85d 2298
109dd3b2 2299 ytcfg = self._extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2300 identity_token = self._extract_identity_token(webpage, video_id)
314ee305 2301 session_index = self._extract_session_index(ytcfg)
109dd3b2 2302 player_url = self._extract_player_url(ytcfg, webpage)
2303
2d6659b9 2304 player_client = self._configuration_arg('player_client', [''])[0]
4bb6b02f 2305 if player_client not in ('web', 'android', ''):
c888ffb9 2306 self.report_warning(f'Invalid player_client {player_client} given. Falling back to android client.')
2307 force_mobile_client = player_client != 'web'
4bb6b02f 2308 player_skip = self._configuration_arg('player_skip')
fe93e2c4 2309 player_response = None
2310 if webpage:
2311 player_response = self._extract_yt_initial_variable(
2312 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2313 video_id, 'initial player response')
109dd3b2 2314
fe93e2c4 2315 syncid = self._extract_account_syncid(ytcfg, player_response)
2316 headers = self._generate_api_headers(ytcfg, identity_token, syncid, session_index=session_index)
9297939e 2317
2318 ytm_streaming_data = {}
2319 if is_music_url:
109dd3b2 2320 ytm_webpage = None
2321 sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2322 if sts and not force_mobile_client and 'configs' not in player_skip:
2323 ytm_webpage = self._download_webpage(
2324 'https://music.youtube.com',
2d6659b9 2325 video_id, fatal=False, note='Downloading remix client config')
109dd3b2 2326
2327 ytm_cfg = self._extract_ytcfg(video_id, ytm_webpage) or {}
2328 ytm_client = 'WEB_REMIX'
2329 if not sts or force_mobile_client:
2330 # Android client already has signature descrambled
2331 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2332 if not sts:
c888ffb9 2333 self.report_warning('Falling back to android remix client for player API.')
109dd3b2 2334 ytm_client = 'ANDROID_MUSIC'
2335 ytm_cfg = {}
2336
2337 ytm_headers = self._generate_api_headers(
2338 ytm_cfg, identity_token, syncid,
314ee305 2339 client=ytm_client, session_index=session_index)
109dd3b2 2340 ytm_query = {'videoId': video_id}
2341 ytm_query.update(self._generate_player_context(sts))
2342
2343 ytm_player_response = self._extract_response(
2344 item_id=video_id, ep='player', query=ytm_query,
2345 ytcfg=ytm_cfg, headers=ytm_headers, fatal=False,
2346 default_client=ytm_client,
c888ffb9 2347 note='Downloading %sremix player API JSON' % ('android ' if force_mobile_client else ''))
2d6659b9 2348 ytm_streaming_data = try_get(ytm_player_response, lambda x: x['streamingData'], dict) or {}
109dd3b2 2349
109dd3b2 2350 if not player_response or force_mobile_client:
2351 sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2352 yt_client = 'WEB'
2353 ytpcfg = ytcfg
2354 ytp_headers = headers
2355 if not sts or force_mobile_client:
2356 # Android client already has signature descrambled
2357 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2358 if not sts:
c888ffb9 2359 self.report_warning('Falling back to android client for player API.')
109dd3b2 2360 yt_client = 'ANDROID'
2361 ytpcfg = {}
314ee305 2362 ytp_headers = self._generate_api_headers(ytpcfg, identity_token, syncid,
2363 client=yt_client, session_index=session_index)
109dd3b2 2364
2365 yt_query = {'videoId': video_id}
2366 yt_query.update(self._generate_player_context(sts))
2367 player_response = self._extract_response(
2368 item_id=video_id, ep='player', query=yt_query,
2369 ytcfg=ytpcfg, headers=ytp_headers, fatal=False,
2370 default_client=yt_client,
c888ffb9 2371 note='Downloading %splayer API JSON' % ('android ' if force_mobile_client else '')
2372 ) or player_response
545cc85d 2373
109dd3b2 2374 # Age-gate workarounds
545cc85d 2375 playability_status = player_response.get('playabilityStatus') or {}
109dd3b2 2376 if playability_status.get('reason') in self._AGE_GATE_REASONS:
c888ffb9 2377 gvi_clients = ('ANDROID', 'TVHTML5') if force_mobile_client else ('TVHTML5', 'ANDROID')
2378 for gvi_client in gvi_clients:
2379 pr = self._parse_json(try_get(compat_parse_qs(
2380 self._download_webpage(
2381 base_url + 'get_video_info', video_id,
2382 'Refetching age-gated %s info webpage' % gvi_client.lower(),
2383 'unable to download video info webpage', fatal=False,
2384 query=self._get_video_info_params(video_id, client=gvi_client))),
2385 lambda x: x['player_response'][0],
2386 compat_str) or '{}', video_id)
2387 if pr:
2388 break
109dd3b2 2389 if not pr:
2390 self.report_warning('Falling back to embedded-only age-gate workaround.')
2391 embed_webpage = None
2392 sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2393 if sts and not force_mobile_client and 'configs' not in player_skip:
2394 embed_webpage = self._download_webpage(
2395 'https://www.youtube.com/embed/%s?html5=1' % video_id,
2396 video_id=video_id, note='Downloading age-gated embed config')
2397
2398 ytcfg_age = self._extract_ytcfg(video_id, embed_webpage) or {}
2399 # If we extracted the embed webpage, it'll tell us if we can view the video
2400 embedded_pr = self._parse_json(
2401 try_get(ytcfg_age, lambda x: x['PLAYER_VARS']['embedded_player_response'], str) or '{}',
2402 video_id=video_id)
2403 embedded_ps_reason = try_get(embedded_pr, lambda x: x['playabilityStatus']['reason'], str) or ''
2404 if embedded_ps_reason not in self._AGE_GATE_REASONS:
2405 yt_client = 'WEB_EMBEDDED_PLAYER'
2406 if not sts or force_mobile_client:
2407 # Android client already has signature descrambled
2408 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2409 if not sts:
2410 self.report_warning(
c888ffb9 2411 'Falling back to android embedded client for player API (note: some formats may be missing).')
109dd3b2 2412 yt_client = 'ANDROID_EMBEDDED_PLAYER'
2413 ytcfg_age = {}
2414
2415 ytage_headers = self._generate_api_headers(
314ee305 2416 ytcfg_age, identity_token, syncid,
2417 client=yt_client, session_index=session_index)
109dd3b2 2418 yt_age_query = {'videoId': video_id}
2419 yt_age_query.update(self._generate_player_context(sts))
2420 pr = self._extract_response(
2421 item_id=video_id, ep='player', query=yt_age_query,
2422 ytcfg=ytcfg_age, headers=ytage_headers, fatal=False,
2423 default_client=yt_client,
c888ffb9 2424 note='Downloading %sage-gated player API JSON' % ('android ' if force_mobile_client else '')
109dd3b2 2425 ) or {}
2426
545cc85d 2427 if pr:
2428 player_response = pr
2429
2430 trailer_video_id = try_get(
2431 playability_status,
2432 lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
2433 compat_str)
2434 if trailer_video_id:
2435 return self.url_result(
2436 trailer_video_id, self.ie_key(), trailer_video_id)
cf7e015f 2437
545cc85d 2438 search_meta = (
2439 lambda x: self._html_search_meta(x, webpage, default=None)) \
2440 if webpage else lambda x: None
dbdaaa23 2441
545cc85d 2442 video_details = player_response.get('videoDetails') or {}
37357d21 2443 microformat = try_get(
545cc85d 2444 player_response,
2445 lambda x: x['microformat']['playerMicroformatRenderer'],
2446 dict) or {}
2447 video_title = video_details.get('title') \
fe93e2c4 2448 or self._get_text(microformat.get('title')) \
545cc85d 2449 or search_meta(['og:title', 'twitter:title', 'title'])
2450 video_description = video_details.get('shortDescription')
cf7e015f 2451
8fe10494 2452 if not smuggled_data.get('force_singlefeed', False):
a06916d9 2453 if not self.get_param('noplaylist'):
8fe10494
S
2454 multifeed_metadata_list = try_get(
2455 player_response,
2456 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
545cc85d 2457 compat_str)
8fe10494
S
2458 if multifeed_metadata_list:
2459 entries = []
2460 feed_ids = []
2461 for feed in multifeed_metadata_list.split(','):
2462 # Unquote should take place before split on comma (,) since textual
2463 # fields may contain comma as well (see
067aa17e 2464 # https://github.com/ytdl-org/youtube-dl/issues/8536)
545cc85d 2465 feed_data = compat_parse_qs(
2466 compat_urllib_parse_unquote_plus(feed))
6b09401b
S
2467
2468 def feed_entry(name):
545cc85d 2469 return try_get(
2470 feed_data, lambda x: x[name][0], compat_str)
6b09401b
S
2471
2472 feed_id = feed_entry('id')
2473 if not feed_id:
2474 continue
2475 feed_title = feed_entry('title')
2476 title = video_title
2477 if feed_title:
2478 title += ' (%s)' % feed_title
8fe10494
S
2479 entries.append({
2480 '_type': 'url_transparent',
2481 'ie_key': 'Youtube',
2482 'url': smuggle_url(
545cc85d 2483 base_url + 'watch?v=' + feed_data['id'][0],
8fe10494 2484 {'force_singlefeed': True}),
6b09401b 2485 'title': title,
8fe10494 2486 })
6b09401b 2487 feed_ids.append(feed_id)
8fe10494
S
2488 self.to_screen(
2489 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2490 % (', '.join(feed_ids), video_id))
545cc85d 2491 return self.playlist_result(
2492 entries, video_id, video_title, video_description)
8fe10494
S
2493 else:
2494 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 2495
9297939e 2496 formats, itags, stream_ids = [], [], []
cc2db878 2497 itag_qualities = {}
d3fc8074 2498 q = qualities([
60bdb7bd 2499 # "tiny" is the smallest video-only format. But some audio-only formats
2500 # was also labeled "tiny". It is not clear if such formats still exist
d3fc8074 2501 'tiny', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
2502 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2503 ])
9297939e 2504
545cc85d 2505 streaming_data = player_response.get('streamingData') or {}
2506 streaming_formats = streaming_data.get('formats') or []
2507 streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
9297939e 2508 streaming_formats.extend(ytm_streaming_data.get('formats') or [])
2509 streaming_formats.extend(ytm_streaming_data.get('adaptiveFormats') or [])
2510
545cc85d 2511 for fmt in streaming_formats:
2512 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2513 continue
321bf820 2514
cc2db878 2515 itag = str_or_none(fmt.get('itag'))
9297939e 2516 audio_track = fmt.get('audioTrack') or {}
2517 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2518 if stream_id in stream_ids:
2519 continue
2520
cc2db878 2521 quality = fmt.get('quality')
d3fc8074 2522 if quality == 'tiny' or not quality:
2523 quality = fmt.get('audioQuality', '').lower() or quality
cc2db878 2524 if itag and quality:
2525 itag_qualities[itag] = quality
2526 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2527 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2528 # number of fragment that would subsequently requested with (`&sq=N`)
2529 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2530 continue
2531
545cc85d 2532 fmt_url = fmt.get('url')
2533 if not fmt_url:
2534 sc = compat_parse_qs(fmt.get('signatureCipher'))
2535 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2536 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2537 if not (sc and fmt_url and encrypted_sig):
2538 continue
545cc85d 2539 if not player_url:
201e9eaa 2540 continue
545cc85d 2541 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2542 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2543 fmt_url += '&' + sp + '=' + signature
2544
545cc85d 2545 if itag:
2546 itags.append(itag)
9297939e 2547 stream_ids.append(stream_id)
2548
cc2db878 2549 tbr = float_or_none(
2550 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 2551 dct = {
2552 'asr': int_or_none(fmt.get('audioSampleRate')),
2553 'filesize': int_or_none(fmt.get('contentLength')),
2554 'format_id': itag,
0fb983f6 2555 'format_note': audio_track.get('displayName') or fmt.get('qualityLabel') or quality,
545cc85d 2556 'fps': int_or_none(fmt.get('fps')),
2557 'height': int_or_none(fmt.get('height')),
dca3ff4a 2558 'quality': q(quality),
cc2db878 2559 'tbr': tbr,
545cc85d 2560 'url': fmt_url,
2561 'width': fmt.get('width'),
0fb983f6 2562 'language': audio_track.get('id', '').split('.')[0],
545cc85d 2563 }
60bdb7bd 2564 mime_mobj = re.match(
2565 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2566 if mime_mobj:
2567 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2568 dct.update(parse_codecs(mime_mobj.group(2)))
2569 # The 3gp format in android client has a quality of "small",
2570 # but is actually worse than all other formats
2571 if dct['ext'] == '3gp':
2572 dct['quality'] = q('tiny')
cc2db878 2573 no_audio = dct.get('acodec') == 'none'
2574 no_video = dct.get('vcodec') == 'none'
2575 if no_audio:
2576 dct['vbr'] = tbr
2577 if no_video:
2578 dct['abr'] = tbr
2579 if no_audio or no_video:
545cc85d 2580 dct['downloader_options'] = {
2581 # Youtube throttles chunks >~10M
2582 'http_chunk_size': 10485760,
bf1317d2 2583 }
7c60c33e 2584 if dct.get('ext'):
2585 dct['container'] = dct['ext'] + '_dash'
545cc85d 2586 formats.append(dct)
2587
4bb6b02f 2588 skip_manifests = self._configuration_arg('skip')
5d3a0e79 2589 get_dash = 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True)
2590 get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2591
9297939e 2592 for sd in (streaming_data, ytm_streaming_data):
5d3a0e79 2593 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
9297939e 2594 if hls_manifest_url:
2595 for f in self._extract_m3u8_formats(
2596 hls_manifest_url, video_id, 'mp4', fatal=False):
2597 itag = self._search_regex(
2598 r'/itag/(\d+)', f['url'], 'itag', default=None)
2599 if itag:
2600 f['format_id'] = itag
8d68ab98 2601 formats.append(f)
545cc85d 2602
5d3a0e79 2603 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2604 if dash_manifest_url:
2605 for f in self._extract_mpd_formats(
2606 dash_manifest_url, video_id, fatal=False):
2607 itag = f['format_id']
2608 if itag in itags:
2609 continue
2610 if itag in itag_qualities:
2611 f['quality'] = q(itag_qualities[itag])
2612 filesize = int_or_none(self._search_regex(
2613 r'/clen/(\d+)', f.get('fragment_base_url')
2614 or f['url'], 'file size', default=None))
2615 if filesize:
2616 f['filesize'] = filesize
2617 formats.append(f)
bf1317d2 2618
545cc85d 2619 if not formats:
a06916d9 2620 if not self.get_param('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
b7da73eb 2621 self.raise_no_formats(
545cc85d 2622 'This video is DRM protected.', expected=True)
2623 pemr = try_get(
2624 playability_status,
2625 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
2626 dict) or {}
fe93e2c4 2627 reason = self._get_text(pemr.get('reason')) or playability_status.get('reason')
545cc85d 2628 subreason = pemr.get('subreason')
2629 if subreason:
fe93e2c4 2630 subreason = clean_html(self._get_text(subreason))
545cc85d 2631 if subreason == 'The uploader has not made this video available in your country.':
2632 countries = microformat.get('availableCountries')
2633 if not countries:
2634 regions_allowed = search_meta('regionsAllowed')
2635 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2636 self.raise_geo_restricted(subreason, countries, metadata_available=True)
545cc85d 2637 reason += '\n' + subreason
2638 if reason:
b7da73eb 2639 self.raise_no_formats(reason, expected=True)
bf1317d2 2640
545cc85d 2641 self._sort_formats(formats)
bf1317d2 2642
545cc85d 2643 keywords = video_details.get('keywords') or []
2644 if not keywords and webpage:
2645 keywords = [
2646 unescapeHTML(m.group('content'))
2647 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2648 for keyword in keywords:
2649 if keyword.startswith('yt:stretch='):
201c1459 2650 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2651 if mobj:
2652 # NB: float is intentional for forcing float division
2653 w, h = (float(v) for v in mobj.groups())
2654 if w > 0 and h > 0:
2655 ratio = w / h
2656 for f in formats:
2657 if f.get('vcodec') != 'none':
2658 f['stretched_ratio'] = ratio
2659 break
6449cd80 2660
0ba692ac 2661 category = microformat.get('category') or search_meta('genre')
2662 channel_id = video_details.get('channelId') \
2663 or microformat.get('externalChannelId') \
2664 or search_meta('channelId')
2665 duration = int_or_none(
2666 video_details.get('lengthSeconds')
2667 or microformat.get('lengthSeconds')) \
2668 or parse_duration(search_meta('duration'))
2669 is_live = video_details.get('isLive')
2670 is_upcoming = video_details.get('isUpcoming')
2671 owner_profile_url = microformat.get('ownerProfileUrl')
2672
545cc85d 2673 thumbnails = []
2674 for container in (video_details, microformat):
2675 for thumbnail in (try_get(
2676 container,
2677 lambda x: x['thumbnail']['thumbnails'], list) or []):
2678 thumbnail_url = thumbnail.get('url')
2679 if not thumbnail_url:
bf1317d2 2680 continue
1988fab7 2681 # Sometimes youtube gives a wrong thumbnail URL. See:
2682 # https://github.com/yt-dlp/yt-dlp/issues/233
2683 # https://github.com/ytdl-org/youtube-dl/issues/28023
2684 if 'maxresdefault' in thumbnail_url:
2685 thumbnail_url = thumbnail_url.split('?')[0]
545cc85d 2686 thumbnails.append({
545cc85d 2687 'url': thumbnail_url,
ff2751ac 2688 'height': int_or_none(thumbnail.get('height')),
545cc85d 2689 'width': int_or_none(thumbnail.get('width')),
2690 })
ff2751ac 2691 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2692 if thumbnail_url:
2693 thumbnails.append({
2694 'url': thumbnail_url,
ff2751ac 2695 })
0ba692ac 2696 # The best resolution thumbnails sometimes does not appear in the webpage
2697 # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
cca80fe6 2698 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2699 hq_thumbnail_names = ['maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3']
2700 guaranteed_thumbnail_names = [
2701 'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2702 'mqdefault', 'mq1', 'mq2', 'mq3',
2703 'default', '1', '2', '3'
2704 ]
2705 thumbnail_names = hq_thumbnail_names + guaranteed_thumbnail_names
2706 n_thumbnail_names = len(thumbnail_names)
2707
0ba692ac 2708 thumbnails.extend({
2709 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2710 video_id=video_id, name=name, ext=ext,
2711 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
cca80fe6 2712 '_test_url': name in hq_thumbnail_names,
2713 } for name in thumbnail_names for ext in ('webp', 'jpg'))
0ba692ac 2714 for thumb in thumbnails:
cca80fe6 2715 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
0ba692ac 2716 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
ff2751ac 2717 self._remove_duplicate_formats(thumbnails)
545cc85d 2718
545cc85d 2719 info = {
2720 'id': video_id,
2721 'title': self._live_title(video_title) if is_live else video_title,
2722 'formats': formats,
2723 'thumbnails': thumbnails,
2724 'description': video_description,
2725 'upload_date': unified_strdate(
2726 microformat.get('uploadDate')
2727 or search_meta('uploadDate')),
2728 'uploader': video_details['author'],
2729 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2730 'uploader_url': owner_profile_url,
2731 'channel_id': channel_id,
2732 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
2733 'duration': duration,
2734 'view_count': int_or_none(
2735 video_details.get('viewCount')
2736 or microformat.get('viewCount')
2737 or search_meta('interactionCount')),
2738 'average_rating': float_or_none(video_details.get('averageRating')),
2739 'age_limit': 18 if (
2740 microformat.get('isFamilySafe') is False
2741 or search_meta('isFamilyFriendly') == 'false'
2742 or search_meta('og:restrictions:age') == '18+') else 0,
2743 'webpage_url': webpage_url,
2744 'categories': [category] if category else None,
2745 'tags': keywords,
2746 'is_live': is_live,
2747 'playable_in_embed': playability_status.get('playableInEmbed'),
c224251a 2748 'was_live': video_details.get('isLiveContent'),
545cc85d 2749 }
b477fc13 2750
545cc85d 2751 pctr = try_get(
2752 player_response,
2753 lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
2754 subtitles = {}
2755 if pctr:
774d79cc 2756 def process_language(container, base_url, lang_code, sub_name, query):
120916da 2757 lang_subs = container.setdefault(lang_code, [])
545cc85d 2758 for fmt in self._SUBTITLE_FORMATS:
2759 query.update({
2760 'fmt': fmt,
2761 })
2762 lang_subs.append({
2763 'ext': fmt,
2764 'url': update_url_query(base_url, query),
774d79cc 2765 'name': sub_name,
545cc85d 2766 })
7e72694b 2767
545cc85d 2768 for caption_track in (pctr.get('captionTracks') or []):
2769 base_url = caption_track.get('baseUrl')
2770 if not base_url:
2771 continue
2772 if caption_track.get('kind') != 'asr':
120916da 2773 lang_code = (
2774 remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2775 or caption_track.get('languageCode'))
545cc85d 2776 if not lang_code:
2777 continue
2778 process_language(
774d79cc 2779 subtitles, base_url, lang_code,
2d6659b9 2780 try_get(caption_track, lambda x: x['name']['simpleText']),
774d79cc 2781 {})
545cc85d 2782 continue
2783 automatic_captions = {}
2784 for translation_language in (pctr.get('translationLanguages') or []):
2785 translation_language_code = translation_language.get('languageCode')
2786 if not translation_language_code:
2787 continue
2788 process_language(
2789 automatic_captions, base_url, translation_language_code,
fe93e2c4 2790 self._get_text(translation_language.get('languageName'), max_runs=1),
545cc85d 2791 {'tlang': translation_language_code})
2792 info['automatic_captions'] = automatic_captions
2793 info['subtitles'] = subtitles
7e72694b 2794
545cc85d 2795 parsed_url = compat_urllib_parse_urlparse(url)
2796 for component in [parsed_url.fragment, parsed_url.query]:
2797 query = compat_parse_qs(component)
2798 for k, v in query.items():
2799 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2800 d_k += '_time'
2801 if d_k not in info and k in s_ks:
2802 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2803
2804 # Youtube Music Auto-generated description
822b9d9c 2805 if video_description:
38d70284 2806 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2807 if mobj:
822b9d9c
RA
2808 release_year = mobj.group('release_year')
2809 release_date = mobj.group('release_date')
2810 if release_date:
2811 release_date = release_date.replace('-', '')
2812 if not release_year:
545cc85d 2813 release_year = release_date[:4]
2814 info.update({
2815 'album': mobj.group('album'.strip()),
2816 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2817 'track': mobj.group('track').strip(),
2818 'release_date': release_date,
cc2db878 2819 'release_year': int_or_none(release_year),
545cc85d 2820 })
7e72694b 2821
545cc85d 2822 initial_data = None
2823 if webpage:
2824 initial_data = self._extract_yt_initial_variable(
2825 webpage, self._YT_INITIAL_DATA_RE, video_id,
2826 'yt initial data')
2827 if not initial_data:
109dd3b2 2828 initial_data = self._extract_response(
2829 item_id=video_id, ep='next', fatal=False,
2830 ytcfg=ytcfg, headers=headers, query={'videoId': video_id},
2831 note='Downloading initial data API JSON')
545cc85d 2832
c60ee3a2 2833 try:
2834 # This will error if there is no livechat
2835 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2836 info['subtitles']['live_chat'] = [{
2837 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
2838 'video_id': video_id,
2839 'ext': 'json',
f6745c49 2840 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
c60ee3a2 2841 }]
2842 except (KeyError, IndexError, TypeError):
2843 pass
545cc85d 2844
2845 if initial_data:
7c365c21 2846 info['chapters'] = (
2847 self._extract_chapters_from_json(initial_data, duration)
2848 or self._extract_chapters_from_engagement_panel(initial_data, duration)
2849 or None)
545cc85d 2850
2851 contents = try_get(
2852 initial_data,
2853 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2854 list) or []
2855 for content in contents:
2856 vpir = content.get('videoPrimaryInfoRenderer')
2857 if vpir:
2858 stl = vpir.get('superTitleLink')
2859 if stl:
fe93e2c4 2860 stl = self._get_text(stl)
545cc85d 2861 if try_get(
2862 vpir,
2863 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2864 info['location'] = stl
2865 else:
2866 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2867 if mobj:
2868 info.update({
2869 'series': mobj.group(1),
2870 'season_number': int(mobj.group(2)),
2871 'episode_number': int(mobj.group(3)),
2872 })
2873 for tlb in (try_get(
2874 vpir,
2875 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2876 list) or []):
2877 tbr = tlb.get('toggleButtonRenderer') or {}
2878 for getter, regex in [(
2879 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2880 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2881 lambda x: x['accessibility'],
2882 lambda x: x['accessibilityData']['accessibilityData'],
2883 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2884 label = (try_get(tbr, getter, dict) or {}).get('label')
2885 if label:
2886 mobj = re.match(regex, label)
2887 if mobj:
2888 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2889 break
2890 sbr_tooltip = try_get(
2891 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2892 if sbr_tooltip:
2893 like_count, dislike_count = sbr_tooltip.split(' / ')
2894 info.update({
2895 'like_count': str_to_int(like_count),
2896 'dislike_count': str_to_int(dislike_count),
2897 })
2898 vsir = content.get('videoSecondaryInfoRenderer')
2899 if vsir:
fe93e2c4 2900 info['channel'] = self._get_text(try_get(
545cc85d 2901 vsir,
2902 lambda x: x['owner']['videoOwnerRenderer']['title'],
cce889b9 2903 dict))
545cc85d 2904 rows = try_get(
2905 vsir,
2906 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2907 list) or []
2908 multiple_songs = False
2909 for row in rows:
2910 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2911 multiple_songs = True
2912 break
2913 for row in rows:
2914 mrr = row.get('metadataRowRenderer') or {}
2915 mrr_title = mrr.get('title')
2916 if not mrr_title:
2917 continue
fe93e2c4 2918 mrr_title = self._get_text(mrr['title'])
2919 mrr_contents_text = self._get_text(mrr['contents'][0])
545cc85d 2920 if mrr_title == 'License':
2921 info['license'] = mrr_contents_text
2922 elif not multiple_songs:
2923 if mrr_title == 'Album':
2924 info['album'] = mrr_contents_text
2925 elif mrr_title == 'Artist':
2926 info['artist'] = mrr_contents_text
2927 elif mrr_title == 'Song':
2928 info['track'] = mrr_contents_text
2929
2930 fallbacks = {
2931 'channel': 'uploader',
2932 'channel_id': 'uploader_id',
2933 'channel_url': 'uploader_url',
2934 }
2935 for to, frm in fallbacks.items():
2936 if not info.get(to):
2937 info[to] = info.get(frm)
2938
2939 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
2940 v = info.get(s_k)
2941 if v:
2942 info[d_k] = v
b84071c0 2943
c224251a
M
2944 is_private = bool_or_none(video_details.get('isPrivate'))
2945 is_unlisted = bool_or_none(microformat.get('isUnlisted'))
2946 is_membersonly = None
b28f8d24 2947 is_premium = None
c224251a
M
2948 if initial_data and is_private is not None:
2949 is_membersonly = False
b28f8d24 2950 is_premium = False
47193e02 2951 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
2952 badge_labels = set()
2953 for content in contents:
2954 if not isinstance(content, dict):
2955 continue
2956 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
2957 for badge_label in badge_labels:
2958 if badge_label.lower() == 'members only':
2959 is_membersonly = True
2960 elif badge_label.lower() == 'premium':
2961 is_premium = True
2962 elif badge_label.lower() == 'unlisted':
2963 is_unlisted = True
c224251a 2964
c224251a
M
2965 info['availability'] = self._availability(
2966 is_private=is_private,
b28f8d24 2967 needs_premium=is_premium,
c224251a
M
2968 needs_subscription=is_membersonly,
2969 needs_auth=info['age_limit'] >= 18,
2970 is_unlisted=None if is_private is None else is_unlisted)
2971
06167fbb 2972 # get xsrf for annotations or comments
a06916d9 2973 get_annotations = self.get_param('writeannotations', False)
2974 get_comments = self.get_param('getcomments', False)
06167fbb 2975 if get_annotations or get_comments:
29f7c58a 2976 xsrf_token = None
545cc85d 2977 ytcfg = self._extract_ytcfg(video_id, webpage)
29f7c58a 2978 if ytcfg:
2979 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2980 if not xsrf_token:
2981 xsrf_token = self._search_regex(
2982 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 2983 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 2984
2985 # annotations
06167fbb 2986 if get_annotations:
64b6a4e9
RA
2987 invideo_url = try_get(
2988 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2989 if xsrf_token and invideo_url:
29f7c58a 2990 xsrf_field_name = None
2991 if ytcfg:
2992 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2993 if not xsrf_field_name:
2994 xsrf_field_name = self._search_regex(
2995 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 2996 webpage, 'xsrf field name',
29f7c58a 2997 group='xsrf_field_name', default='session_token')
8a784c74 2998 info['annotations'] = self._download_webpage(
64b6a4e9
RA
2999 self._proto_relative_url(invideo_url),
3000 video_id, note='Downloading annotations',
3001 errnote='Unable to download video annotations', fatal=False,
3002 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 3003
277d6ff5 3004 if get_comments:
2d6659b9 3005 info['__post_extractor'] = lambda: self._extract_comments(ytcfg, video_id, contents, webpage)
4ea3be0a 3006
545cc85d 3007 self.mark_watched(video_id, player_response)
d77ab8e2 3008
545cc85d 3009 return info
c5e8d7af 3010
5f6a1245 3011
8bdd16b4 3012class YoutubeTabIE(YoutubeBaseInfoExtractor):
3013 IE_DESC = 'YouTube.com tab'
70d5c17b 3014 _VALID_URL = r'''(?x)
3015 https?://
3016 (?:\w+\.)?
3017 (?:
3018 youtube(?:kids)?\.com|
3019 invidio\.us
3020 )/
3021 (?:
fe03a6cd 3022 (?P<channel_type>channel|c|user|browse)/|
70d5c17b 3023 (?P<not_channel>
9ba5705a 3024 feed/|hashtag/|
70d5c17b 3025 (?:playlist|watch)\?.*?\blist=
3026 )|
29f7c58a 3027 (?!(?:%s)\b) # Direct URLs
70d5c17b 3028 )
3029 (?P<id>[^/?\#&]+)
3030 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 3031 IE_NAME = 'youtube:tab'
3032
81127aa5 3033 _TESTS = [{
da692b79 3034 'note': 'playlists, multipage',
8bdd16b4 3035 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3036 'playlist_mincount': 94,
3037 'info_dict': {
3038 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3039 'title': 'Игорь Клейнер - Playlists',
3040 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3041 'uploader': 'Игорь Клейнер',
3042 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 3043 },
3044 }, {
da692b79 3045 'note': 'playlists, multipage, different order',
8bdd16b4 3046 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3047 'playlist_mincount': 94,
3048 'info_dict': {
3049 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3050 'title': 'Игорь Клейнер - Playlists',
3051 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3052 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3053 'uploader': 'Игорь Клейнер',
8bdd16b4 3054 },
201c1459 3055 }, {
da692b79 3056 'note': 'playlists, series',
201c1459 3057 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3058 'playlist_mincount': 5,
3059 'info_dict': {
3060 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3061 'title': '3Blue1Brown - Playlists',
3062 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
da692b79 3063 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3064 'uploader': '3Blue1Brown',
201c1459 3065 },
8bdd16b4 3066 }, {
da692b79 3067 'note': 'playlists, singlepage',
8bdd16b4 3068 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3069 'playlist_mincount': 4,
3070 'info_dict': {
3071 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3072 'title': 'ThirstForScience - Playlists',
3073 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 3074 'uploader': 'ThirstForScience',
3075 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 3076 }
3077 }, {
3078 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3079 'only_matching': True,
3080 }, {
da692b79 3081 'note': 'basic, single video playlist',
0e30a7b9 3082 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 3083 'info_dict': {
0e30a7b9 3084 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3085 'uploader': 'Sergey M.',
3086 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 3087 'title': 'youtube-dl public playlist',
81127aa5 3088 },
0e30a7b9 3089 'playlist_count': 1,
9291475f 3090 }, {
da692b79 3091 'note': 'empty playlist',
0e30a7b9 3092 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 3093 'info_dict': {
0e30a7b9 3094 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3095 'uploader': 'Sergey M.',
3096 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 3097 'title': 'youtube-dl empty playlist',
9291475f
PH
3098 },
3099 'playlist_count': 0,
3100 }, {
da692b79 3101 'note': 'Home tab',
8bdd16b4 3102 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 3103 'info_dict': {
8bdd16b4 3104 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3105 'title': 'lex will - Home',
3106 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3107 'uploader': 'lex will',
3108 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3109 },
8bdd16b4 3110 'playlist_mincount': 2,
9291475f 3111 }, {
da692b79 3112 'note': 'Videos tab',
8bdd16b4 3113 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 3114 'info_dict': {
8bdd16b4 3115 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3116 'title': 'lex will - Videos',
3117 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3118 'uploader': 'lex will',
3119 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3120 },
8bdd16b4 3121 'playlist_mincount': 975,
9291475f 3122 }, {
da692b79 3123 'note': 'Videos tab, sorted by popular',
8bdd16b4 3124 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 3125 'info_dict': {
8bdd16b4 3126 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3127 'title': 'lex will - Videos',
3128 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3129 'uploader': 'lex will',
3130 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3131 },
8bdd16b4 3132 'playlist_mincount': 199,
9291475f 3133 }, {
da692b79 3134 'note': 'Playlists tab',
8bdd16b4 3135 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 3136 'info_dict': {
8bdd16b4 3137 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3138 'title': 'lex will - Playlists',
3139 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3140 'uploader': 'lex will',
3141 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3142 },
8bdd16b4 3143 'playlist_mincount': 17,
ac7553d0 3144 }, {
da692b79 3145 'note': 'Community tab',
8bdd16b4 3146 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 3147 'info_dict': {
8bdd16b4 3148 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3149 'title': 'lex will - Community',
3150 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3151 'uploader': 'lex will',
3152 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3153 },
3154 'playlist_mincount': 18,
87dadd45 3155 }, {
da692b79 3156 'note': 'Channels tab',
8bdd16b4 3157 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 3158 'info_dict': {
8bdd16b4 3159 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3160 'title': 'lex will - Channels',
3161 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3162 'uploader': 'lex will',
3163 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3164 },
deaec5af 3165 'playlist_mincount': 12,
cd684175 3166 }, {
3167 'note': 'Search tab',
3168 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3169 'playlist_mincount': 40,
3170 'info_dict': {
3171 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3172 'title': '3Blue1Brown - Search - linear algebra',
3173 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3174 'uploader': '3Blue1Brown',
3175 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3176 },
6b08cdf6 3177 }, {
a0566bbf 3178 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3179 'only_matching': True,
3180 }, {
a0566bbf 3181 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3182 'only_matching': True,
3183 }, {
a0566bbf 3184 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3185 'only_matching': True,
3186 }, {
3187 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3188 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3189 'info_dict': {
3190 'title': '29C3: Not my department',
3191 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3192 'uploader': 'Christiaan008',
3193 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 3194 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 3195 },
3196 'playlist_count': 96,
3197 }, {
3198 'note': 'Large playlist',
3199 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 3200 'info_dict': {
8bdd16b4 3201 'title': 'Uploads from Cauchemar',
3202 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3203 'uploader': 'Cauchemar',
3204 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 3205 },
8bdd16b4 3206 'playlist_mincount': 1123,
3207 }, {
da692b79 3208 'note': 'even larger playlist, 8832 videos',
8bdd16b4 3209 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3210 'only_matching': True,
4b7df0d3
JMF
3211 }, {
3212 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3213 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3214 'info_dict': {
acf757f4
PH
3215 'title': 'Uploads from Interstellar Movie',
3216 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 3217 'uploader': 'Interstellar Movie',
8bdd16b4 3218 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 3219 },
481cc733 3220 'playlist_mincount': 21,
358de58c 3221 }, {
3222 'note': 'Playlist with "show unavailable videos" button',
3223 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3224 'info_dict': {
3225 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3226 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3227 'uploader': 'Phim Siêu Nhân Nhật Bản',
3228 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3229 },
da692b79 3230 'playlist_mincount': 200,
5d342002 3231 }, {
da692b79 3232 'note': 'Playlist with unavailable videos in page 7',
5d342002 3233 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3234 'info_dict': {
3235 'title': 'Uploads from BlankTV',
3236 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3237 'uploader': 'BlankTV',
3238 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3239 },
da692b79 3240 'playlist_mincount': 1000,
8bdd16b4 3241 }, {
da692b79 3242 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
8bdd16b4 3243 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3244 'info_dict': {
3245 'title': 'Data Analysis with Dr Mike Pound',
3246 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3247 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3248 'uploader': 'Computerphile',
deaec5af 3249 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 3250 },
3251 'playlist_mincount': 11,
3252 }, {
a0566bbf 3253 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 3254 'only_matching': True,
dacb3a86 3255 }, {
da692b79 3256 'note': 'Playlist URL that does not actually serve a playlist',
dacb3a86
S
3257 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3258 'info_dict': {
3259 'id': 'FqZTN594JQw',
3260 'ext': 'webm',
3261 'title': "Smiley's People 01 detective, Adventure Series, Action",
3262 'uploader': 'STREEM',
3263 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 3264 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
3265 'upload_date': '20150526',
3266 'license': 'Standard YouTube License',
3267 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3268 'categories': ['People & Blogs'],
3269 'tags': list,
dbdaaa23 3270 'view_count': int,
dacb3a86
S
3271 'like_count': int,
3272 'dislike_count': int,
3273 },
3274 'params': {
3275 'skip_download': True,
3276 },
13a75688 3277 'skip': 'This video is not available.',
dacb3a86 3278 'add_ie': [YoutubeIE.ie_key()],
481cc733 3279 }, {
8bdd16b4 3280 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 3281 'only_matching': True,
66b48727 3282 }, {
8bdd16b4 3283 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 3284 'only_matching': True,
a0566bbf 3285 }, {
3286 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3287 'info_dict': {
da692b79 3288 'id': 'X1whbWASnNQ', # This will keep changing
a0566bbf 3289 'ext': 'mp4',
deaec5af 3290 'title': compat_str,
a0566bbf 3291 'uploader': 'Sky News',
3292 'uploader_id': 'skynews',
3293 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
da692b79 3294 'upload_date': r're:\d{8}',
3295 'description': compat_str,
a0566bbf 3296 'categories': ['News & Politics'],
3297 'tags': list,
3298 'like_count': int,
3299 'dislike_count': int,
3300 },
3301 'params': {
3302 'skip_download': True,
3303 },
da692b79 3304 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
a0566bbf 3305 }, {
3306 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3307 'info_dict': {
3308 'id': 'a48o2S1cPoo',
3309 'ext': 'mp4',
3310 'title': 'The Young Turks - Live Main Show',
3311 'uploader': 'The Young Turks',
3312 'uploader_id': 'TheYoungTurks',
3313 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3314 'upload_date': '20150715',
3315 'license': 'Standard YouTube License',
3316 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3317 'categories': ['News & Politics'],
3318 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3319 'like_count': int,
3320 'dislike_count': int,
3321 },
3322 'params': {
3323 'skip_download': True,
3324 },
3325 'only_matching': True,
3326 }, {
3327 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3328 'only_matching': True,
3329 }, {
3330 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3331 'only_matching': True,
09f1580e 3332 }, {
3333 'note': 'A channel that is not live. Should raise error',
3334 'url': 'https://www.youtube.com/user/numberphile/live',
3335 'only_matching': True,
3d3dddc9 3336 }, {
3337 'url': 'https://www.youtube.com/feed/trending',
3338 'only_matching': True,
3339 }, {
3d3dddc9 3340 'url': 'https://www.youtube.com/feed/library',
3341 'only_matching': True,
3342 }, {
3d3dddc9 3343 'url': 'https://www.youtube.com/feed/history',
3344 'only_matching': True,
3345 }, {
3d3dddc9 3346 'url': 'https://www.youtube.com/feed/subscriptions',
3347 'only_matching': True,
3348 }, {
3d3dddc9 3349 'url': 'https://www.youtube.com/feed/watch_later',
3350 'only_matching': True,
3351 }, {
da692b79 3352 'note': 'Recommended - redirects to home page',
3d3dddc9 3353 'url': 'https://www.youtube.com/feed/recommended',
3354 'only_matching': True,
29f7c58a 3355 }, {
da692b79 3356 'note': 'inline playlist with not always working continuations',
29f7c58a 3357 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3358 'only_matching': True,
3359 }, {
3360 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3361 'only_matching': True,
3362 }, {
3363 'url': 'https://www.youtube.com/course',
3364 'only_matching': True,
3365 }, {
3366 'url': 'https://www.youtube.com/zsecurity',
3367 'only_matching': True,
3368 }, {
3369 'url': 'http://www.youtube.com/NASAgovVideo/videos',
3370 'only_matching': True,
3371 }, {
3372 'url': 'https://www.youtube.com/TheYoungTurks/live',
3373 'only_matching': True,
39ed931e 3374 }, {
3375 'url': 'https://www.youtube.com/hashtag/cctv9',
3376 'info_dict': {
3377 'id': 'cctv9',
3378 'title': '#cctv9',
3379 },
3380 'playlist_mincount': 350,
201c1459 3381 }, {
3382 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3383 'only_matching': True,
9297939e 3384 }, {
da692b79 3385 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
9297939e 3386 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3387 'only_matching': True
fe03a6cd 3388 }, {
3389 'note': '/browse/ should redirect to /channel/',
3390 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3391 'only_matching': True
3392 }, {
3393 'note': 'VLPL, should redirect to playlist?list=PL...',
3394 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3395 'info_dict': {
3396 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3397 'uploader': 'NoCopyrightSounds',
3398 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3399 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3400 'title': 'NCS Releases',
3401 },
3402 'playlist_mincount': 166,
18db7548 3403 }, {
3404 'note': 'Topic, should redirect to playlist?list=UU...',
3405 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3406 'info_dict': {
3407 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3408 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3409 'title': 'Uploads from Royalty Free Music - Topic',
3410 'uploader': 'Royalty Free Music - Topic',
3411 },
3412 'expected_warnings': [
3413 'A channel/user page was given',
3414 'The URL does not have a videos tab',
3415 ],
3416 'playlist_mincount': 101,
3417 }, {
3418 'note': 'Topic without a UU playlist',
3419 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3420 'info_dict': {
3421 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3422 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3423 },
3424 'expected_warnings': [
3425 'A channel/user page was given',
3426 'The URL does not have a videos tab',
3427 'Falling back to channel URL',
3428 ],
3429 'playlist_mincount': 9,
abcdd12b 3430 }, {
3431 'note': 'Youtube music Album',
3432 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3433 'info_dict': {
3434 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3435 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3436 },
3437 'playlist_count': 50,
47193e02 3438 }, {
3439 'note': 'unlisted single video playlist',
3440 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3441 'info_dict': {
3442 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3443 'uploader': 'colethedj',
3444 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3445 'title': 'yt-dlp unlisted playlist test',
3446 'availability': 'unlisted'
3447 },
3448 'playlist_count': 1,
29f7c58a 3449 }]
3450
3451 @classmethod
3452 def suitable(cls, url):
3453 return False if YoutubeIE.suitable(url) else super(
3454 YoutubeTabIE, cls).suitable(url)
8bdd16b4 3455
3456 def _extract_channel_id(self, webpage):
3457 channel_id = self._html_search_meta(
3458 'channelId', webpage, 'channel id', default=None)
3459 if channel_id:
3460 return channel_id
3461 channel_url = self._html_search_meta(
3462 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3463 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3464 'twitter:app:url:googleplay'), webpage, 'channel url')
3465 return self._search_regex(
3466 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3467 channel_url, 'channel id')
15f6397c 3468
8bdd16b4 3469 @staticmethod
cd7c66cf 3470 def _extract_basic_item_renderer(item):
3471 # Modified from _extract_grid_item_renderer
201c1459 3472 known_basic_renderers = (
3473 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 3474 )
3475 for key, renderer in item.items():
201c1459 3476 if not isinstance(renderer, dict):
cd7c66cf 3477 continue
201c1459 3478 elif key in known_basic_renderers:
3479 return renderer
3480 elif key.startswith('grid') and key.endswith('Renderer'):
3481 return renderer
8bdd16b4 3482
8bdd16b4 3483 def _grid_entries(self, grid_renderer):
3484 for item in grid_renderer['items']:
3485 if not isinstance(item, dict):
39b62db1 3486 continue
cd7c66cf 3487 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 3488 if not isinstance(renderer, dict):
3489 continue
fe93e2c4 3490 title = self._get_text(renderer.get('title'))
3491
8bdd16b4 3492 # playlist
3493 playlist_id = renderer.get('playlistId')
3494 if playlist_id:
3495 yield self.url_result(
3496 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3497 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3498 video_title=title)
201c1459 3499 continue
8bdd16b4 3500 # video
3501 video_id = renderer.get('videoId')
3502 if video_id:
3503 yield self._extract_video(renderer)
201c1459 3504 continue
8bdd16b4 3505 # channel
3506 channel_id = renderer.get('channelId')
3507 if channel_id:
8bdd16b4 3508 yield self.url_result(
3509 'https://www.youtube.com/channel/%s' % channel_id,
3510 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 3511 continue
3512 # generic endpoint URL support
3513 ep_url = urljoin('https://www.youtube.com/', try_get(
3514 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3515 compat_str))
3516 if ep_url:
3517 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3518 if ie.suitable(ep_url):
3519 yield self.url_result(
3520 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3521 break
8bdd16b4 3522
3d3dddc9 3523 def _shelf_entries_from_content(self, shelf_renderer):
3524 content = shelf_renderer.get('content')
3525 if not isinstance(content, dict):
8bdd16b4 3526 return
cd7c66cf 3527 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 3528 if renderer:
3529 # TODO: add support for nested playlists so each shelf is processed
3530 # as separate playlist
3531 # TODO: this includes only first N items
3532 for entry in self._grid_entries(renderer):
3533 yield entry
3534 renderer = content.get('horizontalListRenderer')
3535 if renderer:
3536 # TODO
3537 pass
8bdd16b4 3538
29f7c58a 3539 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3540 ep = try_get(
3541 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3542 compat_str)
3543 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3544 if shelf_url:
29f7c58a 3545 # Skipping links to another channels, note that checking for
3546 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3547 # will not work
3548 if skip_channels and '/channels?' in shelf_url:
3549 return
fe93e2c4 3550 title = self._get_text(shelf_renderer, lambda x: x['title'])
3d3dddc9 3551 yield self.url_result(shelf_url, video_title=title)
3552 # Shelf may not contain shelf URL, fallback to extraction from content
3553 for entry in self._shelf_entries_from_content(shelf_renderer):
3554 yield entry
c5e8d7af 3555
8bdd16b4 3556 def _playlist_entries(self, video_list_renderer):
3557 for content in video_list_renderer['contents']:
3558 if not isinstance(content, dict):
3559 continue
3560 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3561 if not isinstance(renderer, dict):
3562 continue
3563 video_id = renderer.get('videoId')
3564 if not video_id:
3565 continue
3566 yield self._extract_video(renderer)
07aeced6 3567
3462ffa8 3568 def _rich_entries(self, rich_grid_renderer):
3569 renderer = try_get(
70d5c17b 3570 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3571 video_id = renderer.get('videoId')
3572 if not video_id:
3573 return
3574 yield self._extract_video(renderer)
3575
8bdd16b4 3576 def _video_entry(self, video_renderer):
3577 video_id = video_renderer.get('videoId')
3578 if video_id:
3579 return self._extract_video(video_renderer)
dacb3a86 3580
8bdd16b4 3581 def _post_thread_entries(self, post_thread_renderer):
3582 post_renderer = try_get(
3583 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3584 if not post_renderer:
3585 return
3586 # video attachment
3587 video_renderer = try_get(
895b0931 3588 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3589 video_id = video_renderer.get('videoId')
3590 if video_id:
3591 entry = self._extract_video(video_renderer)
8bdd16b4 3592 if entry:
3593 yield entry
895b0931 3594 # playlist attachment
3595 playlist_id = try_get(
3596 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3597 if playlist_id:
3598 yield self.url_result(
e28f1c0a 3599 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3600 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3601 # inline video links
3602 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3603 for run in runs:
3604 if not isinstance(run, dict):
3605 continue
3606 ep_url = try_get(
3607 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3608 if not ep_url:
3609 continue
3610 if not YoutubeIE.suitable(ep_url):
3611 continue
3612 ep_video_id = YoutubeIE._match_id(ep_url)
3613 if video_id == ep_video_id:
3614 continue
895b0931 3615 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 3616
8bdd16b4 3617 def _post_thread_continuation_entries(self, post_thread_continuation):
3618 contents = post_thread_continuation.get('contents')
3619 if not isinstance(contents, list):
3620 return
3621 for content in contents:
3622 renderer = content.get('backstagePostThreadRenderer')
3623 if not isinstance(renderer, dict):
3624 continue
3625 for entry in self._post_thread_entries(renderer):
3626 yield entry
07aeced6 3627
39ed931e 3628 r''' # unused
3629 def _rich_grid_entries(self, contents):
3630 for content in contents:
3631 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3632 if video_renderer:
3633 entry = self._video_entry(video_renderer)
3634 if entry:
3635 yield entry
3636 '''
f4f751af 3637 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3462ffa8 3638
70d5c17b 3639 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3640 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3641 for content in contents:
3642 if not isinstance(content, dict):
8bdd16b4 3643 continue
70d5c17b 3644 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3645 if not is_renderer:
70d5c17b 3646 renderer = content.get('richItemRenderer')
3462ffa8 3647 if renderer:
3648 for entry in self._rich_entries(renderer):
3649 yield entry
3650 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3651 continue
3462ffa8 3652 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3653 for isr_content in isr_contents:
3654 if not isinstance(isr_content, dict):
3655 continue
69184e41 3656
3657 known_renderers = {
3658 'playlistVideoListRenderer': self._playlist_entries,
3659 'gridRenderer': self._grid_entries,
3660 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3661 'backstagePostThreadRenderer': self._post_thread_entries,
3662 'videoRenderer': lambda x: [self._video_entry(x)],
3663 }
3664 for key, renderer in isr_content.items():
3665 if key not in known_renderers:
3666 continue
3667 for entry in known_renderers[key](renderer):
3668 if entry:
3669 yield entry
3462ffa8 3670 continuation_list[0] = self._extract_continuation(renderer)
69184e41 3671 break
70d5c17b 3672
3462ffa8 3673 if not continuation_list[0]:
3674 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3675
3676 if not continuation_list[0]:
3677 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3678
3679 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3680 tab_content = try_get(tab, lambda x: x['content'], dict)
3681 if not tab_content:
3682 return
3462ffa8 3683 parent_renderer = (
29f7c58a 3684 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3685 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3686 for entry in extract_entries(parent_renderer):
3687 yield entry
3462ffa8 3688 continuation = continuation_list[0]
fe93e2c4 3689 visitor_data = None
d069eca7 3690
8bdd16b4 3691 for page_num in itertools.count(1):
3692 if not continuation:
3693 break
f4f751af 3694 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
79360d99 3695 response = self._extract_response(
3696 item_id='%s page %s' % (item_id, page_num),
fe93e2c4 3697 query=continuation, headers=headers, ytcfg=ytcfg,
79360d99 3698 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3699
3700 if not response:
8bdd16b4 3701 break
f4f751af 3702 visitor_data = try_get(
3703 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
ebf1b291 3704
69184e41 3705 known_continuation_renderers = {
3706 'playlistVideoListContinuation': self._playlist_entries,
3707 'gridContinuation': self._grid_entries,
3708 'itemSectionContinuation': self._post_thread_continuation_entries,
3709 'sectionListContinuation': extract_entries, # for feeds
3710 }
8bdd16b4 3711 continuation_contents = try_get(
69184e41 3712 response, lambda x: x['continuationContents'], dict) or {}
3713 continuation_renderer = None
3714 for key, value in continuation_contents.items():
3715 if key not in known_continuation_renderers:
3462ffa8 3716 continue
69184e41 3717 continuation_renderer = value
3718 continuation_list = [None]
3719 for entry in known_continuation_renderers[key](continuation_renderer):
3720 yield entry
3721 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3722 break
3723 if continuation_renderer:
3724 continue
c5e8d7af 3725
a1b535bd 3726 known_renderers = {
3727 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3728 'gridVideoRenderer': (self._grid_entries, 'items'),
3729 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3730 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3731 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3732 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3733 }
cce889b9 3734 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3735 continuation_items = try_get(
cce889b9 3736 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3737 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3738 video_items_renderer = None
3739 for key, value in continuation_item.items():
3740 if key not in known_renderers:
8bdd16b4 3741 continue
a1b535bd 3742 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3743 continuation_list = [None]
a1b535bd 3744 for entry in known_renderers[key][0](video_items_renderer):
3745 yield entry
9ba5705a 3746 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3747 break
3748 if video_items_renderer:
3749 continue
8bdd16b4 3750 break
9558dcec 3751
8bdd16b4 3752 @staticmethod
3753 def _extract_selected_tab(tabs):
3754 for tab in tabs:
cd684175 3755 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3756 if renderer.get('selected') is True:
3757 return renderer
2b3c2546 3758 else:
8bdd16b4 3759 raise ExtractorError('Unable to find selected tab')
b82f815f 3760
47193e02 3761 @classmethod
3762 def _extract_uploader(cls, data):
8bdd16b4 3763 uploader = {}
47193e02 3764 renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3765 owner = try_get(
3766 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3767 if owner:
3768 uploader['uploader'] = owner.get('text')
3769 uploader['uploader_id'] = try_get(
3770 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3771 uploader['uploader_url'] = urljoin(
3772 'https://www.youtube.com/',
3773 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3774 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3775
d069eca7 3776 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3777 playlist_id = title = description = channel_url = channel_name = channel_id = None
3778 thumbnails_list = tags = []
3779
8bdd16b4 3780 selected_tab = self._extract_selected_tab(tabs)
3781 renderer = try_get(
3782 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3783 if renderer:
b60419c5 3784 channel_name = renderer.get('title')
3785 channel_url = renderer.get('channelUrl')
3786 channel_id = renderer.get('externalId')
39ed931e 3787 else:
64c0d954 3788 renderer = try_get(
3789 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3790
8bdd16b4 3791 if renderer:
3792 title = renderer.get('title')
ecc97af3 3793 description = renderer.get('description', '')
b60419c5 3794 playlist_id = channel_id
3795 tags = renderer.get('keywords', '').split()
3796 thumbnails_list = (
3797 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3798 or try_get(
47193e02 3799 self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3800 lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
ff84930c 3801 list)
b60419c5 3802 or [])
3803
3804 thumbnails = []
3805 for t in thumbnails_list:
3806 if not isinstance(t, dict):
3807 continue
3808 thumbnail_url = url_or_none(t.get('url'))
3809 if not thumbnail_url:
3810 continue
3811 thumbnails.append({
3812 'url': thumbnail_url,
3813 'width': int_or_none(t.get('width')),
3814 'height': int_or_none(t.get('height')),
3815 })
3462ffa8 3816 if playlist_id is None:
70d5c17b 3817 playlist_id = item_id
3818 if title is None:
39ed931e 3819 title = (
3820 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3821 or playlist_id)
b60419c5 3822 title += format_field(selected_tab, 'title', ' - %s')
cd684175 3823 title += format_field(selected_tab, 'expandedText', ' - %s')
b60419c5 3824 metadata = {
3825 'playlist_id': playlist_id,
3826 'playlist_title': title,
3827 'playlist_description': description,
3828 'uploader': channel_name,
3829 'uploader_id': channel_id,
3830 'uploader_url': channel_url,
3831 'thumbnails': thumbnails,
3832 'tags': tags,
3833 }
47193e02 3834 availability = self._extract_availability(data)
3835 if availability:
3836 metadata['availability'] = availability
b60419c5 3837 if not channel_id:
3838 metadata.update(self._extract_uploader(data))
3839 metadata.update({
3840 'channel': metadata['uploader'],
3841 'channel_id': metadata['uploader_id'],
3842 'channel_url': metadata['uploader_url']})
fe93e2c4 3843 ytcfg = self._extract_ytcfg(item_id, webpage)
b60419c5 3844 return self.playlist_result(
d069eca7
M
3845 self._entries(
3846 selected_tab, playlist_id,
3847 self._extract_identity_token(webpage, item_id),
fe93e2c4 3848 self._extract_account_syncid(ytcfg, data), ytcfg),
b60419c5 3849 **metadata)
73c4ac2c 3850
79360d99 3851 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
2be71994 3852 first_id = last_id = None
79360d99 3853 ytcfg = self._extract_ytcfg(playlist_id, webpage)
3854 headers = self._generate_api_headers(
fe93e2c4 3855 ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3856 identity_token=self._extract_identity_token(webpage, item_id=playlist_id))
2be71994 3857 for page_num in itertools.count(1):
cd7c66cf 3858 videos = list(self._playlist_entries(playlist))
3859 if not videos:
3860 return
2be71994 3861 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3862 if start >= len(videos):
3863 return
3864 for video in videos[start:]:
3865 if video['id'] == first_id:
3866 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3867 return
3868 yield video
3869 first_id = first_id or videos[0]['id']
3870 last_id = videos[-1]['id']
79360d99 3871 watch_endpoint = try_get(
3872 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3873 query = {
3874 'playlistId': playlist_id,
3875 'videoId': watch_endpoint.get('videoId') or last_id,
3876 'index': watch_endpoint.get('index') or len(videos),
3877 'params': watch_endpoint.get('params') or 'OAE%3D'
3878 }
3879 response = self._extract_response(
3880 item_id='%s page %d' % (playlist_id, page_num),
fe93e2c4 3881 query=query, ep='next', headers=headers, ytcfg=ytcfg,
79360d99 3882 check_get_keys='contents'
3883 )
cd7c66cf 3884 playlist = try_get(
79360d99 3885 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 3886
79360d99 3887 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
8bdd16b4 3888 title = playlist.get('title') or try_get(
3889 data, lambda x: x['titleText']['simpleText'], compat_str)
3890 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3891
3892 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3893 playlist_url = urljoin(url, try_get(
3894 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3895 compat_str))
3896 if playlist_url and playlist_url != url:
3897 return self.url_result(
3898 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3899 video_title=title)
cd7c66cf 3900
8bdd16b4 3901 return self.playlist_result(
79360d99 3902 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
cd7c66cf 3903 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 3904
47193e02 3905 def _extract_availability(self, data):
3906 """
3907 Gets the availability of a given playlist/tab.
3908 Note: Unless YouTube tells us explicitly, we do not assume it is public
3909 @param data: response
3910 """
3911 is_private = is_unlisted = None
3912 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
3913 badge_labels = self._extract_badges(renderer)
3914
3915 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
3916 privacy_dropdown_entries = try_get(
3917 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
3918 for renderer_dict in privacy_dropdown_entries:
3919 is_selected = try_get(
3920 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
3921 if not is_selected:
3922 continue
fe93e2c4 3923 label = self._get_text(
3924 try_get(renderer_dict, lambda x: x['privacyDropdownItemRenderer']['label'], dict) or [])
47193e02 3925 if label:
3926 badge_labels.add(label.lower())
3927 break
3928
3929 for badge_label in badge_labels:
3930 if badge_label == 'unlisted':
3931 is_unlisted = True
3932 elif badge_label == 'private':
3933 is_private = True
3934 elif badge_label == 'public':
3935 is_unlisted = is_private = False
3936 return self._availability(is_private, False, False, False, is_unlisted)
3937
3938 @staticmethod
3939 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
3940 sidebar_renderer = try_get(
3941 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
3942 for item in sidebar_renderer:
3943 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
3944 if renderer:
3945 return renderer
3946
358de58c 3947 def _reload_with_unavailable_videos(self, item_id, data, webpage):
3948 """
3949 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3950 """
5d342002 3951 browse_id = params = None
47193e02 3952 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
3953 if not renderer:
3954 return
3955 menu_renderer = try_get(
3956 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3957 for menu_item in menu_renderer:
3958 if not isinstance(menu_item, dict):
358de58c 3959 continue
47193e02 3960 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3961 text = try_get(
3962 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3963 if not text or text.lower() != 'show unavailable videos':
3964 continue
3965 browse_endpoint = try_get(
3966 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3967 browse_id = browse_endpoint.get('browseId')
3968 params = browse_endpoint.get('params')
3969 break
5d342002 3970
47193e02 3971 ytcfg = self._extract_ytcfg(item_id, webpage)
3972 headers = self._generate_api_headers(
fe93e2c4 3973 ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
47193e02 3974 identity_token=self._extract_identity_token(webpage, item_id=item_id),
3975 visitor_data=try_get(
3976 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
3977 query = {
3978 'params': params or 'wgYCCAA=',
3979 'browseId': browse_id or 'VL%s' % item_id
3980 }
3981 return self._extract_response(
3982 item_id=item_id, headers=headers, query=query,
fe93e2c4 3983 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
47193e02 3984 note='Downloading API JSON with unavailable videos')
358de58c 3985
cd7c66cf 3986 def _extract_webpage(self, url, item_id):
a06916d9 3987 retries = self.get_param('extractor_retries', 3)
62bff2c1 3988 count = -1
c705177d 3989 last_error = 'Incomplete yt initial data recieved'
14fdfea9 3990 while count < retries:
62bff2c1 3991 count += 1
14fdfea9 3992 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 3993 # See: https://github.com/yt-dlp/yt-dlp/issues/116
3994 if count:
c705177d 3995 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 3996 webpage = self._download_webpage(
3997 url, item_id,
cd7c66cf 3998 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
14fdfea9 3999 data = self._extract_yt_initial_data(item_id, webpage)
14fdfea9 4000 if data.get('contents') or data.get('currentVideoEndpoint'):
4001 break
95c01b6c 4002 # Extract alerts here only when there is error
4003 self._extract_and_report_alerts(data)
c705177d 4004 if count >= retries:
6a39ee13 4005 raise ExtractorError(last_error)
cd7c66cf 4006 return webpage, data
4007
9297939e 4008 @staticmethod
4009 def _smuggle_data(entries, data):
4010 for entry in entries:
4011 if data:
4012 entry['url'] = smuggle_url(entry['url'], data)
4013 yield entry
4014
cd7c66cf 4015 def _real_extract(self, url):
9297939e 4016 url, smuggled_data = unsmuggle_url(url, {})
4017 if self.is_music_url(url):
4018 smuggled_data['is_music_url'] = True
fe03a6cd 4019 info_dict = self.__real_extract(url, smuggled_data)
9297939e 4020 if info_dict.get('entries'):
4021 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4022 return info_dict
4023
fe03a6cd 4024 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4025
4026 def __real_extract(self, url, smuggled_data):
cd7c66cf 4027 item_id = self._match_id(url)
4028 url = compat_urlparse.urlunparse(
4029 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 4030 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 4031
fe03a6cd 4032 def get_mobj(url):
4033 mobj = self._url_re.match(url).groupdict()
07cce701 4034 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 4035 return mobj
4036
4037 mobj = get_mobj(url)
4038 # Youtube returns incomplete data if tabname is not lower case
4039 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4040
4041 if is_channel:
4042 if smuggled_data.get('is_music_url'):
4043 if item_id[:2] == 'VL':
4044 # Youtube music VL channels have an equivalent playlist
4045 item_id = item_id[2:]
4046 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
abcdd12b 4047 elif item_id[:2] == 'MP':
4048 # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4049 item_id = self._search_regex(
4050 r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4051 self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4052 'playlist id')
4053 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
fe03a6cd 4054 elif mobj['channel_type'] == 'browse':
4055 # Youtube music /browse/ should be changed to /channel/
4056 pre = 'https://www.youtube.com/channel/%s' % item_id
4057 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4058 # Home URLs should redirect to /videos/
6a39ee13 4059 self.report_warning(
cd7c66cf 4060 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4061 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 4062 tab = '/videos'
4063
4064 url = ''.join((pre, tab, post))
4065 mobj = get_mobj(url)
cd7c66cf 4066
4067 # Handle both video/playlist URLs
201c1459 4068 qs = parse_qs(url)
cd7c66cf 4069 video_id = qs.get('v', [None])[0]
4070 playlist_id = qs.get('list', [None])[0]
4071
fe03a6cd 4072 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 4073 if not playlist_id:
fe03a6cd 4074 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 4075 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 4076 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6a39ee13 4077 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
cd7c66cf 4078 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
18db7548 4079 mobj = get_mobj(url)
cd7c66cf 4080
4081 if video_id and playlist_id:
a06916d9 4082 if self.get_param('noplaylist'):
cd7c66cf 4083 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4084 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4085 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4086
4087 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 4088
18db7548 4089 tabs = try_get(
4090 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4091 if tabs:
4092 selected_tab = self._extract_selected_tab(tabs)
4093 tab_name = selected_tab.get('title', '')
09f1580e 4094 if 'no-youtube-channel-redirect' not in compat_opts:
4095 if mobj['tab'] == '/live':
4096 # Live tab should have redirected to the video
4097 raise ExtractorError('The channel is not currently live', expected=True)
4098 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4099 if not mobj['not_channel'] and item_id[:2] == 'UC':
4100 # Topic channels don't have /videos. Use the equivalent playlist instead
4101 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4102 pl_id = 'UU%s' % item_id[2:]
4103 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4104 try:
4105 pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4106 for alert_type, alert_message in self._extract_alerts(pl_data):
4107 if alert_type == 'error':
4108 raise ExtractorError('Youtube said: %s' % alert_message)
4109 item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4110 except ExtractorError:
4111 self.report_warning('The playlist gave error. Falling back to channel URL')
4112 else:
4113 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
18db7548 4114
4115 self.write_debug('Final URL: %s' % url)
4116
358de58c 4117 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 4118 if 'no-youtube-unavailable-videos' not in compat_opts:
4119 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
95c01b6c 4120 self._extract_and_report_alerts(data)
8bdd16b4 4121 tabs = try_get(
4122 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4123 if tabs:
d069eca7 4124 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 4125
8bdd16b4 4126 playlist = try_get(
4127 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4128 if playlist:
79360d99 4129 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
cd7c66cf 4130
a0566bbf 4131 video_id = try_get(
4132 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4133 compat_str) or video_id
8bdd16b4 4134 if video_id:
09f1580e 4135 if mobj['tab'] != '/live': # live tab is expected to redirect to video
4136 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 4137 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 4138
8bdd16b4 4139 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 4140
c5e8d7af 4141
8bdd16b4 4142class YoutubePlaylistIE(InfoExtractor):
4143 IE_DESC = 'YouTube.com playlists'
4144 _VALID_URL = r'''(?x)(?:
4145 (?:https?://)?
4146 (?:\w+\.)?
4147 (?:
4148 (?:
4149 youtube(?:kids)?\.com|
29f7c58a 4150 invidio\.us
8bdd16b4 4151 )
4152 /.*?\?.*?\blist=
4153 )?
4154 (?P<id>%(playlist_id)s)
4155 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4156 IE_NAME = 'youtube:playlist'
cdc628a4 4157 _TESTS = [{
8bdd16b4 4158 'note': 'issue #673',
4159 'url': 'PLBB231211A4F62143',
cdc628a4 4160 'info_dict': {
8bdd16b4 4161 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4162 'id': 'PLBB231211A4F62143',
4163 'uploader': 'Wickydoo',
4164 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4165 },
4166 'playlist_mincount': 29,
4167 }, {
4168 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4169 'info_dict': {
4170 'title': 'YDL_safe_search',
4171 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4172 },
4173 'playlist_count': 2,
4174 'skip': 'This playlist is private',
9558dcec 4175 }, {
8bdd16b4 4176 'note': 'embedded',
4177 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4178 'playlist_count': 4,
9558dcec 4179 'info_dict': {
8bdd16b4 4180 'title': 'JODA15',
4181 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4182 'uploader': 'milan',
4183 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 4184 }
cdc628a4 4185 }, {
8bdd16b4 4186 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4187 'playlist_mincount': 982,
4188 'info_dict': {
4189 'title': '2018 Chinese New Singles (11/6 updated)',
4190 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4191 'uploader': 'LBK',
4192 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4193 }
daa0df9e 4194 }, {
29f7c58a 4195 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4196 'only_matching': True,
4197 }, {
4198 # music album playlist
4199 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4200 'only_matching': True,
4201 }]
4202
4203 @classmethod
4204 def suitable(cls, url):
201c1459 4205 if YoutubeTabIE.suitable(url):
4206 return False
1bdae7d3 4207 # Hack for lazy extractors until more generic solution is implemented
4208 # (see #28780)
4209 from .youtube import parse_qs
201c1459 4210 qs = parse_qs(url)
4211 if qs.get('v', [None])[0]:
4212 return False
4213 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 4214
4215 def _real_extract(self, url):
4216 playlist_id = self._match_id(url)
46953e7e 4217 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 4218 url = update_url_query(
4219 'https://www.youtube.com/playlist',
4220 parse_qs(url) or {'list': playlist_id})
4221 if is_music_url:
4222 url = smuggle_url(url, {'is_music_url': True})
4223 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 4224
4225
4226class YoutubeYtBeIE(InfoExtractor):
c76eb41b 4227 IE_DESC = 'youtu.be'
29f7c58a 4228 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4229 _TESTS = [{
8bdd16b4 4230 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4231 'info_dict': {
4232 'id': 'yeWKywCrFtk',
4233 'ext': 'mp4',
4234 'title': 'Small Scale Baler and Braiding Rugs',
4235 'uploader': 'Backus-Page House Museum',
4236 'uploader_id': 'backuspagemuseum',
4237 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4238 'upload_date': '20161008',
4239 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4240 'categories': ['Nonprofits & Activism'],
4241 'tags': list,
4242 'like_count': int,
4243 'dislike_count': int,
4244 },
4245 'params': {
4246 'noplaylist': True,
4247 'skip_download': True,
4248 },
39e7107d 4249 }, {
8bdd16b4 4250 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 4251 'only_matching': True,
cdc628a4
PH
4252 }]
4253
8bdd16b4 4254 def _real_extract(self, url):
29f7c58a 4255 mobj = re.match(self._VALID_URL, url)
4256 video_id = mobj.group('id')
4257 playlist_id = mobj.group('playlist_id')
8bdd16b4 4258 return self.url_result(
29f7c58a 4259 update_url_query('https://www.youtube.com/watch', {
4260 'v': video_id,
4261 'list': playlist_id,
4262 'feature': 'youtu.be',
4263 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 4264
4265
4266class YoutubeYtUserIE(InfoExtractor):
c76eb41b 4267 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 4268 _VALID_URL = r'ytuser:(?P<id>.+)'
4269 _TESTS = [{
4270 'url': 'ytuser:phihag',
4271 'only_matching': True,
4272 }]
4273
4274 def _real_extract(self, url):
4275 user_id = self._match_id(url)
4276 return self.url_result(
4277 'https://www.youtube.com/user/%s' % user_id,
4278 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 4279
b05654f0 4280
3d3dddc9 4281class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 4282 IE_NAME = 'youtube:favorites'
4283 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4284 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4285 _LOGIN_REQUIRED = True
4286 _TESTS = [{
4287 'url': ':ytfav',
4288 'only_matching': True,
4289 }, {
4290 'url': ':ytfavorites',
4291 'only_matching': True,
4292 }]
4293
4294 def _real_extract(self, url):
4295 return self.url_result(
4296 'https://www.youtube.com/playlist?list=LL',
4297 ie=YoutubeTabIE.ie_key())
4298
4299
79360d99 4300class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
69184e41 4301 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
4302 # there doesn't appear to be a real limit, for example if you search for
4303 # 'python' you get more than 8.000.000 results
4304 _MAX_RESULTS = float('inf')
78caa52a 4305 IE_NAME = 'youtube:search'
b05654f0 4306 _SEARCH_KEY = 'ytsearch'
6c894ea1 4307 _SEARCH_PARAMS = None
9dd8e46a 4308 _TESTS = []
b05654f0 4309
6c894ea1 4310 def _entries(self, query, n):
a5c56234 4311 data = {'query': query}
6c894ea1
U
4312 if self._SEARCH_PARAMS:
4313 data['params'] = self._SEARCH_PARAMS
4314 total = 0
fe93e2c4 4315 continuation = {}
6c894ea1 4316 for page_num in itertools.count(1):
fe93e2c4 4317 data.update(continuation)
79360d99 4318 search = self._extract_response(
4319 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4320 check_get_keys=('contents', 'onResponseReceivedCommands')
4321 )
6c894ea1 4322 if not search:
b4c08069 4323 break
6c894ea1
U
4324 slr_contents = try_get(
4325 search,
4326 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4327 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4328 list)
4329 if not slr_contents:
a22b2fd1 4330 break
0366ae87 4331
0366ae87
M
4332 # Youtube sometimes adds promoted content to searches,
4333 # changing the index location of videos and token.
4334 # So we search through all entries till we find them.
fe93e2c4 4335 continuation = None
30a074c2 4336 for slr_content in slr_contents:
fe93e2c4 4337 if not continuation:
4338 continuation = self._extract_continuation({'contents': [slr_content]})
a96c6d15 4339
30a074c2 4340 isr_contents = try_get(
4341 slr_content,
4342 lambda x: x['itemSectionRenderer']['contents'],
4343 list)
9da76d30 4344 if not isr_contents:
30a074c2 4345 continue
4346 for content in isr_contents:
4347 if not isinstance(content, dict):
4348 continue
4349 video = content.get('videoRenderer')
4350 if not isinstance(video, dict):
4351 continue
4352 video_id = video.get('videoId')
4353 if not video_id:
4354 continue
4355
4356 yield self._extract_video(video)
4357 total += 1
4358 if total == n:
4359 return
0366ae87 4360
fe93e2c4 4361 if not continuation:
6c894ea1 4362 break
b05654f0 4363
6c894ea1
U
4364 def _get_n_results(self, query, n):
4365 """Get a specified number of results for a query"""
4366 return self.playlist_result(self._entries(query, n), query)
75dff0ee 4367
c9ae7b95 4368
a3dd9248 4369class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 4370 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 4371 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 4372 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 4373 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 4374
c9ae7b95 4375
386e1dd9 4376class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 4377 IE_DESC = 'YouTube.com search URLs'
386e1dd9 4378 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4379 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 4380 # _MAX_RESULTS = 100
3462ffa8 4381 _TESTS = [{
4382 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4383 'playlist_mincount': 5,
4384 'info_dict': {
4385 'title': 'youtube-dl test video',
4386 }
4387 }, {
4388 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4389 'only_matching': True,
4390 }]
4391
386e1dd9 4392 @classmethod
4393 def _make_valid_url(cls):
4394 return cls._VALID_URL
4395
3462ffa8 4396 def _real_extract(self, url):
386e1dd9 4397 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4398 query = (qs.get('search_query') or qs.get('q'))[0]
4399 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4400 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 4401
4402
4403class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 4404 """
25f14e9f 4405 Base class for feed extractors
3d3dddc9 4406 Subclasses must define the _FEED_NAME property.
d7ae0639 4407 """
b2e8bc1b 4408 _LOGIN_REQUIRED = True
ef2f3c7f 4409 _TESTS = []
d7ae0639
JMF
4410
4411 @property
4412 def IE_NAME(self):
78caa52a 4413 return 'youtube:%s' % self._FEED_NAME
04cc9617 4414
3853309f 4415 def _real_extract(self, url):
3d3dddc9 4416 return self.url_result(
4417 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4418 ie=YoutubeTabIE.ie_key())
25f14e9f
S
4419
4420
ef2f3c7f 4421class YoutubeWatchLaterIE(InfoExtractor):
4422 IE_NAME = 'youtube:watchlater'
70d5c17b 4423 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 4424 _VALID_URL = r':ytwatchlater'
bc7a9cd8 4425 _TESTS = [{
8bdd16b4 4426 'url': ':ytwatchlater',
bc7a9cd8
S
4427 'only_matching': True,
4428 }]
25f14e9f
S
4429
4430 def _real_extract(self, url):
ef2f3c7f 4431 return self.url_result(
4432 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 4433
4434
25f14e9f
S
4435class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4436 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 4437 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 4438 _FEED_NAME = 'recommended'
45db527f 4439 _LOGIN_REQUIRED = False
3d3dddc9 4440 _TESTS = [{
4441 'url': ':ytrec',
4442 'only_matching': True,
4443 }, {
4444 'url': ':ytrecommended',
4445 'only_matching': True,
4446 }, {
4447 'url': 'https://youtube.com',
4448 'only_matching': True,
4449 }]
1ed5b5c9 4450
1ed5b5c9 4451
25f14e9f 4452class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 4453 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 4454 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 4455 _FEED_NAME = 'subscriptions'
3d3dddc9 4456 _TESTS = [{
4457 'url': ':ytsubs',
4458 'only_matching': True,
4459 }, {
4460 'url': ':ytsubscriptions',
4461 'only_matching': True,
4462 }]
1ed5b5c9 4463
1ed5b5c9 4464
25f14e9f 4465class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
4466 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4467 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 4468 _FEED_NAME = 'history'
3d3dddc9 4469 _TESTS = [{
4470 'url': ':ythistory',
4471 'only_matching': True,
4472 }]
1ed5b5c9
JMF
4473
4474
15870e90
PH
4475class YoutubeTruncatedURLIE(InfoExtractor):
4476 IE_NAME = 'youtube:truncated_url'
4477 IE_DESC = False # Do not list
975d35db 4478 _VALID_URL = r'''(?x)
b95aab84
PH
4479 (?:https?://)?
4480 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4481 (?:watch\?(?:
c4808c60 4482 feature=[a-z_]+|
b95aab84
PH
4483 annotation_id=annotation_[^&]+|
4484 x-yt-cl=[0-9]+|
c1708b89 4485 hl=[^&]*|
287be8c6 4486 t=[0-9]+
b95aab84
PH
4487 )?
4488 |
4489 attribution_link\?a=[^&]+
4490 )
4491 $
975d35db 4492 '''
15870e90 4493
c4808c60 4494 _TESTS = [{
2d3d2997 4495 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 4496 'only_matching': True,
dc2fc736 4497 }, {
2d3d2997 4498 'url': 'https://www.youtube.com/watch?',
dc2fc736 4499 'only_matching': True,
b95aab84
PH
4500 }, {
4501 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4502 'only_matching': True,
4503 }, {
4504 'url': 'https://www.youtube.com/watch?feature=foo',
4505 'only_matching': True,
c1708b89
PH
4506 }, {
4507 'url': 'https://www.youtube.com/watch?hl=en-GB',
4508 'only_matching': True,
287be8c6
PH
4509 }, {
4510 'url': 'https://www.youtube.com/watch?t=2372',
4511 'only_matching': True,
c4808c60
PH
4512 }]
4513
15870e90
PH
4514 def _real_extract(self, url):
4515 raise ExtractorError(
78caa52a
PH
4516 'Did you forget to quote the URL? Remember that & is a meta '
4517 'character in most shells, so you want to put the URL in quotes, '
3867038a 4518 'like youtube-dl '
2d3d2997 4519 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 4520 ' or simply youtube-dl BaW_jenozKc .',
15870e90 4521 expected=True)
772fd5cc
PH
4522
4523
4524class YoutubeTruncatedIDIE(InfoExtractor):
4525 IE_NAME = 'youtube:truncated_id'
4526 IE_DESC = False # Do not list
b95aab84 4527 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
4528
4529 _TESTS = [{
4530 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4531 'only_matching': True,
4532 }]
4533
4534 def _real_extract(self, url):
4535 video_id = self._match_id(url)
4536 raise ExtractorError(
4537 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4538 expected=True)