]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[utils] Fix LazyList for Falsey values
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
2d6659b9 5import base64
d92f5d5a 6import calendar
109dd3b2 7import copy
fe93e2c4 8import datetime
a5c56234 9import hashlib
0ca96d48 10import itertools
c5e8d7af 11import json
c4417ddb 12import os.path
d77ab8e2 13import random
c5e8d7af 14import re
8a784c74 15import time
e0df6211 16import traceback
c5e8d7af 17
b05654f0 18from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 19from ..compat import (
edf3e38e 20 compat_chr,
29f7c58a 21 compat_HTTPError,
c5e8d7af 22 compat_parse_qs,
545cc85d 23 compat_str,
7fd002c0 24 compat_urllib_parse_unquote_plus,
15707c7e 25 compat_urllib_parse_urlencode,
7c80519c 26 compat_urllib_parse_urlparse,
7c61bd36 27 compat_urlparse,
4bb4a188 28)
545cc85d 29from ..jsinterp import JSInterpreter
4bb4a188 30from ..utils import (
c224251a 31 bool_or_none,
2d6659b9 32 bytes_to_intlist,
c5e8d7af 33 clean_html,
26fe8ffe 34 dict_get,
d92f5d5a 35 datetime_from_str,
358de58c 36 error_to_compat_str,
c5e8d7af 37 ExtractorError,
b60419c5 38 format_field,
2d30521a 39 float_or_none,
dd27fd17 40 int_or_none,
2d6659b9 41 intlist_to_bytes,
94278f72 42 mimetype2ext,
6310acf5 43 parse_codecs,
49bd8c66 44 parse_count,
7c80519c 45 parse_duration,
dca3ff4a 46 qualities,
3995d37d 47 remove_start,
cf7e015f 48 smuggle_url,
dbdaaa23 49 str_or_none,
c93d53f5 50 str_to_int,
7c365c21 51 traverse_obj,
556dbe7f 52 try_get,
c5e8d7af
PH
53 unescapeHTML,
54 unified_strdate,
cf7e015f 55 unsmuggle_url,
8bdd16b4 56 update_url_query,
21c340b8 57 url_or_none,
6e6bc8da 58 urlencode_postdata,
fe93e2c4 59 urljoin,
7c365c21 60 variadic,
c5e8d7af
PH
61)
62
5f6a1245 63
201c1459 64def parse_qs(url):
65 return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
66
67
de7f3446 68class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
69 """Provide base functions for Youtube extractors"""
70 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 71 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
72
73 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
74 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
75 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 76
3462ffa8 77 _RESERVED_NAMES = (
bea74222 78 r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|shorts|'
46953e7e 79 r'movies|results|shared|hashtag|trending|feed|feeds|oembed|get_video_info|'
cd7c66cf 80 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 81
b2e8bc1b
JMF
82 _NETRC_MACHINE = 'youtube'
83 # If True it will raise an error if no login info is provided
84 _LOGIN_REQUIRED = False
85
70d5c17b 86 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 87
b2e8bc1b 88 def _login(self):
83317f69 89 """
90 Attempt to log in to YouTube.
91 True is returned if successful or skipped.
92 False is returned if login failed.
93
94 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
95 """
9d5d4d64 96
97 def warn(message):
98 self.report_warning(message)
99
100 # username+password login is broken
101 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
102 self.raise_login_required(
103 'Login details are needed to download this content', method='cookies')
68217024 104 username, password = self._get_login_info()
9d5d4d64 105 if username:
106 warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
107 return
9d5d4d64 108
2d6659b9 109 # Everything below this is broken!
110 r'''
b2e8bc1b
JMF
111 # No authentication to be performed
112 if username is None:
a06916d9 113 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
69ea8ca4 114 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
a06916d9 115 # if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
545cc85d 116 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 117 return True
b2e8bc1b 118
7cc3570e
PH
119 login_page = self._download_webpage(
120 self._LOGIN_URL, None,
69ea8ca4
PH
121 note='Downloading login page',
122 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
123 if login_page is False:
124 return
b2e8bc1b 125
1212e997 126 login_form = self._hidden_inputs(login_page)
c5e8d7af 127
e00eb564
S
128 def req(url, f_req, note, errnote):
129 data = login_form.copy()
130 data.update({
131 'pstMsg': 1,
132 'checkConnection': 'youtube',
133 'checkedDomains': 'youtube',
134 'hl': 'en',
135 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 136 'f.req': json.dumps(f_req),
e00eb564
S
137 'flowName': 'GlifWebSignIn',
138 'flowEntry': 'ServiceLogin',
baf67a60
S
139 # TODO: reverse actual botguard identifier generation algo
140 'bgRequest': '["identifier",""]',
041bc3ad 141 })
e00eb564
S
142 return self._download_json(
143 url, None, note=note, errnote=errnote,
144 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
145 fatal=False,
146 data=urlencode_postdata(data), headers={
147 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
148 'Google-Accounts-XSRF': 1,
149 })
150
3995d37d
S
151 lookup_req = [
152 username,
153 None, [], None, 'US', None, None, 2, False, True,
154 [
155 None, None,
156 [2, 1, None, 1,
157 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
158 None, [], 4],
159 1, [None, None, []], None, None, None, True
160 ],
161 username,
162 ]
163
e00eb564 164 lookup_results = req(
3995d37d 165 self._LOOKUP_URL, lookup_req,
e00eb564
S
166 'Looking up account info', 'Unable to look up account info')
167
168 if lookup_results is False:
169 return False
041bc3ad 170
3995d37d
S
171 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
172 if not user_hash:
173 warn('Unable to extract user hash')
174 return False
175
176 challenge_req = [
177 user_hash,
178 None, 1, None, [1, None, None, None, [password, None, True]],
179 [
180 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
181 1, [None, None, []], None, None, None, True
182 ]]
83317f69 183
3995d37d
S
184 challenge_results = req(
185 self._CHALLENGE_URL, challenge_req,
186 'Logging in', 'Unable to log in')
83317f69 187
3995d37d 188 if challenge_results is False:
e00eb564 189 return
83317f69 190
3995d37d
S
191 login_res = try_get(challenge_results, lambda x: x[0][5], list)
192 if login_res:
193 login_msg = try_get(login_res, lambda x: x[5], compat_str)
194 warn(
195 'Unable to login: %s' % 'Invalid password'
196 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
197 return False
198
199 res = try_get(challenge_results, lambda x: x[0][-1], list)
200 if not res:
201 warn('Unable to extract result entry')
202 return False
203
9a6628aa
S
204 login_challenge = try_get(res, lambda x: x[0][0], list)
205 if login_challenge:
206 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
207 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
208 # SEND_SUCCESS - TFA code has been successfully sent to phone
209 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 210 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
211 if status == 'QUOTA_EXCEEDED':
212 warn('Exceeded the limit of TFA codes, try later')
213 return False
214
215 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
216 if not tl:
217 warn('Unable to extract TL')
218 return False
219
220 tfa_code = self._get_tfa_info('2-step verification code')
221
222 if not tfa_code:
223 warn(
224 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
225 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
226 return False
227
228 tfa_code = remove_start(tfa_code, 'G-')
229
230 tfa_req = [
231 user_hash, None, 2, None,
232 [
233 9, None, None, None, None, None, None, None,
234 [None, tfa_code, True, 2]
235 ]]
236
237 tfa_results = req(
238 self._TFA_URL.format(tl), tfa_req,
239 'Submitting TFA code', 'Unable to submit TFA code')
240
241 if tfa_results is False:
242 return False
243
244 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
245 if tfa_res:
246 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
247 warn(
248 'Unable to finish TFA: %s' % 'Invalid TFA code'
249 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
250 return False
251
252 check_cookie_url = try_get(
253 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
254 else:
255 CHALLENGES = {
256 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
257 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
258 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
259 }
260 challenge = CHALLENGES.get(
261 challenge_str,
262 '%s returned error %s.' % (self.IE_NAME, challenge_str))
263 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
264 return False
3995d37d
S
265 else:
266 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
267
268 if not check_cookie_url:
269 warn('Unable to extract CheckCookie URL')
270 return False
e00eb564
S
271
272 check_cookie_results = self._download_webpage(
3995d37d
S
273 check_cookie_url, None, 'Checking cookie', fatal=False)
274
275 if check_cookie_results is False:
276 return False
e00eb564 277
3995d37d
S
278 if 'https://myaccount.google.com/' not in check_cookie_results:
279 warn('Unable to log in')
b2e8bc1b 280 return False
e00eb564 281
b2e8bc1b 282 return True
2d6659b9 283 '''
b2e8bc1b 284
cce889b9 285 def _initialize_consent(self):
286 cookies = self._get_cookies('https://www.youtube.com/')
287 if cookies.get('__Secure-3PSID'):
288 return
289 consent_id = None
290 consent = cookies.get('CONSENT')
291 if consent:
292 if 'YES' in consent.value:
293 return
294 consent_id = self._search_regex(
295 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
296 if not consent_id:
297 consent_id = random.randint(100, 999)
298 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 299
b2e8bc1b 300 def _real_initialize(self):
cce889b9 301 self._initialize_consent()
b2e8bc1b
JMF
302 if self._downloader is None:
303 return
b2e8bc1b
JMF
304 if not self._login():
305 return
c5e8d7af 306
a0566bbf 307 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 308 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
309 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 310
109dd3b2 311 _YT_DEFAULT_YTCFGS = {
312 'WEB': {
313 'INNERTUBE_API_VERSION': 'v1',
314 'INNERTUBE_CLIENT_NAME': 'WEB',
315 'INNERTUBE_CLIENT_VERSION': '2.20210622.10.00',
316 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
317 'INNERTUBE_CONTEXT': {
318 'client': {
319 'clientName': 'WEB',
320 'clientVersion': '2.20210622.10.00',
321 'hl': 'en',
322 }
323 },
324 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
325 },
326 'WEB_REMIX': {
327 'INNERTUBE_API_VERSION': 'v1',
328 'INNERTUBE_CLIENT_NAME': 'WEB_REMIX',
329 'INNERTUBE_CLIENT_VERSION': '1.20210621.00.00',
330 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
331 'INNERTUBE_CONTEXT': {
332 'client': {
333 'clientName': 'WEB_REMIX',
334 'clientVersion': '1.20210621.00.00',
335 'hl': 'en',
336 }
337 },
338 'INNERTUBE_CONTEXT_CLIENT_NAME': 67
339 },
340 'WEB_EMBEDDED_PLAYER': {
341 'INNERTUBE_API_VERSION': 'v1',
342 'INNERTUBE_CLIENT_NAME': 'WEB_EMBEDDED_PLAYER',
343 'INNERTUBE_CLIENT_VERSION': '1.20210620.0.1',
344 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
345 'INNERTUBE_CONTEXT': {
346 'client': {
347 'clientName': 'WEB_EMBEDDED_PLAYER',
348 'clientVersion': '1.20210620.0.1',
349 'hl': 'en',
350 }
351 },
352 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
353 },
354 'ANDROID': {
355 'INNERTUBE_API_VERSION': 'v1',
356 'INNERTUBE_CLIENT_NAME': 'ANDROID',
357 'INNERTUBE_CLIENT_VERSION': '16.20',
358 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
359 'INNERTUBE_CONTEXT': {
360 'client': {
361 'clientName': 'ANDROID',
362 'clientVersion': '16.20',
363 'hl': 'en',
364 }
365 },
fe93e2c4 366 'INNERTUBE_CONTEXT_CLIENT_NAME': 3
109dd3b2 367 },
368 'ANDROID_EMBEDDED_PLAYER': {
369 'INNERTUBE_API_VERSION': 'v1',
370 'INNERTUBE_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER',
371 'INNERTUBE_CLIENT_VERSION': '16.20',
372 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
373 'INNERTUBE_CONTEXT': {
374 'client': {
375 'clientName': 'ANDROID_EMBEDDED_PLAYER',
376 'clientVersion': '16.20',
377 'hl': 'en',
378 }
379 },
fe93e2c4 380 'INNERTUBE_CONTEXT_CLIENT_NAME': 55
109dd3b2 381 },
382 'ANDROID_MUSIC': {
383 'INNERTUBE_API_VERSION': 'v1',
384 'INNERTUBE_CLIENT_NAME': 'ANDROID_MUSIC',
385 'INNERTUBE_CLIENT_VERSION': '4.32',
386 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
387 'INNERTUBE_CONTEXT': {
388 'client': {
389 'clientName': 'ANDROID_MUSIC',
390 'clientVersion': '4.32',
391 'hl': 'en',
392 }
393 },
fe93e2c4 394 'INNERTUBE_CONTEXT_CLIENT_NAME': 21
109dd3b2 395 }
396 }
397
398 _YT_DEFAULT_INNERTUBE_HOSTS = {
399 'DIRECT': 'youtubei.googleapis.com',
400 'WEB': 'www.youtube.com',
401 'WEB_REMIX': 'music.youtube.com',
402 'ANDROID_MUSIC': 'music.youtube.com'
403 }
404
405 def _get_default_ytcfg(self, client='WEB'):
406 if client in self._YT_DEFAULT_YTCFGS:
407 return copy.deepcopy(self._YT_DEFAULT_YTCFGS[client])
408 self.write_debug(f'INNERTUBE default client {client} does not exist - falling back to WEB client.')
409 return copy.deepcopy(self._YT_DEFAULT_YTCFGS['WEB'])
410
411 def _get_innertube_host(self, client='WEB'):
412 return dict_get(self._YT_DEFAULT_INNERTUBE_HOSTS, (client, 'WEB'))
413
414 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='WEB'):
415 # try_get but with fallback to default ytcfg client values when present
416 _func = lambda y: try_get(y, getter, expected_type)
417 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
418
419 def _extract_client_name(self, ytcfg, default_client='WEB'):
420 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str, default_client)
421
314ee305 422 @staticmethod
423 def _extract_session_index(ytcfg):
424 return int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
425
109dd3b2 426 def _extract_client_version(self, ytcfg, default_client='WEB'):
427 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str, default_client)
428
429 def _extract_api_key(self, ytcfg=None, default_client='WEB'):
430 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
431
432 def _extract_context(self, ytcfg=None, default_client='WEB'):
433 _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
434 context = _get_context(ytcfg)
435 if context:
436 return context
437
438 context = _get_context(self._get_default_ytcfg(default_client))
439 if not ytcfg:
440 return context
441
442 # Recreate the client context (required)
443 context['client'].update({
444 'clientVersion': self._extract_client_version(ytcfg, default_client),
445 'clientName': self._extract_client_name(ytcfg, default_client),
446 })
447 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
448 if visitor_data:
449 context['client']['visitorData'] = visitor_data
450 return context
451
452 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
1974e99f 453 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
454 # See: https://github.com/yt-dlp/yt-dlp/issues/393
455 yt_cookies = self._get_cookies('https://www.youtube.com')
456 sapisid_cookie = dict_get(
457 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
a5c56234
M
458 if sapisid_cookie is None:
459 return
460 time_now = round(time.time())
1974e99f 461 # SAPISID cookie is required if not already present
462 if not yt_cookies.get('SAPISID'):
463 self._set_cookie(
464 '.youtube.com', 'SAPISID', sapisid_cookie.value, secure=True, expire_time=time_now + 3600)
465 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
466 sapisidhash = hashlib.sha1(
109dd3b2 467 f'{time_now} {sapisid_cookie.value} {origin}'.encode('utf-8')).hexdigest()
1974e99f 468 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
469
470 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 471 note='Downloading API JSON', errnote='Unable to download API page',
109dd3b2 472 context=None, api_key=None, api_hostname=None, default_client='WEB'):
f4f751af 473
109dd3b2 474 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 475 data.update(query)
109dd3b2 476 real_headers = self._generate_api_headers(client=default_client)
f4f751af 477 real_headers.update({'content-type': 'application/json'})
478 if headers:
479 real_headers.update(headers)
545cc85d 480 return self._download_json(
109dd3b2 481 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
a5c56234 482 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 483 data=json.dumps(data).encode('utf8'), headers=real_headers,
484 query={'key': api_key or self._extract_api_key()})
485
8bdd16b4 486 def _extract_yt_initial_data(self, video_id, webpage):
487 return self._parse_json(
488 self._search_regex(
29f7c58a 489 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 490 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 491 video_id)
0c148415 492
a1c5d2ca
M
493 def _extract_identity_token(self, webpage, item_id):
494 ytcfg = self._extract_ytcfg(item_id, webpage)
495 if ytcfg:
496 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
497 if token:
498 return token
499 return self._search_regex(
500 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
501 'identity token', default=None)
502
503 @staticmethod
fe93e2c4 504 def _extract_account_syncid(*args):
8ea3f7b9 505 """
506 Extract syncId required to download private playlists of secondary channels
fe93e2c4 507 @params response and/or ytcfg
8ea3f7b9 508 """
fe93e2c4 509 for data in args:
510 # ytcfg includes channel_syncid if on secondary channel
511 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
512 if delegated_sid:
513 return delegated_sid
514 sync_ids = (try_get(
515 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
516 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
517 if len(sync_ids) >= 2 and sync_ids[1]:
518 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
519 # and just "user_syncid||" for primary channel. We only want the channel_syncid
520 return sync_ids[0]
a1c5d2ca 521
29f7c58a 522 def _extract_ytcfg(self, video_id, webpage):
8c54a305 523 if not webpage:
524 return {}
29f7c58a 525 return self._parse_json(
526 self._search_regex(
527 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 528 default='{}'), video_id, fatal=False) or {}
529
109dd3b2 530 def _generate_api_headers(self, ytcfg=None, identity_token=None, account_syncid=None,
314ee305 531 visitor_data=None, api_hostname=None, client='WEB', session_index=None):
109dd3b2 532 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(client))
f4f751af 533 headers = {
109dd3b2 534 'X-YouTube-Client-Name': compat_str(
535 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=client)),
536 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, client),
537 'Origin': origin
f4f751af 538 }
2d6659b9 539 if not visitor_data and ytcfg:
540 visitor_data = try_get(
541 self._extract_context(ytcfg, client), lambda x: x['client']['visitorData'], compat_str)
f4f751af 542 if identity_token:
109dd3b2 543 headers['X-Youtube-Identity-Token'] = identity_token
f4f751af 544 if account_syncid:
545 headers['X-Goog-PageId'] = account_syncid
314ee305 546 if session_index is None and ytcfg:
547 session_index = self._extract_session_index(ytcfg)
548 if account_syncid or session_index is not None:
549 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
f4f751af 550 if visitor_data:
109dd3b2 551 headers['X-Goog-Visitor-Id'] = visitor_data
552 auth = self._generate_sapisidhash_header(origin)
f4f751af 553 if auth is not None:
554 headers['Authorization'] = auth
109dd3b2 555 headers['X-Origin'] = origin
f4f751af 556 return headers
29f7c58a 557
2d6659b9 558 @staticmethod
559 def _build_api_continuation_query(continuation, ctp=None):
560 query = {
561 'continuation': continuation
562 }
563 # TODO: Inconsistency with clickTrackingParams.
564 # Currently we have a fixed ctp contained within context (from ytcfg)
565 # and a ctp in root query for continuation.
566 if ctp:
567 query['clickTracking'] = {'clickTrackingParams': ctp}
568 return query
569
2d6659b9 570 @classmethod
571 def _extract_next_continuation_data(cls, renderer):
572 next_continuation = try_get(
573 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
574 lambda x: x['continuation']['reloadContinuationData']), dict)
575 if not next_continuation:
576 return
577 continuation = next_continuation.get('continuation')
578 if not continuation:
579 return
580 ctp = next_continuation.get('clickTrackingParams')
fe93e2c4 581 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 582
583 @classmethod
584 def _extract_continuation_ep_data(cls, continuation_ep: dict):
585 if isinstance(continuation_ep, dict):
586 continuation = try_get(
587 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
588 if not continuation:
589 return
590 ctp = continuation_ep.get('clickTrackingParams')
fe93e2c4 591 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 592
593 @classmethod
594 def _extract_continuation(cls, renderer):
595 next_continuation = cls._extract_next_continuation_data(renderer)
596 if next_continuation:
597 return next_continuation
fe93e2c4 598
2d6659b9 599 contents = []
600 for key in ('contents', 'items'):
601 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
fe93e2c4 602
2d6659b9 603 for content in contents:
604 if not isinstance(content, dict):
605 continue
606 continuation_ep = try_get(
607 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
608 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
609 dict)
610 continuation = cls._extract_continuation_ep_data(continuation_ep)
611 if continuation:
612 return continuation
613
fe93e2c4 614 @classmethod
615 def _extract_alerts(cls, data):
109dd3b2 616 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
617 if not isinstance(alert_dict, dict):
618 continue
619 for alert in alert_dict.values():
620 alert_type = alert.get('type')
621 if not alert_type:
622 continue
fe93e2c4 623 message = cls._get_text(alert.get('text'))
109dd3b2 624 if message:
625 yield alert_type, message
626
627 def _report_alerts(self, alerts, expected=True):
628 errors = []
629 warnings = []
630 for alert_type, alert_message in alerts:
631 if alert_type.lower() == 'error':
632 errors.append([alert_type, alert_message])
633 else:
634 warnings.append([alert_type, alert_message])
635
636 for alert_type, alert_message in (warnings + errors[:-1]):
637 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
638 if errors:
639 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
640
641 def _extract_and_report_alerts(self, data, *args, **kwargs):
642 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
643
47193e02 644 def _extract_badges(self, renderer: dict):
645 badges = set()
646 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
647 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
648 if label:
649 badges.add(label.lower())
650 return badges
651
652 @staticmethod
fe93e2c4 653 def _get_text(data, getter=None, max_runs=None):
654 for get in variadic(getter):
655 d = try_get(data, get) if get is not None else data
656 text = try_get(d, lambda x: x['simpleText'], compat_str)
657 if text:
658 return text
659 runs = try_get(d, lambda x: x['runs'], list) or []
660 if not runs and isinstance(d, list):
661 runs = d
662
663 def get_runs(runs):
664 for run in runs[:min(len(runs), max_runs or len(runs))]:
665 yield try_get(run, lambda x: x['text'], compat_str) or ''
666
667 text = ''.join(get_runs(runs))
668 if text:
669 return text
47193e02 670
109dd3b2 671 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
672 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
673 default_client='WEB'):
674 response = None
675 last_error = None
676 count = -1
677 retries = self.get_param('extractor_retries', 3)
678 if check_get_keys is None:
679 check_get_keys = []
680 while count < retries:
681 count += 1
682 if last_error:
683 self.report_warning('%s. Retrying ...' % last_error)
684 try:
685 response = self._call_api(
686 ep=ep, fatal=True, headers=headers,
687 video_id=item_id, query=query,
688 context=self._extract_context(ytcfg, default_client),
689 api_key=self._extract_api_key(ytcfg, default_client),
690 api_hostname=api_hostname, default_client=default_client,
691 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
692 except ExtractorError as e:
693 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
694 # Downloading page may result in intermittent 5xx HTTP error
695 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
696 last_error = 'HTTP Error %s' % e.cause.code
697 if count < retries:
698 continue
699 if fatal:
700 raise
701 else:
702 self.report_warning(error_to_compat_str(e))
703 return
704
705 else:
706 # Youtube may send alerts if there was an issue with the continuation page
707 try:
708 self._extract_and_report_alerts(response, expected=False)
709 except ExtractorError as e:
710 if fatal:
711 raise
712 self.report_warning(error_to_compat_str(e))
713 return
714 if not check_get_keys or dict_get(response, check_get_keys):
715 break
716 # Youtube sometimes sends incomplete data
717 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
718 last_error = 'Incomplete data received'
719 if count >= retries:
720 if fatal:
721 raise ExtractorError(last_error)
722 else:
723 self.report_warning(last_error)
724 return
725 return response
726
9297939e 727 @staticmethod
728 def is_music_url(url):
729 return re.match(r'https?://music\.youtube\.com/', url) is not None
730
30a074c2 731 def _extract_video(self, renderer):
732 video_id = renderer.get('videoId')
fe93e2c4 733 title = self._get_text(renderer.get('title'))
734 description = self._get_text(renderer.get('descriptionSnippet'))
735 duration = parse_duration(self._get_text(renderer.get('lengthText')))
736 view_count_text = self._get_text(renderer.get('viewCountText')) or ''
30a074c2 737 view_count = str_to_int(self._search_regex(
738 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
739 'view count', default=None))
fe93e2c4 740
741 uploader = self._get_text(renderer, (lambda x: x['ownerText'], lambda x: x['shortBylineText']))
742
30a074c2 743 return {
39ed931e 744 '_type': 'url',
30a074c2 745 'ie_key': YoutubeIE.ie_key(),
746 'id': video_id,
747 'url': video_id,
748 'title': title,
749 'description': description,
750 'duration': duration,
751 'view_count': view_count,
752 'uploader': uploader,
753 }
754
0c148415 755
360e1ca5 756class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 757 IE_DESC = 'YouTube.com'
bc2ca1bb 758 _INVIDIOUS_SITES = (
759 # invidious-redirect websites
760 r'(?:www\.)?redirect\.invidious\.io',
761 r'(?:(?:www|dev)\.)?invidio\.us',
762 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
763 r'(?:www\.)?invidious\.pussthecat\.org',
bc2ca1bb 764 r'(?:www\.)?invidious\.zee\.li',
bc2ca1bb 765 r'(?:www\.)?invidious\.ethibox\.fr',
bc2ca1bb 766 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
bc2ca1bb 767 # youtube-dl invidious instances list
768 r'(?:(?:www|no)\.)?invidiou\.sh',
769 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
770 r'(?:www\.)?invidious\.kabi\.tk',
bc2ca1bb 771 r'(?:www\.)?invidious\.mastodon\.host',
772 r'(?:www\.)?invidious\.zapashcanon\.fr',
ed807c18 773 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
201c1459 774 r'(?:www\.)?invidious\.tinfoil-hat\.net',
775 r'(?:www\.)?invidious\.himiko\.cloud',
776 r'(?:www\.)?invidious\.reallyancient\.tech',
bc2ca1bb 777 r'(?:www\.)?invidious\.tube',
778 r'(?:www\.)?invidiou\.site',
779 r'(?:www\.)?invidious\.site',
780 r'(?:www\.)?invidious\.xyz',
781 r'(?:www\.)?invidious\.nixnet\.xyz',
201c1459 782 r'(?:www\.)?invidious\.048596\.xyz',
bc2ca1bb 783 r'(?:www\.)?invidious\.drycat\.fr',
201c1459 784 r'(?:www\.)?inv\.skyn3t\.in',
bc2ca1bb 785 r'(?:www\.)?tube\.poal\.co',
786 r'(?:www\.)?tube\.connect\.cafe',
787 r'(?:www\.)?vid\.wxzm\.sx',
788 r'(?:www\.)?vid\.mint\.lgbt',
201c1459 789 r'(?:www\.)?vid\.puffyan\.us',
bc2ca1bb 790 r'(?:www\.)?yewtu\.be',
791 r'(?:www\.)?yt\.elukerio\.org',
792 r'(?:www\.)?yt\.lelux\.fi',
793 r'(?:www\.)?invidious\.ggc-project\.de',
794 r'(?:www\.)?yt\.maisputain\.ovh',
201c1459 795 r'(?:www\.)?ytprivate\.com',
796 r'(?:www\.)?invidious\.13ad\.de',
bc2ca1bb 797 r'(?:www\.)?invidious\.toot\.koeln',
798 r'(?:www\.)?invidious\.fdn\.fr',
799 r'(?:www\.)?watch\.nettohikari\.com',
ed807c18 800 r'(?:www\.)?invidious\.namazso\.eu',
801 r'(?:www\.)?invidious\.silkky\.cloud',
802 r'(?:www\.)?invidious\.exonip\.de',
803 r'(?:www\.)?invidious\.riverside\.rocks',
804 r'(?:www\.)?invidious\.blamefran\.net',
805 r'(?:www\.)?invidious\.moomoo\.de',
806 r'(?:www\.)?ytb\.trom\.tf',
807 r'(?:www\.)?yt\.cyberhost\.uk',
bc2ca1bb 808 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
809 r'(?:www\.)?qklhadlycap4cnod\.onion',
810 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
811 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
812 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
813 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
814 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
815 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
ed807c18 816 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
817 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
818 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
819 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
bc2ca1bb 820 )
cb7dfeea 821 _VALID_URL = r"""(?x)^
c5e8d7af 822 (
edb53e2d 823 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 824 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
825 (?:www\.)?deturl\.com/www\.youtube\.com|
826 (?:www\.)?pwnyoutube\.com|
827 (?:www\.)?hooktube\.com|
828 (?:www\.)?yourepeat\.com|
829 tube\.majestyc\.net|
830 %(invidious)s|
831 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
832 (?:.*?\#/)? # handle anchor (#/) redirect urls
833 (?: # the various things that can precede the ID:
ac7553d0 834 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 835 |(?: # or the v= param in all its forms
f7000f3a 836 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 837 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 838 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
839 v=
840 )
f4b05232 841 ))
cbaed4bb
S
842 |(?:
843 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
844 vid\.plus| # or vid.plus/xxxx
845 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 846 %(invidious)s
cbaed4bb 847 )/
edb53e2d 848 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 849 )
c5e8d7af 850 )? # all until now is optional -> you can pass the naked ID
201c1459 851 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 852 (?(1).+)? # if we found the ID, everything can follow
9297939e 853 (?:\#|$)""" % {
bc2ca1bb 854 'invidious': '|'.join(_INVIDIOUS_SITES),
855 }
e40c758c 856 _PLAYER_INFO_RE = (
cc2db878 857 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
858 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 859 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 860 )
2c62dc26 861 _formats = {
c2d3cb4c 862 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
863 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
864 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
865 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
866 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
867 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
868 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
869 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 870 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 871 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
872 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
873 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
874 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
875 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
876 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 877 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 878 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
879 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 880
881
882 # 3D videos
c2d3cb4c 883 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
884 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
885 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
886 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 887 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
888 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
889 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 890
96fb5605 891 # Apple HTTP Live Streaming
11f12195 892 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 893 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
894 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
895 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
896 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
897 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 898 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
899 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
900
901 # DASH mp4 video
d23028a8
S
902 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
903 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
904 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
905 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
906 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 907 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
908 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
909 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
910 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
911 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
912 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
913 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 914
f6f1fc92 915 # Dash mp4 audio
d23028a8
S
916 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
917 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
918 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
919 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
920 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
921 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
922 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
923
924 # Dash webm
d23028a8
S
925 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
926 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
927 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
928 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
929 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
930 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
931 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
932 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
933 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
934 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
935 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
936 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
937 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
938 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
939 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 940 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
941 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
942 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
943 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
944 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
945 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
946 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
947
948 # Dash webm audio
d23028a8
S
949 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
950 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 951
0857baad 952 # Dash webm audio with opus inside
d23028a8
S
953 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
954 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
955 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 956
ce6b9a2d
PH
957 # RTMP (unnamed)
958 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
959
960 # av01 video only formats sometimes served with "unknown" codecs
961 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
962 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
963 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
964 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 965 }
29f7c58a 966 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 967
109dd3b2 968 _AGE_GATE_REASONS = (
969 'Sign in to confirm your age',
970 'This video may be inappropriate for some users.',
971 'Sorry, this content is age-restricted.')
972
fd5c4aab
S
973 _GEO_BYPASS = False
974
78caa52a 975 IE_NAME = 'youtube'
2eb88d95
PH
976 _TESTS = [
977 {
2d3d2997 978 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
979 'info_dict': {
980 'id': 'BaW_jenozKc',
981 'ext': 'mp4',
3867038a 982 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
983 'uploader': 'Philipp Hagemeister',
984 'uploader_id': 'phihag',
ec85ded8 985 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
986 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
987 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 988 'upload_date': '20121002',
3867038a 989 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 990 'categories': ['Science & Technology'],
3867038a 991 'tags': ['youtube-dl'],
556dbe7f 992 'duration': 10,
dbdaaa23 993 'view_count': int,
3e7c1224
PH
994 'like_count': int,
995 'dislike_count': int,
7c80519c 996 'start_time': 1,
297a564b 997 'end_time': 9,
2eb88d95 998 }
0e853ca4 999 },
fccd3771 1000 {
4bc3a23e
PH
1001 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1002 'note': 'Embed-only video (#1746)',
1003 'info_dict': {
1004 'id': 'yZIXLfi8CZQ',
1005 'ext': 'mp4',
1006 'upload_date': '20120608',
1007 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1008 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1009 'uploader': 'SET India',
94bfcd23 1010 'uploader_id': 'setindia',
ec85ded8 1011 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 1012 'age_limit': 18,
545cc85d 1013 },
1014 'skip': 'Private video',
fccd3771 1015 },
11b56058 1016 {
8bdd16b4 1017 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
1018 'note': 'Use the first video ID in the URL',
1019 'info_dict': {
1020 'id': 'BaW_jenozKc',
1021 'ext': 'mp4',
3867038a 1022 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
1023 'uploader': 'Philipp Hagemeister',
1024 'uploader_id': 'phihag',
ec85ded8 1025 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 1026 'upload_date': '20121002',
3867038a 1027 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 1028 'categories': ['Science & Technology'],
3867038a 1029 'tags': ['youtube-dl'],
556dbe7f 1030 'duration': 10,
dbdaaa23 1031 'view_count': int,
11b56058
PM
1032 'like_count': int,
1033 'dislike_count': int,
34a7de29
S
1034 },
1035 'params': {
1036 'skip_download': True,
1037 },
11b56058 1038 },
dd27fd17 1039 {
2d3d2997 1040 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
1041 'note': '256k DASH audio (format 141) via DASH manifest',
1042 'info_dict': {
1043 'id': 'a9LDPn-MO4I',
1044 'ext': 'm4a',
1045 'upload_date': '20121002',
1046 'uploader_id': '8KVIDEO',
ec85ded8 1047 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
1048 'description': '',
1049 'uploader': '8KVIDEO',
1050 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 1051 },
4bc3a23e
PH
1052 'params': {
1053 'youtube_include_dash_manifest': True,
1054 'format': '141',
4919603f 1055 },
de3c7fe0 1056 'skip': 'format 141 not served anymore',
dd27fd17 1057 },
8bdd16b4 1058 # DASH manifest with encrypted signature
1059 {
1060 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1061 'info_dict': {
1062 'id': 'IB3lcPjvWLA',
1063 'ext': 'm4a',
1064 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1065 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1066 'duration': 244,
1067 'uploader': 'AfrojackVEVO',
1068 'uploader_id': 'AfrojackVEVO',
1069 'upload_date': '20131011',
cc2db878 1070 'abr': 129.495,
8bdd16b4 1071 },
1072 'params': {
1073 'youtube_include_dash_manifest': True,
1074 'format': '141/bestaudio[ext=m4a]',
1075 },
1076 },
aa79ac0c
PH
1077 # Controversy video
1078 {
1079 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
1080 'info_dict': {
1081 'id': 'T4XJQO3qol8',
1082 'ext': 'mp4',
556dbe7f 1083 'duration': 219,
aa79ac0c 1084 'upload_date': '20100909',
4fe54c12 1085 'uploader': 'Amazing Atheist',
aa79ac0c 1086 'uploader_id': 'TheAmazingAtheist',
ec85ded8 1087 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c 1088 'title': 'Burning Everyone\'s Koran',
545cc85d 1089 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
aa79ac0c 1090 }
c522adb1 1091 },
dd2d55f1 1092 # Normal age-gate video (embed allowed)
c522adb1 1093 {
2d3d2997 1094 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1095 'info_dict': {
1096 'id': 'HtVdAasjOgU',
1097 'ext': 'mp4',
1098 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1099 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1100 'duration': 142,
c522adb1
JMF
1101 'uploader': 'The Witcher',
1102 'uploader_id': 'WitcherGame',
ec85ded8 1103 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 1104 'upload_date': '20140605',
34952f09 1105 'age_limit': 18,
c522adb1
JMF
1106 },
1107 },
8bdd16b4 1108 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1109 # YouTube Red ad is not captured for creator
1110 {
1111 'url': '__2ABJjxzNo',
1112 'info_dict': {
1113 'id': '__2ABJjxzNo',
1114 'ext': 'mp4',
1115 'duration': 266,
1116 'upload_date': '20100430',
1117 'uploader_id': 'deadmau5',
1118 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 1119 'creator': 'deadmau5',
1120 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1121 'uploader': 'deadmau5',
1122 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1123 'alt_title': 'Some Chords',
8bdd16b4 1124 },
1125 'expected_warnings': [
1126 'DASH manifest missing',
1127 ]
1128 },
067aa17e 1129 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1130 {
1131 'url': 'lqQg6PlCWgI',
1132 'info_dict': {
1133 'id': 'lqQg6PlCWgI',
1134 'ext': 'mp4',
556dbe7f 1135 'duration': 6085,
90227264 1136 'upload_date': '20150827',
cbe2bd91 1137 'uploader_id': 'olympic',
ec85ded8 1138 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 1139 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 1140 'uploader': 'Olympic',
cbe2bd91
PH
1141 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1142 },
1143 'params': {
1144 'skip_download': 'requires avconv',
e52a40ab 1145 }
cbe2bd91 1146 },
6271f1ca
PH
1147 # Non-square pixels
1148 {
1149 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1150 'info_dict': {
1151 'id': '_b-2C3KPAM0',
1152 'ext': 'mp4',
1153 'stretched_ratio': 16 / 9.,
556dbe7f 1154 'duration': 85,
6271f1ca
PH
1155 'upload_date': '20110310',
1156 'uploader_id': 'AllenMeow',
ec85ded8 1157 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 1158 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 1159 'uploader': '孫ᄋᄅ',
6271f1ca
PH
1160 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1161 },
06b491eb
S
1162 },
1163 # url_encoded_fmt_stream_map is empty string
1164 {
1165 'url': 'qEJwOuvDf7I',
1166 'info_dict': {
1167 'id': 'qEJwOuvDf7I',
f57b7835 1168 'ext': 'webm',
06b491eb
S
1169 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1170 'description': '',
1171 'upload_date': '20150404',
1172 'uploader_id': 'spbelect',
1173 'uploader': 'Наблюдатели Петербурга',
1174 },
1175 'params': {
1176 'skip_download': 'requires avconv',
e323cf3f
S
1177 },
1178 'skip': 'This live event has ended.',
06b491eb 1179 },
067aa17e 1180 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1181 {
1182 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1183 'info_dict': {
1184 'id': 'FIl7x6_3R5Y',
eb6793ba 1185 'ext': 'webm',
da77d856
S
1186 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1187 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1188 'duration': 220,
da77d856
S
1189 'upload_date': '20150625',
1190 'uploader_id': 'dorappi2000',
ec85ded8 1191 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 1192 'uploader': 'dorappi2000',
eb6793ba 1193 'formats': 'mincount:31',
da77d856 1194 },
eb6793ba 1195 'skip': 'not actual anymore',
2ee8f5d8 1196 },
8a1a26ce
YCH
1197 # DASH manifest with segment_list
1198 {
1199 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1200 'md5': '8ce563a1d667b599d21064e982ab9e31',
1201 'info_dict': {
1202 'id': 'CsmdDsKjzN8',
1203 'ext': 'mp4',
17ee98e1 1204 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
1205 'uploader': 'Airtek',
1206 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1207 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1208 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1209 },
1210 'params': {
1211 'youtube_include_dash_manifest': True,
1212 'format': '135', # bestvideo
be49068d
S
1213 },
1214 'skip': 'This live event has ended.',
2ee8f5d8 1215 },
cf7e015f
S
1216 {
1217 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 1218 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 1219 'info_dict': {
545cc85d 1220 'id': 'jvGDaLqkpTg',
1221 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1222 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
1223 },
1224 'playlist': [{
1225 'info_dict': {
545cc85d 1226 'id': 'jvGDaLqkpTg',
cf7e015f 1227 'ext': 'mp4',
545cc85d 1228 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1229 'description': 'md5:e03b909557865076822aa169218d6a5d',
1230 'duration': 10643,
1231 'upload_date': '20161111',
1232 'uploader': 'Team PGP',
1233 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1234 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1235 },
1236 }, {
1237 'info_dict': {
545cc85d 1238 'id': '3AKt1R1aDnw',
cf7e015f 1239 'ext': 'mp4',
545cc85d 1240 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1241 'description': 'md5:e03b909557865076822aa169218d6a5d',
1242 'duration': 10991,
1243 'upload_date': '20161111',
1244 'uploader': 'Team PGP',
1245 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1246 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1247 },
1248 }, {
1249 'info_dict': {
545cc85d 1250 'id': 'RtAMM00gpVc',
cf7e015f 1251 'ext': 'mp4',
545cc85d 1252 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1253 'description': 'md5:e03b909557865076822aa169218d6a5d',
1254 'duration': 10995,
1255 'upload_date': '20161111',
1256 'uploader': 'Team PGP',
1257 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1258 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1259 },
1260 }, {
1261 'info_dict': {
545cc85d 1262 'id': '6N2fdlP3C5U',
cf7e015f 1263 'ext': 'mp4',
545cc85d 1264 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1265 'description': 'md5:e03b909557865076822aa169218d6a5d',
1266 'duration': 10990,
1267 'upload_date': '20161111',
1268 'uploader': 'Team PGP',
1269 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1270 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1271 },
1272 }],
1273 'params': {
1274 'skip_download': True,
1275 },
cbaed4bb 1276 },
f9f49d87 1277 {
067aa17e 1278 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1279 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1280 'info_dict': {
1281 'id': 'gVfLd0zydlo',
1282 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1283 },
1284 'playlist_count': 2,
be49068d 1285 'skip': 'Not multifeed anymore',
f9f49d87 1286 },
cbaed4bb 1287 {
2d3d2997 1288 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1289 'only_matching': True,
0e49d9a6 1290 },
6d4fc66b 1291 {
2d3d2997 1292 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1293 'only_matching': True,
1294 },
0e49d9a6 1295 {
067aa17e 1296 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1297 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1298 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1299 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1300 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1301 'info_dict': {
1302 'id': 'lsguqyKfVQg',
1303 'ext': 'mp4',
1304 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 1305 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 1306 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1307 'duration': 133,
0e49d9a6
LL
1308 'upload_date': '20151119',
1309 'uploader_id': 'IronSoulElf',
ec85ded8 1310 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1311 'uploader': 'IronSoulElf',
eb6793ba
S
1312 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
1313 'track': 'Dark Walk - Position Music',
1314 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 1315 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
1316 },
1317 'params': {
1318 'skip_download': True,
1319 },
1320 },
61f92af1 1321 {
067aa17e 1322 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1323 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1324 'only_matching': True,
1325 },
313dfc45
LL
1326 {
1327 # Video with yt:stretch=17:0
1328 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1329 'info_dict': {
1330 'id': 'Q39EVAstoRM',
1331 'ext': 'mp4',
1332 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1333 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1334 'upload_date': '20151107',
1335 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1336 'uploader': 'CH GAMER DROID',
1337 },
1338 'params': {
1339 'skip_download': True,
1340 },
be49068d 1341 'skip': 'This video does not exist.',
313dfc45 1342 },
201c1459 1343 {
1344 # Video with incomplete 'yt:stretch=16:'
1345 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1346 'only_matching': True,
1347 },
7caf9830
S
1348 {
1349 # Video licensed under Creative Commons
1350 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1351 'info_dict': {
1352 'id': 'M4gD1WSo5mA',
1353 'ext': 'mp4',
1354 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1355 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1356 'duration': 721,
7caf9830
S
1357 'upload_date': '20150127',
1358 'uploader_id': 'BerkmanCenter',
ec85ded8 1359 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1360 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1361 'license': 'Creative Commons Attribution license (reuse allowed)',
1362 },
1363 'params': {
1364 'skip_download': True,
1365 },
1366 },
fd050249
S
1367 {
1368 # Channel-like uploader_url
1369 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1370 'info_dict': {
1371 'id': 'eQcmzGIKrzg',
1372 'ext': 'mp4',
1373 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1374 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1375 'duration': 4060,
fd050249 1376 'upload_date': '20151119',
eb6793ba 1377 'uploader': 'Bernie Sanders',
fd050249 1378 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1379 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1380 'license': 'Creative Commons Attribution license (reuse allowed)',
1381 },
1382 'params': {
1383 'skip_download': True,
1384 },
1385 },
040ac686
S
1386 {
1387 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1388 'only_matching': True,
7f29cf54
S
1389 },
1390 {
067aa17e 1391 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1392 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1393 'only_matching': True,
6496ccb4
S
1394 },
1395 {
1396 # Rental video preview
1397 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1398 'info_dict': {
1399 'id': 'uGpuVWrhIzE',
1400 'ext': 'mp4',
1401 'title': 'Piku - Trailer',
1402 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1403 'upload_date': '20150811',
1404 'uploader': 'FlixMatrix',
1405 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1406 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1407 'license': 'Standard YouTube License',
1408 },
1409 'params': {
1410 'skip_download': True,
1411 },
eb6793ba 1412 'skip': 'This video is not available.',
022a5d66 1413 },
12afdc2a
S
1414 {
1415 # YouTube Red video with episode data
1416 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1417 'info_dict': {
1418 'id': 'iqKdEhx-dD4',
1419 'ext': 'mp4',
1420 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1421 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1422 'duration': 2085,
12afdc2a
S
1423 'upload_date': '20170118',
1424 'uploader': 'Vsauce',
1425 'uploader_id': 'Vsauce',
1426 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1427 'series': 'Mind Field',
1428 'season_number': 1,
1429 'episode_number': 1,
1430 },
1431 'params': {
1432 'skip_download': True,
1433 },
1434 'expected_warnings': [
1435 'Skipping DASH manifest',
1436 ],
1437 },
c7121fa7
S
1438 {
1439 # The following content has been identified by the YouTube community
1440 # as inappropriate or offensive to some audiences.
1441 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1442 'info_dict': {
1443 'id': '6SJNVb0GnPI',
1444 'ext': 'mp4',
1445 'title': 'Race Differences in Intelligence',
1446 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1447 'duration': 965,
1448 'upload_date': '20140124',
1449 'uploader': 'New Century Foundation',
1450 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1451 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1452 },
1453 'params': {
1454 'skip_download': True,
1455 },
545cc85d 1456 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1457 },
022a5d66
S
1458 {
1459 # itag 212
1460 'url': '1t24XAntNCY',
1461 'only_matching': True,
fd5c4aab
S
1462 },
1463 {
1464 # geo restricted to JP
1465 'url': 'sJL6WA-aGkQ',
1466 'only_matching': True,
1467 },
cd5a74a2
S
1468 {
1469 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1470 'only_matching': True,
1471 },
bc2ca1bb 1472 {
1473 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1474 'only_matching': True,
1475 },
1476 {
1477 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1478 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1479 'only_matching': True,
1480 },
825cd268
RA
1481 {
1482 # DRM protected
1483 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1484 'only_matching': True,
4fe54c12
S
1485 },
1486 {
1487 # Video with unsupported adaptive stream type formats
1488 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1489 'info_dict': {
1490 'id': 'Z4Vy8R84T1U',
1491 'ext': 'mp4',
1492 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1493 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1494 'duration': 433,
1495 'upload_date': '20130923',
1496 'uploader': 'Amelia Putri Harwita',
1497 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1498 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1499 'formats': 'maxcount:10',
1500 },
1501 'params': {
1502 'skip_download': True,
1503 'youtube_include_dash_manifest': False,
1504 },
5429d6a9 1505 'skip': 'not actual anymore',
5caabd3c 1506 },
1507 {
822b9d9c 1508 # Youtube Music Auto-generated description
5caabd3c 1509 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1510 'info_dict': {
1511 'id': 'MgNrAu2pzNs',
1512 'ext': 'mp4',
1513 'title': 'Voyeur Girl',
1514 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1515 'upload_date': '20190312',
5429d6a9
S
1516 'uploader': 'Stephen - Topic',
1517 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1518 'artist': 'Stephen',
1519 'track': 'Voyeur Girl',
1520 'album': 'it\'s too much love to know my dear',
1521 'release_date': '20190313',
1522 'release_year': 2019,
1523 },
1524 'params': {
1525 'skip_download': True,
1526 },
1527 },
66b48727
RA
1528 {
1529 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1530 'only_matching': True,
1531 },
011e75e6
S
1532 {
1533 # invalid -> valid video id redirection
1534 'url': 'DJztXj2GPfl',
1535 'info_dict': {
1536 'id': 'DJztXj2GPfk',
1537 'ext': 'mp4',
1538 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1539 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1540 'upload_date': '20090125',
1541 'uploader': 'Prochorowka',
1542 'uploader_id': 'Prochorowka',
1543 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1544 'artist': 'Panjabi MC',
1545 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1546 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1547 },
1548 'params': {
1549 'skip_download': True,
1550 },
545cc85d 1551 'skip': 'Video unavailable',
ea74e00b
DP
1552 },
1553 {
1554 # empty description results in an empty string
1555 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1556 'info_dict': {
1557 'id': 'x41yOUIvK2k',
1558 'ext': 'mp4',
1559 'title': 'IMG 3456',
1560 'description': '',
1561 'upload_date': '20170613',
1562 'uploader_id': 'ElevageOrVert',
1563 'uploader': 'ElevageOrVert',
1564 },
1565 'params': {
1566 'skip_download': True,
1567 },
1568 },
a0566bbf 1569 {
29f7c58a 1570 # with '};' inside yt initial data (see [1])
1571 # see [2] for an example with '};' inside ytInitialPlayerResponse
1572 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1573 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1574 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1575 'info_dict': {
1576 'id': 'CHqg6qOn4no',
1577 'ext': 'mp4',
1578 'title': 'Part 77 Sort a list of simple types in c#',
1579 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1580 'upload_date': '20130831',
1581 'uploader_id': 'kudvenkat',
1582 'uploader': 'kudvenkat',
1583 },
1584 'params': {
1585 'skip_download': True,
1586 },
1587 },
29f7c58a 1588 {
1589 # another example of '};' in ytInitialData
1590 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1591 'only_matching': True,
1592 },
1593 {
1594 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1595 'only_matching': True,
1596 },
545cc85d 1597 {
cc2db878 1598 # https://github.com/ytdl-org/youtube-dl/pull/28094
1599 'url': 'OtqTfy26tG0',
1600 'info_dict': {
1601 'id': 'OtqTfy26tG0',
1602 'ext': 'mp4',
1603 'title': 'Burn Out',
1604 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1605 'upload_date': '20141120',
1606 'uploader': 'The Cinematic Orchestra - Topic',
1607 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1608 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1609 'artist': 'The Cinematic Orchestra',
1610 'track': 'Burn Out',
1611 'album': 'Every Day',
1612 'release_data': None,
1613 'release_year': None,
1614 },
1615 'params': {
1616 'skip_download': True,
1617 },
545cc85d 1618 },
bc2ca1bb 1619 {
1620 # controversial video, only works with bpctr when authenticated with cookies
1621 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1622 'only_matching': True,
1623 },
f7ad7160 1624 {
1625 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1626 'url': 'cBvYw8_A0vQ',
1627 'info_dict': {
1628 'id': 'cBvYw8_A0vQ',
1629 'ext': 'mp4',
1630 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1631 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1632 'upload_date': '20201120',
1633 'uploader': 'Walk around Japan',
1634 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1635 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1636 },
1637 'params': {
1638 'skip_download': True,
1639 },
0fb983f6 1640 }, {
1641 # Has multiple audio streams
1642 'url': 'WaOKSUlf4TM',
1643 'only_matching': True
9297939e 1644 }, {
1645 # Requires Premium: has format 141 when requested using YTM url
1646 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1647 'only_matching': True
1648 }, {
120916da 1649 # multiple subtitles with same lang_code
1650 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1651 'only_matching': True,
109dd3b2 1652 }, {
1653 # Force use android client fallback
1654 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1655 'info_dict': {
1656 'id': 'YOelRv7fMxY',
1657 'title': 'Digging a Secret Tunnel from my Workshop',
1658 'ext': '3gp',
1659 'upload_date': '20210624',
1660 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1661 'uploader': 'colinfurze',
1662 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1663 'description': 'md5:ecb672623246d98c6c562eed6ae798c3'
1664 },
1665 'params': {
1666 'format': '17', # 3gp format available on android
1667 'extractor_args': {'youtube': {'player_client': ['android']}},
1668 },
120916da 1669 },
109dd3b2 1670 {
1671 # Skip download of additional client configs (remix client config in this case)
1672 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1673 'only_matching': True,
1674 'params': {
1675 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1676 },
1677 }
2eb88d95
PH
1678 ]
1679
201c1459 1680 @classmethod
1681 def suitable(cls, url):
1bdae7d3 1682 # Hack for lazy extractors until more generic solution is implemented
1683 # (see #28780)
1684 from .youtube import parse_qs
201c1459 1685 qs = parse_qs(url)
1686 if qs.get('list', [None])[0]:
1687 return False
1688 return super(YoutubeIE, cls).suitable(url)
1689
e0df6211
PH
1690 def __init__(self, *args, **kwargs):
1691 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1692 self._code_cache = {}
83799698 1693 self._player_cache = {}
e0df6211 1694
109dd3b2 1695 def _extract_player_url(self, ytcfg=None, webpage=None):
1696 player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)
1697 if not player_url:
1698 player_url = self._search_regex(
1699 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1700 webpage, 'player URL', fatal=False)
1701 if player_url.startswith('//'):
1702 player_url = 'https:' + player_url
1703 elif not re.match(r'https?://', player_url):
1704 player_url = compat_urlparse.urljoin(
1705 'https://www.youtube.com', player_url)
1706 return player_url
1707
60064c53
PH
1708 def _signature_cache_id(self, example_sig):
1709 """ Return a string representation of a signature """
78caa52a 1710 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1711
e40c758c
S
1712 @classmethod
1713 def _extract_player_info(cls, player_url):
1714 for player_re in cls._PLAYER_INFO_RE:
1715 id_m = re.search(player_re, player_url)
1716 if id_m:
1717 break
1718 else:
c081b35c 1719 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1720 return id_m.group('id')
e40c758c 1721
109dd3b2 1722 def _load_player(self, video_id, player_url, fatal=True) -> bool:
1723 player_id = self._extract_player_info(player_url)
1724 if player_id not in self._code_cache:
1725 self._code_cache[player_id] = self._download_webpage(
1726 player_url, video_id, fatal=fatal,
1727 note='Downloading player ' + player_id,
1728 errnote='Download of %s failed' % player_url)
1729 return player_id in self._code_cache
1730
e40c758c 1731 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1732 player_id = self._extract_player_info(player_url)
e0df6211 1733
c4417ddb 1734 # Read from filesystem cache
545cc85d 1735 func_id = 'js_%s_%s' % (
1736 player_id, self._signature_cache_id(example_sig))
c4417ddb 1737 assert os.path.basename(func_id) == func_id
a0e07d31 1738
69ea8ca4 1739 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1740 if cache_spec is not None:
78caa52a 1741 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1742
109dd3b2 1743 if self._load_player(video_id, player_url):
1744 code = self._code_cache[player_id]
1745 res = self._parse_sig_js(code)
e0df6211 1746
109dd3b2 1747 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1748 cache_res = res(test_string)
1749 cache_spec = [ord(c) for c in cache_res]
83799698 1750
109dd3b2 1751 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1752 return res
83799698 1753
60064c53 1754 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1755 def gen_sig_code(idxs):
1756 def _genslice(start, end, step):
78caa52a 1757 starts = '' if start == 0 else str(start)
8bcc8756 1758 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1759 steps = '' if step == 1 else (':%d' % step)
78caa52a 1760 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1761
1762 step = None
7af808a5
PH
1763 # Quelch pyflakes warnings - start will be set when step is set
1764 start = '(Never used)'
edf3e38e
PH
1765 for i, prev in zip(idxs[1:], idxs[:-1]):
1766 if step is not None:
1767 if i - prev == step:
1768 continue
1769 yield _genslice(start, prev, step)
1770 step = None
1771 continue
1772 if i - prev in [-1, 1]:
1773 step = i - prev
1774 start = prev
1775 continue
1776 else:
78caa52a 1777 yield 's[%d]' % prev
edf3e38e 1778 if step is None:
78caa52a 1779 yield 's[%d]' % i
edf3e38e
PH
1780 else:
1781 yield _genslice(start, i, step)
1782
78caa52a 1783 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1784 cache_res = func(test_string)
edf3e38e 1785 cache_spec = [ord(c) for c in cache_res]
78caa52a 1786 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1787 signature_id_tuple = '(%s)' % (
1788 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1789 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1790 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1791 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1792
e0df6211
PH
1793 def _parse_sig_js(self, jscode):
1794 funcname = self._search_regex(
abefc03f
S
1795 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1796 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1797 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1798 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1799 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1800 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1801 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1802 # Obsolete patterns
1803 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1804 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1805 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1806 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1807 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1808 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1809 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1810 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1811 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1812
1813 jsi = JSInterpreter(jscode)
1814 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1815 return lambda s: initial_function([s])
1816
545cc85d 1817 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1818 """Turn the encrypted s field into a working signature"""
6b37f0be 1819
c8bf86d5 1820 if player_url is None:
69ea8ca4 1821 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1822
c8bf86d5 1823 try:
62af3a0e 1824 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1825 if player_id not in self._player_cache:
1826 func = self._extract_signature_function(
60064c53 1827 video_id, player_url, s
c8bf86d5
PH
1828 )
1829 self._player_cache[player_id] = func
1830 func = self._player_cache[player_id]
a06916d9 1831 if self.get_param('youtube_print_sig_code'):
60064c53 1832 self._print_sig_code(func, s)
c8bf86d5
PH
1833 return func(s)
1834 except Exception as e:
1835 tb = traceback.format_exc()
1836 raise ExtractorError(
78caa52a 1837 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1838
109dd3b2 1839 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1840 """
1841 Extract signatureTimestamp (sts)
1842 Required to tell API what sig/player version is in use.
1843 """
1844 sts = None
1845 if isinstance(ytcfg, dict):
1846 sts = int_or_none(ytcfg.get('STS'))
1847
1848 if not sts:
1849 # Attempt to extract from player
1850 if player_url is None:
1851 error_msg = 'Cannot extract signature timestamp without player_url.'
1852 if fatal:
1853 raise ExtractorError(error_msg)
1854 self.report_warning(error_msg)
1855 return
1856 if self._load_player(video_id, player_url, fatal=fatal):
1857 player_id = self._extract_player_info(player_url)
1858 code = self._code_cache[player_id]
1859 sts = int_or_none(self._search_regex(
1860 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
1861 'JS player signature timestamp', group='sts', fatal=fatal))
1862 return sts
1863
545cc85d 1864 def _mark_watched(self, video_id, player_response):
21c340b8
S
1865 playback_url = url_or_none(try_get(
1866 player_response,
545cc85d 1867 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
d77ab8e2
S
1868 if not playback_url:
1869 return
1870 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1871 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1872
1873 # cpn generation algorithm is reverse engineered from base.js.
1874 # In fact it works even with dummy cpn.
1875 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1876 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1877
1878 qs.update({
1879 'ver': ['2'],
1880 'cpn': [cpn],
1881 })
1882 playback_url = compat_urlparse.urlunparse(
15707c7e 1883 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1884
1885 self._download_webpage(
1886 playback_url, video_id, 'Marking watched',
1887 'Unable to mark watched', fatal=False)
1888
66c9fa36
S
1889 @staticmethod
1890 def _extract_urls(webpage):
1891 # Embedded YouTube player
1892 entries = [
1893 unescapeHTML(mobj.group('url'))
1894 for mobj in re.finditer(r'''(?x)
1895 (?:
1896 <iframe[^>]+?src=|
1897 data-video-url=|
1898 <embed[^>]+?src=|
1899 embedSWF\(?:\s*|
1900 <object[^>]+data=|
1901 new\s+SWFObject\(
1902 )
1903 (["\'])
1904 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1905 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1906 \1''', webpage)]
1907
1908 # lazyYT YouTube embed
1909 entries.extend(list(map(
1910 unescapeHTML,
1911 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1912
1913 # Wordpress "YouTube Video Importer" plugin
1914 matches = re.findall(r'''(?x)<div[^>]+
1915 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1916 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1917 entries.extend(m[-1] for m in matches)
1918
1919 return entries
1920
1921 @staticmethod
1922 def _extract_url(webpage):
1923 urls = YoutubeIE._extract_urls(webpage)
1924 return urls[0] if urls else None
1925
97665381
PH
1926 @classmethod
1927 def extract_id(cls, url):
1928 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1929 if mobj is None:
69ea8ca4 1930 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1931 video_id = mobj.group(2)
1932 return video_id
1933
7c365c21 1934 def _extract_chapters_from_json(self, data, duration):
1935 chapter_list = traverse_obj(
1936 data, (
1937 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
1938 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
1939 ), expected_type=list)
1940
1941 return self._extract_chapters(
1942 chapter_list,
1943 chapter_time=lambda chapter: float_or_none(
1944 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
1945 chapter_title=lambda chapter: traverse_obj(
1946 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
1947 duration=duration)
1948
1949 def _extract_chapters_from_engagement_panel(self, data, duration):
1950 content_list = traverse_obj(
8bdd16b4 1951 data,
7c365c21 1952 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
da503b7a 1953 expected_type=list, default=[])
7c365c21 1954 chapter_time = lambda chapter: parse_duration(self._get_text(chapter.get('timeDescription')))
1955 chapter_title = lambda chapter: self._get_text(chapter.get('title'))
1956
1957 return next((
1958 filter(None, (
1959 self._extract_chapters(
1960 traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
1961 chapter_time, chapter_title, duration)
1962 for contents in content_list
1963 ))), [])
1964
1965 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
84213ea8 1966 chapters = []
7c365c21 1967 last_chapter = {'start_time': 0}
1968 for idx, chapter in enumerate(chapter_list or []):
1969 title = chapter_title(chapter)
84213ea8
S
1970 start_time = chapter_time(chapter)
1971 if start_time is None:
1972 continue
7c365c21 1973 last_chapter['end_time'] = start_time
1974 if start_time < last_chapter['start_time']:
1975 if idx == 1:
1976 chapters.pop()
1977 self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
1978 else:
1979 self.report_warning(f'Invalid start time for chapter "{title}"')
1980 continue
1981 last_chapter = {'start_time': start_time, 'title': title}
1982 chapters.append(last_chapter)
1983 last_chapter['end_time'] = duration
84213ea8
S
1984 return chapters
1985
545cc85d 1986 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1987 return self._parse_json(self._search_regex(
1988 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1989 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 1990
d92f5d5a 1991 @staticmethod
1992 def parse_time_text(time_text):
1993 """
1994 Parse the comment time text
1995 time_text is in the format 'X units ago (edited)'
1996 """
1997 time_text_split = time_text.split(' ')
1998 if len(time_text_split) >= 3:
da503b7a 1999 try:
2000 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
2001 except ValueError:
2002 return None
d92f5d5a 2003
a1c5d2ca
M
2004 def _extract_comment(self, comment_renderer, parent=None):
2005 comment_id = comment_renderer.get('commentId')
2006 if not comment_id:
2007 return
fe93e2c4 2008
2009 text = self._get_text(comment_renderer.get('contentText'))
2010
49bd8c66 2011 # note: timestamp is an estimate calculated from the current time and time_text
fe93e2c4 2012 time_text = self._get_text(comment_renderer.get('publishedTimeText')) or ''
2013 time_text_dt = self.parse_time_text(time_text)
2014 if isinstance(time_text_dt, datetime.datetime):
2015 timestamp = calendar.timegm(time_text_dt.timetuple())
2016 author = self._get_text(comment_renderer.get('authorText'))
a1c5d2ca
M
2017 author_id = try_get(comment_renderer,
2018 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
fe93e2c4 2019
49bd8c66 2020 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2021 lambda x: x['likeCount']), compat_str)) or 0
a1c5d2ca
M
2022 author_thumbnail = try_get(comment_renderer,
2023 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2024
2025 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
97524332 2026 is_favorited = 'creatorHeart' in (try_get(
2027 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
a1c5d2ca
M
2028 return {
2029 'id': comment_id,
2030 'text': text,
d92f5d5a 2031 'timestamp': timestamp,
a1c5d2ca
M
2032 'time_text': time_text,
2033 'like_count': votes,
97524332 2034 'is_favorited': is_favorited,
a1c5d2ca
M
2035 'author': author,
2036 'author_id': author_id,
2037 'author_thumbnail': author_thumbnail,
2038 'author_is_uploader': author_is_uploader,
2039 'parent': parent or 'root'
2040 }
2041
2042 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
2d6659b9 2043 ytcfg, video_id, parent=None, comment_counts=None):
2044
2045 def extract_header(contents):
2046 _total_comments = 0
2047 _continuation = None
2048 for content in contents:
2049 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
fe93e2c4 2050 expected_comment_count = parse_count(self._get_text(
2051 comments_header_renderer, (lambda x: x['countText'], lambda x: x['commentsCount']), max_runs=1))
2052
2d6659b9 2053 if expected_comment_count:
fe93e2c4 2054 comment_counts[1] = expected_comment_count
2055 self.to_screen('Downloading ~%d comments' % expected_comment_count)
2d6659b9 2056 _total_comments = comment_counts[1]
2057 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2058 comment_sort_index = int(sort_mode_str != 'top') # 1 = new, 0 = top
2059
2060 sort_menu_item = try_get(
2061 comments_header_renderer,
2062 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2063 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2064
2065 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2066 if not _continuation:
2067 continue
2068
2069 sort_text = sort_menu_item.get('title')
2070 if isinstance(sort_text, compat_str):
2071 sort_text = sort_text.lower()
2072 else:
2073 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2074 self.to_screen('Sorting comments by %s' % sort_text)
2075 break
2076 return _total_comments, _continuation
a1c5d2ca 2077
2d6659b9 2078 def extract_thread(contents):
a1c5d2ca
M
2079 if not parent:
2080 comment_counts[2] = 0
2081 for content in contents:
2082 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2083 comment_renderer = try_get(
2084 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2085 content, (lambda x: x['commentRenderer'], dict))
2086
2087 if not comment_renderer:
2088 continue
2089 comment = self._extract_comment(comment_renderer, parent)
2090 if not comment:
2091 continue
2092 comment_counts[0] += 1
2093 yield comment
2094 # Attempt to get the replies
2095 comment_replies_renderer = try_get(
2096 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2097
2098 if comment_replies_renderer:
2099 comment_counts[2] += 1
2100 comment_entries_iter = self._comment_entries(
f4f751af 2101 comment_replies_renderer, identity_token, account_syncid, ytcfg,
2d6659b9 2102 video_id, parent=comment.get('id'), comment_counts=comment_counts)
a1c5d2ca
M
2103
2104 for reply_comment in comment_entries_iter:
2105 yield reply_comment
2106
2d6659b9 2107 # YouTube comments have a max depth of 2
2108 max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2109 if max_depth == 1 and parent:
2110 return
a1c5d2ca
M
2111 if not comment_counts:
2112 # comment so far, est. total comments, current comment thread #
2113 comment_counts = [0, 0, 0]
a1c5d2ca 2114
2d6659b9 2115 continuation = self._extract_continuation(root_continuation_data)
fe93e2c4 2116 if continuation and len(continuation['continuation']) < 27:
2d6659b9 2117 self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2118 continuation_token = self._generate_comment_continuation(video_id)
fe93e2c4 2119 continuation = self._build_api_continuation_query(continuation_token, None)
2d6659b9 2120
2121 visitor_data = None
2122 is_first_continuation = parent is None
a1c5d2ca
M
2123
2124 for page_num in itertools.count(0):
2125 if not continuation:
2126 break
f4f751af 2127 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
2d6659b9 2128 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2129 if page_num == 0:
2130 if is_first_continuation:
2131 note_prefix = 'Downloading comment section API JSON'
a1c5d2ca 2132 else:
2d6659b9 2133 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
2134 comment_counts[2], comment_prog_str)
2135 else:
2136 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2137 ' ' if parent else '', ' replies' if parent else '',
2138 page_num, comment_prog_str)
2139
2140 response = self._extract_response(
fe93e2c4 2141 item_id=None, query=continuation,
2d6659b9 2142 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2143 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca
M
2144 if not response:
2145 break
f4f751af 2146 visitor_data = try_get(
2147 response,
2148 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2149 compat_str) or visitor_data
a1c5d2ca 2150
2d6659b9 2151 continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca 2152
2d6659b9 2153 continuation = None
2154 if isinstance(continuation_contents, list):
2155 for continuation_section in continuation_contents:
2156 if not isinstance(continuation_section, dict):
2157 continue
2158 continuation_items = try_get(
2159 continuation_section,
2160 (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2161 lambda x: x['appendContinuationItemsAction']['continuationItems']),
2162 list) or []
2163 if is_first_continuation:
2164 total_comments, continuation = extract_header(continuation_items)
2165 if total_comments:
2166 yield total_comments
2167 is_first_continuation = False
2168 if continuation:
2169 break
2170 continue
2171 count = 0
2172 for count, entry in enumerate(extract_thread(continuation_items)):
2173 yield entry
2174 continuation = self._extract_continuation({'contents': continuation_items})
2175 if continuation:
2176 # Sometimes YouTube provides a continuation without any comments
2177 # In most cases we end up just downloading these with very little comments to come.
2178 if count == 0:
2179 if not parent:
2180 self.report_warning('No comments received - assuming end of comments')
2181 continuation = None
a1c5d2ca
M
2182 break
2183
2d6659b9 2184 # Deprecated response structure
2185 elif isinstance(continuation_contents, dict):
2186 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2187 for key, continuation_renderer in continuation_contents.items():
2188 if key not in known_continuation_renderers:
2189 continue
2190 if not isinstance(continuation_renderer, dict):
2191 continue
2192 if is_first_continuation:
2193 header_continuation_items = [continuation_renderer.get('header') or {}]
2194 total_comments, continuation = extract_header(header_continuation_items)
2195 if total_comments:
2196 yield total_comments
2197 is_first_continuation = False
2198 if continuation:
2199 break
a1c5d2ca 2200
2d6659b9 2201 # Sometimes YouTube provides a continuation without any comments
2202 # In most cases we end up just downloading these with very little comments to come.
2203 count = 0
2204 for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2205 yield entry
2206 continuation = self._extract_continuation(continuation_renderer)
2207 if count == 0:
2208 if not parent:
2209 self.report_warning('No comments received - assuming end of comments')
2210 continuation = None
2211 break
a1c5d2ca 2212
2d6659b9 2213 @staticmethod
2214 def _generate_comment_continuation(video_id):
2215 """
2216 Generates initial comment section continuation token from given video id
2217 """
2218 b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2219 parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2220 new_continuation_intlist = list(itertools.chain.from_iterable(
2221 [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2222 return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2223
2224 def _extract_comments(self, ytcfg, video_id, contents, webpage):
a1c5d2ca 2225 """Entry for comment extraction"""
2d6659b9 2226 def _real_comment_extract(contents):
2227 if isinstance(contents, list):
2228 for entry in contents:
2229 for key, renderer in entry.items():
2230 if key not in known_entry_comment_renderers:
2231 continue
2232 yield from self._comment_entries(
2233 renderer, video_id=video_id, ytcfg=ytcfg,
2234 identity_token=self._extract_identity_token(webpage, item_id=video_id),
2235 account_syncid=self._extract_account_syncid(ytcfg))
2236 break
a1c5d2ca 2237 comments = []
2d6659b9 2238 known_entry_comment_renderers = ('itemSectionRenderer',)
a1c5d2ca 2239 estimated_total = 0
2d6659b9 2240 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
a1c5d2ca 2241
2d6659b9 2242 try:
2243 for comment in _real_comment_extract(contents):
2244 if len(comments) >= max_comments:
2245 break
2246 if isinstance(comment, int):
2247 estimated_total = comment
2248 continue
2249 comments.append(comment)
2250 except KeyboardInterrupt:
2251 self.to_screen('Interrupted by user')
d92f5d5a 2252 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
a1c5d2ca
M
2253 return {
2254 'comments': comments,
2255 'comment_count': len(comments),
2256 }
2257
109dd3b2 2258 @staticmethod
2259 def _generate_player_context(sts=None):
2260 context = {
2261 'html5Preference': 'HTML5_PREF_WANTS',
2262 }
2263 if sts is not None:
2264 context['signatureTimestamp'] = sts
2265 return {
2266 'playbackContext': {
2267 'contentPlaybackContext': context
2268 }
2269 }
2270
4e6767b5 2271 @staticmethod
c888ffb9 2272 def _get_video_info_params(video_id, client='TVHTML5'):
2273 GVI_CLIENTS = {
2274 'ANDROID': {
2275 'c': 'ANDROID',
2276 'cver': '16.20',
2277 },
2278 'TVHTML5': {
2279 'c': 'TVHTML5',
2280 'cver': '6.20180913',
2281 }
2282 }
2283 query = {
4e6767b5 2284 'video_id': video_id,
2285 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
c888ffb9 2286 'html5': '1'
4e6767b5 2287 }
c888ffb9 2288 query.update(GVI_CLIENTS.get(client))
2289 return query
4e6767b5 2290
c5e8d7af 2291 def _real_extract(self, url):
cf7e015f 2292 url, smuggled_data = unsmuggle_url(url, {})
545cc85d 2293 video_id = self._match_id(url)
9297939e 2294
2295 is_music_url = smuggled_data.get('is_music_url') or self.is_music_url(url)
2296
545cc85d 2297 base_url = self.http_scheme() + '//www.youtube.com/'
b3d12425 2298 webpage_url = base_url + 'watch?v=' + video_id
2299 webpage = self._download_webpage(
cce889b9 2300 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
545cc85d 2301
109dd3b2 2302 ytcfg = self._extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2303 identity_token = self._extract_identity_token(webpage, video_id)
314ee305 2304 session_index = self._extract_session_index(ytcfg)
109dd3b2 2305 player_url = self._extract_player_url(ytcfg, webpage)
2306
2d6659b9 2307 player_client = self._configuration_arg('player_client', [''])[0]
4bb6b02f 2308 if player_client not in ('web', 'android', ''):
c888ffb9 2309 self.report_warning(f'Invalid player_client {player_client} given. Falling back to android client.')
2310 force_mobile_client = player_client != 'web'
4bb6b02f 2311 player_skip = self._configuration_arg('player_skip')
fe93e2c4 2312 player_response = None
2313 if webpage:
2314 player_response = self._extract_yt_initial_variable(
2315 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2316 video_id, 'initial player response')
109dd3b2 2317
fe93e2c4 2318 syncid = self._extract_account_syncid(ytcfg, player_response)
2319 headers = self._generate_api_headers(ytcfg, identity_token, syncid, session_index=session_index)
9297939e 2320
2321 ytm_streaming_data = {}
2322 if is_music_url:
109dd3b2 2323 ytm_webpage = None
2324 sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2325 if sts and not force_mobile_client and 'configs' not in player_skip:
2326 ytm_webpage = self._download_webpage(
2327 'https://music.youtube.com',
2d6659b9 2328 video_id, fatal=False, note='Downloading remix client config')
109dd3b2 2329
2330 ytm_cfg = self._extract_ytcfg(video_id, ytm_webpage) or {}
2331 ytm_client = 'WEB_REMIX'
2332 if not sts or force_mobile_client:
2333 # Android client already has signature descrambled
2334 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2335 if not sts:
c888ffb9 2336 self.report_warning('Falling back to android remix client for player API.')
109dd3b2 2337 ytm_client = 'ANDROID_MUSIC'
2338 ytm_cfg = {}
2339
2340 ytm_headers = self._generate_api_headers(
2341 ytm_cfg, identity_token, syncid,
314ee305 2342 client=ytm_client, session_index=session_index)
109dd3b2 2343 ytm_query = {'videoId': video_id}
2344 ytm_query.update(self._generate_player_context(sts))
2345
2346 ytm_player_response = self._extract_response(
2347 item_id=video_id, ep='player', query=ytm_query,
2348 ytcfg=ytm_cfg, headers=ytm_headers, fatal=False,
2349 default_client=ytm_client,
c888ffb9 2350 note='Downloading %sremix player API JSON' % ('android ' if force_mobile_client else ''))
2d6659b9 2351 ytm_streaming_data = try_get(ytm_player_response, lambda x: x['streamingData'], dict) or {}
109dd3b2 2352
109dd3b2 2353 if not player_response or force_mobile_client:
2354 sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2355 yt_client = 'WEB'
2356 ytpcfg = ytcfg
2357 ytp_headers = headers
2358 if not sts or force_mobile_client:
2359 # Android client already has signature descrambled
2360 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2361 if not sts:
c888ffb9 2362 self.report_warning('Falling back to android client for player API.')
109dd3b2 2363 yt_client = 'ANDROID'
2364 ytpcfg = {}
314ee305 2365 ytp_headers = self._generate_api_headers(ytpcfg, identity_token, syncid,
2366 client=yt_client, session_index=session_index)
109dd3b2 2367
2368 yt_query = {'videoId': video_id}
2369 yt_query.update(self._generate_player_context(sts))
2370 player_response = self._extract_response(
2371 item_id=video_id, ep='player', query=yt_query,
2372 ytcfg=ytpcfg, headers=ytp_headers, fatal=False,
2373 default_client=yt_client,
c888ffb9 2374 note='Downloading %splayer API JSON' % ('android ' if force_mobile_client else '')
2375 ) or player_response
545cc85d 2376
109dd3b2 2377 # Age-gate workarounds
545cc85d 2378 playability_status = player_response.get('playabilityStatus') or {}
109dd3b2 2379 if playability_status.get('reason') in self._AGE_GATE_REASONS:
c888ffb9 2380 gvi_clients = ('ANDROID', 'TVHTML5') if force_mobile_client else ('TVHTML5', 'ANDROID')
2381 for gvi_client in gvi_clients:
2382 pr = self._parse_json(try_get(compat_parse_qs(
2383 self._download_webpage(
2384 base_url + 'get_video_info', video_id,
2385 'Refetching age-gated %s info webpage' % gvi_client.lower(),
2386 'unable to download video info webpage', fatal=False,
2387 query=self._get_video_info_params(video_id, client=gvi_client))),
2388 lambda x: x['player_response'][0],
2389 compat_str) or '{}', video_id)
2390 if pr:
2391 break
109dd3b2 2392 if not pr:
2393 self.report_warning('Falling back to embedded-only age-gate workaround.')
2394 embed_webpage = None
2395 sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2396 if sts and not force_mobile_client and 'configs' not in player_skip:
2397 embed_webpage = self._download_webpage(
2398 'https://www.youtube.com/embed/%s?html5=1' % video_id,
2399 video_id=video_id, note='Downloading age-gated embed config')
2400
2401 ytcfg_age = self._extract_ytcfg(video_id, embed_webpage) or {}
2402 # If we extracted the embed webpage, it'll tell us if we can view the video
2403 embedded_pr = self._parse_json(
2404 try_get(ytcfg_age, lambda x: x['PLAYER_VARS']['embedded_player_response'], str) or '{}',
2405 video_id=video_id)
2406 embedded_ps_reason = try_get(embedded_pr, lambda x: x['playabilityStatus']['reason'], str) or ''
2407 if embedded_ps_reason not in self._AGE_GATE_REASONS:
2408 yt_client = 'WEB_EMBEDDED_PLAYER'
2409 if not sts or force_mobile_client:
2410 # Android client already has signature descrambled
2411 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2412 if not sts:
2413 self.report_warning(
c888ffb9 2414 'Falling back to android embedded client for player API (note: some formats may be missing).')
109dd3b2 2415 yt_client = 'ANDROID_EMBEDDED_PLAYER'
2416 ytcfg_age = {}
2417
2418 ytage_headers = self._generate_api_headers(
314ee305 2419 ytcfg_age, identity_token, syncid,
2420 client=yt_client, session_index=session_index)
109dd3b2 2421 yt_age_query = {'videoId': video_id}
2422 yt_age_query.update(self._generate_player_context(sts))
2423 pr = self._extract_response(
2424 item_id=video_id, ep='player', query=yt_age_query,
2425 ytcfg=ytcfg_age, headers=ytage_headers, fatal=False,
2426 default_client=yt_client,
c888ffb9 2427 note='Downloading %sage-gated player API JSON' % ('android ' if force_mobile_client else '')
109dd3b2 2428 ) or {}
2429
545cc85d 2430 if pr:
2431 player_response = pr
2432
2433 trailer_video_id = try_get(
2434 playability_status,
2435 lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
2436 compat_str)
2437 if trailer_video_id:
2438 return self.url_result(
2439 trailer_video_id, self.ie_key(), trailer_video_id)
cf7e015f 2440
545cc85d 2441 search_meta = (
2442 lambda x: self._html_search_meta(x, webpage, default=None)) \
2443 if webpage else lambda x: None
dbdaaa23 2444
545cc85d 2445 video_details = player_response.get('videoDetails') or {}
37357d21 2446 microformat = try_get(
545cc85d 2447 player_response,
2448 lambda x: x['microformat']['playerMicroformatRenderer'],
2449 dict) or {}
2450 video_title = video_details.get('title') \
fe93e2c4 2451 or self._get_text(microformat.get('title')) \
545cc85d 2452 or search_meta(['og:title', 'twitter:title', 'title'])
2453 video_description = video_details.get('shortDescription')
cf7e015f 2454
8fe10494 2455 if not smuggled_data.get('force_singlefeed', False):
a06916d9 2456 if not self.get_param('noplaylist'):
8fe10494
S
2457 multifeed_metadata_list = try_get(
2458 player_response,
2459 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
545cc85d 2460 compat_str)
8fe10494
S
2461 if multifeed_metadata_list:
2462 entries = []
2463 feed_ids = []
2464 for feed in multifeed_metadata_list.split(','):
2465 # Unquote should take place before split on comma (,) since textual
2466 # fields may contain comma as well (see
067aa17e 2467 # https://github.com/ytdl-org/youtube-dl/issues/8536)
545cc85d 2468 feed_data = compat_parse_qs(
2469 compat_urllib_parse_unquote_plus(feed))
6b09401b
S
2470
2471 def feed_entry(name):
545cc85d 2472 return try_get(
2473 feed_data, lambda x: x[name][0], compat_str)
6b09401b
S
2474
2475 feed_id = feed_entry('id')
2476 if not feed_id:
2477 continue
2478 feed_title = feed_entry('title')
2479 title = video_title
2480 if feed_title:
2481 title += ' (%s)' % feed_title
8fe10494
S
2482 entries.append({
2483 '_type': 'url_transparent',
2484 'ie_key': 'Youtube',
2485 'url': smuggle_url(
545cc85d 2486 base_url + 'watch?v=' + feed_data['id'][0],
8fe10494 2487 {'force_singlefeed': True}),
6b09401b 2488 'title': title,
8fe10494 2489 })
6b09401b 2490 feed_ids.append(feed_id)
8fe10494
S
2491 self.to_screen(
2492 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2493 % (', '.join(feed_ids), video_id))
545cc85d 2494 return self.playlist_result(
2495 entries, video_id, video_title, video_description)
8fe10494
S
2496 else:
2497 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 2498
9297939e 2499 formats, itags, stream_ids = [], [], []
cc2db878 2500 itag_qualities = {}
d3fc8074 2501 q = qualities([
60bdb7bd 2502 # "tiny" is the smallest video-only format. But some audio-only formats
2503 # was also labeled "tiny". It is not clear if such formats still exist
d3fc8074 2504 'tiny', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
2505 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2506 ])
9297939e 2507
545cc85d 2508 streaming_data = player_response.get('streamingData') or {}
2509 streaming_formats = streaming_data.get('formats') or []
2510 streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
9297939e 2511 streaming_formats.extend(ytm_streaming_data.get('formats') or [])
2512 streaming_formats.extend(ytm_streaming_data.get('adaptiveFormats') or [])
2513
545cc85d 2514 for fmt in streaming_formats:
2515 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2516 continue
321bf820 2517
cc2db878 2518 itag = str_or_none(fmt.get('itag'))
9297939e 2519 audio_track = fmt.get('audioTrack') or {}
2520 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2521 if stream_id in stream_ids:
2522 continue
2523
cc2db878 2524 quality = fmt.get('quality')
d3fc8074 2525 if quality == 'tiny' or not quality:
2526 quality = fmt.get('audioQuality', '').lower() or quality
cc2db878 2527 if itag and quality:
2528 itag_qualities[itag] = quality
2529 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2530 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2531 # number of fragment that would subsequently requested with (`&sq=N`)
2532 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2533 continue
2534
545cc85d 2535 fmt_url = fmt.get('url')
2536 if not fmt_url:
2537 sc = compat_parse_qs(fmt.get('signatureCipher'))
2538 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2539 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2540 if not (sc and fmt_url and encrypted_sig):
2541 continue
545cc85d 2542 if not player_url:
201e9eaa 2543 continue
545cc85d 2544 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2545 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2546 fmt_url += '&' + sp + '=' + signature
2547
545cc85d 2548 if itag:
2549 itags.append(itag)
9297939e 2550 stream_ids.append(stream_id)
2551
cc2db878 2552 tbr = float_or_none(
2553 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 2554 dct = {
2555 'asr': int_or_none(fmt.get('audioSampleRate')),
2556 'filesize': int_or_none(fmt.get('contentLength')),
2557 'format_id': itag,
0fb983f6 2558 'format_note': audio_track.get('displayName') or fmt.get('qualityLabel') or quality,
545cc85d 2559 'fps': int_or_none(fmt.get('fps')),
2560 'height': int_or_none(fmt.get('height')),
dca3ff4a 2561 'quality': q(quality),
cc2db878 2562 'tbr': tbr,
545cc85d 2563 'url': fmt_url,
2564 'width': fmt.get('width'),
0fb983f6 2565 'language': audio_track.get('id', '').split('.')[0],
545cc85d 2566 }
60bdb7bd 2567 mime_mobj = re.match(
2568 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2569 if mime_mobj:
2570 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2571 dct.update(parse_codecs(mime_mobj.group(2)))
2572 # The 3gp format in android client has a quality of "small",
2573 # but is actually worse than all other formats
2574 if dct['ext'] == '3gp':
2575 dct['quality'] = q('tiny')
cc2db878 2576 no_audio = dct.get('acodec') == 'none'
2577 no_video = dct.get('vcodec') == 'none'
2578 if no_audio:
2579 dct['vbr'] = tbr
2580 if no_video:
2581 dct['abr'] = tbr
2582 if no_audio or no_video:
545cc85d 2583 dct['downloader_options'] = {
2584 # Youtube throttles chunks >~10M
2585 'http_chunk_size': 10485760,
bf1317d2 2586 }
7c60c33e 2587 if dct.get('ext'):
2588 dct['container'] = dct['ext'] + '_dash'
545cc85d 2589 formats.append(dct)
2590
4bb6b02f 2591 skip_manifests = self._configuration_arg('skip')
5d3a0e79 2592 get_dash = 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True)
2593 get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2594
9297939e 2595 for sd in (streaming_data, ytm_streaming_data):
5d3a0e79 2596 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
9297939e 2597 if hls_manifest_url:
2598 for f in self._extract_m3u8_formats(
2599 hls_manifest_url, video_id, 'mp4', fatal=False):
2600 itag = self._search_regex(
2601 r'/itag/(\d+)', f['url'], 'itag', default=None)
2602 if itag:
2603 f['format_id'] = itag
8d68ab98 2604 formats.append(f)
545cc85d 2605
5d3a0e79 2606 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2607 if dash_manifest_url:
2608 for f in self._extract_mpd_formats(
2609 dash_manifest_url, video_id, fatal=False):
2610 itag = f['format_id']
2611 if itag in itags:
2612 continue
2613 if itag in itag_qualities:
2614 f['quality'] = q(itag_qualities[itag])
2615 filesize = int_or_none(self._search_regex(
2616 r'/clen/(\d+)', f.get('fragment_base_url')
2617 or f['url'], 'file size', default=None))
2618 if filesize:
2619 f['filesize'] = filesize
2620 formats.append(f)
bf1317d2 2621
545cc85d 2622 if not formats:
a06916d9 2623 if not self.get_param('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
b7da73eb 2624 self.raise_no_formats(
545cc85d 2625 'This video is DRM protected.', expected=True)
2626 pemr = try_get(
2627 playability_status,
2628 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
2629 dict) or {}
fe93e2c4 2630 reason = self._get_text(pemr.get('reason')) or playability_status.get('reason')
545cc85d 2631 subreason = pemr.get('subreason')
2632 if subreason:
fe93e2c4 2633 subreason = clean_html(self._get_text(subreason))
545cc85d 2634 if subreason == 'The uploader has not made this video available in your country.':
2635 countries = microformat.get('availableCountries')
2636 if not countries:
2637 regions_allowed = search_meta('regionsAllowed')
2638 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2639 self.raise_geo_restricted(subreason, countries, metadata_available=True)
545cc85d 2640 reason += '\n' + subreason
2641 if reason:
b7da73eb 2642 self.raise_no_formats(reason, expected=True)
bf1317d2 2643
545cc85d 2644 self._sort_formats(formats)
bf1317d2 2645
545cc85d 2646 keywords = video_details.get('keywords') or []
2647 if not keywords and webpage:
2648 keywords = [
2649 unescapeHTML(m.group('content'))
2650 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2651 for keyword in keywords:
2652 if keyword.startswith('yt:stretch='):
201c1459 2653 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2654 if mobj:
2655 # NB: float is intentional for forcing float division
2656 w, h = (float(v) for v in mobj.groups())
2657 if w > 0 and h > 0:
2658 ratio = w / h
2659 for f in formats:
2660 if f.get('vcodec') != 'none':
2661 f['stretched_ratio'] = ratio
2662 break
6449cd80 2663
0ba692ac 2664 category = microformat.get('category') or search_meta('genre')
2665 channel_id = video_details.get('channelId') \
2666 or microformat.get('externalChannelId') \
2667 or search_meta('channelId')
2668 duration = int_or_none(
2669 video_details.get('lengthSeconds')
2670 or microformat.get('lengthSeconds')) \
2671 or parse_duration(search_meta('duration'))
2672 is_live = video_details.get('isLive')
2673 is_upcoming = video_details.get('isUpcoming')
2674 owner_profile_url = microformat.get('ownerProfileUrl')
2675
545cc85d 2676 thumbnails = []
2677 for container in (video_details, microformat):
2678 for thumbnail in (try_get(
2679 container,
2680 lambda x: x['thumbnail']['thumbnails'], list) or []):
2681 thumbnail_url = thumbnail.get('url')
2682 if not thumbnail_url:
bf1317d2 2683 continue
1988fab7 2684 # Sometimes youtube gives a wrong thumbnail URL. See:
2685 # https://github.com/yt-dlp/yt-dlp/issues/233
2686 # https://github.com/ytdl-org/youtube-dl/issues/28023
2687 if 'maxresdefault' in thumbnail_url:
2688 thumbnail_url = thumbnail_url.split('?')[0]
545cc85d 2689 thumbnails.append({
545cc85d 2690 'url': thumbnail_url,
ff2751ac 2691 'height': int_or_none(thumbnail.get('height')),
545cc85d 2692 'width': int_or_none(thumbnail.get('width')),
2693 })
ff2751ac 2694 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2695 if thumbnail_url:
2696 thumbnails.append({
2697 'url': thumbnail_url,
ff2751ac 2698 })
0ba692ac 2699 # The best resolution thumbnails sometimes does not appear in the webpage
2700 # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
cca80fe6 2701 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2702 hq_thumbnail_names = ['maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3']
2703 guaranteed_thumbnail_names = [
2704 'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2705 'mqdefault', 'mq1', 'mq2', 'mq3',
2706 'default', '1', '2', '3'
2707 ]
2708 thumbnail_names = hq_thumbnail_names + guaranteed_thumbnail_names
2709 n_thumbnail_names = len(thumbnail_names)
2710
0ba692ac 2711 thumbnails.extend({
2712 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2713 video_id=video_id, name=name, ext=ext,
2714 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
cca80fe6 2715 '_test_url': name in hq_thumbnail_names,
2716 } for name in thumbnail_names for ext in ('webp', 'jpg'))
0ba692ac 2717 for thumb in thumbnails:
cca80fe6 2718 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
0ba692ac 2719 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
ff2751ac 2720 self._remove_duplicate_formats(thumbnails)
545cc85d 2721
545cc85d 2722 info = {
2723 'id': video_id,
2724 'title': self._live_title(video_title) if is_live else video_title,
2725 'formats': formats,
2726 'thumbnails': thumbnails,
2727 'description': video_description,
2728 'upload_date': unified_strdate(
2729 microformat.get('uploadDate')
2730 or search_meta('uploadDate')),
2731 'uploader': video_details['author'],
2732 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2733 'uploader_url': owner_profile_url,
2734 'channel_id': channel_id,
2735 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
2736 'duration': duration,
2737 'view_count': int_or_none(
2738 video_details.get('viewCount')
2739 or microformat.get('viewCount')
2740 or search_meta('interactionCount')),
2741 'average_rating': float_or_none(video_details.get('averageRating')),
2742 'age_limit': 18 if (
2743 microformat.get('isFamilySafe') is False
2744 or search_meta('isFamilyFriendly') == 'false'
2745 or search_meta('og:restrictions:age') == '18+') else 0,
2746 'webpage_url': webpage_url,
2747 'categories': [category] if category else None,
2748 'tags': keywords,
2749 'is_live': is_live,
2750 'playable_in_embed': playability_status.get('playableInEmbed'),
c224251a 2751 'was_live': video_details.get('isLiveContent'),
545cc85d 2752 }
b477fc13 2753
545cc85d 2754 pctr = try_get(
2755 player_response,
2756 lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
2757 subtitles = {}
2758 if pctr:
774d79cc 2759 def process_language(container, base_url, lang_code, sub_name, query):
120916da 2760 lang_subs = container.setdefault(lang_code, [])
545cc85d 2761 for fmt in self._SUBTITLE_FORMATS:
2762 query.update({
2763 'fmt': fmt,
2764 })
2765 lang_subs.append({
2766 'ext': fmt,
2767 'url': update_url_query(base_url, query),
774d79cc 2768 'name': sub_name,
545cc85d 2769 })
7e72694b 2770
545cc85d 2771 for caption_track in (pctr.get('captionTracks') or []):
2772 base_url = caption_track.get('baseUrl')
2773 if not base_url:
2774 continue
2775 if caption_track.get('kind') != 'asr':
120916da 2776 lang_code = (
2777 remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2778 or caption_track.get('languageCode'))
545cc85d 2779 if not lang_code:
2780 continue
2781 process_language(
774d79cc 2782 subtitles, base_url, lang_code,
2d6659b9 2783 try_get(caption_track, lambda x: x['name']['simpleText']),
774d79cc 2784 {})
545cc85d 2785 continue
2786 automatic_captions = {}
2787 for translation_language in (pctr.get('translationLanguages') or []):
2788 translation_language_code = translation_language.get('languageCode')
2789 if not translation_language_code:
2790 continue
2791 process_language(
2792 automatic_captions, base_url, translation_language_code,
fe93e2c4 2793 self._get_text(translation_language.get('languageName'), max_runs=1),
545cc85d 2794 {'tlang': translation_language_code})
2795 info['automatic_captions'] = automatic_captions
2796 info['subtitles'] = subtitles
7e72694b 2797
545cc85d 2798 parsed_url = compat_urllib_parse_urlparse(url)
2799 for component in [parsed_url.fragment, parsed_url.query]:
2800 query = compat_parse_qs(component)
2801 for k, v in query.items():
2802 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2803 d_k += '_time'
2804 if d_k not in info and k in s_ks:
2805 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2806
2807 # Youtube Music Auto-generated description
822b9d9c 2808 if video_description:
38d70284 2809 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2810 if mobj:
822b9d9c
RA
2811 release_year = mobj.group('release_year')
2812 release_date = mobj.group('release_date')
2813 if release_date:
2814 release_date = release_date.replace('-', '')
2815 if not release_year:
545cc85d 2816 release_year = release_date[:4]
2817 info.update({
2818 'album': mobj.group('album'.strip()),
2819 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2820 'track': mobj.group('track').strip(),
2821 'release_date': release_date,
cc2db878 2822 'release_year': int_or_none(release_year),
545cc85d 2823 })
7e72694b 2824
545cc85d 2825 initial_data = None
2826 if webpage:
2827 initial_data = self._extract_yt_initial_variable(
2828 webpage, self._YT_INITIAL_DATA_RE, video_id,
2829 'yt initial data')
2830 if not initial_data:
109dd3b2 2831 initial_data = self._extract_response(
2832 item_id=video_id, ep='next', fatal=False,
2833 ytcfg=ytcfg, headers=headers, query={'videoId': video_id},
2834 note='Downloading initial data API JSON')
545cc85d 2835
c60ee3a2 2836 try:
2837 # This will error if there is no livechat
2838 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2839 info['subtitles']['live_chat'] = [{
2840 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
2841 'video_id': video_id,
2842 'ext': 'json',
f6745c49 2843 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
c60ee3a2 2844 }]
2845 except (KeyError, IndexError, TypeError):
2846 pass
545cc85d 2847
2848 if initial_data:
7c365c21 2849 info['chapters'] = (
2850 self._extract_chapters_from_json(initial_data, duration)
2851 or self._extract_chapters_from_engagement_panel(initial_data, duration)
2852 or None)
545cc85d 2853
2854 contents = try_get(
2855 initial_data,
2856 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2857 list) or []
2858 for content in contents:
2859 vpir = content.get('videoPrimaryInfoRenderer')
2860 if vpir:
2861 stl = vpir.get('superTitleLink')
2862 if stl:
fe93e2c4 2863 stl = self._get_text(stl)
545cc85d 2864 if try_get(
2865 vpir,
2866 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2867 info['location'] = stl
2868 else:
2869 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2870 if mobj:
2871 info.update({
2872 'series': mobj.group(1),
2873 'season_number': int(mobj.group(2)),
2874 'episode_number': int(mobj.group(3)),
2875 })
2876 for tlb in (try_get(
2877 vpir,
2878 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2879 list) or []):
2880 tbr = tlb.get('toggleButtonRenderer') or {}
2881 for getter, regex in [(
2882 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2883 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2884 lambda x: x['accessibility'],
2885 lambda x: x['accessibilityData']['accessibilityData'],
2886 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2887 label = (try_get(tbr, getter, dict) or {}).get('label')
2888 if label:
2889 mobj = re.match(regex, label)
2890 if mobj:
2891 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2892 break
2893 sbr_tooltip = try_get(
2894 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2895 if sbr_tooltip:
2896 like_count, dislike_count = sbr_tooltip.split(' / ')
2897 info.update({
2898 'like_count': str_to_int(like_count),
2899 'dislike_count': str_to_int(dislike_count),
2900 })
2901 vsir = content.get('videoSecondaryInfoRenderer')
2902 if vsir:
fe93e2c4 2903 info['channel'] = self._get_text(try_get(
545cc85d 2904 vsir,
2905 lambda x: x['owner']['videoOwnerRenderer']['title'],
cce889b9 2906 dict))
545cc85d 2907 rows = try_get(
2908 vsir,
2909 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2910 list) or []
2911 multiple_songs = False
2912 for row in rows:
2913 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2914 multiple_songs = True
2915 break
2916 for row in rows:
2917 mrr = row.get('metadataRowRenderer') or {}
2918 mrr_title = mrr.get('title')
2919 if not mrr_title:
2920 continue
fe93e2c4 2921 mrr_title = self._get_text(mrr['title'])
2922 mrr_contents_text = self._get_text(mrr['contents'][0])
545cc85d 2923 if mrr_title == 'License':
2924 info['license'] = mrr_contents_text
2925 elif not multiple_songs:
2926 if mrr_title == 'Album':
2927 info['album'] = mrr_contents_text
2928 elif mrr_title == 'Artist':
2929 info['artist'] = mrr_contents_text
2930 elif mrr_title == 'Song':
2931 info['track'] = mrr_contents_text
2932
2933 fallbacks = {
2934 'channel': 'uploader',
2935 'channel_id': 'uploader_id',
2936 'channel_url': 'uploader_url',
2937 }
2938 for to, frm in fallbacks.items():
2939 if not info.get(to):
2940 info[to] = info.get(frm)
2941
2942 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
2943 v = info.get(s_k)
2944 if v:
2945 info[d_k] = v
b84071c0 2946
c224251a
M
2947 is_private = bool_or_none(video_details.get('isPrivate'))
2948 is_unlisted = bool_or_none(microformat.get('isUnlisted'))
2949 is_membersonly = None
b28f8d24 2950 is_premium = None
c224251a
M
2951 if initial_data and is_private is not None:
2952 is_membersonly = False
b28f8d24 2953 is_premium = False
47193e02 2954 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
2955 badge_labels = set()
2956 for content in contents:
2957 if not isinstance(content, dict):
2958 continue
2959 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
2960 for badge_label in badge_labels:
2961 if badge_label.lower() == 'members only':
2962 is_membersonly = True
2963 elif badge_label.lower() == 'premium':
2964 is_premium = True
2965 elif badge_label.lower() == 'unlisted':
2966 is_unlisted = True
c224251a 2967
c224251a
M
2968 info['availability'] = self._availability(
2969 is_private=is_private,
b28f8d24 2970 needs_premium=is_premium,
c224251a
M
2971 needs_subscription=is_membersonly,
2972 needs_auth=info['age_limit'] >= 18,
2973 is_unlisted=None if is_private is None else is_unlisted)
2974
06167fbb 2975 # get xsrf for annotations or comments
a06916d9 2976 get_annotations = self.get_param('writeannotations', False)
2977 get_comments = self.get_param('getcomments', False)
06167fbb 2978 if get_annotations or get_comments:
29f7c58a 2979 xsrf_token = None
545cc85d 2980 ytcfg = self._extract_ytcfg(video_id, webpage)
29f7c58a 2981 if ytcfg:
2982 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2983 if not xsrf_token:
2984 xsrf_token = self._search_regex(
2985 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 2986 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 2987
2988 # annotations
06167fbb 2989 if get_annotations:
64b6a4e9
RA
2990 invideo_url = try_get(
2991 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2992 if xsrf_token and invideo_url:
29f7c58a 2993 xsrf_field_name = None
2994 if ytcfg:
2995 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2996 if not xsrf_field_name:
2997 xsrf_field_name = self._search_regex(
2998 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 2999 webpage, 'xsrf field name',
29f7c58a 3000 group='xsrf_field_name', default='session_token')
8a784c74 3001 info['annotations'] = self._download_webpage(
64b6a4e9
RA
3002 self._proto_relative_url(invideo_url),
3003 video_id, note='Downloading annotations',
3004 errnote='Unable to download video annotations', fatal=False,
3005 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 3006
277d6ff5 3007 if get_comments:
2d6659b9 3008 info['__post_extractor'] = lambda: self._extract_comments(ytcfg, video_id, contents, webpage)
4ea3be0a 3009
545cc85d 3010 self.mark_watched(video_id, player_response)
d77ab8e2 3011
545cc85d 3012 return info
c5e8d7af 3013
5f6a1245 3014
8bdd16b4 3015class YoutubeTabIE(YoutubeBaseInfoExtractor):
3016 IE_DESC = 'YouTube.com tab'
70d5c17b 3017 _VALID_URL = r'''(?x)
3018 https?://
3019 (?:\w+\.)?
3020 (?:
3021 youtube(?:kids)?\.com|
3022 invidio\.us
3023 )/
3024 (?:
fe03a6cd 3025 (?P<channel_type>channel|c|user|browse)/|
70d5c17b 3026 (?P<not_channel>
9ba5705a 3027 feed/|hashtag/|
70d5c17b 3028 (?:playlist|watch)\?.*?\blist=
3029 )|
29f7c58a 3030 (?!(?:%s)\b) # Direct URLs
70d5c17b 3031 )
3032 (?P<id>[^/?\#&]+)
3033 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 3034 IE_NAME = 'youtube:tab'
3035
81127aa5 3036 _TESTS = [{
da692b79 3037 'note': 'playlists, multipage',
8bdd16b4 3038 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3039 'playlist_mincount': 94,
3040 'info_dict': {
3041 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3042 'title': 'Игорь Клейнер - Playlists',
3043 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3044 'uploader': 'Игорь Клейнер',
3045 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 3046 },
3047 }, {
da692b79 3048 'note': 'playlists, multipage, different order',
8bdd16b4 3049 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3050 'playlist_mincount': 94,
3051 'info_dict': {
3052 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3053 'title': 'Игорь Клейнер - Playlists',
3054 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3055 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3056 'uploader': 'Игорь Клейнер',
8bdd16b4 3057 },
201c1459 3058 }, {
da692b79 3059 'note': 'playlists, series',
201c1459 3060 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3061 'playlist_mincount': 5,
3062 'info_dict': {
3063 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3064 'title': '3Blue1Brown - Playlists',
3065 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
da692b79 3066 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3067 'uploader': '3Blue1Brown',
201c1459 3068 },
8bdd16b4 3069 }, {
da692b79 3070 'note': 'playlists, singlepage',
8bdd16b4 3071 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3072 'playlist_mincount': 4,
3073 'info_dict': {
3074 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3075 'title': 'ThirstForScience - Playlists',
3076 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 3077 'uploader': 'ThirstForScience',
3078 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 3079 }
3080 }, {
3081 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3082 'only_matching': True,
3083 }, {
da692b79 3084 'note': 'basic, single video playlist',
0e30a7b9 3085 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 3086 'info_dict': {
0e30a7b9 3087 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3088 'uploader': 'Sergey M.',
3089 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 3090 'title': 'youtube-dl public playlist',
81127aa5 3091 },
0e30a7b9 3092 'playlist_count': 1,
9291475f 3093 }, {
da692b79 3094 'note': 'empty playlist',
0e30a7b9 3095 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 3096 'info_dict': {
0e30a7b9 3097 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3098 'uploader': 'Sergey M.',
3099 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 3100 'title': 'youtube-dl empty playlist',
9291475f
PH
3101 },
3102 'playlist_count': 0,
3103 }, {
da692b79 3104 'note': 'Home tab',
8bdd16b4 3105 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 3106 'info_dict': {
8bdd16b4 3107 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3108 'title': 'lex will - Home',
3109 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3110 'uploader': 'lex will',
3111 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3112 },
8bdd16b4 3113 'playlist_mincount': 2,
9291475f 3114 }, {
da692b79 3115 'note': 'Videos tab',
8bdd16b4 3116 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 3117 'info_dict': {
8bdd16b4 3118 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3119 'title': 'lex will - Videos',
3120 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3121 'uploader': 'lex will',
3122 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3123 },
8bdd16b4 3124 'playlist_mincount': 975,
9291475f 3125 }, {
da692b79 3126 'note': 'Videos tab, sorted by popular',
8bdd16b4 3127 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 3128 'info_dict': {
8bdd16b4 3129 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3130 'title': 'lex will - Videos',
3131 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3132 'uploader': 'lex will',
3133 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3134 },
8bdd16b4 3135 'playlist_mincount': 199,
9291475f 3136 }, {
da692b79 3137 'note': 'Playlists tab',
8bdd16b4 3138 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 3139 'info_dict': {
8bdd16b4 3140 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3141 'title': 'lex will - Playlists',
3142 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3143 'uploader': 'lex will',
3144 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3145 },
8bdd16b4 3146 'playlist_mincount': 17,
ac7553d0 3147 }, {
da692b79 3148 'note': 'Community tab',
8bdd16b4 3149 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 3150 'info_dict': {
8bdd16b4 3151 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3152 'title': 'lex will - Community',
3153 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3154 'uploader': 'lex will',
3155 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3156 },
3157 'playlist_mincount': 18,
87dadd45 3158 }, {
da692b79 3159 'note': 'Channels tab',
8bdd16b4 3160 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 3161 'info_dict': {
8bdd16b4 3162 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3163 'title': 'lex will - Channels',
3164 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3165 'uploader': 'lex will',
3166 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3167 },
deaec5af 3168 'playlist_mincount': 12,
cd684175 3169 }, {
3170 'note': 'Search tab',
3171 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3172 'playlist_mincount': 40,
3173 'info_dict': {
3174 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3175 'title': '3Blue1Brown - Search - linear algebra',
3176 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3177 'uploader': '3Blue1Brown',
3178 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3179 },
6b08cdf6 3180 }, {
a0566bbf 3181 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3182 'only_matching': True,
3183 }, {
a0566bbf 3184 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3185 'only_matching': True,
3186 }, {
a0566bbf 3187 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3188 'only_matching': True,
3189 }, {
3190 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3191 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3192 'info_dict': {
3193 'title': '29C3: Not my department',
3194 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3195 'uploader': 'Christiaan008',
3196 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 3197 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 3198 },
3199 'playlist_count': 96,
3200 }, {
3201 'note': 'Large playlist',
3202 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 3203 'info_dict': {
8bdd16b4 3204 'title': 'Uploads from Cauchemar',
3205 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3206 'uploader': 'Cauchemar',
3207 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 3208 },
8bdd16b4 3209 'playlist_mincount': 1123,
3210 }, {
da692b79 3211 'note': 'even larger playlist, 8832 videos',
8bdd16b4 3212 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3213 'only_matching': True,
4b7df0d3
JMF
3214 }, {
3215 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3216 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3217 'info_dict': {
acf757f4
PH
3218 'title': 'Uploads from Interstellar Movie',
3219 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 3220 'uploader': 'Interstellar Movie',
8bdd16b4 3221 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 3222 },
481cc733 3223 'playlist_mincount': 21,
358de58c 3224 }, {
3225 'note': 'Playlist with "show unavailable videos" button',
3226 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3227 'info_dict': {
3228 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3229 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3230 'uploader': 'Phim Siêu Nhân Nhật Bản',
3231 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3232 },
da692b79 3233 'playlist_mincount': 200,
5d342002 3234 }, {
da692b79 3235 'note': 'Playlist with unavailable videos in page 7',
5d342002 3236 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3237 'info_dict': {
3238 'title': 'Uploads from BlankTV',
3239 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3240 'uploader': 'BlankTV',
3241 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3242 },
da692b79 3243 'playlist_mincount': 1000,
8bdd16b4 3244 }, {
da692b79 3245 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
8bdd16b4 3246 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3247 'info_dict': {
3248 'title': 'Data Analysis with Dr Mike Pound',
3249 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3250 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3251 'uploader': 'Computerphile',
deaec5af 3252 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 3253 },
3254 'playlist_mincount': 11,
3255 }, {
a0566bbf 3256 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 3257 'only_matching': True,
dacb3a86 3258 }, {
da692b79 3259 'note': 'Playlist URL that does not actually serve a playlist',
dacb3a86
S
3260 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3261 'info_dict': {
3262 'id': 'FqZTN594JQw',
3263 'ext': 'webm',
3264 'title': "Smiley's People 01 detective, Adventure Series, Action",
3265 'uploader': 'STREEM',
3266 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 3267 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
3268 'upload_date': '20150526',
3269 'license': 'Standard YouTube License',
3270 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3271 'categories': ['People & Blogs'],
3272 'tags': list,
dbdaaa23 3273 'view_count': int,
dacb3a86
S
3274 'like_count': int,
3275 'dislike_count': int,
3276 },
3277 'params': {
3278 'skip_download': True,
3279 },
13a75688 3280 'skip': 'This video is not available.',
dacb3a86 3281 'add_ie': [YoutubeIE.ie_key()],
481cc733 3282 }, {
8bdd16b4 3283 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 3284 'only_matching': True,
66b48727 3285 }, {
8bdd16b4 3286 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 3287 'only_matching': True,
a0566bbf 3288 }, {
3289 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3290 'info_dict': {
da692b79 3291 'id': 'X1whbWASnNQ', # This will keep changing
a0566bbf 3292 'ext': 'mp4',
deaec5af 3293 'title': compat_str,
a0566bbf 3294 'uploader': 'Sky News',
3295 'uploader_id': 'skynews',
3296 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
da692b79 3297 'upload_date': r're:\d{8}',
3298 'description': compat_str,
a0566bbf 3299 'categories': ['News & Politics'],
3300 'tags': list,
3301 'like_count': int,
3302 'dislike_count': int,
3303 },
3304 'params': {
3305 'skip_download': True,
3306 },
da692b79 3307 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
a0566bbf 3308 }, {
3309 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3310 'info_dict': {
3311 'id': 'a48o2S1cPoo',
3312 'ext': 'mp4',
3313 'title': 'The Young Turks - Live Main Show',
3314 'uploader': 'The Young Turks',
3315 'uploader_id': 'TheYoungTurks',
3316 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3317 'upload_date': '20150715',
3318 'license': 'Standard YouTube License',
3319 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3320 'categories': ['News & Politics'],
3321 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3322 'like_count': int,
3323 'dislike_count': int,
3324 },
3325 'params': {
3326 'skip_download': True,
3327 },
3328 'only_matching': True,
3329 }, {
3330 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3331 'only_matching': True,
3332 }, {
3333 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3334 'only_matching': True,
09f1580e 3335 }, {
3336 'note': 'A channel that is not live. Should raise error',
3337 'url': 'https://www.youtube.com/user/numberphile/live',
3338 'only_matching': True,
3d3dddc9 3339 }, {
3340 'url': 'https://www.youtube.com/feed/trending',
3341 'only_matching': True,
3342 }, {
3d3dddc9 3343 'url': 'https://www.youtube.com/feed/library',
3344 'only_matching': True,
3345 }, {
3d3dddc9 3346 'url': 'https://www.youtube.com/feed/history',
3347 'only_matching': True,
3348 }, {
3d3dddc9 3349 'url': 'https://www.youtube.com/feed/subscriptions',
3350 'only_matching': True,
3351 }, {
3d3dddc9 3352 'url': 'https://www.youtube.com/feed/watch_later',
3353 'only_matching': True,
3354 }, {
da692b79 3355 'note': 'Recommended - redirects to home page',
3d3dddc9 3356 'url': 'https://www.youtube.com/feed/recommended',
3357 'only_matching': True,
29f7c58a 3358 }, {
da692b79 3359 'note': 'inline playlist with not always working continuations',
29f7c58a 3360 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3361 'only_matching': True,
3362 }, {
3363 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3364 'only_matching': True,
3365 }, {
3366 'url': 'https://www.youtube.com/course',
3367 'only_matching': True,
3368 }, {
3369 'url': 'https://www.youtube.com/zsecurity',
3370 'only_matching': True,
3371 }, {
3372 'url': 'http://www.youtube.com/NASAgovVideo/videos',
3373 'only_matching': True,
3374 }, {
3375 'url': 'https://www.youtube.com/TheYoungTurks/live',
3376 'only_matching': True,
39ed931e 3377 }, {
3378 'url': 'https://www.youtube.com/hashtag/cctv9',
3379 'info_dict': {
3380 'id': 'cctv9',
3381 'title': '#cctv9',
3382 },
3383 'playlist_mincount': 350,
201c1459 3384 }, {
3385 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3386 'only_matching': True,
9297939e 3387 }, {
da692b79 3388 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
9297939e 3389 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3390 'only_matching': True
fe03a6cd 3391 }, {
3392 'note': '/browse/ should redirect to /channel/',
3393 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3394 'only_matching': True
3395 }, {
3396 'note': 'VLPL, should redirect to playlist?list=PL...',
3397 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3398 'info_dict': {
3399 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3400 'uploader': 'NoCopyrightSounds',
3401 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3402 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3403 'title': 'NCS Releases',
3404 },
3405 'playlist_mincount': 166,
18db7548 3406 }, {
3407 'note': 'Topic, should redirect to playlist?list=UU...',
3408 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3409 'info_dict': {
3410 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3411 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3412 'title': 'Uploads from Royalty Free Music - Topic',
3413 'uploader': 'Royalty Free Music - Topic',
3414 },
3415 'expected_warnings': [
3416 'A channel/user page was given',
3417 'The URL does not have a videos tab',
3418 ],
3419 'playlist_mincount': 101,
3420 }, {
3421 'note': 'Topic without a UU playlist',
3422 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3423 'info_dict': {
3424 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3425 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3426 },
3427 'expected_warnings': [
3428 'A channel/user page was given',
3429 'The URL does not have a videos tab',
3430 'Falling back to channel URL',
3431 ],
3432 'playlist_mincount': 9,
abcdd12b 3433 }, {
3434 'note': 'Youtube music Album',
3435 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3436 'info_dict': {
3437 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3438 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3439 },
3440 'playlist_count': 50,
47193e02 3441 }, {
3442 'note': 'unlisted single video playlist',
3443 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3444 'info_dict': {
3445 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3446 'uploader': 'colethedj',
3447 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3448 'title': 'yt-dlp unlisted playlist test',
3449 'availability': 'unlisted'
3450 },
3451 'playlist_count': 1,
29f7c58a 3452 }]
3453
3454 @classmethod
3455 def suitable(cls, url):
3456 return False if YoutubeIE.suitable(url) else super(
3457 YoutubeTabIE, cls).suitable(url)
8bdd16b4 3458
3459 def _extract_channel_id(self, webpage):
3460 channel_id = self._html_search_meta(
3461 'channelId', webpage, 'channel id', default=None)
3462 if channel_id:
3463 return channel_id
3464 channel_url = self._html_search_meta(
3465 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3466 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3467 'twitter:app:url:googleplay'), webpage, 'channel url')
3468 return self._search_regex(
3469 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3470 channel_url, 'channel id')
15f6397c 3471
8bdd16b4 3472 @staticmethod
cd7c66cf 3473 def _extract_basic_item_renderer(item):
3474 # Modified from _extract_grid_item_renderer
201c1459 3475 known_basic_renderers = (
3476 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 3477 )
3478 for key, renderer in item.items():
201c1459 3479 if not isinstance(renderer, dict):
cd7c66cf 3480 continue
201c1459 3481 elif key in known_basic_renderers:
3482 return renderer
3483 elif key.startswith('grid') and key.endswith('Renderer'):
3484 return renderer
8bdd16b4 3485
8bdd16b4 3486 def _grid_entries(self, grid_renderer):
3487 for item in grid_renderer['items']:
3488 if not isinstance(item, dict):
39b62db1 3489 continue
cd7c66cf 3490 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 3491 if not isinstance(renderer, dict):
3492 continue
fe93e2c4 3493 title = self._get_text(renderer.get('title'))
3494
8bdd16b4 3495 # playlist
3496 playlist_id = renderer.get('playlistId')
3497 if playlist_id:
3498 yield self.url_result(
3499 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3500 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3501 video_title=title)
201c1459 3502 continue
8bdd16b4 3503 # video
3504 video_id = renderer.get('videoId')
3505 if video_id:
3506 yield self._extract_video(renderer)
201c1459 3507 continue
8bdd16b4 3508 # channel
3509 channel_id = renderer.get('channelId')
3510 if channel_id:
8bdd16b4 3511 yield self.url_result(
3512 'https://www.youtube.com/channel/%s' % channel_id,
3513 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 3514 continue
3515 # generic endpoint URL support
3516 ep_url = urljoin('https://www.youtube.com/', try_get(
3517 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3518 compat_str))
3519 if ep_url:
3520 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3521 if ie.suitable(ep_url):
3522 yield self.url_result(
3523 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3524 break
8bdd16b4 3525
3d3dddc9 3526 def _shelf_entries_from_content(self, shelf_renderer):
3527 content = shelf_renderer.get('content')
3528 if not isinstance(content, dict):
8bdd16b4 3529 return
cd7c66cf 3530 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 3531 if renderer:
3532 # TODO: add support for nested playlists so each shelf is processed
3533 # as separate playlist
3534 # TODO: this includes only first N items
3535 for entry in self._grid_entries(renderer):
3536 yield entry
3537 renderer = content.get('horizontalListRenderer')
3538 if renderer:
3539 # TODO
3540 pass
8bdd16b4 3541
29f7c58a 3542 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3543 ep = try_get(
3544 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3545 compat_str)
3546 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3547 if shelf_url:
29f7c58a 3548 # Skipping links to another channels, note that checking for
3549 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3550 # will not work
3551 if skip_channels and '/channels?' in shelf_url:
3552 return
fe93e2c4 3553 title = self._get_text(shelf_renderer, lambda x: x['title'])
3d3dddc9 3554 yield self.url_result(shelf_url, video_title=title)
3555 # Shelf may not contain shelf URL, fallback to extraction from content
3556 for entry in self._shelf_entries_from_content(shelf_renderer):
3557 yield entry
c5e8d7af 3558
8bdd16b4 3559 def _playlist_entries(self, video_list_renderer):
3560 for content in video_list_renderer['contents']:
3561 if not isinstance(content, dict):
3562 continue
3563 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3564 if not isinstance(renderer, dict):
3565 continue
3566 video_id = renderer.get('videoId')
3567 if not video_id:
3568 continue
3569 yield self._extract_video(renderer)
07aeced6 3570
3462ffa8 3571 def _rich_entries(self, rich_grid_renderer):
3572 renderer = try_get(
70d5c17b 3573 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3574 video_id = renderer.get('videoId')
3575 if not video_id:
3576 return
3577 yield self._extract_video(renderer)
3578
8bdd16b4 3579 def _video_entry(self, video_renderer):
3580 video_id = video_renderer.get('videoId')
3581 if video_id:
3582 return self._extract_video(video_renderer)
dacb3a86 3583
8bdd16b4 3584 def _post_thread_entries(self, post_thread_renderer):
3585 post_renderer = try_get(
3586 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3587 if not post_renderer:
3588 return
3589 # video attachment
3590 video_renderer = try_get(
895b0931 3591 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3592 video_id = video_renderer.get('videoId')
3593 if video_id:
3594 entry = self._extract_video(video_renderer)
8bdd16b4 3595 if entry:
3596 yield entry
895b0931 3597 # playlist attachment
3598 playlist_id = try_get(
3599 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3600 if playlist_id:
3601 yield self.url_result(
e28f1c0a 3602 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3603 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3604 # inline video links
3605 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3606 for run in runs:
3607 if not isinstance(run, dict):
3608 continue
3609 ep_url = try_get(
3610 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3611 if not ep_url:
3612 continue
3613 if not YoutubeIE.suitable(ep_url):
3614 continue
3615 ep_video_id = YoutubeIE._match_id(ep_url)
3616 if video_id == ep_video_id:
3617 continue
895b0931 3618 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 3619
8bdd16b4 3620 def _post_thread_continuation_entries(self, post_thread_continuation):
3621 contents = post_thread_continuation.get('contents')
3622 if not isinstance(contents, list):
3623 return
3624 for content in contents:
3625 renderer = content.get('backstagePostThreadRenderer')
3626 if not isinstance(renderer, dict):
3627 continue
3628 for entry in self._post_thread_entries(renderer):
3629 yield entry
07aeced6 3630
39ed931e 3631 r''' # unused
3632 def _rich_grid_entries(self, contents):
3633 for content in contents:
3634 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3635 if video_renderer:
3636 entry = self._video_entry(video_renderer)
3637 if entry:
3638 yield entry
3639 '''
f4f751af 3640 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3462ffa8 3641
70d5c17b 3642 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3643 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3644 for content in contents:
3645 if not isinstance(content, dict):
8bdd16b4 3646 continue
70d5c17b 3647 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3648 if not is_renderer:
70d5c17b 3649 renderer = content.get('richItemRenderer')
3462ffa8 3650 if renderer:
3651 for entry in self._rich_entries(renderer):
3652 yield entry
3653 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3654 continue
3462ffa8 3655 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3656 for isr_content in isr_contents:
3657 if not isinstance(isr_content, dict):
3658 continue
69184e41 3659
3660 known_renderers = {
3661 'playlistVideoListRenderer': self._playlist_entries,
3662 'gridRenderer': self._grid_entries,
3663 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3664 'backstagePostThreadRenderer': self._post_thread_entries,
3665 'videoRenderer': lambda x: [self._video_entry(x)],
3666 }
3667 for key, renderer in isr_content.items():
3668 if key not in known_renderers:
3669 continue
3670 for entry in known_renderers[key](renderer):
3671 if entry:
3672 yield entry
3462ffa8 3673 continuation_list[0] = self._extract_continuation(renderer)
69184e41 3674 break
70d5c17b 3675
3462ffa8 3676 if not continuation_list[0]:
3677 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3678
3679 if not continuation_list[0]:
3680 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3681
3682 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3683 tab_content = try_get(tab, lambda x: x['content'], dict)
3684 if not tab_content:
3685 return
3462ffa8 3686 parent_renderer = (
29f7c58a 3687 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3688 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3689 for entry in extract_entries(parent_renderer):
3690 yield entry
3462ffa8 3691 continuation = continuation_list[0]
fe93e2c4 3692 visitor_data = None
d069eca7 3693
8bdd16b4 3694 for page_num in itertools.count(1):
3695 if not continuation:
3696 break
f4f751af 3697 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
79360d99 3698 response = self._extract_response(
3699 item_id='%s page %s' % (item_id, page_num),
fe93e2c4 3700 query=continuation, headers=headers, ytcfg=ytcfg,
79360d99 3701 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3702
3703 if not response:
8bdd16b4 3704 break
f4f751af 3705 visitor_data = try_get(
3706 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
ebf1b291 3707
69184e41 3708 known_continuation_renderers = {
3709 'playlistVideoListContinuation': self._playlist_entries,
3710 'gridContinuation': self._grid_entries,
3711 'itemSectionContinuation': self._post_thread_continuation_entries,
3712 'sectionListContinuation': extract_entries, # for feeds
3713 }
8bdd16b4 3714 continuation_contents = try_get(
69184e41 3715 response, lambda x: x['continuationContents'], dict) or {}
3716 continuation_renderer = None
3717 for key, value in continuation_contents.items():
3718 if key not in known_continuation_renderers:
3462ffa8 3719 continue
69184e41 3720 continuation_renderer = value
3721 continuation_list = [None]
3722 for entry in known_continuation_renderers[key](continuation_renderer):
3723 yield entry
3724 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3725 break
3726 if continuation_renderer:
3727 continue
c5e8d7af 3728
a1b535bd 3729 known_renderers = {
3730 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3731 'gridVideoRenderer': (self._grid_entries, 'items'),
3732 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3733 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3734 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3735 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3736 }
cce889b9 3737 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3738 continuation_items = try_get(
cce889b9 3739 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3740 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3741 video_items_renderer = None
3742 for key, value in continuation_item.items():
3743 if key not in known_renderers:
8bdd16b4 3744 continue
a1b535bd 3745 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3746 continuation_list = [None]
a1b535bd 3747 for entry in known_renderers[key][0](video_items_renderer):
3748 yield entry
9ba5705a 3749 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3750 break
3751 if video_items_renderer:
3752 continue
8bdd16b4 3753 break
9558dcec 3754
8bdd16b4 3755 @staticmethod
3756 def _extract_selected_tab(tabs):
3757 for tab in tabs:
cd684175 3758 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3759 if renderer.get('selected') is True:
3760 return renderer
2b3c2546 3761 else:
8bdd16b4 3762 raise ExtractorError('Unable to find selected tab')
b82f815f 3763
47193e02 3764 @classmethod
3765 def _extract_uploader(cls, data):
8bdd16b4 3766 uploader = {}
47193e02 3767 renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3768 owner = try_get(
3769 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3770 if owner:
3771 uploader['uploader'] = owner.get('text')
3772 uploader['uploader_id'] = try_get(
3773 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3774 uploader['uploader_url'] = urljoin(
3775 'https://www.youtube.com/',
3776 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3777 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3778
d069eca7 3779 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3780 playlist_id = title = description = channel_url = channel_name = channel_id = None
3781 thumbnails_list = tags = []
3782
8bdd16b4 3783 selected_tab = self._extract_selected_tab(tabs)
3784 renderer = try_get(
3785 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3786 if renderer:
b60419c5 3787 channel_name = renderer.get('title')
3788 channel_url = renderer.get('channelUrl')
3789 channel_id = renderer.get('externalId')
39ed931e 3790 else:
64c0d954 3791 renderer = try_get(
3792 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3793
8bdd16b4 3794 if renderer:
3795 title = renderer.get('title')
ecc97af3 3796 description = renderer.get('description', '')
b60419c5 3797 playlist_id = channel_id
3798 tags = renderer.get('keywords', '').split()
3799 thumbnails_list = (
3800 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3801 or try_get(
47193e02 3802 self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3803 lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
ff84930c 3804 list)
b60419c5 3805 or [])
3806
3807 thumbnails = []
3808 for t in thumbnails_list:
3809 if not isinstance(t, dict):
3810 continue
3811 thumbnail_url = url_or_none(t.get('url'))
3812 if not thumbnail_url:
3813 continue
3814 thumbnails.append({
3815 'url': thumbnail_url,
3816 'width': int_or_none(t.get('width')),
3817 'height': int_or_none(t.get('height')),
3818 })
3462ffa8 3819 if playlist_id is None:
70d5c17b 3820 playlist_id = item_id
3821 if title is None:
39ed931e 3822 title = (
3823 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3824 or playlist_id)
b60419c5 3825 title += format_field(selected_tab, 'title', ' - %s')
cd684175 3826 title += format_field(selected_tab, 'expandedText', ' - %s')
b60419c5 3827 metadata = {
3828 'playlist_id': playlist_id,
3829 'playlist_title': title,
3830 'playlist_description': description,
3831 'uploader': channel_name,
3832 'uploader_id': channel_id,
3833 'uploader_url': channel_url,
3834 'thumbnails': thumbnails,
3835 'tags': tags,
3836 }
47193e02 3837 availability = self._extract_availability(data)
3838 if availability:
3839 metadata['availability'] = availability
b60419c5 3840 if not channel_id:
3841 metadata.update(self._extract_uploader(data))
3842 metadata.update({
3843 'channel': metadata['uploader'],
3844 'channel_id': metadata['uploader_id'],
3845 'channel_url': metadata['uploader_url']})
fe93e2c4 3846 ytcfg = self._extract_ytcfg(item_id, webpage)
b60419c5 3847 return self.playlist_result(
d069eca7
M
3848 self._entries(
3849 selected_tab, playlist_id,
3850 self._extract_identity_token(webpage, item_id),
fe93e2c4 3851 self._extract_account_syncid(ytcfg, data), ytcfg),
b60419c5 3852 **metadata)
73c4ac2c 3853
79360d99 3854 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
2be71994 3855 first_id = last_id = None
79360d99 3856 ytcfg = self._extract_ytcfg(playlist_id, webpage)
3857 headers = self._generate_api_headers(
fe93e2c4 3858 ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3859 identity_token=self._extract_identity_token(webpage, item_id=playlist_id))
2be71994 3860 for page_num in itertools.count(1):
cd7c66cf 3861 videos = list(self._playlist_entries(playlist))
3862 if not videos:
3863 return
2be71994 3864 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3865 if start >= len(videos):
3866 return
3867 for video in videos[start:]:
3868 if video['id'] == first_id:
3869 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3870 return
3871 yield video
3872 first_id = first_id or videos[0]['id']
3873 last_id = videos[-1]['id']
79360d99 3874 watch_endpoint = try_get(
3875 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3876 query = {
3877 'playlistId': playlist_id,
3878 'videoId': watch_endpoint.get('videoId') or last_id,
3879 'index': watch_endpoint.get('index') or len(videos),
3880 'params': watch_endpoint.get('params') or 'OAE%3D'
3881 }
3882 response = self._extract_response(
3883 item_id='%s page %d' % (playlist_id, page_num),
fe93e2c4 3884 query=query, ep='next', headers=headers, ytcfg=ytcfg,
79360d99 3885 check_get_keys='contents'
3886 )
cd7c66cf 3887 playlist = try_get(
79360d99 3888 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 3889
79360d99 3890 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
8bdd16b4 3891 title = playlist.get('title') or try_get(
3892 data, lambda x: x['titleText']['simpleText'], compat_str)
3893 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3894
3895 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3896 playlist_url = urljoin(url, try_get(
3897 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3898 compat_str))
3899 if playlist_url and playlist_url != url:
3900 return self.url_result(
3901 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3902 video_title=title)
cd7c66cf 3903
8bdd16b4 3904 return self.playlist_result(
79360d99 3905 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
cd7c66cf 3906 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 3907
47193e02 3908 def _extract_availability(self, data):
3909 """
3910 Gets the availability of a given playlist/tab.
3911 Note: Unless YouTube tells us explicitly, we do not assume it is public
3912 @param data: response
3913 """
3914 is_private = is_unlisted = None
3915 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
3916 badge_labels = self._extract_badges(renderer)
3917
3918 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
3919 privacy_dropdown_entries = try_get(
3920 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
3921 for renderer_dict in privacy_dropdown_entries:
3922 is_selected = try_get(
3923 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
3924 if not is_selected:
3925 continue
fe93e2c4 3926 label = self._get_text(
3927 try_get(renderer_dict, lambda x: x['privacyDropdownItemRenderer']['label'], dict) or [])
47193e02 3928 if label:
3929 badge_labels.add(label.lower())
3930 break
3931
3932 for badge_label in badge_labels:
3933 if badge_label == 'unlisted':
3934 is_unlisted = True
3935 elif badge_label == 'private':
3936 is_private = True
3937 elif badge_label == 'public':
3938 is_unlisted = is_private = False
3939 return self._availability(is_private, False, False, False, is_unlisted)
3940
3941 @staticmethod
3942 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
3943 sidebar_renderer = try_get(
3944 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
3945 for item in sidebar_renderer:
3946 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
3947 if renderer:
3948 return renderer
3949
358de58c 3950 def _reload_with_unavailable_videos(self, item_id, data, webpage):
3951 """
3952 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3953 """
5d342002 3954 browse_id = params = None
47193e02 3955 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
3956 if not renderer:
3957 return
3958 menu_renderer = try_get(
3959 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3960 for menu_item in menu_renderer:
3961 if not isinstance(menu_item, dict):
358de58c 3962 continue
47193e02 3963 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3964 text = try_get(
3965 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3966 if not text or text.lower() != 'show unavailable videos':
3967 continue
3968 browse_endpoint = try_get(
3969 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3970 browse_id = browse_endpoint.get('browseId')
3971 params = browse_endpoint.get('params')
3972 break
5d342002 3973
47193e02 3974 ytcfg = self._extract_ytcfg(item_id, webpage)
3975 headers = self._generate_api_headers(
fe93e2c4 3976 ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
47193e02 3977 identity_token=self._extract_identity_token(webpage, item_id=item_id),
3978 visitor_data=try_get(
3979 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
3980 query = {
3981 'params': params or 'wgYCCAA=',
3982 'browseId': browse_id or 'VL%s' % item_id
3983 }
3984 return self._extract_response(
3985 item_id=item_id, headers=headers, query=query,
fe93e2c4 3986 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
47193e02 3987 note='Downloading API JSON with unavailable videos')
358de58c 3988
cd7c66cf 3989 def _extract_webpage(self, url, item_id):
a06916d9 3990 retries = self.get_param('extractor_retries', 3)
62bff2c1 3991 count = -1
c705177d 3992 last_error = 'Incomplete yt initial data recieved'
14fdfea9 3993 while count < retries:
62bff2c1 3994 count += 1
14fdfea9 3995 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 3996 # See: https://github.com/yt-dlp/yt-dlp/issues/116
3997 if count:
c705177d 3998 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 3999 webpage = self._download_webpage(
4000 url, item_id,
cd7c66cf 4001 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
14fdfea9 4002 data = self._extract_yt_initial_data(item_id, webpage)
14fdfea9 4003 if data.get('contents') or data.get('currentVideoEndpoint'):
4004 break
95c01b6c 4005 # Extract alerts here only when there is error
4006 self._extract_and_report_alerts(data)
c705177d 4007 if count >= retries:
6a39ee13 4008 raise ExtractorError(last_error)
cd7c66cf 4009 return webpage, data
4010
9297939e 4011 @staticmethod
4012 def _smuggle_data(entries, data):
4013 for entry in entries:
4014 if data:
4015 entry['url'] = smuggle_url(entry['url'], data)
4016 yield entry
4017
cd7c66cf 4018 def _real_extract(self, url):
9297939e 4019 url, smuggled_data = unsmuggle_url(url, {})
4020 if self.is_music_url(url):
4021 smuggled_data['is_music_url'] = True
fe03a6cd 4022 info_dict = self.__real_extract(url, smuggled_data)
9297939e 4023 if info_dict.get('entries'):
4024 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4025 return info_dict
4026
fe03a6cd 4027 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4028
4029 def __real_extract(self, url, smuggled_data):
cd7c66cf 4030 item_id = self._match_id(url)
4031 url = compat_urlparse.urlunparse(
4032 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 4033 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 4034
fe03a6cd 4035 def get_mobj(url):
4036 mobj = self._url_re.match(url).groupdict()
07cce701 4037 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 4038 return mobj
4039
4040 mobj = get_mobj(url)
4041 # Youtube returns incomplete data if tabname is not lower case
4042 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4043
4044 if is_channel:
4045 if smuggled_data.get('is_music_url'):
4046 if item_id[:2] == 'VL':
4047 # Youtube music VL channels have an equivalent playlist
4048 item_id = item_id[2:]
4049 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
abcdd12b 4050 elif item_id[:2] == 'MP':
4051 # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4052 item_id = self._search_regex(
4053 r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4054 self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4055 'playlist id')
4056 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
fe03a6cd 4057 elif mobj['channel_type'] == 'browse':
4058 # Youtube music /browse/ should be changed to /channel/
4059 pre = 'https://www.youtube.com/channel/%s' % item_id
4060 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4061 # Home URLs should redirect to /videos/
6a39ee13 4062 self.report_warning(
cd7c66cf 4063 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4064 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 4065 tab = '/videos'
4066
4067 url = ''.join((pre, tab, post))
4068 mobj = get_mobj(url)
cd7c66cf 4069
4070 # Handle both video/playlist URLs
201c1459 4071 qs = parse_qs(url)
cd7c66cf 4072 video_id = qs.get('v', [None])[0]
4073 playlist_id = qs.get('list', [None])[0]
4074
fe03a6cd 4075 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 4076 if not playlist_id:
fe03a6cd 4077 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 4078 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 4079 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6a39ee13 4080 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
cd7c66cf 4081 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
18db7548 4082 mobj = get_mobj(url)
cd7c66cf 4083
4084 if video_id and playlist_id:
a06916d9 4085 if self.get_param('noplaylist'):
cd7c66cf 4086 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4087 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4088 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4089
4090 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 4091
18db7548 4092 tabs = try_get(
4093 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4094 if tabs:
4095 selected_tab = self._extract_selected_tab(tabs)
4096 tab_name = selected_tab.get('title', '')
09f1580e 4097 if 'no-youtube-channel-redirect' not in compat_opts:
4098 if mobj['tab'] == '/live':
4099 # Live tab should have redirected to the video
4100 raise ExtractorError('The channel is not currently live', expected=True)
4101 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4102 if not mobj['not_channel'] and item_id[:2] == 'UC':
4103 # Topic channels don't have /videos. Use the equivalent playlist instead
4104 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4105 pl_id = 'UU%s' % item_id[2:]
4106 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4107 try:
4108 pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4109 for alert_type, alert_message in self._extract_alerts(pl_data):
4110 if alert_type == 'error':
4111 raise ExtractorError('Youtube said: %s' % alert_message)
4112 item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4113 except ExtractorError:
4114 self.report_warning('The playlist gave error. Falling back to channel URL')
4115 else:
4116 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
18db7548 4117
4118 self.write_debug('Final URL: %s' % url)
4119
358de58c 4120 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 4121 if 'no-youtube-unavailable-videos' not in compat_opts:
4122 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
95c01b6c 4123 self._extract_and_report_alerts(data)
8bdd16b4 4124 tabs = try_get(
4125 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4126 if tabs:
d069eca7 4127 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 4128
8bdd16b4 4129 playlist = try_get(
4130 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4131 if playlist:
79360d99 4132 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
cd7c66cf 4133
a0566bbf 4134 video_id = try_get(
4135 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4136 compat_str) or video_id
8bdd16b4 4137 if video_id:
09f1580e 4138 if mobj['tab'] != '/live': # live tab is expected to redirect to video
4139 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 4140 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 4141
8bdd16b4 4142 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 4143
c5e8d7af 4144
8bdd16b4 4145class YoutubePlaylistIE(InfoExtractor):
4146 IE_DESC = 'YouTube.com playlists'
4147 _VALID_URL = r'''(?x)(?:
4148 (?:https?://)?
4149 (?:\w+\.)?
4150 (?:
4151 (?:
4152 youtube(?:kids)?\.com|
29f7c58a 4153 invidio\.us
8bdd16b4 4154 )
4155 /.*?\?.*?\blist=
4156 )?
4157 (?P<id>%(playlist_id)s)
4158 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4159 IE_NAME = 'youtube:playlist'
cdc628a4 4160 _TESTS = [{
8bdd16b4 4161 'note': 'issue #673',
4162 'url': 'PLBB231211A4F62143',
cdc628a4 4163 'info_dict': {
8bdd16b4 4164 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4165 'id': 'PLBB231211A4F62143',
4166 'uploader': 'Wickydoo',
4167 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4168 },
4169 'playlist_mincount': 29,
4170 }, {
4171 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4172 'info_dict': {
4173 'title': 'YDL_safe_search',
4174 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4175 },
4176 'playlist_count': 2,
4177 'skip': 'This playlist is private',
9558dcec 4178 }, {
8bdd16b4 4179 'note': 'embedded',
4180 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4181 'playlist_count': 4,
9558dcec 4182 'info_dict': {
8bdd16b4 4183 'title': 'JODA15',
4184 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4185 'uploader': 'milan',
4186 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 4187 }
cdc628a4 4188 }, {
8bdd16b4 4189 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4190 'playlist_mincount': 982,
4191 'info_dict': {
4192 'title': '2018 Chinese New Singles (11/6 updated)',
4193 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4194 'uploader': 'LBK',
4195 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4196 }
daa0df9e 4197 }, {
29f7c58a 4198 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4199 'only_matching': True,
4200 }, {
4201 # music album playlist
4202 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4203 'only_matching': True,
4204 }]
4205
4206 @classmethod
4207 def suitable(cls, url):
201c1459 4208 if YoutubeTabIE.suitable(url):
4209 return False
1bdae7d3 4210 # Hack for lazy extractors until more generic solution is implemented
4211 # (see #28780)
4212 from .youtube import parse_qs
201c1459 4213 qs = parse_qs(url)
4214 if qs.get('v', [None])[0]:
4215 return False
4216 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 4217
4218 def _real_extract(self, url):
4219 playlist_id = self._match_id(url)
46953e7e 4220 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 4221 url = update_url_query(
4222 'https://www.youtube.com/playlist',
4223 parse_qs(url) or {'list': playlist_id})
4224 if is_music_url:
4225 url = smuggle_url(url, {'is_music_url': True})
4226 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 4227
4228
4229class YoutubeYtBeIE(InfoExtractor):
c76eb41b 4230 IE_DESC = 'youtu.be'
29f7c58a 4231 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4232 _TESTS = [{
8bdd16b4 4233 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4234 'info_dict': {
4235 'id': 'yeWKywCrFtk',
4236 'ext': 'mp4',
4237 'title': 'Small Scale Baler and Braiding Rugs',
4238 'uploader': 'Backus-Page House Museum',
4239 'uploader_id': 'backuspagemuseum',
4240 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4241 'upload_date': '20161008',
4242 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4243 'categories': ['Nonprofits & Activism'],
4244 'tags': list,
4245 'like_count': int,
4246 'dislike_count': int,
4247 },
4248 'params': {
4249 'noplaylist': True,
4250 'skip_download': True,
4251 },
39e7107d 4252 }, {
8bdd16b4 4253 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 4254 'only_matching': True,
cdc628a4
PH
4255 }]
4256
8bdd16b4 4257 def _real_extract(self, url):
29f7c58a 4258 mobj = re.match(self._VALID_URL, url)
4259 video_id = mobj.group('id')
4260 playlist_id = mobj.group('playlist_id')
8bdd16b4 4261 return self.url_result(
29f7c58a 4262 update_url_query('https://www.youtube.com/watch', {
4263 'v': video_id,
4264 'list': playlist_id,
4265 'feature': 'youtu.be',
4266 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 4267
4268
4269class YoutubeYtUserIE(InfoExtractor):
c76eb41b 4270 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 4271 _VALID_URL = r'ytuser:(?P<id>.+)'
4272 _TESTS = [{
4273 'url': 'ytuser:phihag',
4274 'only_matching': True,
4275 }]
4276
4277 def _real_extract(self, url):
4278 user_id = self._match_id(url)
4279 return self.url_result(
4280 'https://www.youtube.com/user/%s' % user_id,
4281 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 4282
b05654f0 4283
3d3dddc9 4284class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 4285 IE_NAME = 'youtube:favorites'
4286 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4287 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4288 _LOGIN_REQUIRED = True
4289 _TESTS = [{
4290 'url': ':ytfav',
4291 'only_matching': True,
4292 }, {
4293 'url': ':ytfavorites',
4294 'only_matching': True,
4295 }]
4296
4297 def _real_extract(self, url):
4298 return self.url_result(
4299 'https://www.youtube.com/playlist?list=LL',
4300 ie=YoutubeTabIE.ie_key())
4301
4302
79360d99 4303class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
69184e41 4304 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
4305 # there doesn't appear to be a real limit, for example if you search for
4306 # 'python' you get more than 8.000.000 results
4307 _MAX_RESULTS = float('inf')
78caa52a 4308 IE_NAME = 'youtube:search'
b05654f0 4309 _SEARCH_KEY = 'ytsearch'
6c894ea1 4310 _SEARCH_PARAMS = None
9dd8e46a 4311 _TESTS = []
b05654f0 4312
6c894ea1 4313 def _entries(self, query, n):
a5c56234 4314 data = {'query': query}
6c894ea1
U
4315 if self._SEARCH_PARAMS:
4316 data['params'] = self._SEARCH_PARAMS
4317 total = 0
fe93e2c4 4318 continuation = {}
6c894ea1 4319 for page_num in itertools.count(1):
fe93e2c4 4320 data.update(continuation)
79360d99 4321 search = self._extract_response(
4322 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4323 check_get_keys=('contents', 'onResponseReceivedCommands')
4324 )
6c894ea1 4325 if not search:
b4c08069 4326 break
6c894ea1
U
4327 slr_contents = try_get(
4328 search,
4329 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4330 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4331 list)
4332 if not slr_contents:
a22b2fd1 4333 break
0366ae87 4334
0366ae87
M
4335 # Youtube sometimes adds promoted content to searches,
4336 # changing the index location of videos and token.
4337 # So we search through all entries till we find them.
fe93e2c4 4338 continuation = None
30a074c2 4339 for slr_content in slr_contents:
fe93e2c4 4340 if not continuation:
4341 continuation = self._extract_continuation({'contents': [slr_content]})
a96c6d15 4342
30a074c2 4343 isr_contents = try_get(
4344 slr_content,
4345 lambda x: x['itemSectionRenderer']['contents'],
4346 list)
9da76d30 4347 if not isr_contents:
30a074c2 4348 continue
4349 for content in isr_contents:
4350 if not isinstance(content, dict):
4351 continue
4352 video = content.get('videoRenderer')
4353 if not isinstance(video, dict):
4354 continue
4355 video_id = video.get('videoId')
4356 if not video_id:
4357 continue
4358
4359 yield self._extract_video(video)
4360 total += 1
4361 if total == n:
4362 return
0366ae87 4363
fe93e2c4 4364 if not continuation:
6c894ea1 4365 break
b05654f0 4366
6c894ea1
U
4367 def _get_n_results(self, query, n):
4368 """Get a specified number of results for a query"""
4369 return self.playlist_result(self._entries(query, n), query)
75dff0ee 4370
c9ae7b95 4371
a3dd9248 4372class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 4373 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 4374 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 4375 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 4376 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 4377
c9ae7b95 4378
386e1dd9 4379class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 4380 IE_DESC = 'YouTube.com search URLs'
386e1dd9 4381 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4382 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 4383 # _MAX_RESULTS = 100
3462ffa8 4384 _TESTS = [{
4385 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4386 'playlist_mincount': 5,
4387 'info_dict': {
4388 'title': 'youtube-dl test video',
4389 }
4390 }, {
4391 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4392 'only_matching': True,
4393 }]
4394
386e1dd9 4395 @classmethod
4396 def _make_valid_url(cls):
4397 return cls._VALID_URL
4398
3462ffa8 4399 def _real_extract(self, url):
386e1dd9 4400 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4401 query = (qs.get('search_query') or qs.get('q'))[0]
4402 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4403 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 4404
4405
4406class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 4407 """
25f14e9f 4408 Base class for feed extractors
3d3dddc9 4409 Subclasses must define the _FEED_NAME property.
d7ae0639 4410 """
b2e8bc1b 4411 _LOGIN_REQUIRED = True
ef2f3c7f 4412 _TESTS = []
d7ae0639
JMF
4413
4414 @property
4415 def IE_NAME(self):
78caa52a 4416 return 'youtube:%s' % self._FEED_NAME
04cc9617 4417
3853309f 4418 def _real_extract(self, url):
3d3dddc9 4419 return self.url_result(
4420 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4421 ie=YoutubeTabIE.ie_key())
25f14e9f
S
4422
4423
ef2f3c7f 4424class YoutubeWatchLaterIE(InfoExtractor):
4425 IE_NAME = 'youtube:watchlater'
70d5c17b 4426 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 4427 _VALID_URL = r':ytwatchlater'
bc7a9cd8 4428 _TESTS = [{
8bdd16b4 4429 'url': ':ytwatchlater',
bc7a9cd8
S
4430 'only_matching': True,
4431 }]
25f14e9f
S
4432
4433 def _real_extract(self, url):
ef2f3c7f 4434 return self.url_result(
4435 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 4436
4437
25f14e9f
S
4438class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4439 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 4440 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 4441 _FEED_NAME = 'recommended'
45db527f 4442 _LOGIN_REQUIRED = False
3d3dddc9 4443 _TESTS = [{
4444 'url': ':ytrec',
4445 'only_matching': True,
4446 }, {
4447 'url': ':ytrecommended',
4448 'only_matching': True,
4449 }, {
4450 'url': 'https://youtube.com',
4451 'only_matching': True,
4452 }]
1ed5b5c9 4453
1ed5b5c9 4454
25f14e9f 4455class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 4456 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 4457 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 4458 _FEED_NAME = 'subscriptions'
3d3dddc9 4459 _TESTS = [{
4460 'url': ':ytsubs',
4461 'only_matching': True,
4462 }, {
4463 'url': ':ytsubscriptions',
4464 'only_matching': True,
4465 }]
1ed5b5c9 4466
1ed5b5c9 4467
25f14e9f 4468class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
4469 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4470 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 4471 _FEED_NAME = 'history'
3d3dddc9 4472 _TESTS = [{
4473 'url': ':ythistory',
4474 'only_matching': True,
4475 }]
1ed5b5c9
JMF
4476
4477
15870e90
PH
4478class YoutubeTruncatedURLIE(InfoExtractor):
4479 IE_NAME = 'youtube:truncated_url'
4480 IE_DESC = False # Do not list
975d35db 4481 _VALID_URL = r'''(?x)
b95aab84
PH
4482 (?:https?://)?
4483 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4484 (?:watch\?(?:
c4808c60 4485 feature=[a-z_]+|
b95aab84
PH
4486 annotation_id=annotation_[^&]+|
4487 x-yt-cl=[0-9]+|
c1708b89 4488 hl=[^&]*|
287be8c6 4489 t=[0-9]+
b95aab84
PH
4490 )?
4491 |
4492 attribution_link\?a=[^&]+
4493 )
4494 $
975d35db 4495 '''
15870e90 4496
c4808c60 4497 _TESTS = [{
2d3d2997 4498 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 4499 'only_matching': True,
dc2fc736 4500 }, {
2d3d2997 4501 'url': 'https://www.youtube.com/watch?',
dc2fc736 4502 'only_matching': True,
b95aab84
PH
4503 }, {
4504 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4505 'only_matching': True,
4506 }, {
4507 'url': 'https://www.youtube.com/watch?feature=foo',
4508 'only_matching': True,
c1708b89
PH
4509 }, {
4510 'url': 'https://www.youtube.com/watch?hl=en-GB',
4511 'only_matching': True,
287be8c6
PH
4512 }, {
4513 'url': 'https://www.youtube.com/watch?t=2372',
4514 'only_matching': True,
c4808c60
PH
4515 }]
4516
15870e90
PH
4517 def _real_extract(self, url):
4518 raise ExtractorError(
78caa52a
PH
4519 'Did you forget to quote the URL? Remember that & is a meta '
4520 'character in most shells, so you want to put the URL in quotes, '
3867038a 4521 'like youtube-dl '
2d3d2997 4522 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 4523 ' or simply youtube-dl BaW_jenozKc .',
15870e90 4524 expected=True)
772fd5cc
PH
4525
4526
4527class YoutubeTruncatedIDIE(InfoExtractor):
4528 IE_NAME = 'youtube:truncated_id'
4529 IE_DESC = False # Do not list
b95aab84 4530 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
4531
4532 _TESTS = [{
4533 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4534 'only_matching': True,
4535 }]
4536
4537 def _real_extract(self, url):
4538 video_id = self._match_id(url)
4539 raise ExtractorError(
4540 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4541 expected=True)