]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[youtube] simplify and de-duplicate client definitions (#577)
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
2d6659b9 5import base64
d92f5d5a 6import calendar
109dd3b2 7import copy
fe93e2c4 8import datetime
a5c56234 9import hashlib
0ca96d48 10import itertools
c5e8d7af 11import json
c4417ddb 12import os.path
d77ab8e2 13import random
c5e8d7af 14import re
8a784c74 15import time
e0df6211 16import traceback
c5e8d7af 17
b05654f0 18from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 19from ..compat import (
edf3e38e 20 compat_chr,
29f7c58a 21 compat_HTTPError,
c5e8d7af 22 compat_parse_qs,
545cc85d 23 compat_str,
7fd002c0 24 compat_urllib_parse_unquote_plus,
15707c7e 25 compat_urllib_parse_urlencode,
7c80519c 26 compat_urllib_parse_urlparse,
7c61bd36 27 compat_urlparse,
4bb4a188 28)
545cc85d 29from ..jsinterp import JSInterpreter
4bb4a188 30from ..utils import (
2d6659b9 31 bytes_to_intlist,
c5e8d7af 32 clean_html,
d92f5d5a 33 datetime_from_str,
11f9be09 34 dict_get,
358de58c 35 error_to_compat_str,
c5e8d7af 36 ExtractorError,
2d30521a 37 float_or_none,
11f9be09 38 format_field,
dd27fd17 39 int_or_none,
2d6659b9 40 intlist_to_bytes,
94278f72 41 mimetype2ext,
9c0d7f49 42 network_exceptions,
11f9be09 43 orderedSet,
6310acf5 44 parse_codecs,
49bd8c66 45 parse_count,
7c80519c 46 parse_duration,
7ea65411 47 parse_iso8601,
dca3ff4a 48 qualities,
3995d37d 49 remove_start,
cf7e015f 50 smuggle_url,
dbdaaa23 51 str_or_none,
c93d53f5 52 str_to_int,
7c365c21 53 traverse_obj,
556dbe7f 54 try_get,
c5e8d7af
PH
55 unescapeHTML,
56 unified_strdate,
cf7e015f 57 unsmuggle_url,
8bdd16b4 58 update_url_query,
21c340b8 59 url_or_none,
6e6bc8da 60 urlencode_postdata,
fe93e2c4 61 urljoin,
7c365c21 62 variadic,
c5e8d7af
PH
63)
64
5f6a1245 65
201c1459 66def parse_qs(url):
67 return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
68
69
000c15a4 70# any clients starting with _ cannot be explicity requested by the user
71INNERTUBE_CLIENTS = {
72 'web': {
73 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
74 'INNERTUBE_CONTEXT': {
75 'client': {
76 'clientName': 'WEB',
77 'clientVersion': '2.20210622.10.00',
78 }
79 },
80 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
81 },
82 'web_embedded': {
83 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
84 'INNERTUBE_CONTEXT': {
85 'client': {
86 'clientName': 'WEB_EMBEDDED_PLAYER',
87 'clientVersion': '1.20210620.0.1',
88 },
89 },
90 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
91 },
92 'web_music': {
93 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
94 'INNERTUBE_HOST': 'music.youtube.com',
95 'INNERTUBE_CONTEXT': {
96 'client': {
97 'clientName': 'WEB_REMIX',
98 'clientVersion': '1.20210621.00.00',
99 }
100 },
101 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
102 },
103 'android': {
104 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
105 'INNERTUBE_CONTEXT': {
106 'client': {
107 'clientName': 'ANDROID',
108 'clientVersion': '16.20',
109 }
110 },
111 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
112 },
113 'android_embedded': {
114 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
115 'INNERTUBE_CONTEXT': {
116 'client': {
117 'clientName': 'ANDROID_EMBEDDED_PLAYER',
118 'clientVersion': '16.20',
119 },
120 },
121 'INNERTUBE_CONTEXT_CLIENT_NAME': 55
122 },
123 'android_music': {
124 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
125 'INNERTUBE_HOST': 'music.youtube.com',
126 'INNERTUBE_CONTEXT': {
127 'client': {
128 'clientName': 'ANDROID_MUSIC',
129 'clientVersion': '4.32',
130 }
131 },
132 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
133 },
134 'ios': {
135 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
136 'INNERTUBE_CONTEXT': {
137 'client': {
138 'clientName': 'IOS',
139 'clientVersion': '16.20',
140 }
141 },
142 'INNERTUBE_CONTEXT_CLIENT_NAME': 5
143 },
144 'ios_embedded': {
145 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
146 'INNERTUBE_CONTEXT': {
147 'client': {
148 'clientName': 'IOS_MESSAGES_EXTENSION',
149 'clientVersion': '16.20',
150 },
151 },
152 'INNERTUBE_CONTEXT_CLIENT_NAME': 66
153 },
154 'ios_music': {
155 'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
156 'INNERTUBE_HOST': 'music.youtube.com',
157 'INNERTUBE_CONTEXT': {
158 'client': {
159 'clientName': 'IOS_MUSIC',
160 'clientVersion': '4.32',
161 },
162 },
163 'INNERTUBE_CONTEXT_CLIENT_NAME': 26
164 },
165 'mweb': {
166 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
167 'INNERTUBE_CONTEXT': {
168 'client': {
169 'clientName': 'MWEB',
170 'clientVersion': '2.20210721.07.00',
171 }
172 },
173 'INNERTUBE_CONTEXT_CLIENT_NAME': 2
174 },
175}
176
177
178def build_innertube_clients():
179 base_clients = ('android', 'web', 'ios', 'mweb')
180 priority = qualities(base_clients[::-1])
181
182 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
183 ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM4DrUqRUYnGn3llEO78bcxq8')
184 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
185 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
186 ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
187
188 if client in base_clients:
189 INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
190 agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
191 agegate_ytcfg['priority'] -= 1
192 elif client.endswith('_embedded'):
193 ytcfg['priority'] -= 2
194 else:
195 ytcfg['priority'] -= 3
196
197
198build_innertube_clients()
199
200
de7f3446 201class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
202 """Provide base functions for Youtube extractors"""
203 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 204 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
205
206 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
207 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
208 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 209
3462ffa8 210 _RESERVED_NAMES = (
bea74222 211 r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|shorts|'
46953e7e 212 r'movies|results|shared|hashtag|trending|feed|feeds|oembed|get_video_info|'
cd7c66cf 213 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 214
b2e8bc1b
JMF
215 _NETRC_MACHINE = 'youtube'
216 # If True it will raise an error if no login info is provided
217 _LOGIN_REQUIRED = False
218
70d5c17b 219 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 220
b2e8bc1b 221 def _login(self):
83317f69 222 """
223 Attempt to log in to YouTube.
224 True is returned if successful or skipped.
225 False is returned if login failed.
226
227 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
228 """
9d5d4d64 229
230 def warn(message):
231 self.report_warning(message)
232
233 # username+password login is broken
982ee69a
MB
234 if (self._LOGIN_REQUIRED
235 and self.get_param('cookiefile') is None
236 and self.get_param('cookiesfrombrowser') is None):
9d5d4d64 237 self.raise_login_required(
238 'Login details are needed to download this content', method='cookies')
68217024 239 username, password = self._get_login_info()
9d5d4d64 240 if username:
241 warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
242 return
9d5d4d64 243
2d6659b9 244 # Everything below this is broken!
245 r'''
b2e8bc1b
JMF
246 # No authentication to be performed
247 if username is None:
a06916d9 248 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
69ea8ca4 249 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
a06916d9 250 # if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
545cc85d 251 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 252 return True
b2e8bc1b 253
7cc3570e
PH
254 login_page = self._download_webpage(
255 self._LOGIN_URL, None,
69ea8ca4
PH
256 note='Downloading login page',
257 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
258 if login_page is False:
259 return
b2e8bc1b 260
1212e997 261 login_form = self._hidden_inputs(login_page)
c5e8d7af 262
e00eb564
S
263 def req(url, f_req, note, errnote):
264 data = login_form.copy()
265 data.update({
266 'pstMsg': 1,
267 'checkConnection': 'youtube',
268 'checkedDomains': 'youtube',
269 'hl': 'en',
270 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 271 'f.req': json.dumps(f_req),
e00eb564
S
272 'flowName': 'GlifWebSignIn',
273 'flowEntry': 'ServiceLogin',
baf67a60
S
274 # TODO: reverse actual botguard identifier generation algo
275 'bgRequest': '["identifier",""]',
041bc3ad 276 })
e00eb564
S
277 return self._download_json(
278 url, None, note=note, errnote=errnote,
279 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
280 fatal=False,
281 data=urlencode_postdata(data), headers={
282 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
283 'Google-Accounts-XSRF': 1,
284 })
285
3995d37d
S
286 lookup_req = [
287 username,
288 None, [], None, 'US', None, None, 2, False, True,
289 [
290 None, None,
291 [2, 1, None, 1,
292 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
293 None, [], 4],
294 1, [None, None, []], None, None, None, True
295 ],
296 username,
297 ]
298
e00eb564 299 lookup_results = req(
3995d37d 300 self._LOOKUP_URL, lookup_req,
e00eb564
S
301 'Looking up account info', 'Unable to look up account info')
302
303 if lookup_results is False:
304 return False
041bc3ad 305
3995d37d
S
306 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
307 if not user_hash:
308 warn('Unable to extract user hash')
309 return False
310
311 challenge_req = [
312 user_hash,
313 None, 1, None, [1, None, None, None, [password, None, True]],
314 [
315 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
316 1, [None, None, []], None, None, None, True
317 ]]
83317f69 318
3995d37d
S
319 challenge_results = req(
320 self._CHALLENGE_URL, challenge_req,
321 'Logging in', 'Unable to log in')
83317f69 322
3995d37d 323 if challenge_results is False:
e00eb564 324 return
83317f69 325
3995d37d
S
326 login_res = try_get(challenge_results, lambda x: x[0][5], list)
327 if login_res:
328 login_msg = try_get(login_res, lambda x: x[5], compat_str)
329 warn(
330 'Unable to login: %s' % 'Invalid password'
331 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
332 return False
333
334 res = try_get(challenge_results, lambda x: x[0][-1], list)
335 if not res:
336 warn('Unable to extract result entry')
337 return False
338
9a6628aa
S
339 login_challenge = try_get(res, lambda x: x[0][0], list)
340 if login_challenge:
341 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
342 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
343 # SEND_SUCCESS - TFA code has been successfully sent to phone
344 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 345 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
346 if status == 'QUOTA_EXCEEDED':
347 warn('Exceeded the limit of TFA codes, try later')
348 return False
349
350 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
351 if not tl:
352 warn('Unable to extract TL')
353 return False
354
355 tfa_code = self._get_tfa_info('2-step verification code')
356
357 if not tfa_code:
358 warn(
359 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
360 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
361 return False
362
363 tfa_code = remove_start(tfa_code, 'G-')
364
365 tfa_req = [
366 user_hash, None, 2, None,
367 [
368 9, None, None, None, None, None, None, None,
369 [None, tfa_code, True, 2]
370 ]]
371
372 tfa_results = req(
373 self._TFA_URL.format(tl), tfa_req,
374 'Submitting TFA code', 'Unable to submit TFA code')
375
376 if tfa_results is False:
377 return False
378
379 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
380 if tfa_res:
381 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
382 warn(
383 'Unable to finish TFA: %s' % 'Invalid TFA code'
384 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
385 return False
386
387 check_cookie_url = try_get(
388 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
389 else:
390 CHALLENGES = {
391 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
392 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
393 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
394 }
395 challenge = CHALLENGES.get(
396 challenge_str,
397 '%s returned error %s.' % (self.IE_NAME, challenge_str))
398 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
399 return False
3995d37d
S
400 else:
401 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
402
403 if not check_cookie_url:
404 warn('Unable to extract CheckCookie URL')
405 return False
e00eb564
S
406
407 check_cookie_results = self._download_webpage(
3995d37d
S
408 check_cookie_url, None, 'Checking cookie', fatal=False)
409
410 if check_cookie_results is False:
411 return False
e00eb564 412
3995d37d
S
413 if 'https://myaccount.google.com/' not in check_cookie_results:
414 warn('Unable to log in')
b2e8bc1b 415 return False
e00eb564 416
b2e8bc1b 417 return True
2d6659b9 418 '''
b2e8bc1b 419
cce889b9 420 def _initialize_consent(self):
421 cookies = self._get_cookies('https://www.youtube.com/')
422 if cookies.get('__Secure-3PSID'):
423 return
424 consent_id = None
425 consent = cookies.get('CONSENT')
426 if consent:
427 if 'YES' in consent.value:
428 return
429 consent_id = self._search_regex(
430 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
431 if not consent_id:
432 consent_id = random.randint(100, 999)
433 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 434
b2e8bc1b 435 def _real_initialize(self):
cce889b9 436 self._initialize_consent()
b2e8bc1b
JMF
437 if self._downloader is None:
438 return
b2e8bc1b
JMF
439 if not self._login():
440 return
c5e8d7af 441
a0566bbf 442 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 443 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
444 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 445
000c15a4 446 def _get_default_ytcfg(self, client='web'):
447 return copy.deepcopy(INNERTUBE_CLIENTS[client])
109dd3b2 448
000c15a4 449 def _get_innertube_host(self, client='web'):
450 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
109dd3b2 451
000c15a4 452 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
109dd3b2 453 # try_get but with fallback to default ytcfg client values when present
454 _func = lambda y: try_get(y, getter, expected_type)
455 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
456
000c15a4 457 def _extract_client_name(self, ytcfg, default_client='web'):
458 return (
459 try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str)
460 or self._ytcfg_get_safe(
461 ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['clientName'], compat_str, default_client))
109dd3b2 462
314ee305 463 @staticmethod
11f9be09 464 def _extract_session_index(*data):
465 for ytcfg in data:
466 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
467 if session_index is not None:
468 return session_index
314ee305 469
000c15a4 470 def _extract_client_version(self, ytcfg, default_client='web'):
471 return (
472 try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str)
473 or self._ytcfg_get_safe(
474 ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion'], compat_str, default_client))
109dd3b2 475
000c15a4 476 def _extract_api_key(self, ytcfg=None, default_client='web'):
109dd3b2 477 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
478
000c15a4 479 def _extract_context(self, ytcfg=None, default_client='web'):
109dd3b2 480 _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
481 context = _get_context(ytcfg)
482 if context:
483 return context
484
485 context = _get_context(self._get_default_ytcfg(default_client))
486 if not ytcfg:
487 return context
488
489 # Recreate the client context (required)
490 context['client'].update({
491 'clientVersion': self._extract_client_version(ytcfg, default_client),
492 'clientName': self._extract_client_name(ytcfg, default_client),
493 })
494 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
495 if visitor_data:
496 context['client']['visitorData'] = visitor_data
497 return context
498
499 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
1974e99f 500 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
501 # See: https://github.com/yt-dlp/yt-dlp/issues/393
502 yt_cookies = self._get_cookies('https://www.youtube.com')
503 sapisid_cookie = dict_get(
504 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
c926c954 505 if sapisid_cookie is None or not sapisid_cookie.value:
a5c56234
M
506 return
507 time_now = round(time.time())
1974e99f 508 # SAPISID cookie is required if not already present
509 if not yt_cookies.get('SAPISID'):
c926c954 510 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie', only_once=True)
1974e99f 511 self._set_cookie(
512 '.youtube.com', 'SAPISID', sapisid_cookie.value, secure=True, expire_time=time_now + 3600)
c926c954 513 self.write_debug('Extracted SAPISID cookie', only_once=True)
1974e99f 514 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
515 sapisidhash = hashlib.sha1(
109dd3b2 516 f'{time_now} {sapisid_cookie.value} {origin}'.encode('utf-8')).hexdigest()
1974e99f 517 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
518
519 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 520 note='Downloading API JSON', errnote='Unable to download API page',
000c15a4 521 context=None, api_key=None, api_hostname=None, default_client='web'):
f4f751af 522
109dd3b2 523 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 524 data.update(query)
11f9be09 525 real_headers = self.generate_api_headers(default_client=default_client)
f4f751af 526 real_headers.update({'content-type': 'application/json'})
527 if headers:
528 real_headers.update(headers)
545cc85d 529 return self._download_json(
109dd3b2 530 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
a5c56234 531 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 532 data=json.dumps(data).encode('utf8'), headers=real_headers,
533 query={'key': api_key or self._extract_api_key()})
534
11f9be09 535 def extract_yt_initial_data(self, video_id, webpage):
8bdd16b4 536 return self._parse_json(
537 self._search_regex(
29f7c58a 538 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 539 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 540 video_id)
0c148415 541
a1c5d2ca 542 def _extract_identity_token(self, webpage, item_id):
11f9be09 543 if not webpage:
544 return None
545 ytcfg = self.extract_ytcfg(item_id, webpage)
a1c5d2ca
M
546 if ytcfg:
547 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
548 if token:
549 return token
550 return self._search_regex(
551 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
552 'identity token', default=None)
553
554 @staticmethod
fe93e2c4 555 def _extract_account_syncid(*args):
8ea3f7b9 556 """
557 Extract syncId required to download private playlists of secondary channels
fe93e2c4 558 @params response and/or ytcfg
8ea3f7b9 559 """
fe93e2c4 560 for data in args:
561 # ytcfg includes channel_syncid if on secondary channel
562 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
563 if delegated_sid:
564 return delegated_sid
565 sync_ids = (try_get(
566 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
567 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
568 if len(sync_ids) >= 2 and sync_ids[1]:
569 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
570 # and just "user_syncid||" for primary channel. We only want the channel_syncid
571 return sync_ids[0]
a1c5d2ca 572
11f9be09 573 def extract_ytcfg(self, video_id, webpage):
8c54a305 574 if not webpage:
575 return {}
29f7c58a 576 return self._parse_json(
577 self._search_regex(
578 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 579 default='{}'), video_id, fatal=False) or {}
580
11f9be09 581 def generate_api_headers(
582 self, ytcfg=None, identity_token=None, account_syncid=None,
000c15a4 583 visitor_data=None, api_hostname=None, default_client='web', session_index=None):
11f9be09 584 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
f4f751af 585 headers = {
109dd3b2 586 'X-YouTube-Client-Name': compat_str(
11f9be09 587 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
588 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
109dd3b2 589 'Origin': origin
f4f751af 590 }
2d6659b9 591 if not visitor_data and ytcfg:
592 visitor_data = try_get(
11f9be09 593 self._extract_context(ytcfg, default_client), lambda x: x['client']['visitorData'], compat_str)
f4f751af 594 if identity_token:
109dd3b2 595 headers['X-Youtube-Identity-Token'] = identity_token
f4f751af 596 if account_syncid:
597 headers['X-Goog-PageId'] = account_syncid
314ee305 598 if session_index is None and ytcfg:
599 session_index = self._extract_session_index(ytcfg)
600 if account_syncid or session_index is not None:
601 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
f4f751af 602 if visitor_data:
109dd3b2 603 headers['X-Goog-Visitor-Id'] = visitor_data
604 auth = self._generate_sapisidhash_header(origin)
f4f751af 605 if auth is not None:
606 headers['Authorization'] = auth
109dd3b2 607 headers['X-Origin'] = origin
f4f751af 608 return headers
29f7c58a 609
2d6659b9 610 @staticmethod
611 def _build_api_continuation_query(continuation, ctp=None):
612 query = {
613 'continuation': continuation
614 }
615 # TODO: Inconsistency with clickTrackingParams.
616 # Currently we have a fixed ctp contained within context (from ytcfg)
617 # and a ctp in root query for continuation.
618 if ctp:
619 query['clickTracking'] = {'clickTrackingParams': ctp}
620 return query
621
2d6659b9 622 @classmethod
623 def _extract_next_continuation_data(cls, renderer):
624 next_continuation = try_get(
625 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
626 lambda x: x['continuation']['reloadContinuationData']), dict)
627 if not next_continuation:
628 return
629 continuation = next_continuation.get('continuation')
630 if not continuation:
631 return
632 ctp = next_continuation.get('clickTrackingParams')
fe93e2c4 633 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 634
635 @classmethod
636 def _extract_continuation_ep_data(cls, continuation_ep: dict):
637 if isinstance(continuation_ep, dict):
638 continuation = try_get(
639 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
640 if not continuation:
641 return
642 ctp = continuation_ep.get('clickTrackingParams')
fe93e2c4 643 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 644
645 @classmethod
646 def _extract_continuation(cls, renderer):
647 next_continuation = cls._extract_next_continuation_data(renderer)
648 if next_continuation:
649 return next_continuation
fe93e2c4 650
2d6659b9 651 contents = []
652 for key in ('contents', 'items'):
653 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
fe93e2c4 654
2d6659b9 655 for content in contents:
656 if not isinstance(content, dict):
657 continue
658 continuation_ep = try_get(
659 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
660 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
661 dict)
662 continuation = cls._extract_continuation_ep_data(continuation_ep)
663 if continuation:
664 return continuation
665
fe93e2c4 666 @classmethod
667 def _extract_alerts(cls, data):
109dd3b2 668 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
669 if not isinstance(alert_dict, dict):
670 continue
671 for alert in alert_dict.values():
672 alert_type = alert.get('type')
673 if not alert_type:
674 continue
052e1350 675 message = cls._get_text(alert, 'text')
109dd3b2 676 if message:
677 yield alert_type, message
678
679 def _report_alerts(self, alerts, expected=True):
680 errors = []
681 warnings = []
682 for alert_type, alert_message in alerts:
683 if alert_type.lower() == 'error':
684 errors.append([alert_type, alert_message])
685 else:
686 warnings.append([alert_type, alert_message])
687
688 for alert_type, alert_message in (warnings + errors[:-1]):
689 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
690 if errors:
691 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
692
693 def _extract_and_report_alerts(self, data, *args, **kwargs):
694 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
695
47193e02 696 def _extract_badges(self, renderer: dict):
697 badges = set()
698 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
699 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
700 if label:
701 badges.add(label.lower())
702 return badges
703
704 @staticmethod
052e1350 705 def _get_text(data, *path_list, max_runs=None):
706 for path in path_list or [None]:
707 if path is None:
708 obj = [data]
709 else:
710 obj = traverse_obj(data, path, default=[])
711 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
712 obj = [obj]
713 for item in obj:
714 text = try_get(item, lambda x: x['simpleText'], compat_str)
715 if text:
716 return text
717 runs = try_get(item, lambda x: x['runs'], list) or []
718 if not runs and isinstance(item, list):
719 runs = item
720
721 runs = runs[:min(len(runs), max_runs or len(runs))]
722 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
723 if text:
724 return text
47193e02 725
109dd3b2 726 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
727 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
000c15a4 728 default_client='web'):
109dd3b2 729 response = None
730 last_error = None
731 count = -1
732 retries = self.get_param('extractor_retries', 3)
733 if check_get_keys is None:
734 check_get_keys = []
735 while count < retries:
736 count += 1
737 if last_error:
738 self.report_warning('%s. Retrying ...' % last_error)
739 try:
740 response = self._call_api(
741 ep=ep, fatal=True, headers=headers,
742 video_id=item_id, query=query,
743 context=self._extract_context(ytcfg, default_client),
744 api_key=self._extract_api_key(ytcfg, default_client),
745 api_hostname=api_hostname, default_client=default_client,
746 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
747 except ExtractorError as e:
9c0d7f49 748 if isinstance(e.cause, network_exceptions):
109dd3b2 749 # Downloading page may result in intermittent 5xx HTTP error
750 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
9c0d7f49 751 # We also want to catch all other network exceptions since errors in later pages can be troublesome
752 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
753 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
754 last_error = error_to_compat_str(e.cause or e)
755 if count < retries:
756 continue
109dd3b2 757 if fatal:
758 raise
759 else:
760 self.report_warning(error_to_compat_str(e))
761 return
762
763 else:
764 # Youtube may send alerts if there was an issue with the continuation page
765 try:
766 self._extract_and_report_alerts(response, expected=False)
767 except ExtractorError as e:
768 if fatal:
769 raise
770 self.report_warning(error_to_compat_str(e))
771 return
772 if not check_get_keys or dict_get(response, check_get_keys):
773 break
774 # Youtube sometimes sends incomplete data
775 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
776 last_error = 'Incomplete data received'
777 if count >= retries:
778 if fatal:
779 raise ExtractorError(last_error)
780 else:
781 self.report_warning(last_error)
782 return
783 return response
784
9297939e 785 @staticmethod
786 def is_music_url(url):
787 return re.match(r'https?://music\.youtube\.com/', url) is not None
788
30a074c2 789 def _extract_video(self, renderer):
790 video_id = renderer.get('videoId')
052e1350 791 title = self._get_text(renderer, 'title')
792 description = self._get_text(renderer, 'descriptionSnippet')
a353beba 793 duration = parse_duration(self._get_text(
794 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
052e1350 795 view_count_text = self._get_text(renderer, 'viewCountText') or ''
30a074c2 796 view_count = str_to_int(self._search_regex(
797 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
798 'view count', default=None))
fe93e2c4 799
052e1350 800 uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
fe93e2c4 801
30a074c2 802 return {
39ed931e 803 '_type': 'url',
30a074c2 804 'ie_key': YoutubeIE.ie_key(),
805 'id': video_id,
806 'url': video_id,
807 'title': title,
808 'description': description,
809 'duration': duration,
810 'view_count': view_count,
811 'uploader': uploader,
812 }
813
0c148415 814
360e1ca5 815class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 816 IE_DESC = 'YouTube.com'
bc2ca1bb 817 _INVIDIOUS_SITES = (
818 # invidious-redirect websites
819 r'(?:www\.)?redirect\.invidious\.io',
820 r'(?:(?:www|dev)\.)?invidio\.us',
821 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
822 r'(?:www\.)?invidious\.pussthecat\.org',
bc2ca1bb 823 r'(?:www\.)?invidious\.zee\.li',
bc2ca1bb 824 r'(?:www\.)?invidious\.ethibox\.fr',
bc2ca1bb 825 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
bc2ca1bb 826 # youtube-dl invidious instances list
827 r'(?:(?:www|no)\.)?invidiou\.sh',
828 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
829 r'(?:www\.)?invidious\.kabi\.tk',
bc2ca1bb 830 r'(?:www\.)?invidious\.mastodon\.host',
831 r'(?:www\.)?invidious\.zapashcanon\.fr',
ed807c18 832 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
201c1459 833 r'(?:www\.)?invidious\.tinfoil-hat\.net',
834 r'(?:www\.)?invidious\.himiko\.cloud',
835 r'(?:www\.)?invidious\.reallyancient\.tech',
bc2ca1bb 836 r'(?:www\.)?invidious\.tube',
837 r'(?:www\.)?invidiou\.site',
838 r'(?:www\.)?invidious\.site',
839 r'(?:www\.)?invidious\.xyz',
840 r'(?:www\.)?invidious\.nixnet\.xyz',
201c1459 841 r'(?:www\.)?invidious\.048596\.xyz',
bc2ca1bb 842 r'(?:www\.)?invidious\.drycat\.fr',
201c1459 843 r'(?:www\.)?inv\.skyn3t\.in',
bc2ca1bb 844 r'(?:www\.)?tube\.poal\.co',
845 r'(?:www\.)?tube\.connect\.cafe',
846 r'(?:www\.)?vid\.wxzm\.sx',
847 r'(?:www\.)?vid\.mint\.lgbt',
201c1459 848 r'(?:www\.)?vid\.puffyan\.us',
bc2ca1bb 849 r'(?:www\.)?yewtu\.be',
850 r'(?:www\.)?yt\.elukerio\.org',
851 r'(?:www\.)?yt\.lelux\.fi',
852 r'(?:www\.)?invidious\.ggc-project\.de',
853 r'(?:www\.)?yt\.maisputain\.ovh',
201c1459 854 r'(?:www\.)?ytprivate\.com',
855 r'(?:www\.)?invidious\.13ad\.de',
bc2ca1bb 856 r'(?:www\.)?invidious\.toot\.koeln',
857 r'(?:www\.)?invidious\.fdn\.fr',
858 r'(?:www\.)?watch\.nettohikari\.com',
ed807c18 859 r'(?:www\.)?invidious\.namazso\.eu',
860 r'(?:www\.)?invidious\.silkky\.cloud',
861 r'(?:www\.)?invidious\.exonip\.de',
862 r'(?:www\.)?invidious\.riverside\.rocks',
863 r'(?:www\.)?invidious\.blamefran\.net',
864 r'(?:www\.)?invidious\.moomoo\.de',
865 r'(?:www\.)?ytb\.trom\.tf',
866 r'(?:www\.)?yt\.cyberhost\.uk',
bc2ca1bb 867 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
868 r'(?:www\.)?qklhadlycap4cnod\.onion',
869 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
870 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
871 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
872 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
873 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
874 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
ed807c18 875 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
876 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
877 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
878 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
bc2ca1bb 879 )
cb7dfeea 880 _VALID_URL = r"""(?x)^
c5e8d7af 881 (
edb53e2d 882 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 883 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
884 (?:www\.)?deturl\.com/www\.youtube\.com|
885 (?:www\.)?pwnyoutube\.com|
886 (?:www\.)?hooktube\.com|
887 (?:www\.)?yourepeat\.com|
888 tube\.majestyc\.net|
889 %(invidious)s|
890 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
891 (?:.*?\#/)? # handle anchor (#/) redirect urls
892 (?: # the various things that can precede the ID:
ac7553d0 893 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 894 |(?: # or the v= param in all its forms
f7000f3a 895 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 896 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 897 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
898 v=
899 )
f4b05232 900 ))
cbaed4bb
S
901 |(?:
902 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
903 vid\.plus| # or vid.plus/xxxx
904 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 905 %(invidious)s
cbaed4bb 906 )/
edb53e2d 907 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 908 )
c5e8d7af 909 )? # all until now is optional -> you can pass the naked ID
201c1459 910 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 911 (?(1).+)? # if we found the ID, everything can follow
9297939e 912 (?:\#|$)""" % {
bc2ca1bb 913 'invidious': '|'.join(_INVIDIOUS_SITES),
914 }
e40c758c 915 _PLAYER_INFO_RE = (
cc2db878 916 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
917 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 918 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 919 )
2c62dc26 920 _formats = {
c2d3cb4c 921 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
922 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
923 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
924 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
925 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
926 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
927 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
928 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 929 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 930 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
931 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
932 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
933 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
934 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
935 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 936 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 937 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
938 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 939
940
941 # 3D videos
c2d3cb4c 942 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
943 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
944 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
945 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 946 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
947 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
948 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 949
96fb5605 950 # Apple HTTP Live Streaming
11f12195 951 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 952 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
953 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
954 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
955 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
956 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 957 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
958 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
959
960 # DASH mp4 video
d23028a8
S
961 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
962 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
963 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
964 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
965 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 966 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
967 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
968 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
969 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
970 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
971 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
972 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 973
f6f1fc92 974 # Dash mp4 audio
d23028a8
S
975 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
976 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
977 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
978 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
979 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
980 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
981 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
982
983 # Dash webm
d23028a8
S
984 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
985 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
986 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
987 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
988 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
989 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
990 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
991 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
992 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
993 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
994 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
995 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
996 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
997 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
998 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 999 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
1000 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1001 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1002 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1003 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1004 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1005 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
1006
1007 # Dash webm audio
d23028a8
S
1008 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1009 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 1010
0857baad 1011 # Dash webm audio with opus inside
d23028a8
S
1012 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1013 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1014 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 1015
ce6b9a2d
PH
1016 # RTMP (unnamed)
1017 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
1018
1019 # av01 video only formats sometimes served with "unknown" codecs
1020 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1021 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1022 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1023 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 1024 }
29f7c58a 1025 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 1026
109dd3b2 1027 _AGE_GATE_REASONS = (
1028 'Sign in to confirm your age',
1029 'This video may be inappropriate for some users.',
9275f62c 1030 'Sorry, this content is age-restricted.',
1031 'Please confirm your age.')
1032
1033 _AGE_GATE_STATUS_REASONS = (
1034 'AGE_VERIFICATION_REQUIRED',
1035 'AGE_CHECK_REQUIRED'
1036 )
109dd3b2 1037
fd5c4aab
S
1038 _GEO_BYPASS = False
1039
78caa52a 1040 IE_NAME = 'youtube'
2eb88d95
PH
1041 _TESTS = [
1042 {
2d3d2997 1043 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
1044 'info_dict': {
1045 'id': 'BaW_jenozKc',
1046 'ext': 'mp4',
3867038a 1047 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
1048 'uploader': 'Philipp Hagemeister',
1049 'uploader_id': 'phihag',
ec85ded8 1050 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
1051 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1052 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 1053 'upload_date': '20121002',
3867038a 1054 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 1055 'categories': ['Science & Technology'],
3867038a 1056 'tags': ['youtube-dl'],
556dbe7f 1057 'duration': 10,
dbdaaa23 1058 'view_count': int,
3e7c1224
PH
1059 'like_count': int,
1060 'dislike_count': int,
7c80519c 1061 'start_time': 1,
297a564b 1062 'end_time': 9,
2eb88d95 1063 }
0e853ca4 1064 },
fccd3771 1065 {
4bc3a23e
PH
1066 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1067 'note': 'Embed-only video (#1746)',
1068 'info_dict': {
1069 'id': 'yZIXLfi8CZQ',
1070 'ext': 'mp4',
1071 'upload_date': '20120608',
1072 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1073 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1074 'uploader': 'SET India',
94bfcd23 1075 'uploader_id': 'setindia',
ec85ded8 1076 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 1077 'age_limit': 18,
545cc85d 1078 },
1079 'skip': 'Private video',
fccd3771 1080 },
11b56058 1081 {
8bdd16b4 1082 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
1083 'note': 'Use the first video ID in the URL',
1084 'info_dict': {
1085 'id': 'BaW_jenozKc',
1086 'ext': 'mp4',
3867038a 1087 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
1088 'uploader': 'Philipp Hagemeister',
1089 'uploader_id': 'phihag',
ec85ded8 1090 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 1091 'upload_date': '20121002',
3867038a 1092 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 1093 'categories': ['Science & Technology'],
3867038a 1094 'tags': ['youtube-dl'],
556dbe7f 1095 'duration': 10,
dbdaaa23 1096 'view_count': int,
11b56058
PM
1097 'like_count': int,
1098 'dislike_count': int,
34a7de29
S
1099 },
1100 'params': {
1101 'skip_download': True,
1102 },
11b56058 1103 },
dd27fd17 1104 {
2d3d2997 1105 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
1106 'note': '256k DASH audio (format 141) via DASH manifest',
1107 'info_dict': {
1108 'id': 'a9LDPn-MO4I',
1109 'ext': 'm4a',
1110 'upload_date': '20121002',
1111 'uploader_id': '8KVIDEO',
ec85ded8 1112 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
1113 'description': '',
1114 'uploader': '8KVIDEO',
1115 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 1116 },
4bc3a23e
PH
1117 'params': {
1118 'youtube_include_dash_manifest': True,
1119 'format': '141',
4919603f 1120 },
de3c7fe0 1121 'skip': 'format 141 not served anymore',
dd27fd17 1122 },
8bdd16b4 1123 # DASH manifest with encrypted signature
1124 {
1125 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1126 'info_dict': {
1127 'id': 'IB3lcPjvWLA',
1128 'ext': 'm4a',
1129 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1130 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1131 'duration': 244,
1132 'uploader': 'AfrojackVEVO',
1133 'uploader_id': 'AfrojackVEVO',
1134 'upload_date': '20131011',
cc2db878 1135 'abr': 129.495,
8bdd16b4 1136 },
1137 'params': {
1138 'youtube_include_dash_manifest': True,
1139 'format': '141/bestaudio[ext=m4a]',
1140 },
1141 },
dd2d55f1 1142 # Normal age-gate video (embed allowed)
c522adb1 1143 {
2d3d2997 1144 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1145 'info_dict': {
1146 'id': 'HtVdAasjOgU',
1147 'ext': 'mp4',
1148 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1149 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1150 'duration': 142,
c522adb1
JMF
1151 'uploader': 'The Witcher',
1152 'uploader_id': 'WitcherGame',
ec85ded8 1153 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 1154 'upload_date': '20140605',
34952f09 1155 'age_limit': 18,
c522adb1
JMF
1156 },
1157 },
8bdd16b4 1158 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1159 # YouTube Red ad is not captured for creator
1160 {
1161 'url': '__2ABJjxzNo',
1162 'info_dict': {
1163 'id': '__2ABJjxzNo',
1164 'ext': 'mp4',
1165 'duration': 266,
1166 'upload_date': '20100430',
1167 'uploader_id': 'deadmau5',
1168 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 1169 'creator': 'deadmau5',
1170 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1171 'uploader': 'deadmau5',
1172 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1173 'alt_title': 'Some Chords',
8bdd16b4 1174 },
1175 'expected_warnings': [
1176 'DASH manifest missing',
1177 ]
1178 },
067aa17e 1179 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1180 {
1181 'url': 'lqQg6PlCWgI',
1182 'info_dict': {
1183 'id': 'lqQg6PlCWgI',
1184 'ext': 'mp4',
556dbe7f 1185 'duration': 6085,
90227264 1186 'upload_date': '20150827',
cbe2bd91 1187 'uploader_id': 'olympic',
ec85ded8 1188 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 1189 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
11f9be09 1190 'uploader': 'Olympics',
cbe2bd91
PH
1191 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1192 },
1193 'params': {
1194 'skip_download': 'requires avconv',
e52a40ab 1195 }
cbe2bd91 1196 },
6271f1ca
PH
1197 # Non-square pixels
1198 {
1199 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1200 'info_dict': {
1201 'id': '_b-2C3KPAM0',
1202 'ext': 'mp4',
1203 'stretched_ratio': 16 / 9.,
556dbe7f 1204 'duration': 85,
6271f1ca
PH
1205 'upload_date': '20110310',
1206 'uploader_id': 'AllenMeow',
ec85ded8 1207 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 1208 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 1209 'uploader': '孫ᄋᄅ',
6271f1ca
PH
1210 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1211 },
06b491eb
S
1212 },
1213 # url_encoded_fmt_stream_map is empty string
1214 {
1215 'url': 'qEJwOuvDf7I',
1216 'info_dict': {
1217 'id': 'qEJwOuvDf7I',
f57b7835 1218 'ext': 'webm',
06b491eb
S
1219 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1220 'description': '',
1221 'upload_date': '20150404',
1222 'uploader_id': 'spbelect',
1223 'uploader': 'Наблюдатели Петербурга',
1224 },
1225 'params': {
1226 'skip_download': 'requires avconv',
e323cf3f
S
1227 },
1228 'skip': 'This live event has ended.',
06b491eb 1229 },
067aa17e 1230 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1231 {
1232 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1233 'info_dict': {
1234 'id': 'FIl7x6_3R5Y',
eb6793ba 1235 'ext': 'webm',
da77d856
S
1236 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1237 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1238 'duration': 220,
da77d856
S
1239 'upload_date': '20150625',
1240 'uploader_id': 'dorappi2000',
ec85ded8 1241 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 1242 'uploader': 'dorappi2000',
eb6793ba 1243 'formats': 'mincount:31',
da77d856 1244 },
eb6793ba 1245 'skip': 'not actual anymore',
2ee8f5d8 1246 },
8a1a26ce
YCH
1247 # DASH manifest with segment_list
1248 {
1249 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1250 'md5': '8ce563a1d667b599d21064e982ab9e31',
1251 'info_dict': {
1252 'id': 'CsmdDsKjzN8',
1253 'ext': 'mp4',
17ee98e1 1254 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
1255 'uploader': 'Airtek',
1256 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1257 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1258 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1259 },
1260 'params': {
1261 'youtube_include_dash_manifest': True,
1262 'format': '135', # bestvideo
be49068d
S
1263 },
1264 'skip': 'This live event has ended.',
2ee8f5d8 1265 },
cf7e015f
S
1266 {
1267 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 1268 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 1269 'info_dict': {
545cc85d 1270 'id': 'jvGDaLqkpTg',
1271 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1272 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
1273 },
1274 'playlist': [{
1275 'info_dict': {
545cc85d 1276 'id': 'jvGDaLqkpTg',
cf7e015f 1277 'ext': 'mp4',
545cc85d 1278 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1279 'description': 'md5:e03b909557865076822aa169218d6a5d',
1280 'duration': 10643,
1281 'upload_date': '20161111',
1282 'uploader': 'Team PGP',
1283 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1284 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1285 },
1286 }, {
1287 'info_dict': {
545cc85d 1288 'id': '3AKt1R1aDnw',
cf7e015f 1289 'ext': 'mp4',
545cc85d 1290 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1291 'description': 'md5:e03b909557865076822aa169218d6a5d',
1292 'duration': 10991,
1293 'upload_date': '20161111',
1294 'uploader': 'Team PGP',
1295 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1296 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1297 },
1298 }, {
1299 'info_dict': {
545cc85d 1300 'id': 'RtAMM00gpVc',
cf7e015f 1301 'ext': 'mp4',
545cc85d 1302 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1303 'description': 'md5:e03b909557865076822aa169218d6a5d',
1304 'duration': 10995,
1305 'upload_date': '20161111',
1306 'uploader': 'Team PGP',
1307 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1308 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1309 },
1310 }, {
1311 'info_dict': {
545cc85d 1312 'id': '6N2fdlP3C5U',
cf7e015f 1313 'ext': 'mp4',
545cc85d 1314 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1315 'description': 'md5:e03b909557865076822aa169218d6a5d',
1316 'duration': 10990,
1317 'upload_date': '20161111',
1318 'uploader': 'Team PGP',
1319 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1320 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1321 },
1322 }],
1323 'params': {
1324 'skip_download': True,
1325 },
cbaed4bb 1326 },
f9f49d87 1327 {
067aa17e 1328 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1329 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1330 'info_dict': {
1331 'id': 'gVfLd0zydlo',
1332 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1333 },
1334 'playlist_count': 2,
be49068d 1335 'skip': 'Not multifeed anymore',
f9f49d87 1336 },
cbaed4bb 1337 {
2d3d2997 1338 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1339 'only_matching': True,
0e49d9a6 1340 },
6d4fc66b 1341 {
2d3d2997 1342 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1343 'only_matching': True,
1344 },
0e49d9a6 1345 {
067aa17e 1346 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1347 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1348 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1349 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1350 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1351 'info_dict': {
1352 'id': 'lsguqyKfVQg',
1353 'ext': 'mp4',
1354 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
11f9be09 1355 'alt_title': 'Dark Walk',
0e49d9a6 1356 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1357 'duration': 133,
0e49d9a6
LL
1358 'upload_date': '20151119',
1359 'uploader_id': 'IronSoulElf',
ec85ded8 1360 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1361 'uploader': 'IronSoulElf',
11f9be09 1362 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1363 'track': 'Dark Walk',
1364 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
92bc97d3 1365 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
1366 },
1367 'params': {
1368 'skip_download': True,
1369 },
1370 },
61f92af1 1371 {
067aa17e 1372 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1373 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1374 'only_matching': True,
1375 },
313dfc45
LL
1376 {
1377 # Video with yt:stretch=17:0
1378 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1379 'info_dict': {
1380 'id': 'Q39EVAstoRM',
1381 'ext': 'mp4',
1382 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1383 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1384 'upload_date': '20151107',
1385 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1386 'uploader': 'CH GAMER DROID',
1387 },
1388 'params': {
1389 'skip_download': True,
1390 },
be49068d 1391 'skip': 'This video does not exist.',
313dfc45 1392 },
201c1459 1393 {
1394 # Video with incomplete 'yt:stretch=16:'
1395 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1396 'only_matching': True,
1397 },
7caf9830
S
1398 {
1399 # Video licensed under Creative Commons
1400 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1401 'info_dict': {
1402 'id': 'M4gD1WSo5mA',
1403 'ext': 'mp4',
1404 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1405 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1406 'duration': 721,
7caf9830
S
1407 'upload_date': '20150127',
1408 'uploader_id': 'BerkmanCenter',
ec85ded8 1409 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1410 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1411 'license': 'Creative Commons Attribution license (reuse allowed)',
1412 },
1413 'params': {
1414 'skip_download': True,
1415 },
1416 },
fd050249
S
1417 {
1418 # Channel-like uploader_url
1419 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1420 'info_dict': {
1421 'id': 'eQcmzGIKrzg',
1422 'ext': 'mp4',
1423 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1424 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1425 'duration': 4060,
fd050249 1426 'upload_date': '20151119',
eb6793ba 1427 'uploader': 'Bernie Sanders',
fd050249 1428 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1429 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1430 'license': 'Creative Commons Attribution license (reuse allowed)',
1431 },
1432 'params': {
1433 'skip_download': True,
1434 },
1435 },
040ac686
S
1436 {
1437 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1438 'only_matching': True,
7f29cf54
S
1439 },
1440 {
067aa17e 1441 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1442 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1443 'only_matching': True,
6496ccb4
S
1444 },
1445 {
1446 # Rental video preview
1447 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1448 'info_dict': {
1449 'id': 'uGpuVWrhIzE',
1450 'ext': 'mp4',
1451 'title': 'Piku - Trailer',
1452 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1453 'upload_date': '20150811',
1454 'uploader': 'FlixMatrix',
1455 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1456 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1457 'license': 'Standard YouTube License',
1458 },
1459 'params': {
1460 'skip_download': True,
1461 },
eb6793ba 1462 'skip': 'This video is not available.',
022a5d66 1463 },
12afdc2a
S
1464 {
1465 # YouTube Red video with episode data
1466 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1467 'info_dict': {
1468 'id': 'iqKdEhx-dD4',
1469 'ext': 'mp4',
1470 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1471 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1472 'duration': 2085,
12afdc2a
S
1473 'upload_date': '20170118',
1474 'uploader': 'Vsauce',
1475 'uploader_id': 'Vsauce',
1476 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1477 'series': 'Mind Field',
1478 'season_number': 1,
1479 'episode_number': 1,
1480 },
1481 'params': {
1482 'skip_download': True,
1483 },
1484 'expected_warnings': [
1485 'Skipping DASH manifest',
1486 ],
1487 },
c7121fa7
S
1488 {
1489 # The following content has been identified by the YouTube community
1490 # as inappropriate or offensive to some audiences.
1491 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1492 'info_dict': {
1493 'id': '6SJNVb0GnPI',
1494 'ext': 'mp4',
1495 'title': 'Race Differences in Intelligence',
1496 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1497 'duration': 965,
1498 'upload_date': '20140124',
1499 'uploader': 'New Century Foundation',
1500 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1501 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1502 },
1503 'params': {
1504 'skip_download': True,
1505 },
545cc85d 1506 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1507 },
022a5d66
S
1508 {
1509 # itag 212
1510 'url': '1t24XAntNCY',
1511 'only_matching': True,
fd5c4aab
S
1512 },
1513 {
1514 # geo restricted to JP
1515 'url': 'sJL6WA-aGkQ',
1516 'only_matching': True,
1517 },
cd5a74a2
S
1518 {
1519 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1520 'only_matching': True,
1521 },
bc2ca1bb 1522 {
1523 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1524 'only_matching': True,
1525 },
1526 {
1527 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1528 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1529 'only_matching': True,
1530 },
825cd268
RA
1531 {
1532 # DRM protected
1533 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1534 'only_matching': True,
4fe54c12
S
1535 },
1536 {
1537 # Video with unsupported adaptive stream type formats
1538 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1539 'info_dict': {
1540 'id': 'Z4Vy8R84T1U',
1541 'ext': 'mp4',
1542 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1543 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1544 'duration': 433,
1545 'upload_date': '20130923',
1546 'uploader': 'Amelia Putri Harwita',
1547 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1548 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1549 'formats': 'maxcount:10',
1550 },
1551 'params': {
1552 'skip_download': True,
1553 'youtube_include_dash_manifest': False,
1554 },
5429d6a9 1555 'skip': 'not actual anymore',
5caabd3c 1556 },
1557 {
822b9d9c 1558 # Youtube Music Auto-generated description
5caabd3c 1559 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1560 'info_dict': {
1561 'id': 'MgNrAu2pzNs',
1562 'ext': 'mp4',
1563 'title': 'Voyeur Girl',
1564 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1565 'upload_date': '20190312',
5429d6a9
S
1566 'uploader': 'Stephen - Topic',
1567 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1568 'artist': 'Stephen',
1569 'track': 'Voyeur Girl',
1570 'album': 'it\'s too much love to know my dear',
1571 'release_date': '20190313',
1572 'release_year': 2019,
1573 },
1574 'params': {
1575 'skip_download': True,
1576 },
1577 },
66b48727
RA
1578 {
1579 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1580 'only_matching': True,
1581 },
011e75e6
S
1582 {
1583 # invalid -> valid video id redirection
1584 'url': 'DJztXj2GPfl',
1585 'info_dict': {
1586 'id': 'DJztXj2GPfk',
1587 'ext': 'mp4',
1588 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1589 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1590 'upload_date': '20090125',
1591 'uploader': 'Prochorowka',
1592 'uploader_id': 'Prochorowka',
1593 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1594 'artist': 'Panjabi MC',
1595 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1596 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1597 },
1598 'params': {
1599 'skip_download': True,
1600 },
545cc85d 1601 'skip': 'Video unavailable',
ea74e00b
DP
1602 },
1603 {
1604 # empty description results in an empty string
1605 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1606 'info_dict': {
1607 'id': 'x41yOUIvK2k',
1608 'ext': 'mp4',
1609 'title': 'IMG 3456',
1610 'description': '',
1611 'upload_date': '20170613',
1612 'uploader_id': 'ElevageOrVert',
1613 'uploader': 'ElevageOrVert',
1614 },
1615 'params': {
1616 'skip_download': True,
1617 },
1618 },
a0566bbf 1619 {
29f7c58a 1620 # with '};' inside yt initial data (see [1])
1621 # see [2] for an example with '};' inside ytInitialPlayerResponse
1622 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1623 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1624 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1625 'info_dict': {
1626 'id': 'CHqg6qOn4no',
1627 'ext': 'mp4',
1628 'title': 'Part 77 Sort a list of simple types in c#',
1629 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1630 'upload_date': '20130831',
1631 'uploader_id': 'kudvenkat',
1632 'uploader': 'kudvenkat',
1633 },
1634 'params': {
1635 'skip_download': True,
1636 },
1637 },
29f7c58a 1638 {
1639 # another example of '};' in ytInitialData
1640 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1641 'only_matching': True,
1642 },
1643 {
1644 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1645 'only_matching': True,
1646 },
545cc85d 1647 {
cc2db878 1648 # https://github.com/ytdl-org/youtube-dl/pull/28094
1649 'url': 'OtqTfy26tG0',
1650 'info_dict': {
1651 'id': 'OtqTfy26tG0',
1652 'ext': 'mp4',
1653 'title': 'Burn Out',
1654 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1655 'upload_date': '20141120',
1656 'uploader': 'The Cinematic Orchestra - Topic',
1657 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1658 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1659 'artist': 'The Cinematic Orchestra',
1660 'track': 'Burn Out',
1661 'album': 'Every Day',
1662 'release_data': None,
1663 'release_year': None,
1664 },
1665 'params': {
1666 'skip_download': True,
1667 },
545cc85d 1668 },
bc2ca1bb 1669 {
1670 # controversial video, only works with bpctr when authenticated with cookies
1671 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1672 'only_matching': True,
1673 },
a1a7907b 1674 {
1675 # controversial video, requires bpctr/contentCheckOk
1676 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1677 'info_dict': {
1678 'id': 'SZJvDhaSDnc',
1679 'ext': 'mp4',
1680 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1681 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1682 'uploader': 'CBS This Morning',
11f9be09 1683 'uploader_id': 'CBSThisMorning',
a1a7907b 1684 'upload_date': '20140716',
1685 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1686 }
1687 },
f7ad7160 1688 {
1689 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1690 'url': 'cBvYw8_A0vQ',
1691 'info_dict': {
1692 'id': 'cBvYw8_A0vQ',
1693 'ext': 'mp4',
1694 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1695 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1696 'upload_date': '20201120',
1697 'uploader': 'Walk around Japan',
1698 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1699 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1700 },
1701 'params': {
1702 'skip_download': True,
1703 },
0fb983f6 1704 }, {
1705 # Has multiple audio streams
1706 'url': 'WaOKSUlf4TM',
1707 'only_matching': True
9297939e 1708 }, {
1709 # Requires Premium: has format 141 when requested using YTM url
1710 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1711 'only_matching': True
1712 }, {
120916da 1713 # multiple subtitles with same lang_code
1714 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1715 'only_matching': True,
109dd3b2 1716 }, {
1717 # Force use android client fallback
1718 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1719 'info_dict': {
1720 'id': 'YOelRv7fMxY',
11f9be09 1721 'title': 'DIGGING A SECRET TUNNEL Part 1',
109dd3b2 1722 'ext': '3gp',
1723 'upload_date': '20210624',
1724 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1725 'uploader': 'colinfurze',
11f9be09 1726 'uploader_id': 'colinfurze',
109dd3b2 1727 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
11f9be09 1728 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
109dd3b2 1729 },
1730 'params': {
1731 'format': '17', # 3gp format available on android
1732 'extractor_args': {'youtube': {'player_client': ['android']}},
1733 },
120916da 1734 },
109dd3b2 1735 {
1736 # Skip download of additional client configs (remix client config in this case)
1737 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1738 'only_matching': True,
1739 'params': {
1740 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1741 },
1742 }
2eb88d95
PH
1743 ]
1744
201c1459 1745 @classmethod
1746 def suitable(cls, url):
1bdae7d3 1747 # Hack for lazy extractors until more generic solution is implemented
1748 # (see #28780)
1749 from .youtube import parse_qs
201c1459 1750 qs = parse_qs(url)
1751 if qs.get('list', [None])[0]:
1752 return False
1753 return super(YoutubeIE, cls).suitable(url)
1754
e0df6211
PH
1755 def __init__(self, *args, **kwargs):
1756 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1757 self._code_cache = {}
83799698 1758 self._player_cache = {}
e0df6211 1759
109dd3b2 1760 def _extract_player_url(self, ytcfg=None, webpage=None):
1761 player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)
11f9be09 1762 if not player_url and webpage:
109dd3b2 1763 player_url = self._search_regex(
1764 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1765 webpage, 'player URL', fatal=False)
11f9be09 1766 if not player_url:
1767 return None
109dd3b2 1768 if player_url.startswith('//'):
1769 player_url = 'https:' + player_url
1770 elif not re.match(r'https?://', player_url):
1771 player_url = compat_urlparse.urljoin(
1772 'https://www.youtube.com', player_url)
1773 return player_url
1774
60064c53
PH
1775 def _signature_cache_id(self, example_sig):
1776 """ Return a string representation of a signature """
78caa52a 1777 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1778
e40c758c
S
1779 @classmethod
1780 def _extract_player_info(cls, player_url):
1781 for player_re in cls._PLAYER_INFO_RE:
1782 id_m = re.search(player_re, player_url)
1783 if id_m:
1784 break
1785 else:
c081b35c 1786 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1787 return id_m.group('id')
e40c758c 1788
109dd3b2 1789 def _load_player(self, video_id, player_url, fatal=True) -> bool:
1790 player_id = self._extract_player_info(player_url)
1791 if player_id not in self._code_cache:
1792 self._code_cache[player_id] = self._download_webpage(
1793 player_url, video_id, fatal=fatal,
1794 note='Downloading player ' + player_id,
1795 errnote='Download of %s failed' % player_url)
1796 return player_id in self._code_cache
1797
e40c758c 1798 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1799 player_id = self._extract_player_info(player_url)
e0df6211 1800
c4417ddb 1801 # Read from filesystem cache
545cc85d 1802 func_id = 'js_%s_%s' % (
1803 player_id, self._signature_cache_id(example_sig))
c4417ddb 1804 assert os.path.basename(func_id) == func_id
a0e07d31 1805
69ea8ca4 1806 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1807 if cache_spec is not None:
78caa52a 1808 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1809
109dd3b2 1810 if self._load_player(video_id, player_url):
1811 code = self._code_cache[player_id]
1812 res = self._parse_sig_js(code)
e0df6211 1813
109dd3b2 1814 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1815 cache_res = res(test_string)
1816 cache_spec = [ord(c) for c in cache_res]
83799698 1817
109dd3b2 1818 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1819 return res
83799698 1820
60064c53 1821 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1822 def gen_sig_code(idxs):
1823 def _genslice(start, end, step):
78caa52a 1824 starts = '' if start == 0 else str(start)
8bcc8756 1825 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1826 steps = '' if step == 1 else (':%d' % step)
78caa52a 1827 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1828
1829 step = None
7af808a5
PH
1830 # Quelch pyflakes warnings - start will be set when step is set
1831 start = '(Never used)'
edf3e38e
PH
1832 for i, prev in zip(idxs[1:], idxs[:-1]):
1833 if step is not None:
1834 if i - prev == step:
1835 continue
1836 yield _genslice(start, prev, step)
1837 step = None
1838 continue
1839 if i - prev in [-1, 1]:
1840 step = i - prev
1841 start = prev
1842 continue
1843 else:
78caa52a 1844 yield 's[%d]' % prev
edf3e38e 1845 if step is None:
78caa52a 1846 yield 's[%d]' % i
edf3e38e
PH
1847 else:
1848 yield _genslice(start, i, step)
1849
78caa52a 1850 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1851 cache_res = func(test_string)
edf3e38e 1852 cache_spec = [ord(c) for c in cache_res]
78caa52a 1853 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1854 signature_id_tuple = '(%s)' % (
1855 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1856 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1857 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1858 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1859
e0df6211
PH
1860 def _parse_sig_js(self, jscode):
1861 funcname = self._search_regex(
abefc03f
S
1862 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1863 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1864 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1865 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1866 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1867 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1868 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1869 # Obsolete patterns
1870 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1871 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1872 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1873 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1874 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1875 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1876 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1877 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1878 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1879
1880 jsi = JSInterpreter(jscode)
1881 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1882 return lambda s: initial_function([s])
1883
545cc85d 1884 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1885 """Turn the encrypted s field into a working signature"""
6b37f0be 1886
c8bf86d5 1887 if player_url is None:
69ea8ca4 1888 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1889
c8bf86d5 1890 try:
62af3a0e 1891 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1892 if player_id not in self._player_cache:
1893 func = self._extract_signature_function(
60064c53 1894 video_id, player_url, s
c8bf86d5
PH
1895 )
1896 self._player_cache[player_id] = func
1897 func = self._player_cache[player_id]
a06916d9 1898 if self.get_param('youtube_print_sig_code'):
60064c53 1899 self._print_sig_code(func, s)
c8bf86d5
PH
1900 return func(s)
1901 except Exception as e:
1902 tb = traceback.format_exc()
1903 raise ExtractorError(
78caa52a 1904 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1905
109dd3b2 1906 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1907 """
1908 Extract signatureTimestamp (sts)
1909 Required to tell API what sig/player version is in use.
1910 """
1911 sts = None
1912 if isinstance(ytcfg, dict):
1913 sts = int_or_none(ytcfg.get('STS'))
1914
1915 if not sts:
1916 # Attempt to extract from player
1917 if player_url is None:
1918 error_msg = 'Cannot extract signature timestamp without player_url.'
1919 if fatal:
1920 raise ExtractorError(error_msg)
1921 self.report_warning(error_msg)
1922 return
1923 if self._load_player(video_id, player_url, fatal=fatal):
1924 player_id = self._extract_player_info(player_url)
1925 code = self._code_cache[player_id]
1926 sts = int_or_none(self._search_regex(
1927 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
1928 'JS player signature timestamp', group='sts', fatal=fatal))
1929 return sts
1930
11f9be09 1931 def _mark_watched(self, video_id, player_responses):
352d63fd 1932 playback_url = traverse_obj(
1933 player_responses, (..., 'playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
1934 expected_type=url_or_none, get_all=False)
d77ab8e2 1935 if not playback_url:
352d63fd 1936 self.report_warning('Unable to mark watched')
d77ab8e2
S
1937 return
1938 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1939 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1940
1941 # cpn generation algorithm is reverse engineered from base.js.
1942 # In fact it works even with dummy cpn.
1943 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1944 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1945
1946 qs.update({
1947 'ver': ['2'],
1948 'cpn': [cpn],
1949 })
1950 playback_url = compat_urlparse.urlunparse(
15707c7e 1951 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1952
1953 self._download_webpage(
1954 playback_url, video_id, 'Marking watched',
1955 'Unable to mark watched', fatal=False)
1956
66c9fa36
S
1957 @staticmethod
1958 def _extract_urls(webpage):
1959 # Embedded YouTube player
1960 entries = [
1961 unescapeHTML(mobj.group('url'))
1962 for mobj in re.finditer(r'''(?x)
1963 (?:
1964 <iframe[^>]+?src=|
1965 data-video-url=|
1966 <embed[^>]+?src=|
1967 embedSWF\(?:\s*|
1968 <object[^>]+data=|
1969 new\s+SWFObject\(
1970 )
1971 (["\'])
1972 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1973 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1974 \1''', webpage)]
1975
1976 # lazyYT YouTube embed
1977 entries.extend(list(map(
1978 unescapeHTML,
1979 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1980
1981 # Wordpress "YouTube Video Importer" plugin
1982 matches = re.findall(r'''(?x)<div[^>]+
1983 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1984 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1985 entries.extend(m[-1] for m in matches)
1986
1987 return entries
1988
1989 @staticmethod
1990 def _extract_url(webpage):
1991 urls = YoutubeIE._extract_urls(webpage)
1992 return urls[0] if urls else None
1993
97665381
PH
1994 @classmethod
1995 def extract_id(cls, url):
1996 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1997 if mobj is None:
69ea8ca4 1998 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1999 video_id = mobj.group(2)
2000 return video_id
2001
7c365c21 2002 def _extract_chapters_from_json(self, data, duration):
2003 chapter_list = traverse_obj(
2004 data, (
2005 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2006 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2007 ), expected_type=list)
2008
2009 return self._extract_chapters(
2010 chapter_list,
2011 chapter_time=lambda chapter: float_or_none(
2012 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2013 chapter_title=lambda chapter: traverse_obj(
2014 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2015 duration=duration)
2016
2017 def _extract_chapters_from_engagement_panel(self, data, duration):
2018 content_list = traverse_obj(
8bdd16b4 2019 data,
7c365c21 2020 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
da503b7a 2021 expected_type=list, default=[])
052e1350 2022 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2023 chapter_title = lambda chapter: self._get_text(chapter, 'title')
7c365c21 2024
2025 return next((
2026 filter(None, (
2027 self._extract_chapters(
2028 traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2029 chapter_time, chapter_title, duration)
2030 for contents in content_list
2031 ))), [])
2032
2033 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
84213ea8 2034 chapters = []
7c365c21 2035 last_chapter = {'start_time': 0}
2036 for idx, chapter in enumerate(chapter_list or []):
2037 title = chapter_title(chapter)
84213ea8
S
2038 start_time = chapter_time(chapter)
2039 if start_time is None:
2040 continue
7c365c21 2041 last_chapter['end_time'] = start_time
2042 if start_time < last_chapter['start_time']:
2043 if idx == 1:
2044 chapters.pop()
2045 self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2046 else:
2047 self.report_warning(f'Invalid start time for chapter "{title}"')
2048 continue
2049 last_chapter = {'start_time': start_time, 'title': title}
2050 chapters.append(last_chapter)
2051 last_chapter['end_time'] = duration
84213ea8
S
2052 return chapters
2053
545cc85d 2054 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2055 return self._parse_json(self._search_regex(
2056 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2057 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 2058
d92f5d5a 2059 @staticmethod
2060 def parse_time_text(time_text):
2061 """
2062 Parse the comment time text
2063 time_text is in the format 'X units ago (edited)'
2064 """
2065 time_text_split = time_text.split(' ')
2066 if len(time_text_split) >= 3:
da503b7a 2067 try:
2068 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
2069 except ValueError:
2070 return None
d92f5d5a 2071
a1c5d2ca
M
2072 def _extract_comment(self, comment_renderer, parent=None):
2073 comment_id = comment_renderer.get('commentId')
2074 if not comment_id:
2075 return
fe93e2c4 2076
052e1350 2077 text = self._get_text(comment_renderer, 'contentText')
fe93e2c4 2078
49bd8c66 2079 # note: timestamp is an estimate calculated from the current time and time_text
052e1350 2080 time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
fe93e2c4 2081 time_text_dt = self.parse_time_text(time_text)
2082 if isinstance(time_text_dt, datetime.datetime):
2083 timestamp = calendar.timegm(time_text_dt.timetuple())
052e1350 2084 author = self._get_text(comment_renderer, 'authorText')
a1c5d2ca
M
2085 author_id = try_get(comment_renderer,
2086 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
fe93e2c4 2087
49bd8c66 2088 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2089 lambda x: x['likeCount']), compat_str)) or 0
a1c5d2ca
M
2090 author_thumbnail = try_get(comment_renderer,
2091 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2092
2093 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
97524332 2094 is_favorited = 'creatorHeart' in (try_get(
2095 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
a1c5d2ca
M
2096 return {
2097 'id': comment_id,
2098 'text': text,
d92f5d5a 2099 'timestamp': timestamp,
a1c5d2ca
M
2100 'time_text': time_text,
2101 'like_count': votes,
97524332 2102 'is_favorited': is_favorited,
a1c5d2ca
M
2103 'author': author,
2104 'author_id': author_id,
2105 'author_thumbnail': author_thumbnail,
2106 'author_is_uploader': author_is_uploader,
2107 'parent': parent or 'root'
2108 }
2109
2110 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
2d6659b9 2111 ytcfg, video_id, parent=None, comment_counts=None):
2112
2113 def extract_header(contents):
2114 _total_comments = 0
2115 _continuation = None
2116 for content in contents:
2117 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
fe93e2c4 2118 expected_comment_count = parse_count(self._get_text(
052e1350 2119 comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
fe93e2c4 2120
2d6659b9 2121 if expected_comment_count:
fe93e2c4 2122 comment_counts[1] = expected_comment_count
2123 self.to_screen('Downloading ~%d comments' % expected_comment_count)
2d6659b9 2124 _total_comments = comment_counts[1]
2125 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2126 comment_sort_index = int(sort_mode_str != 'top') # 1 = new, 0 = top
2127
2128 sort_menu_item = try_get(
2129 comments_header_renderer,
2130 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2131 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2132
2133 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2134 if not _continuation:
2135 continue
2136
2137 sort_text = sort_menu_item.get('title')
2138 if isinstance(sort_text, compat_str):
2139 sort_text = sort_text.lower()
2140 else:
2141 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2142 self.to_screen('Sorting comments by %s' % sort_text)
2143 break
2144 return _total_comments, _continuation
a1c5d2ca 2145
2d6659b9 2146 def extract_thread(contents):
a1c5d2ca
M
2147 if not parent:
2148 comment_counts[2] = 0
2149 for content in contents:
2150 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2151 comment_renderer = try_get(
2152 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2153 content, (lambda x: x['commentRenderer'], dict))
2154
2155 if not comment_renderer:
2156 continue
2157 comment = self._extract_comment(comment_renderer, parent)
2158 if not comment:
2159 continue
2160 comment_counts[0] += 1
2161 yield comment
2162 # Attempt to get the replies
2163 comment_replies_renderer = try_get(
2164 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2165
2166 if comment_replies_renderer:
2167 comment_counts[2] += 1
2168 comment_entries_iter = self._comment_entries(
f4f751af 2169 comment_replies_renderer, identity_token, account_syncid, ytcfg,
2d6659b9 2170 video_id, parent=comment.get('id'), comment_counts=comment_counts)
a1c5d2ca
M
2171
2172 for reply_comment in comment_entries_iter:
2173 yield reply_comment
2174
2d6659b9 2175 # YouTube comments have a max depth of 2
2176 max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2177 if max_depth == 1 and parent:
2178 return
a1c5d2ca
M
2179 if not comment_counts:
2180 # comment so far, est. total comments, current comment thread #
2181 comment_counts = [0, 0, 0]
a1c5d2ca 2182
2d6659b9 2183 continuation = self._extract_continuation(root_continuation_data)
fe93e2c4 2184 if continuation and len(continuation['continuation']) < 27:
2d6659b9 2185 self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2186 continuation_token = self._generate_comment_continuation(video_id)
fe93e2c4 2187 continuation = self._build_api_continuation_query(continuation_token, None)
2d6659b9 2188
2189 visitor_data = None
2190 is_first_continuation = parent is None
a1c5d2ca
M
2191
2192 for page_num in itertools.count(0):
2193 if not continuation:
2194 break
11f9be09 2195 headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
2d6659b9 2196 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2197 if page_num == 0:
2198 if is_first_continuation:
2199 note_prefix = 'Downloading comment section API JSON'
a1c5d2ca 2200 else:
2d6659b9 2201 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
2202 comment_counts[2], comment_prog_str)
2203 else:
2204 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2205 ' ' if parent else '', ' replies' if parent else '',
2206 page_num, comment_prog_str)
2207
2208 response = self._extract_response(
fe93e2c4 2209 item_id=None, query=continuation,
2d6659b9 2210 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2211 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca
M
2212 if not response:
2213 break
f4f751af 2214 visitor_data = try_get(
2215 response,
2216 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2217 compat_str) or visitor_data
a1c5d2ca 2218
2d6659b9 2219 continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca 2220
2d6659b9 2221 continuation = None
2222 if isinstance(continuation_contents, list):
2223 for continuation_section in continuation_contents:
2224 if not isinstance(continuation_section, dict):
2225 continue
2226 continuation_items = try_get(
2227 continuation_section,
2228 (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2229 lambda x: x['appendContinuationItemsAction']['continuationItems']),
2230 list) or []
2231 if is_first_continuation:
2232 total_comments, continuation = extract_header(continuation_items)
2233 if total_comments:
2234 yield total_comments
2235 is_first_continuation = False
2236 if continuation:
2237 break
2238 continue
2239 count = 0
2240 for count, entry in enumerate(extract_thread(continuation_items)):
2241 yield entry
2242 continuation = self._extract_continuation({'contents': continuation_items})
2243 if continuation:
2244 # Sometimes YouTube provides a continuation without any comments
2245 # In most cases we end up just downloading these with very little comments to come.
2246 if count == 0:
2247 if not parent:
2248 self.report_warning('No comments received - assuming end of comments')
2249 continuation = None
a1c5d2ca
M
2250 break
2251
2d6659b9 2252 # Deprecated response structure
2253 elif isinstance(continuation_contents, dict):
2254 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2255 for key, continuation_renderer in continuation_contents.items():
2256 if key not in known_continuation_renderers:
2257 continue
2258 if not isinstance(continuation_renderer, dict):
2259 continue
2260 if is_first_continuation:
2261 header_continuation_items = [continuation_renderer.get('header') or {}]
2262 total_comments, continuation = extract_header(header_continuation_items)
2263 if total_comments:
2264 yield total_comments
2265 is_first_continuation = False
2266 if continuation:
2267 break
a1c5d2ca 2268
2d6659b9 2269 # Sometimes YouTube provides a continuation without any comments
2270 # In most cases we end up just downloading these with very little comments to come.
2271 count = 0
2272 for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2273 yield entry
2274 continuation = self._extract_continuation(continuation_renderer)
2275 if count == 0:
2276 if not parent:
2277 self.report_warning('No comments received - assuming end of comments')
2278 continuation = None
2279 break
a1c5d2ca 2280
2d6659b9 2281 @staticmethod
2282 def _generate_comment_continuation(video_id):
2283 """
2284 Generates initial comment section continuation token from given video id
2285 """
2286 b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2287 parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2288 new_continuation_intlist = list(itertools.chain.from_iterable(
2289 [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2290 return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2291
2292 def _extract_comments(self, ytcfg, video_id, contents, webpage):
a1c5d2ca 2293 """Entry for comment extraction"""
2d6659b9 2294 def _real_comment_extract(contents):
2295 if isinstance(contents, list):
2296 for entry in contents:
2297 for key, renderer in entry.items():
2298 if key not in known_entry_comment_renderers:
2299 continue
2300 yield from self._comment_entries(
2301 renderer, video_id=video_id, ytcfg=ytcfg,
2302 identity_token=self._extract_identity_token(webpage, item_id=video_id),
2303 account_syncid=self._extract_account_syncid(ytcfg))
2304 break
a1c5d2ca 2305 comments = []
2d6659b9 2306 known_entry_comment_renderers = ('itemSectionRenderer',)
a1c5d2ca 2307 estimated_total = 0
2d6659b9 2308 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
65524694 2309 # Force English regardless of account setting to prevent parsing issues
2310 # See: https://github.com/yt-dlp/yt-dlp/issues/532
2311 ytcfg = copy.deepcopy(ytcfg)
2312 traverse_obj(
2313 ytcfg, ('INNERTUBE_CONTEXT', 'client'), expected_type=dict, default={})['hl'] = 'en'
2d6659b9 2314 try:
2315 for comment in _real_comment_extract(contents):
2316 if len(comments) >= max_comments:
2317 break
2318 if isinstance(comment, int):
2319 estimated_total = comment
2320 continue
2321 comments.append(comment)
2322 except KeyboardInterrupt:
2323 self.to_screen('Interrupted by user')
d92f5d5a 2324 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
a1c5d2ca
M
2325 return {
2326 'comments': comments,
2327 'comment_count': len(comments),
2328 }
2329
109dd3b2 2330 @staticmethod
2331 def _generate_player_context(sts=None):
2332 context = {
2333 'html5Preference': 'HTML5_PREF_WANTS',
2334 }
2335 if sts is not None:
2336 context['signatureTimestamp'] = sts
2337 return {
2338 'playbackContext': {
2339 'contentPlaybackContext': context
a1a7907b 2340 },
2fd226f6 2341 'contentCheckOk': True,
2342 'racyCheckOk': True
109dd3b2 2343 }
2344
9275f62c 2345 def _is_agegated(self, player_response):
2346 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
2347 for reason in reasons:
2348 if reason in self._AGE_GATE_REASONS + self._AGE_GATE_STATUS_REASONS:
2349 return True
2350 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')) is not None:
2351 return True
2352 return False
2353
11f9be09 2354 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr):
109dd3b2 2355
11f9be09 2356 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2357 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
2358 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False)
2359 headers = self.generate_api_headers(
2360 player_ytcfg, identity_token, syncid,
000c15a4 2361 default_client=client, session_index=session_index)
9297939e 2362
11f9be09 2363 yt_query = {'videoId': video_id}
2364 yt_query.update(self._generate_player_context(sts))
2365 return self._extract_response(
2366 item_id=video_id, ep='player', query=yt_query,
2367 ytcfg=player_ytcfg, headers=headers, fatal=False,
000c15a4 2368 default_client=client,
11f9be09 2369 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2370 ) or None
2371
11f9be09 2372 def _get_requested_clients(self, url, smuggled_data):
b4c055ba 2373 requested_clients = []
000c15a4 2374 allowed_clients = sorted(
2375 [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],
2376 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
b4c055ba 2377 for client in self._configuration_arg('player_client'):
2378 if client in allowed_clients:
2379 requested_clients.append(client)
2380 elif client == 'all':
2381 requested_clients.extend(allowed_clients)
2382 else:
2383 self.report_warning(f'Skipping unsupported client {client}')
11f9be09 2384 if not requested_clients:
2385 requested_clients = ['android', 'web']
cf7e015f 2386
11f9be09 2387 if smuggled_data.get('is_music_url') or self.is_music_url(url):
2388 requested_clients.extend(
2389 f'{client}_music' for client in requested_clients if not client.endswith('_music'))
dbdaaa23 2390
11f9be09 2391 return orderedSet(requested_clients)
cf7e015f 2392
c0bc527b
M
2393 def _extract_player_ytcfg(self, client, video_id):
2394 url = {
2395 'web_music': 'https://music.youtube.com',
2396 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
2397 }.get(client)
2398 if not url:
2399 return {}
2400 webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config')
2401 return self.extract_ytcfg(video_id, webpage) or {}
2402
11f9be09 2403 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, player_url, identity_token):
2404 initial_pr = None
2405 if webpage:
2406 initial_pr = self._extract_yt_initial_variable(
2407 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2408 video_id, 'initial player response')
6b09401b 2409
c0bc527b
M
2410 original_clients = clients
2411 clients = clients[::-1]
2412 while clients:
2413 client = clients.pop()
11f9be09 2414 player_ytcfg = master_ytcfg if client == 'web' else {}
c0bc527b
M
2415 if 'configs' not in self._configuration_arg('player_skip'):
2416 player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
c0bc527b
M
2417
2418 pr = (
2419 initial_pr if client == 'web' and initial_pr
2420 else self._extract_player_response(
2421 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr))
11f9be09 2422 if pr:
2423 yield pr
c0bc527b 2424
9275f62c 2425 if self._is_agegated(pr):
c0bc527b 2426 client = f'{client}_agegate'
000c15a4 2427 if client in INNERTUBE_CLIENTS and client not in original_clients:
c0bc527b
M
2428 clients.append(client)
2429
11f9be09 2430 # Android player_response does not have microFormats which are needed for
2431 # extraction of some data. So we return the initial_pr with formats
2432 # stripped out even if not requested by the user
2433 # See: https://github.com/yt-dlp/yt-dlp/issues/501
c0bc527b 2434 if initial_pr and 'web' not in original_clients:
11f9be09 2435 initial_pr['streamingData'] = None
2436 yield initial_pr
2437
2438 def _extract_formats(self, streaming_data, video_id, player_url, is_live):
2439 itags, stream_ids = [], []
2a9c6dcd 2440 itag_qualities, res_qualities = {}, {}
d3fc8074 2441 q = qualities([
2a9c6dcd 2442 # Normally tiny is the smallest video-only formats. But
2443 # audio-only formats with unknown quality may get tagged as tiny
2444 'tiny',
2445 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
d3fc8074 2446 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2447 ])
11f9be09 2448 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
9297939e 2449
545cc85d 2450 for fmt in streaming_formats:
2451 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2452 continue
321bf820 2453
cc2db878 2454 itag = str_or_none(fmt.get('itag'))
9297939e 2455 audio_track = fmt.get('audioTrack') or {}
2456 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2457 if stream_id in stream_ids:
2458 continue
2459
cc2db878 2460 quality = fmt.get('quality')
2a9c6dcd 2461 height = int_or_none(fmt.get('height'))
d3fc8074 2462 if quality == 'tiny' or not quality:
2463 quality = fmt.get('audioQuality', '').lower() or quality
2a9c6dcd 2464 # The 3gp format (17) in android client has a quality of "small",
2465 # but is actually worse than other formats
2466 if itag == '17':
2467 quality = 'tiny'
2468 if quality:
2469 if itag:
2470 itag_qualities[itag] = quality
2471 if height:
2472 res_qualities[height] = quality
cc2db878 2473 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2474 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2475 # number of fragment that would subsequently requested with (`&sq=N`)
2476 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2477 continue
2478
545cc85d 2479 fmt_url = fmt.get('url')
2480 if not fmt_url:
2481 sc = compat_parse_qs(fmt.get('signatureCipher'))
2482 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2483 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2484 if not (sc and fmt_url and encrypted_sig):
2485 continue
545cc85d 2486 if not player_url:
201e9eaa 2487 continue
545cc85d 2488 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2489 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2490 fmt_url += '&' + sp + '=' + signature
2491
545cc85d 2492 if itag:
2493 itags.append(itag)
9297939e 2494 stream_ids.append(stream_id)
2495
cc2db878 2496 tbr = float_or_none(
2497 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 2498 dct = {
2499 'asr': int_or_none(fmt.get('audioSampleRate')),
2500 'filesize': int_or_none(fmt.get('contentLength')),
2501 'format_id': itag,
11f9be09 2502 'format_note': ', '.join(filter(None, (
2a9c6dcd 2503 audio_track.get('displayName'),
2504 fmt.get('qualityLabel') or quality.replace('audio_quality_', '')))),
545cc85d 2505 'fps': int_or_none(fmt.get('fps')),
2a9c6dcd 2506 'height': height,
dca3ff4a 2507 'quality': q(quality),
cc2db878 2508 'tbr': tbr,
545cc85d 2509 'url': fmt_url,
2a9c6dcd 2510 'width': int_or_none(fmt.get('width')),
0fb983f6 2511 'language': audio_track.get('id', '').split('.')[0],
545cc85d 2512 }
60bdb7bd 2513 mime_mobj = re.match(
2514 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2515 if mime_mobj:
2516 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2517 dct.update(parse_codecs(mime_mobj.group(2)))
cc2db878 2518 no_audio = dct.get('acodec') == 'none'
2519 no_video = dct.get('vcodec') == 'none'
2520 if no_audio:
2521 dct['vbr'] = tbr
2522 if no_video:
2523 dct['abr'] = tbr
2524 if no_audio or no_video:
545cc85d 2525 dct['downloader_options'] = {
2526 # Youtube throttles chunks >~10M
2527 'http_chunk_size': 10485760,
bf1317d2 2528 }
7c60c33e 2529 if dct.get('ext'):
2530 dct['container'] = dct['ext'] + '_dash'
11f9be09 2531 yield dct
545cc85d 2532
4bb6b02f 2533 skip_manifests = self._configuration_arg('skip')
11f9be09 2534 get_dash = not is_live and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True)
5d3a0e79 2535 get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2536
2a9c6dcd 2537 def guess_quality(f):
2538 for val, qdict in ((f.get('format_id'), itag_qualities), (f.get('height'), res_qualities)):
2539 if val in qdict:
2540 return q(qdict[val])
2541 return -1
2542
11f9be09 2543 for sd in streaming_data:
5d3a0e79 2544 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
9297939e 2545 if hls_manifest_url:
2a9c6dcd 2546 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
9297939e 2547 itag = self._search_regex(
2548 r'/itag/(\d+)', f['url'], 'itag', default=None)
11f9be09 2549 if itag in itags:
2550 continue
9297939e 2551 if itag:
2552 f['format_id'] = itag
11f9be09 2553 itags.append(itag)
2a9c6dcd 2554 f['quality'] = guess_quality(f)
11f9be09 2555 yield f
545cc85d 2556
5d3a0e79 2557 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2558 if dash_manifest_url:
2a9c6dcd 2559 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
5d3a0e79 2560 itag = f['format_id']
2561 if itag in itags:
2562 continue
11f9be09 2563 if itag:
2564 itags.append(itag)
2a9c6dcd 2565 f['quality'] = guess_quality(f)
5d3a0e79 2566 filesize = int_or_none(self._search_regex(
2567 r'/clen/(\d+)', f.get('fragment_base_url')
2568 or f['url'], 'file size', default=None))
2569 if filesize:
2570 f['filesize'] = filesize
11f9be09 2571 yield f
2572
2573 def _real_extract(self, url):
2574 url, smuggled_data = unsmuggle_url(url, {})
2575 video_id = self._match_id(url)
2576
2577 base_url = self.http_scheme() + '//www.youtube.com/'
2578 webpage_url = base_url + 'watch?v=' + video_id
2579 webpage = self._download_webpage(
2580 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2581
2582 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2583 player_url = self._extract_player_url(master_ytcfg, webpage)
2584 identity_token = self._extract_identity_token(webpage, video_id)
2585
2586 player_responses = list(self._extract_player_responses(
2587 self._get_requested_clients(url, smuggled_data),
2588 video_id, webpage, master_ytcfg, player_url, identity_token))
2589
352d63fd 2590 get_first = lambda obj, keys, **kwargs: traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
11f9be09 2591
2592 playability_statuses = traverse_obj(
2593 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2594
2595 trailer_video_id = get_first(
2596 playability_statuses,
2597 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2598 expected_type=str)
2599 if trailer_video_id:
2600 return self.url_result(
2601 trailer_video_id, self.ie_key(), trailer_video_id)
2602
2603 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2604 if webpage else (lambda x: None))
2605
2606 video_details = traverse_obj(
2607 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2608 microformats = traverse_obj(
2609 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2610 expected_type=dict, default=[])
2611 video_title = (
2612 get_first(video_details, 'title')
2613 or self._get_text(microformats, (..., 'title'))
2614 or search_meta(['og:title', 'twitter:title', 'title']))
2615 video_description = get_first(video_details, 'shortDescription')
2616
2617 if not smuggled_data.get('force_singlefeed', False):
2618 if not self.get_param('noplaylist'):
2619 multifeed_metadata_list = get_first(
2620 player_responses,
2621 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2622 expected_type=str)
2623 if multifeed_metadata_list:
2624 entries = []
2625 feed_ids = []
2626 for feed in multifeed_metadata_list.split(','):
2627 # Unquote should take place before split on comma (,) since textual
2628 # fields may contain comma as well (see
2629 # https://github.com/ytdl-org/youtube-dl/issues/8536)
2630 feed_data = compat_parse_qs(
2631 compat_urllib_parse_unquote_plus(feed))
2632
2633 def feed_entry(name):
2634 return try_get(
2635 feed_data, lambda x: x[name][0], compat_str)
2636
2637 feed_id = feed_entry('id')
2638 if not feed_id:
2639 continue
2640 feed_title = feed_entry('title')
2641 title = video_title
2642 if feed_title:
2643 title += ' (%s)' % feed_title
2644 entries.append({
2645 '_type': 'url_transparent',
2646 'ie_key': 'Youtube',
2647 'url': smuggle_url(
2648 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2649 {'force_singlefeed': True}),
2650 'title': title,
2651 })
2652 feed_ids.append(feed_id)
2653 self.to_screen(
2654 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2655 % (', '.join(feed_ids), video_id))
2656 return self.playlist_result(
2657 entries, video_id, video_title, video_description)
2658 else:
2659 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2660
7ea65411 2661 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
11f9be09 2662 is_live = get_first(video_details, 'isLive')
7ea65411 2663 if is_live is None:
2664 is_live = get_first(live_broadcast_details, 'isLiveNow')
11f9be09 2665
2666 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2667 formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
bf1317d2 2668
545cc85d 2669 if not formats:
11f9be09 2670 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
b7da73eb 2671 self.raise_no_formats(
545cc85d 2672 'This video is DRM protected.', expected=True)
11f9be09 2673 pemr = get_first(
2674 playability_statuses,
2675 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2676 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2677 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
545cc85d 2678 if subreason:
545cc85d 2679 if subreason == 'The uploader has not made this video available in your country.':
11f9be09 2680 countries = get_first(microformats, 'availableCountries')
545cc85d 2681 if not countries:
2682 regions_allowed = search_meta('regionsAllowed')
2683 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2684 self.raise_geo_restricted(subreason, countries, metadata_available=True)
11f9be09 2685 reason += f'. {subreason}'
545cc85d 2686 if reason:
b7da73eb 2687 self.raise_no_formats(reason, expected=True)
bf1317d2 2688
11f9be09 2689 for f in formats:
2a9c6dcd 2690 if '&c=WEB&' in f['url'] and '&ratebypass=yes&' not in f['url']: # throttled
11f9be09 2691 f['source_preference'] = -10
2a9c6dcd 2692 note = f.get('format_note')
2693 f['format_note'] = f'{note} (throttled)' if note else '(throttled)'
11f9be09 2694
2a9c6dcd 2695 # Source is given priority since formats that throttle are given lower source_preference
2696 # When throttling issue is fully fixed, remove this
2697 self._sort_formats(formats, ('quality', 'height', 'fps', 'source'))
bf1317d2 2698
11f9be09 2699 keywords = get_first(video_details, 'keywords', expected_type=list) or []
545cc85d 2700 if not keywords and webpage:
2701 keywords = [
2702 unescapeHTML(m.group('content'))
2703 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2704 for keyword in keywords:
2705 if keyword.startswith('yt:stretch='):
201c1459 2706 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2707 if mobj:
2708 # NB: float is intentional for forcing float division
2709 w, h = (float(v) for v in mobj.groups())
2710 if w > 0 and h > 0:
2711 ratio = w / h
2712 for f in formats:
2713 if f.get('vcodec') != 'none':
2714 f['stretched_ratio'] = ratio
2715 break
6449cd80 2716
545cc85d 2717 thumbnails = []
11f9be09 2718 thumbnail_dicts = traverse_obj(
2719 (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2720 expected_type=dict, default=[])
2721 for thumbnail in thumbnail_dicts:
2722 thumbnail_url = thumbnail.get('url')
2723 if not thumbnail_url:
2724 continue
2725 # Sometimes youtube gives a wrong thumbnail URL. See:
2726 # https://github.com/yt-dlp/yt-dlp/issues/233
2727 # https://github.com/ytdl-org/youtube-dl/issues/28023
2728 if 'maxresdefault' in thumbnail_url:
2729 thumbnail_url = thumbnail_url.split('?')[0]
2730 thumbnails.append({
2731 'url': thumbnail_url,
2732 'height': int_or_none(thumbnail.get('height')),
2733 'width': int_or_none(thumbnail.get('width')),
2734 })
ff2751ac 2735 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2736 if thumbnail_url:
2737 thumbnails.append({
2738 'url': thumbnail_url,
ff2751ac 2739 })
0ba692ac 2740 # The best resolution thumbnails sometimes does not appear in the webpage
2741 # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
cca80fe6 2742 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2743 hq_thumbnail_names = ['maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3']
245524e6 2744 # TODO: Test them also? - For some videos, even these don't exist
cca80fe6 2745 guaranteed_thumbnail_names = [
2746 'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2747 'mqdefault', 'mq1', 'mq2', 'mq3',
2748 'default', '1', '2', '3'
2749 ]
2750 thumbnail_names = hq_thumbnail_names + guaranteed_thumbnail_names
2751 n_thumbnail_names = len(thumbnail_names)
2752
0ba692ac 2753 thumbnails.extend({
2754 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2755 video_id=video_id, name=name, ext=ext,
2756 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
cca80fe6 2757 '_test_url': name in hq_thumbnail_names,
2758 } for name in thumbnail_names for ext in ('webp', 'jpg'))
0ba692ac 2759 for thumb in thumbnails:
cca80fe6 2760 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
0ba692ac 2761 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
ff2751ac 2762 self._remove_duplicate_formats(thumbnails)
545cc85d 2763
7ea65411 2764 category = get_first(microformats, 'category') or search_meta('genre')
2765 channel_id = str_or_none(
2766 get_first(video_details, 'channelId')
2767 or get_first(microformats, 'externalChannelId')
2768 or search_meta('channelId'))
2769 duration = int_or_none(
2770 get_first(video_details, 'lengthSeconds')
2771 or get_first(microformats, 'lengthSeconds')
2772 or parse_duration(search_meta('duration'))) or None
2773 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2774
2775 live_content = get_first(video_details, 'isLiveContent')
2776 is_upcoming = get_first(video_details, 'isUpcoming')
2777 if is_live is None:
2778 if is_upcoming or live_content is False:
2779 is_live = False
2780 if is_upcoming is None and (live_content or is_live):
2781 is_upcoming = False
2782 live_starttime = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2783 live_endtime = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2784 if not duration and live_endtime and live_starttime:
2785 duration = live_endtime - live_starttime
2786
545cc85d 2787 info = {
2788 'id': video_id,
2789 'title': self._live_title(video_title) if is_live else video_title,
2790 'formats': formats,
2791 'thumbnails': thumbnails,
2792 'description': video_description,
2793 'upload_date': unified_strdate(
11f9be09 2794 get_first(microformats, 'uploadDate')
545cc85d 2795 or search_meta('uploadDate')),
11f9be09 2796 'uploader': get_first(video_details, 'author'),
545cc85d 2797 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2798 'uploader_url': owner_profile_url,
2799 'channel_id': channel_id,
11f9be09 2800 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
545cc85d 2801 'duration': duration,
2802 'view_count': int_or_none(
11f9be09 2803 get_first((video_details, microformats), (..., 'viewCount'))
545cc85d 2804 or search_meta('interactionCount')),
11f9be09 2805 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
545cc85d 2806 'age_limit': 18 if (
11f9be09 2807 get_first(microformats, 'isFamilySafe') is False
545cc85d 2808 or search_meta('isFamilyFriendly') == 'false'
2809 or search_meta('og:restrictions:age') == '18+') else 0,
2810 'webpage_url': webpage_url,
2811 'categories': [category] if category else None,
2812 'tags': keywords,
11f9be09 2813 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
7ea65411 2814 'is_live': is_live,
2815 'was_live': (False if is_live or is_upcoming or live_content is False
2816 else None if is_live is None or is_upcoming is None
2817 else live_content),
2818 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
2819 'release_timestamp': live_starttime,
545cc85d 2820 }
b477fc13 2821
3944e7af 2822 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
2823 # Converted into dicts to remove duplicates
2824 captions = {
2825 sub.get('baseUrl'): sub
2826 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
2827 translation_languages = {
2828 lang.get('languageCode'): lang.get('languageName')
2829 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
545cc85d 2830 subtitles = {}
2831 if pctr:
774d79cc 2832 def process_language(container, base_url, lang_code, sub_name, query):
120916da 2833 lang_subs = container.setdefault(lang_code, [])
545cc85d 2834 for fmt in self._SUBTITLE_FORMATS:
2835 query.update({
2836 'fmt': fmt,
2837 })
2838 lang_subs.append({
2839 'ext': fmt,
2840 'url': update_url_query(base_url, query),
774d79cc 2841 'name': sub_name,
545cc85d 2842 })
7e72694b 2843
3944e7af 2844 for base_url, caption_track in captions.items():
545cc85d 2845 if not base_url:
2846 continue
2847 if caption_track.get('kind') != 'asr':
120916da 2848 lang_code = (
2849 remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2850 or caption_track.get('languageCode'))
545cc85d 2851 if not lang_code:
2852 continue
2853 process_language(
774d79cc 2854 subtitles, base_url, lang_code,
3944e7af 2855 traverse_obj(caption_track, ('name', 'simpleText')),
774d79cc 2856 {})
545cc85d 2857 continue
2858 automatic_captions = {}
3944e7af 2859 for trans_code, trans_name in translation_languages.items():
2860 if not trans_code:
545cc85d 2861 continue
2862 process_language(
3944e7af 2863 automatic_captions, base_url, trans_code,
2864 self._get_text(trans_name, max_runs=1),
2865 {'tlang': trans_code})
545cc85d 2866 info['automatic_captions'] = automatic_captions
2867 info['subtitles'] = subtitles
7e72694b 2868
545cc85d 2869 parsed_url = compat_urllib_parse_urlparse(url)
2870 for component in [parsed_url.fragment, parsed_url.query]:
2871 query = compat_parse_qs(component)
2872 for k, v in query.items():
2873 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2874 d_k += '_time'
2875 if d_k not in info and k in s_ks:
2876 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2877
2878 # Youtube Music Auto-generated description
822b9d9c 2879 if video_description:
38d70284 2880 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2881 if mobj:
822b9d9c
RA
2882 release_year = mobj.group('release_year')
2883 release_date = mobj.group('release_date')
2884 if release_date:
2885 release_date = release_date.replace('-', '')
2886 if not release_year:
545cc85d 2887 release_year = release_date[:4]
2888 info.update({
2889 'album': mobj.group('album'.strip()),
2890 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2891 'track': mobj.group('track').strip(),
2892 'release_date': release_date,
cc2db878 2893 'release_year': int_or_none(release_year),
545cc85d 2894 })
7e72694b 2895
545cc85d 2896 initial_data = None
2897 if webpage:
2898 initial_data = self._extract_yt_initial_variable(
2899 webpage, self._YT_INITIAL_DATA_RE, video_id,
2900 'yt initial data')
2901 if not initial_data:
11f9be09 2902 headers = self.generate_api_headers(
2903 master_ytcfg, identity_token, self._extract_account_syncid(master_ytcfg),
2904 session_index=self._extract_session_index(master_ytcfg))
2905
109dd3b2 2906 initial_data = self._extract_response(
2907 item_id=video_id, ep='next', fatal=False,
11f9be09 2908 ytcfg=master_ytcfg, headers=headers, query={'videoId': video_id},
109dd3b2 2909 note='Downloading initial data API JSON')
545cc85d 2910
c60ee3a2 2911 try:
2912 # This will error if there is no livechat
2913 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2914 info['subtitles']['live_chat'] = [{
2915 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
2916 'video_id': video_id,
2917 'ext': 'json',
f6745c49 2918 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
c60ee3a2 2919 }]
2920 except (KeyError, IndexError, TypeError):
2921 pass
545cc85d 2922
2923 if initial_data:
7c365c21 2924 info['chapters'] = (
2925 self._extract_chapters_from_json(initial_data, duration)
2926 or self._extract_chapters_from_engagement_panel(initial_data, duration)
2927 or None)
545cc85d 2928
2929 contents = try_get(
2930 initial_data,
2931 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2932 list) or []
2933 for content in contents:
2934 vpir = content.get('videoPrimaryInfoRenderer')
2935 if vpir:
2936 stl = vpir.get('superTitleLink')
2937 if stl:
fe93e2c4 2938 stl = self._get_text(stl)
545cc85d 2939 if try_get(
2940 vpir,
2941 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2942 info['location'] = stl
2943 else:
2944 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2945 if mobj:
2946 info.update({
2947 'series': mobj.group(1),
2948 'season_number': int(mobj.group(2)),
2949 'episode_number': int(mobj.group(3)),
2950 })
2951 for tlb in (try_get(
2952 vpir,
2953 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2954 list) or []):
2955 tbr = tlb.get('toggleButtonRenderer') or {}
2956 for getter, regex in [(
2957 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2958 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2959 lambda x: x['accessibility'],
2960 lambda x: x['accessibilityData']['accessibilityData'],
2961 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2962 label = (try_get(tbr, getter, dict) or {}).get('label')
2963 if label:
2964 mobj = re.match(regex, label)
2965 if mobj:
2966 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2967 break
2968 sbr_tooltip = try_get(
2969 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2970 if sbr_tooltip:
2971 like_count, dislike_count = sbr_tooltip.split(' / ')
2972 info.update({
2973 'like_count': str_to_int(like_count),
2974 'dislike_count': str_to_int(dislike_count),
2975 })
2976 vsir = content.get('videoSecondaryInfoRenderer')
2977 if vsir:
052e1350 2978 info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
545cc85d 2979 rows = try_get(
2980 vsir,
2981 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2982 list) or []
2983 multiple_songs = False
2984 for row in rows:
2985 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2986 multiple_songs = True
2987 break
2988 for row in rows:
2989 mrr = row.get('metadataRowRenderer') or {}
2990 mrr_title = mrr.get('title')
2991 if not mrr_title:
2992 continue
052e1350 2993 mrr_title = self._get_text(mrr, 'title')
2994 mrr_contents_text = self._get_text(mrr, ('contents', 0))
545cc85d 2995 if mrr_title == 'License':
2996 info['license'] = mrr_contents_text
2997 elif not multiple_songs:
2998 if mrr_title == 'Album':
2999 info['album'] = mrr_contents_text
3000 elif mrr_title == 'Artist':
3001 info['artist'] = mrr_contents_text
3002 elif mrr_title == 'Song':
3003 info['track'] = mrr_contents_text
3004
3005 fallbacks = {
3006 'channel': 'uploader',
3007 'channel_id': 'uploader_id',
3008 'channel_url': 'uploader_url',
3009 }
3010 for to, frm in fallbacks.items():
3011 if not info.get(to):
3012 info[to] = info.get(frm)
3013
3014 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3015 v = info.get(s_k)
3016 if v:
3017 info[d_k] = v
b84071c0 3018
11f9be09 3019 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3020 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
c224251a 3021 is_membersonly = None
b28f8d24 3022 is_premium = None
c224251a
M
3023 if initial_data and is_private is not None:
3024 is_membersonly = False
b28f8d24 3025 is_premium = False
47193e02 3026 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3027 badge_labels = set()
3028 for content in contents:
3029 if not isinstance(content, dict):
3030 continue
3031 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3032 for badge_label in badge_labels:
3033 if badge_label.lower() == 'members only':
3034 is_membersonly = True
3035 elif badge_label.lower() == 'premium':
3036 is_premium = True
3037 elif badge_label.lower() == 'unlisted':
3038 is_unlisted = True
c224251a 3039
c224251a
M
3040 info['availability'] = self._availability(
3041 is_private=is_private,
b28f8d24 3042 needs_premium=is_premium,
c224251a
M
3043 needs_subscription=is_membersonly,
3044 needs_auth=info['age_limit'] >= 18,
3045 is_unlisted=None if is_private is None else is_unlisted)
3046
06167fbb 3047 # get xsrf for annotations or comments
a06916d9 3048 get_annotations = self.get_param('writeannotations', False)
3049 get_comments = self.get_param('getcomments', False)
06167fbb 3050 if get_annotations or get_comments:
29f7c58a 3051 xsrf_token = None
11f9be09 3052 if master_ytcfg:
3053 xsrf_token = try_get(master_ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
29f7c58a 3054 if not xsrf_token:
3055 xsrf_token = self._search_regex(
3056 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 3057 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 3058
3059 # annotations
06167fbb 3060 if get_annotations:
11f9be09 3061 invideo_url = get_first(
3062 player_responses,
3063 ('annotations', 0, 'playerAnnotationsUrlsRenderer', 'invideoUrl'),
3064 expected_type=str)
64b6a4e9 3065 if xsrf_token and invideo_url:
29f7c58a 3066 xsrf_field_name = None
11f9be09 3067 if master_ytcfg:
3068 xsrf_field_name = try_get(master_ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
29f7c58a 3069 if not xsrf_field_name:
3070 xsrf_field_name = self._search_regex(
3071 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 3072 webpage, 'xsrf field name',
29f7c58a 3073 group='xsrf_field_name', default='session_token')
8a784c74 3074 info['annotations'] = self._download_webpage(
64b6a4e9
RA
3075 self._proto_relative_url(invideo_url),
3076 video_id, note='Downloading annotations',
3077 errnote='Unable to download video annotations', fatal=False,
3078 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 3079
277d6ff5 3080 if get_comments:
11f9be09 3081 info['__post_extractor'] = lambda: self._extract_comments(master_ytcfg, video_id, contents, webpage)
4ea3be0a 3082
11f9be09 3083 self.mark_watched(video_id, player_responses)
d77ab8e2 3084
545cc85d 3085 return info
c5e8d7af 3086
5f6a1245 3087
8bdd16b4 3088class YoutubeTabIE(YoutubeBaseInfoExtractor):
3089 IE_DESC = 'YouTube.com tab'
70d5c17b 3090 _VALID_URL = r'''(?x)
3091 https?://
3092 (?:\w+\.)?
3093 (?:
3094 youtube(?:kids)?\.com|
3095 invidio\.us
3096 )/
3097 (?:
fe03a6cd 3098 (?P<channel_type>channel|c|user|browse)/|
70d5c17b 3099 (?P<not_channel>
9ba5705a 3100 feed/|hashtag/|
70d5c17b 3101 (?:playlist|watch)\?.*?\blist=
3102 )|
29f7c58a 3103 (?!(?:%s)\b) # Direct URLs
70d5c17b 3104 )
3105 (?P<id>[^/?\#&]+)
3106 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 3107 IE_NAME = 'youtube:tab'
3108
81127aa5 3109 _TESTS = [{
da692b79 3110 'note': 'playlists, multipage',
8bdd16b4 3111 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3112 'playlist_mincount': 94,
3113 'info_dict': {
3114 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3115 'title': 'Игорь Клейнер - Playlists',
3116 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3117 'uploader': 'Игорь Клейнер',
3118 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 3119 },
3120 }, {
da692b79 3121 'note': 'playlists, multipage, different order',
8bdd16b4 3122 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3123 'playlist_mincount': 94,
3124 'info_dict': {
3125 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3126 'title': 'Игорь Клейнер - Playlists',
3127 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3128 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3129 'uploader': 'Игорь Клейнер',
8bdd16b4 3130 },
201c1459 3131 }, {
da692b79 3132 'note': 'playlists, series',
201c1459 3133 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3134 'playlist_mincount': 5,
3135 'info_dict': {
3136 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3137 'title': '3Blue1Brown - Playlists',
3138 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
da692b79 3139 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3140 'uploader': '3Blue1Brown',
201c1459 3141 },
8bdd16b4 3142 }, {
da692b79 3143 'note': 'playlists, singlepage',
8bdd16b4 3144 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3145 'playlist_mincount': 4,
3146 'info_dict': {
3147 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3148 'title': 'ThirstForScience - Playlists',
3149 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 3150 'uploader': 'ThirstForScience',
3151 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 3152 }
3153 }, {
3154 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3155 'only_matching': True,
3156 }, {
da692b79 3157 'note': 'basic, single video playlist',
0e30a7b9 3158 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 3159 'info_dict': {
0e30a7b9 3160 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3161 'uploader': 'Sergey M.',
3162 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 3163 'title': 'youtube-dl public playlist',
81127aa5 3164 },
0e30a7b9 3165 'playlist_count': 1,
9291475f 3166 }, {
da692b79 3167 'note': 'empty playlist',
0e30a7b9 3168 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 3169 'info_dict': {
0e30a7b9 3170 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3171 'uploader': 'Sergey M.',
3172 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 3173 'title': 'youtube-dl empty playlist',
9291475f
PH
3174 },
3175 'playlist_count': 0,
3176 }, {
da692b79 3177 'note': 'Home tab',
8bdd16b4 3178 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 3179 'info_dict': {
8bdd16b4 3180 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3181 'title': 'lex will - Home',
3182 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3183 'uploader': 'lex will',
3184 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3185 },
8bdd16b4 3186 'playlist_mincount': 2,
9291475f 3187 }, {
da692b79 3188 'note': 'Videos tab',
8bdd16b4 3189 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 3190 'info_dict': {
8bdd16b4 3191 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3192 'title': 'lex will - Videos',
3193 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3194 'uploader': 'lex will',
3195 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3196 },
8bdd16b4 3197 'playlist_mincount': 975,
9291475f 3198 }, {
da692b79 3199 'note': 'Videos tab, sorted by popular',
8bdd16b4 3200 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 3201 'info_dict': {
8bdd16b4 3202 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3203 'title': 'lex will - Videos',
3204 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3205 'uploader': 'lex will',
3206 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3207 },
8bdd16b4 3208 'playlist_mincount': 199,
9291475f 3209 }, {
da692b79 3210 'note': 'Playlists tab',
8bdd16b4 3211 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 3212 'info_dict': {
8bdd16b4 3213 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3214 'title': 'lex will - Playlists',
3215 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3216 'uploader': 'lex will',
3217 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3218 },
8bdd16b4 3219 'playlist_mincount': 17,
ac7553d0 3220 }, {
da692b79 3221 'note': 'Community tab',
8bdd16b4 3222 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 3223 'info_dict': {
8bdd16b4 3224 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3225 'title': 'lex will - Community',
3226 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3227 'uploader': 'lex will',
3228 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3229 },
3230 'playlist_mincount': 18,
87dadd45 3231 }, {
da692b79 3232 'note': 'Channels tab',
8bdd16b4 3233 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 3234 'info_dict': {
8bdd16b4 3235 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3236 'title': 'lex will - Channels',
3237 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3238 'uploader': 'lex will',
3239 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3240 },
deaec5af 3241 'playlist_mincount': 12,
cd684175 3242 }, {
3243 'note': 'Search tab',
3244 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3245 'playlist_mincount': 40,
3246 'info_dict': {
3247 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3248 'title': '3Blue1Brown - Search - linear algebra',
3249 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3250 'uploader': '3Blue1Brown',
3251 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3252 },
6b08cdf6 3253 }, {
a0566bbf 3254 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3255 'only_matching': True,
3256 }, {
a0566bbf 3257 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3258 'only_matching': True,
3259 }, {
a0566bbf 3260 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3261 'only_matching': True,
3262 }, {
3263 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3264 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3265 'info_dict': {
3266 'title': '29C3: Not my department',
3267 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3268 'uploader': 'Christiaan008',
3269 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 3270 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 3271 },
3272 'playlist_count': 96,
3273 }, {
3274 'note': 'Large playlist',
3275 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 3276 'info_dict': {
8bdd16b4 3277 'title': 'Uploads from Cauchemar',
3278 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3279 'uploader': 'Cauchemar',
3280 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 3281 },
8bdd16b4 3282 'playlist_mincount': 1123,
3283 }, {
da692b79 3284 'note': 'even larger playlist, 8832 videos',
8bdd16b4 3285 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3286 'only_matching': True,
4b7df0d3
JMF
3287 }, {
3288 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3289 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3290 'info_dict': {
acf757f4
PH
3291 'title': 'Uploads from Interstellar Movie',
3292 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 3293 'uploader': 'Interstellar Movie',
8bdd16b4 3294 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 3295 },
481cc733 3296 'playlist_mincount': 21,
358de58c 3297 }, {
3298 'note': 'Playlist with "show unavailable videos" button',
3299 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3300 'info_dict': {
3301 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3302 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3303 'uploader': 'Phim Siêu Nhân Nhật Bản',
3304 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3305 },
da692b79 3306 'playlist_mincount': 200,
5d342002 3307 }, {
da692b79 3308 'note': 'Playlist with unavailable videos in page 7',
5d342002 3309 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3310 'info_dict': {
3311 'title': 'Uploads from BlankTV',
3312 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3313 'uploader': 'BlankTV',
3314 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3315 },
da692b79 3316 'playlist_mincount': 1000,
8bdd16b4 3317 }, {
da692b79 3318 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
8bdd16b4 3319 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3320 'info_dict': {
3321 'title': 'Data Analysis with Dr Mike Pound',
3322 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3323 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3324 'uploader': 'Computerphile',
deaec5af 3325 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 3326 },
3327 'playlist_mincount': 11,
3328 }, {
a0566bbf 3329 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 3330 'only_matching': True,
dacb3a86 3331 }, {
da692b79 3332 'note': 'Playlist URL that does not actually serve a playlist',
dacb3a86
S
3333 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3334 'info_dict': {
3335 'id': 'FqZTN594JQw',
3336 'ext': 'webm',
3337 'title': "Smiley's People 01 detective, Adventure Series, Action",
3338 'uploader': 'STREEM',
3339 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 3340 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
3341 'upload_date': '20150526',
3342 'license': 'Standard YouTube License',
3343 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3344 'categories': ['People & Blogs'],
3345 'tags': list,
dbdaaa23 3346 'view_count': int,
dacb3a86
S
3347 'like_count': int,
3348 'dislike_count': int,
3349 },
3350 'params': {
3351 'skip_download': True,
3352 },
13a75688 3353 'skip': 'This video is not available.',
dacb3a86 3354 'add_ie': [YoutubeIE.ie_key()],
481cc733 3355 }, {
8bdd16b4 3356 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 3357 'only_matching': True,
66b48727 3358 }, {
8bdd16b4 3359 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 3360 'only_matching': True,
a0566bbf 3361 }, {
3362 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3363 'info_dict': {
11f9be09 3364 'id': 'FMtPN8yp5LU', # This will keep changing
a0566bbf 3365 'ext': 'mp4',
deaec5af 3366 'title': compat_str,
a0566bbf 3367 'uploader': 'Sky News',
3368 'uploader_id': 'skynews',
3369 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
da692b79 3370 'upload_date': r're:\d{8}',
3371 'description': compat_str,
a0566bbf 3372 'categories': ['News & Politics'],
3373 'tags': list,
3374 'like_count': int,
3375 'dislike_count': int,
3376 },
3377 'params': {
3378 'skip_download': True,
3379 },
da692b79 3380 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
a0566bbf 3381 }, {
3382 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3383 'info_dict': {
3384 'id': 'a48o2S1cPoo',
3385 'ext': 'mp4',
3386 'title': 'The Young Turks - Live Main Show',
3387 'uploader': 'The Young Turks',
3388 'uploader_id': 'TheYoungTurks',
3389 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3390 'upload_date': '20150715',
3391 'license': 'Standard YouTube License',
3392 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3393 'categories': ['News & Politics'],
3394 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3395 'like_count': int,
3396 'dislike_count': int,
3397 },
3398 'params': {
3399 'skip_download': True,
3400 },
3401 'only_matching': True,
3402 }, {
3403 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3404 'only_matching': True,
3405 }, {
3406 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3407 'only_matching': True,
09f1580e 3408 }, {
3409 'note': 'A channel that is not live. Should raise error',
3410 'url': 'https://www.youtube.com/user/numberphile/live',
3411 'only_matching': True,
3d3dddc9 3412 }, {
3413 'url': 'https://www.youtube.com/feed/trending',
3414 'only_matching': True,
3415 }, {
3d3dddc9 3416 'url': 'https://www.youtube.com/feed/library',
3417 'only_matching': True,
3418 }, {
3d3dddc9 3419 'url': 'https://www.youtube.com/feed/history',
3420 'only_matching': True,
3421 }, {
3d3dddc9 3422 'url': 'https://www.youtube.com/feed/subscriptions',
3423 'only_matching': True,
3424 }, {
3d3dddc9 3425 'url': 'https://www.youtube.com/feed/watch_later',
3426 'only_matching': True,
3427 }, {
da692b79 3428 'note': 'Recommended - redirects to home page',
3d3dddc9 3429 'url': 'https://www.youtube.com/feed/recommended',
3430 'only_matching': True,
29f7c58a 3431 }, {
da692b79 3432 'note': 'inline playlist with not always working continuations',
29f7c58a 3433 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3434 'only_matching': True,
3435 }, {
3436 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3437 'only_matching': True,
3438 }, {
3439 'url': 'https://www.youtube.com/course',
3440 'only_matching': True,
3441 }, {
3442 'url': 'https://www.youtube.com/zsecurity',
3443 'only_matching': True,
3444 }, {
3445 'url': 'http://www.youtube.com/NASAgovVideo/videos',
3446 'only_matching': True,
3447 }, {
3448 'url': 'https://www.youtube.com/TheYoungTurks/live',
3449 'only_matching': True,
39ed931e 3450 }, {
3451 'url': 'https://www.youtube.com/hashtag/cctv9',
3452 'info_dict': {
3453 'id': 'cctv9',
3454 'title': '#cctv9',
3455 },
3456 'playlist_mincount': 350,
201c1459 3457 }, {
3458 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3459 'only_matching': True,
9297939e 3460 }, {
da692b79 3461 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
9297939e 3462 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3463 'only_matching': True
fe03a6cd 3464 }, {
3465 'note': '/browse/ should redirect to /channel/',
3466 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3467 'only_matching': True
3468 }, {
3469 'note': 'VLPL, should redirect to playlist?list=PL...',
3470 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3471 'info_dict': {
3472 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3473 'uploader': 'NoCopyrightSounds',
3474 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3475 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3476 'title': 'NCS Releases',
3477 },
3478 'playlist_mincount': 166,
18db7548 3479 }, {
3480 'note': 'Topic, should redirect to playlist?list=UU...',
3481 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3482 'info_dict': {
3483 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3484 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3485 'title': 'Uploads from Royalty Free Music - Topic',
3486 'uploader': 'Royalty Free Music - Topic',
3487 },
3488 'expected_warnings': [
3489 'A channel/user page was given',
3490 'The URL does not have a videos tab',
3491 ],
3492 'playlist_mincount': 101,
3493 }, {
3494 'note': 'Topic without a UU playlist',
3495 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3496 'info_dict': {
3497 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3498 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3499 },
3500 'expected_warnings': [
3501 'A channel/user page was given',
3502 'The URL does not have a videos tab',
3503 'Falling back to channel URL',
3504 ],
3505 'playlist_mincount': 9,
abcdd12b 3506 }, {
3507 'note': 'Youtube music Album',
3508 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3509 'info_dict': {
3510 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3511 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3512 },
3513 'playlist_count': 50,
47193e02 3514 }, {
3515 'note': 'unlisted single video playlist',
3516 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3517 'info_dict': {
3518 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3519 'uploader': 'colethedj',
3520 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3521 'title': 'yt-dlp unlisted playlist test',
3522 'availability': 'unlisted'
3523 },
3524 'playlist_count': 1,
29f7c58a 3525 }]
3526
3527 @classmethod
3528 def suitable(cls, url):
3529 return False if YoutubeIE.suitable(url) else super(
3530 YoutubeTabIE, cls).suitable(url)
8bdd16b4 3531
3532 def _extract_channel_id(self, webpage):
3533 channel_id = self._html_search_meta(
3534 'channelId', webpage, 'channel id', default=None)
3535 if channel_id:
3536 return channel_id
3537 channel_url = self._html_search_meta(
3538 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3539 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3540 'twitter:app:url:googleplay'), webpage, 'channel url')
3541 return self._search_regex(
3542 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3543 channel_url, 'channel id')
15f6397c 3544
8bdd16b4 3545 @staticmethod
cd7c66cf 3546 def _extract_basic_item_renderer(item):
3547 # Modified from _extract_grid_item_renderer
201c1459 3548 known_basic_renderers = (
3549 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 3550 )
3551 for key, renderer in item.items():
201c1459 3552 if not isinstance(renderer, dict):
cd7c66cf 3553 continue
201c1459 3554 elif key in known_basic_renderers:
3555 return renderer
3556 elif key.startswith('grid') and key.endswith('Renderer'):
3557 return renderer
8bdd16b4 3558
8bdd16b4 3559 def _grid_entries(self, grid_renderer):
3560 for item in grid_renderer['items']:
3561 if not isinstance(item, dict):
39b62db1 3562 continue
cd7c66cf 3563 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 3564 if not isinstance(renderer, dict):
3565 continue
052e1350 3566 title = self._get_text(renderer, 'title')
fe93e2c4 3567
8bdd16b4 3568 # playlist
3569 playlist_id = renderer.get('playlistId')
3570 if playlist_id:
3571 yield self.url_result(
3572 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3573 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3574 video_title=title)
201c1459 3575 continue
8bdd16b4 3576 # video
3577 video_id = renderer.get('videoId')
3578 if video_id:
3579 yield self._extract_video(renderer)
201c1459 3580 continue
8bdd16b4 3581 # channel
3582 channel_id = renderer.get('channelId')
3583 if channel_id:
8bdd16b4 3584 yield self.url_result(
3585 'https://www.youtube.com/channel/%s' % channel_id,
3586 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 3587 continue
3588 # generic endpoint URL support
3589 ep_url = urljoin('https://www.youtube.com/', try_get(
3590 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3591 compat_str))
3592 if ep_url:
3593 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3594 if ie.suitable(ep_url):
3595 yield self.url_result(
3596 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3597 break
8bdd16b4 3598
3d3dddc9 3599 def _shelf_entries_from_content(self, shelf_renderer):
3600 content = shelf_renderer.get('content')
3601 if not isinstance(content, dict):
8bdd16b4 3602 return
cd7c66cf 3603 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 3604 if renderer:
3605 # TODO: add support for nested playlists so each shelf is processed
3606 # as separate playlist
3607 # TODO: this includes only first N items
3608 for entry in self._grid_entries(renderer):
3609 yield entry
3610 renderer = content.get('horizontalListRenderer')
3611 if renderer:
3612 # TODO
3613 pass
8bdd16b4 3614
29f7c58a 3615 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3616 ep = try_get(
3617 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3618 compat_str)
3619 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3620 if shelf_url:
29f7c58a 3621 # Skipping links to another channels, note that checking for
3622 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3623 # will not work
3624 if skip_channels and '/channels?' in shelf_url:
3625 return
052e1350 3626 title = self._get_text(shelf_renderer, 'title')
3d3dddc9 3627 yield self.url_result(shelf_url, video_title=title)
3628 # Shelf may not contain shelf URL, fallback to extraction from content
3629 for entry in self._shelf_entries_from_content(shelf_renderer):
3630 yield entry
c5e8d7af 3631
8bdd16b4 3632 def _playlist_entries(self, video_list_renderer):
3633 for content in video_list_renderer['contents']:
3634 if not isinstance(content, dict):
3635 continue
3636 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3637 if not isinstance(renderer, dict):
3638 continue
3639 video_id = renderer.get('videoId')
3640 if not video_id:
3641 continue
3642 yield self._extract_video(renderer)
07aeced6 3643
3462ffa8 3644 def _rich_entries(self, rich_grid_renderer):
3645 renderer = try_get(
70d5c17b 3646 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3647 video_id = renderer.get('videoId')
3648 if not video_id:
3649 return
3650 yield self._extract_video(renderer)
3651
8bdd16b4 3652 def _video_entry(self, video_renderer):
3653 video_id = video_renderer.get('videoId')
3654 if video_id:
3655 return self._extract_video(video_renderer)
dacb3a86 3656
8bdd16b4 3657 def _post_thread_entries(self, post_thread_renderer):
3658 post_renderer = try_get(
3659 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3660 if not post_renderer:
3661 return
3662 # video attachment
3663 video_renderer = try_get(
895b0931 3664 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3665 video_id = video_renderer.get('videoId')
3666 if video_id:
3667 entry = self._extract_video(video_renderer)
8bdd16b4 3668 if entry:
3669 yield entry
895b0931 3670 # playlist attachment
3671 playlist_id = try_get(
3672 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3673 if playlist_id:
3674 yield self.url_result(
e28f1c0a 3675 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3676 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3677 # inline video links
3678 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3679 for run in runs:
3680 if not isinstance(run, dict):
3681 continue
3682 ep_url = try_get(
3683 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3684 if not ep_url:
3685 continue
3686 if not YoutubeIE.suitable(ep_url):
3687 continue
3688 ep_video_id = YoutubeIE._match_id(ep_url)
3689 if video_id == ep_video_id:
3690 continue
895b0931 3691 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 3692
8bdd16b4 3693 def _post_thread_continuation_entries(self, post_thread_continuation):
3694 contents = post_thread_continuation.get('contents')
3695 if not isinstance(contents, list):
3696 return
3697 for content in contents:
3698 renderer = content.get('backstagePostThreadRenderer')
3699 if not isinstance(renderer, dict):
3700 continue
3701 for entry in self._post_thread_entries(renderer):
3702 yield entry
07aeced6 3703
39ed931e 3704 r''' # unused
3705 def _rich_grid_entries(self, contents):
3706 for content in contents:
3707 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3708 if video_renderer:
3709 entry = self._video_entry(video_renderer)
3710 if entry:
3711 yield entry
3712 '''
f4f751af 3713 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3462ffa8 3714
70d5c17b 3715 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3716 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3717 for content in contents:
3718 if not isinstance(content, dict):
8bdd16b4 3719 continue
70d5c17b 3720 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3721 if not is_renderer:
70d5c17b 3722 renderer = content.get('richItemRenderer')
3462ffa8 3723 if renderer:
3724 for entry in self._rich_entries(renderer):
3725 yield entry
3726 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3727 continue
3462ffa8 3728 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3729 for isr_content in isr_contents:
3730 if not isinstance(isr_content, dict):
3731 continue
69184e41 3732
3733 known_renderers = {
3734 'playlistVideoListRenderer': self._playlist_entries,
3735 'gridRenderer': self._grid_entries,
3736 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3737 'backstagePostThreadRenderer': self._post_thread_entries,
3738 'videoRenderer': lambda x: [self._video_entry(x)],
3739 }
3740 for key, renderer in isr_content.items():
3741 if key not in known_renderers:
3742 continue
3743 for entry in known_renderers[key](renderer):
3744 if entry:
3745 yield entry
3462ffa8 3746 continuation_list[0] = self._extract_continuation(renderer)
69184e41 3747 break
70d5c17b 3748
3462ffa8 3749 if not continuation_list[0]:
3750 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3751
3752 if not continuation_list[0]:
3753 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3754
3755 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3756 tab_content = try_get(tab, lambda x: x['content'], dict)
3757 if not tab_content:
3758 return
3462ffa8 3759 parent_renderer = (
29f7c58a 3760 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3761 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3762 for entry in extract_entries(parent_renderer):
3763 yield entry
3462ffa8 3764 continuation = continuation_list[0]
fe93e2c4 3765 visitor_data = None
d069eca7 3766
8bdd16b4 3767 for page_num in itertools.count(1):
3768 if not continuation:
3769 break
11f9be09 3770 headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
79360d99 3771 response = self._extract_response(
3772 item_id='%s page %s' % (item_id, page_num),
fe93e2c4 3773 query=continuation, headers=headers, ytcfg=ytcfg,
79360d99 3774 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3775
3776 if not response:
8bdd16b4 3777 break
f4f751af 3778 visitor_data = try_get(
3779 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
ebf1b291 3780
69184e41 3781 known_continuation_renderers = {
3782 'playlistVideoListContinuation': self._playlist_entries,
3783 'gridContinuation': self._grid_entries,
3784 'itemSectionContinuation': self._post_thread_continuation_entries,
3785 'sectionListContinuation': extract_entries, # for feeds
3786 }
8bdd16b4 3787 continuation_contents = try_get(
69184e41 3788 response, lambda x: x['continuationContents'], dict) or {}
3789 continuation_renderer = None
3790 for key, value in continuation_contents.items():
3791 if key not in known_continuation_renderers:
3462ffa8 3792 continue
69184e41 3793 continuation_renderer = value
3794 continuation_list = [None]
3795 for entry in known_continuation_renderers[key](continuation_renderer):
3796 yield entry
3797 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3798 break
3799 if continuation_renderer:
3800 continue
c5e8d7af 3801
a1b535bd 3802 known_renderers = {
3803 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3804 'gridVideoRenderer': (self._grid_entries, 'items'),
d61fc646 3805 'gridChannelRenderer': (self._grid_entries, 'items'),
a1b535bd 3806 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3807 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3808 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3809 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3810 }
cce889b9 3811 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3812 continuation_items = try_get(
cce889b9 3813 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3814 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3815 video_items_renderer = None
3816 for key, value in continuation_item.items():
3817 if key not in known_renderers:
8bdd16b4 3818 continue
a1b535bd 3819 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3820 continuation_list = [None]
a1b535bd 3821 for entry in known_renderers[key][0](video_items_renderer):
3822 yield entry
9ba5705a 3823 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3824 break
3825 if video_items_renderer:
3826 continue
8bdd16b4 3827 break
9558dcec 3828
8bdd16b4 3829 @staticmethod
3830 def _extract_selected_tab(tabs):
3831 for tab in tabs:
cd684175 3832 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3833 if renderer.get('selected') is True:
3834 return renderer
2b3c2546 3835 else:
8bdd16b4 3836 raise ExtractorError('Unable to find selected tab')
b82f815f 3837
47193e02 3838 @classmethod
3839 def _extract_uploader(cls, data):
8bdd16b4 3840 uploader = {}
47193e02 3841 renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3842 owner = try_get(
3843 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3844 if owner:
3845 uploader['uploader'] = owner.get('text')
3846 uploader['uploader_id'] = try_get(
3847 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3848 uploader['uploader_url'] = urljoin(
3849 'https://www.youtube.com/',
3850 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3851 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3852
d069eca7 3853 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3854 playlist_id = title = description = channel_url = channel_name = channel_id = None
3855 thumbnails_list = tags = []
3856
8bdd16b4 3857 selected_tab = self._extract_selected_tab(tabs)
3858 renderer = try_get(
3859 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3860 if renderer:
b60419c5 3861 channel_name = renderer.get('title')
3862 channel_url = renderer.get('channelUrl')
3863 channel_id = renderer.get('externalId')
39ed931e 3864 else:
64c0d954 3865 renderer = try_get(
3866 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3867
8bdd16b4 3868 if renderer:
3869 title = renderer.get('title')
ecc97af3 3870 description = renderer.get('description', '')
b60419c5 3871 playlist_id = channel_id
3872 tags = renderer.get('keywords', '').split()
3873 thumbnails_list = (
3874 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3875 or try_get(
47193e02 3876 self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3877 lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
ff84930c 3878 list)
b60419c5 3879 or [])
3880
3881 thumbnails = []
3882 for t in thumbnails_list:
3883 if not isinstance(t, dict):
3884 continue
3885 thumbnail_url = url_or_none(t.get('url'))
3886 if not thumbnail_url:
3887 continue
3888 thumbnails.append({
3889 'url': thumbnail_url,
3890 'width': int_or_none(t.get('width')),
3891 'height': int_or_none(t.get('height')),
3892 })
3462ffa8 3893 if playlist_id is None:
70d5c17b 3894 playlist_id = item_id
3895 if title is None:
39ed931e 3896 title = (
3897 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3898 or playlist_id)
b60419c5 3899 title += format_field(selected_tab, 'title', ' - %s')
cd684175 3900 title += format_field(selected_tab, 'expandedText', ' - %s')
b60419c5 3901 metadata = {
3902 'playlist_id': playlist_id,
3903 'playlist_title': title,
3904 'playlist_description': description,
3905 'uploader': channel_name,
3906 'uploader_id': channel_id,
3907 'uploader_url': channel_url,
3908 'thumbnails': thumbnails,
3909 'tags': tags,
3910 }
47193e02 3911 availability = self._extract_availability(data)
3912 if availability:
3913 metadata['availability'] = availability
b60419c5 3914 if not channel_id:
3915 metadata.update(self._extract_uploader(data))
3916 metadata.update({
3917 'channel': metadata['uploader'],
3918 'channel_id': metadata['uploader_id'],
3919 'channel_url': metadata['uploader_url']})
11f9be09 3920 ytcfg = self.extract_ytcfg(item_id, webpage)
b60419c5 3921 return self.playlist_result(
d069eca7
M
3922 self._entries(
3923 selected_tab, playlist_id,
3924 self._extract_identity_token(webpage, item_id),
fe93e2c4 3925 self._extract_account_syncid(ytcfg, data), ytcfg),
b60419c5 3926 **metadata)
73c4ac2c 3927
79360d99 3928 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
2be71994 3929 first_id = last_id = None
11f9be09 3930 ytcfg = self.extract_ytcfg(playlist_id, webpage)
3931 headers = self.generate_api_headers(
fe93e2c4 3932 ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3933 identity_token=self._extract_identity_token(webpage, item_id=playlist_id))
2be71994 3934 for page_num in itertools.count(1):
cd7c66cf 3935 videos = list(self._playlist_entries(playlist))
3936 if not videos:
3937 return
2be71994 3938 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3939 if start >= len(videos):
3940 return
3941 for video in videos[start:]:
3942 if video['id'] == first_id:
3943 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3944 return
3945 yield video
3946 first_id = first_id or videos[0]['id']
3947 last_id = videos[-1]['id']
79360d99 3948 watch_endpoint = try_get(
3949 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3950 query = {
3951 'playlistId': playlist_id,
3952 'videoId': watch_endpoint.get('videoId') or last_id,
3953 'index': watch_endpoint.get('index') or len(videos),
3954 'params': watch_endpoint.get('params') or 'OAE%3D'
3955 }
3956 response = self._extract_response(
3957 item_id='%s page %d' % (playlist_id, page_num),
fe93e2c4 3958 query=query, ep='next', headers=headers, ytcfg=ytcfg,
79360d99 3959 check_get_keys='contents'
3960 )
cd7c66cf 3961 playlist = try_get(
79360d99 3962 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 3963
79360d99 3964 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
8bdd16b4 3965 title = playlist.get('title') or try_get(
3966 data, lambda x: x['titleText']['simpleText'], compat_str)
3967 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3968
3969 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3970 playlist_url = urljoin(url, try_get(
3971 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3972 compat_str))
3973 if playlist_url and playlist_url != url:
3974 return self.url_result(
3975 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3976 video_title=title)
cd7c66cf 3977
8bdd16b4 3978 return self.playlist_result(
79360d99 3979 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
cd7c66cf 3980 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 3981
47193e02 3982 def _extract_availability(self, data):
3983 """
3984 Gets the availability of a given playlist/tab.
3985 Note: Unless YouTube tells us explicitly, we do not assume it is public
3986 @param data: response
3987 """
3988 is_private = is_unlisted = None
3989 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
3990 badge_labels = self._extract_badges(renderer)
3991
3992 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
3993 privacy_dropdown_entries = try_get(
3994 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
3995 for renderer_dict in privacy_dropdown_entries:
3996 is_selected = try_get(
3997 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
3998 if not is_selected:
3999 continue
052e1350 4000 label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
47193e02 4001 if label:
4002 badge_labels.add(label.lower())
4003 break
4004
4005 for badge_label in badge_labels:
4006 if badge_label == 'unlisted':
4007 is_unlisted = True
4008 elif badge_label == 'private':
4009 is_private = True
4010 elif badge_label == 'public':
4011 is_unlisted = is_private = False
4012 return self._availability(is_private, False, False, False, is_unlisted)
4013
4014 @staticmethod
4015 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4016 sidebar_renderer = try_get(
4017 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4018 for item in sidebar_renderer:
4019 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4020 if renderer:
4021 return renderer
4022
358de58c 4023 def _reload_with_unavailable_videos(self, item_id, data, webpage):
4024 """
4025 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4026 """
5d342002 4027 browse_id = params = None
47193e02 4028 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4029 if not renderer:
4030 return
4031 menu_renderer = try_get(
4032 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4033 for menu_item in menu_renderer:
4034 if not isinstance(menu_item, dict):
358de58c 4035 continue
47193e02 4036 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4037 text = try_get(
4038 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
4039 if not text or text.lower() != 'show unavailable videos':
4040 continue
4041 browse_endpoint = try_get(
4042 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4043 browse_id = browse_endpoint.get('browseId')
4044 params = browse_endpoint.get('params')
4045 break
5d342002 4046
11f9be09 4047 ytcfg = self.extract_ytcfg(item_id, webpage)
4048 headers = self.generate_api_headers(
fe93e2c4 4049 ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
47193e02 4050 identity_token=self._extract_identity_token(webpage, item_id=item_id),
4051 visitor_data=try_get(
4052 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
4053 query = {
4054 'params': params or 'wgYCCAA=',
4055 'browseId': browse_id or 'VL%s' % item_id
4056 }
4057 return self._extract_response(
4058 item_id=item_id, headers=headers, query=query,
fe93e2c4 4059 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
47193e02 4060 note='Downloading API JSON with unavailable videos')
358de58c 4061
cd7c66cf 4062 def _extract_webpage(self, url, item_id):
a06916d9 4063 retries = self.get_param('extractor_retries', 3)
62bff2c1 4064 count = -1
c705177d 4065 last_error = 'Incomplete yt initial data recieved'
14fdfea9 4066 while count < retries:
62bff2c1 4067 count += 1
14fdfea9 4068 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 4069 # See: https://github.com/yt-dlp/yt-dlp/issues/116
4070 if count:
c705177d 4071 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 4072 webpage = self._download_webpage(
4073 url, item_id,
cd7c66cf 4074 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
11f9be09 4075 data = self.extract_yt_initial_data(item_id, webpage)
14fdfea9 4076 if data.get('contents') or data.get('currentVideoEndpoint'):
4077 break
95c01b6c 4078 # Extract alerts here only when there is error
4079 self._extract_and_report_alerts(data)
c705177d 4080 if count >= retries:
6a39ee13 4081 raise ExtractorError(last_error)
cd7c66cf 4082 return webpage, data
4083
9297939e 4084 @staticmethod
4085 def _smuggle_data(entries, data):
4086 for entry in entries:
4087 if data:
4088 entry['url'] = smuggle_url(entry['url'], data)
4089 yield entry
4090
cd7c66cf 4091 def _real_extract(self, url):
9297939e 4092 url, smuggled_data = unsmuggle_url(url, {})
4093 if self.is_music_url(url):
4094 smuggled_data['is_music_url'] = True
fe03a6cd 4095 info_dict = self.__real_extract(url, smuggled_data)
9297939e 4096 if info_dict.get('entries'):
4097 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4098 return info_dict
4099
fe03a6cd 4100 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4101
4102 def __real_extract(self, url, smuggled_data):
cd7c66cf 4103 item_id = self._match_id(url)
4104 url = compat_urlparse.urlunparse(
4105 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 4106 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 4107
fe03a6cd 4108 def get_mobj(url):
4109 mobj = self._url_re.match(url).groupdict()
07cce701 4110 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 4111 return mobj
4112
4113 mobj = get_mobj(url)
4114 # Youtube returns incomplete data if tabname is not lower case
4115 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4116
4117 if is_channel:
4118 if smuggled_data.get('is_music_url'):
4119 if item_id[:2] == 'VL':
4120 # Youtube music VL channels have an equivalent playlist
4121 item_id = item_id[2:]
4122 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
abcdd12b 4123 elif item_id[:2] == 'MP':
4124 # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4125 item_id = self._search_regex(
4126 r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4127 self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4128 'playlist id')
4129 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
fe03a6cd 4130 elif mobj['channel_type'] == 'browse':
4131 # Youtube music /browse/ should be changed to /channel/
4132 pre = 'https://www.youtube.com/channel/%s' % item_id
4133 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4134 # Home URLs should redirect to /videos/
6a39ee13 4135 self.report_warning(
cd7c66cf 4136 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4137 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 4138 tab = '/videos'
4139
4140 url = ''.join((pre, tab, post))
4141 mobj = get_mobj(url)
cd7c66cf 4142
4143 # Handle both video/playlist URLs
201c1459 4144 qs = parse_qs(url)
cd7c66cf 4145 video_id = qs.get('v', [None])[0]
4146 playlist_id = qs.get('list', [None])[0]
4147
fe03a6cd 4148 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 4149 if not playlist_id:
fe03a6cd 4150 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 4151 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 4152 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6a39ee13 4153 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
cd7c66cf 4154 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
18db7548 4155 mobj = get_mobj(url)
cd7c66cf 4156
4157 if video_id and playlist_id:
a06916d9 4158 if self.get_param('noplaylist'):
cd7c66cf 4159 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4160 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4161 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4162
4163 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 4164
18db7548 4165 tabs = try_get(
4166 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4167 if tabs:
4168 selected_tab = self._extract_selected_tab(tabs)
4169 tab_name = selected_tab.get('title', '')
09f1580e 4170 if 'no-youtube-channel-redirect' not in compat_opts:
4171 if mobj['tab'] == '/live':
4172 # Live tab should have redirected to the video
4173 raise ExtractorError('The channel is not currently live', expected=True)
4174 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4175 if not mobj['not_channel'] and item_id[:2] == 'UC':
4176 # Topic channels don't have /videos. Use the equivalent playlist instead
4177 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4178 pl_id = 'UU%s' % item_id[2:]
4179 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4180 try:
4181 pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4182 for alert_type, alert_message in self._extract_alerts(pl_data):
4183 if alert_type == 'error':
4184 raise ExtractorError('Youtube said: %s' % alert_message)
4185 item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4186 except ExtractorError:
4187 self.report_warning('The playlist gave error. Falling back to channel URL')
4188 else:
4189 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
18db7548 4190
4191 self.write_debug('Final URL: %s' % url)
4192
358de58c 4193 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 4194 if 'no-youtube-unavailable-videos' not in compat_opts:
4195 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
95c01b6c 4196 self._extract_and_report_alerts(data)
8bdd16b4 4197 tabs = try_get(
4198 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4199 if tabs:
d069eca7 4200 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 4201
8bdd16b4 4202 playlist = try_get(
4203 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4204 if playlist:
79360d99 4205 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
cd7c66cf 4206
a0566bbf 4207 video_id = try_get(
4208 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4209 compat_str) or video_id
8bdd16b4 4210 if video_id:
09f1580e 4211 if mobj['tab'] != '/live': # live tab is expected to redirect to video
4212 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 4213 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 4214
8bdd16b4 4215 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 4216
c5e8d7af 4217
8bdd16b4 4218class YoutubePlaylistIE(InfoExtractor):
4219 IE_DESC = 'YouTube.com playlists'
4220 _VALID_URL = r'''(?x)(?:
4221 (?:https?://)?
4222 (?:\w+\.)?
4223 (?:
4224 (?:
4225 youtube(?:kids)?\.com|
29f7c58a 4226 invidio\.us
8bdd16b4 4227 )
4228 /.*?\?.*?\blist=
4229 )?
4230 (?P<id>%(playlist_id)s)
4231 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4232 IE_NAME = 'youtube:playlist'
cdc628a4 4233 _TESTS = [{
8bdd16b4 4234 'note': 'issue #673',
4235 'url': 'PLBB231211A4F62143',
cdc628a4 4236 'info_dict': {
8bdd16b4 4237 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4238 'id': 'PLBB231211A4F62143',
4239 'uploader': 'Wickydoo',
4240 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
11f9be09 4241 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
8bdd16b4 4242 },
4243 'playlist_mincount': 29,
4244 }, {
4245 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4246 'info_dict': {
4247 'title': 'YDL_safe_search',
4248 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4249 },
4250 'playlist_count': 2,
4251 'skip': 'This playlist is private',
9558dcec 4252 }, {
8bdd16b4 4253 'note': 'embedded',
4254 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4255 'playlist_count': 4,
9558dcec 4256 'info_dict': {
8bdd16b4 4257 'title': 'JODA15',
4258 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4259 'uploader': 'milan',
4260 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 4261 }
cdc628a4 4262 }, {
8bdd16b4 4263 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
11f9be09 4264 'playlist_mincount': 654,
8bdd16b4 4265 'info_dict': {
4266 'title': '2018 Chinese New Singles (11/6 updated)',
4267 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4268 'uploader': 'LBK',
4269 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
11f9be09 4270 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
8bdd16b4 4271 }
daa0df9e 4272 }, {
29f7c58a 4273 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4274 'only_matching': True,
4275 }, {
4276 # music album playlist
4277 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4278 'only_matching': True,
4279 }]
4280
4281 @classmethod
4282 def suitable(cls, url):
201c1459 4283 if YoutubeTabIE.suitable(url):
4284 return False
1bdae7d3 4285 # Hack for lazy extractors until more generic solution is implemented
4286 # (see #28780)
4287 from .youtube import parse_qs
201c1459 4288 qs = parse_qs(url)
4289 if qs.get('v', [None])[0]:
4290 return False
4291 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 4292
4293 def _real_extract(self, url):
4294 playlist_id = self._match_id(url)
46953e7e 4295 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 4296 url = update_url_query(
4297 'https://www.youtube.com/playlist',
4298 parse_qs(url) or {'list': playlist_id})
4299 if is_music_url:
4300 url = smuggle_url(url, {'is_music_url': True})
4301 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 4302
4303
4304class YoutubeYtBeIE(InfoExtractor):
c76eb41b 4305 IE_DESC = 'youtu.be'
29f7c58a 4306 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4307 _TESTS = [{
8bdd16b4 4308 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4309 'info_dict': {
4310 'id': 'yeWKywCrFtk',
4311 'ext': 'mp4',
4312 'title': 'Small Scale Baler and Braiding Rugs',
4313 'uploader': 'Backus-Page House Museum',
4314 'uploader_id': 'backuspagemuseum',
4315 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4316 'upload_date': '20161008',
4317 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4318 'categories': ['Nonprofits & Activism'],
4319 'tags': list,
4320 'like_count': int,
4321 'dislike_count': int,
4322 },
4323 'params': {
4324 'noplaylist': True,
4325 'skip_download': True,
4326 },
39e7107d 4327 }, {
8bdd16b4 4328 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 4329 'only_matching': True,
cdc628a4
PH
4330 }]
4331
8bdd16b4 4332 def _real_extract(self, url):
29f7c58a 4333 mobj = re.match(self._VALID_URL, url)
4334 video_id = mobj.group('id')
4335 playlist_id = mobj.group('playlist_id')
8bdd16b4 4336 return self.url_result(
29f7c58a 4337 update_url_query('https://www.youtube.com/watch', {
4338 'v': video_id,
4339 'list': playlist_id,
4340 'feature': 'youtu.be',
4341 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 4342
4343
4344class YoutubeYtUserIE(InfoExtractor):
c76eb41b 4345 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 4346 _VALID_URL = r'ytuser:(?P<id>.+)'
4347 _TESTS = [{
4348 'url': 'ytuser:phihag',
4349 'only_matching': True,
4350 }]
4351
4352 def _real_extract(self, url):
4353 user_id = self._match_id(url)
4354 return self.url_result(
4355 'https://www.youtube.com/user/%s' % user_id,
4356 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 4357
b05654f0 4358
3d3dddc9 4359class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 4360 IE_NAME = 'youtube:favorites'
4361 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4362 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4363 _LOGIN_REQUIRED = True
4364 _TESTS = [{
4365 'url': ':ytfav',
4366 'only_matching': True,
4367 }, {
4368 'url': ':ytfavorites',
4369 'only_matching': True,
4370 }]
4371
4372 def _real_extract(self, url):
4373 return self.url_result(
4374 'https://www.youtube.com/playlist?list=LL',
4375 ie=YoutubeTabIE.ie_key())
4376
4377
79360d99 4378class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
69184e41 4379 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
4380 # there doesn't appear to be a real limit, for example if you search for
4381 # 'python' you get more than 8.000.000 results
4382 _MAX_RESULTS = float('inf')
78caa52a 4383 IE_NAME = 'youtube:search'
b05654f0 4384 _SEARCH_KEY = 'ytsearch'
6c894ea1 4385 _SEARCH_PARAMS = None
9dd8e46a 4386 _TESTS = []
b05654f0 4387
6c894ea1 4388 def _entries(self, query, n):
a5c56234 4389 data = {'query': query}
6c894ea1
U
4390 if self._SEARCH_PARAMS:
4391 data['params'] = self._SEARCH_PARAMS
4392 total = 0
fe93e2c4 4393 continuation = {}
6c894ea1 4394 for page_num in itertools.count(1):
fe93e2c4 4395 data.update(continuation)
79360d99 4396 search = self._extract_response(
4397 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4398 check_get_keys=('contents', 'onResponseReceivedCommands')
4399 )
6c894ea1 4400 if not search:
b4c08069 4401 break
6c894ea1
U
4402 slr_contents = try_get(
4403 search,
4404 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4405 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4406 list)
4407 if not slr_contents:
a22b2fd1 4408 break
0366ae87 4409
0366ae87
M
4410 # Youtube sometimes adds promoted content to searches,
4411 # changing the index location of videos and token.
4412 # So we search through all entries till we find them.
fe93e2c4 4413 continuation = None
30a074c2 4414 for slr_content in slr_contents:
fe93e2c4 4415 if not continuation:
4416 continuation = self._extract_continuation({'contents': [slr_content]})
a96c6d15 4417
30a074c2 4418 isr_contents = try_get(
4419 slr_content,
4420 lambda x: x['itemSectionRenderer']['contents'],
4421 list)
9da76d30 4422 if not isr_contents:
30a074c2 4423 continue
4424 for content in isr_contents:
4425 if not isinstance(content, dict):
4426 continue
4427 video = content.get('videoRenderer')
4428 if not isinstance(video, dict):
4429 continue
4430 video_id = video.get('videoId')
4431 if not video_id:
4432 continue
4433
4434 yield self._extract_video(video)
4435 total += 1
4436 if total == n:
4437 return
0366ae87 4438
fe93e2c4 4439 if not continuation:
6c894ea1 4440 break
b05654f0 4441
6c894ea1
U
4442 def _get_n_results(self, query, n):
4443 """Get a specified number of results for a query"""
11f9be09 4444 return self.playlist_result(self._entries(query, n), query, query)
75dff0ee 4445
c9ae7b95 4446
a3dd9248 4447class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 4448 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 4449 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 4450 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 4451 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 4452
c9ae7b95 4453
386e1dd9 4454class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 4455 IE_DESC = 'YouTube.com search URLs'
386e1dd9 4456 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4457 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 4458 # _MAX_RESULTS = 100
3462ffa8 4459 _TESTS = [{
4460 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4461 'playlist_mincount': 5,
4462 'info_dict': {
11f9be09 4463 'id': 'youtube-dl test video',
3462ffa8 4464 'title': 'youtube-dl test video',
4465 }
4466 }, {
4467 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4468 'only_matching': True,
4469 }]
4470
386e1dd9 4471 @classmethod
4472 def _make_valid_url(cls):
4473 return cls._VALID_URL
4474
3462ffa8 4475 def _real_extract(self, url):
386e1dd9 4476 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4477 query = (qs.get('search_query') or qs.get('q'))[0]
4478 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4479 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 4480
4481
4482class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 4483 """
25f14e9f 4484 Base class for feed extractors
3d3dddc9 4485 Subclasses must define the _FEED_NAME property.
d7ae0639 4486 """
b2e8bc1b 4487 _LOGIN_REQUIRED = True
ef2f3c7f 4488 _TESTS = []
d7ae0639
JMF
4489
4490 @property
4491 def IE_NAME(self):
78caa52a 4492 return 'youtube:%s' % self._FEED_NAME
04cc9617 4493
3853309f 4494 def _real_extract(self, url):
3d3dddc9 4495 return self.url_result(
4496 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4497 ie=YoutubeTabIE.ie_key())
25f14e9f
S
4498
4499
ef2f3c7f 4500class YoutubeWatchLaterIE(InfoExtractor):
4501 IE_NAME = 'youtube:watchlater'
70d5c17b 4502 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 4503 _VALID_URL = r':ytwatchlater'
bc7a9cd8 4504 _TESTS = [{
8bdd16b4 4505 'url': ':ytwatchlater',
bc7a9cd8
S
4506 'only_matching': True,
4507 }]
25f14e9f
S
4508
4509 def _real_extract(self, url):
ef2f3c7f 4510 return self.url_result(
4511 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 4512
4513
25f14e9f
S
4514class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4515 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 4516 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 4517 _FEED_NAME = 'recommended'
45db527f 4518 _LOGIN_REQUIRED = False
3d3dddc9 4519 _TESTS = [{
4520 'url': ':ytrec',
4521 'only_matching': True,
4522 }, {
4523 'url': ':ytrecommended',
4524 'only_matching': True,
4525 }, {
4526 'url': 'https://youtube.com',
4527 'only_matching': True,
4528 }]
1ed5b5c9 4529
1ed5b5c9 4530
25f14e9f 4531class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 4532 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 4533 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 4534 _FEED_NAME = 'subscriptions'
3d3dddc9 4535 _TESTS = [{
4536 'url': ':ytsubs',
4537 'only_matching': True,
4538 }, {
4539 'url': ':ytsubscriptions',
4540 'only_matching': True,
4541 }]
1ed5b5c9 4542
1ed5b5c9 4543
25f14e9f 4544class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
4545 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4546 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 4547 _FEED_NAME = 'history'
3d3dddc9 4548 _TESTS = [{
4549 'url': ':ythistory',
4550 'only_matching': True,
4551 }]
1ed5b5c9
JMF
4552
4553
15870e90
PH
4554class YoutubeTruncatedURLIE(InfoExtractor):
4555 IE_NAME = 'youtube:truncated_url'
4556 IE_DESC = False # Do not list
975d35db 4557 _VALID_URL = r'''(?x)
b95aab84
PH
4558 (?:https?://)?
4559 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4560 (?:watch\?(?:
c4808c60 4561 feature=[a-z_]+|
b95aab84
PH
4562 annotation_id=annotation_[^&]+|
4563 x-yt-cl=[0-9]+|
c1708b89 4564 hl=[^&]*|
287be8c6 4565 t=[0-9]+
b95aab84
PH
4566 )?
4567 |
4568 attribution_link\?a=[^&]+
4569 )
4570 $
975d35db 4571 '''
15870e90 4572
c4808c60 4573 _TESTS = [{
2d3d2997 4574 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 4575 'only_matching': True,
dc2fc736 4576 }, {
2d3d2997 4577 'url': 'https://www.youtube.com/watch?',
dc2fc736 4578 'only_matching': True,
b95aab84
PH
4579 }, {
4580 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4581 'only_matching': True,
4582 }, {
4583 'url': 'https://www.youtube.com/watch?feature=foo',
4584 'only_matching': True,
c1708b89
PH
4585 }, {
4586 'url': 'https://www.youtube.com/watch?hl=en-GB',
4587 'only_matching': True,
287be8c6
PH
4588 }, {
4589 'url': 'https://www.youtube.com/watch?t=2372',
4590 'only_matching': True,
c4808c60
PH
4591 }]
4592
15870e90
PH
4593 def _real_extract(self, url):
4594 raise ExtractorError(
78caa52a
PH
4595 'Did you forget to quote the URL? Remember that & is a meta '
4596 'character in most shells, so you want to put the URL in quotes, '
3867038a 4597 'like youtube-dl '
2d3d2997 4598 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 4599 ' or simply youtube-dl BaW_jenozKc .',
15870e90 4600 expected=True)
772fd5cc
PH
4601
4602
4603class YoutubeTruncatedIDIE(InfoExtractor):
4604 IE_NAME = 'youtube:truncated_id'
4605 IE_DESC = False # Do not list
b95aab84 4606 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
4607
4608 _TESTS = [{
4609 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4610 'only_matching': True,
4611 }]
4612
4613 def _real_extract(self, url):
4614 video_id = self._match_id(url)
4615 raise ExtractorError(
4616 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4617 expected=True)