]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
Add option `--cookies-from-browser` to load cookies from a browser (#488)
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
2d6659b9 5import base64
d92f5d5a 6import calendar
109dd3b2 7import copy
fe93e2c4 8import datetime
a5c56234 9import hashlib
0ca96d48 10import itertools
c5e8d7af 11import json
c4417ddb 12import os.path
d77ab8e2 13import random
c5e8d7af 14import re
8a784c74 15import time
e0df6211 16import traceback
c5e8d7af 17
b05654f0 18from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 19from ..compat import (
edf3e38e 20 compat_chr,
29f7c58a 21 compat_HTTPError,
c5e8d7af 22 compat_parse_qs,
545cc85d 23 compat_str,
7fd002c0 24 compat_urllib_parse_unquote_plus,
15707c7e 25 compat_urllib_parse_urlencode,
7c80519c 26 compat_urllib_parse_urlparse,
7c61bd36 27 compat_urlparse,
4bb4a188 28)
545cc85d 29from ..jsinterp import JSInterpreter
4bb4a188 30from ..utils import (
2d6659b9 31 bytes_to_intlist,
c5e8d7af 32 clean_html,
d92f5d5a 33 datetime_from_str,
11f9be09 34 dict_get,
358de58c 35 error_to_compat_str,
c5e8d7af 36 ExtractorError,
2d30521a 37 float_or_none,
11f9be09 38 format_field,
dd27fd17 39 int_or_none,
2d6659b9 40 intlist_to_bytes,
94278f72 41 mimetype2ext,
11f9be09 42 orderedSet,
6310acf5 43 parse_codecs,
49bd8c66 44 parse_count,
7c80519c 45 parse_duration,
7ea65411 46 parse_iso8601,
dca3ff4a 47 qualities,
3995d37d 48 remove_start,
cf7e015f 49 smuggle_url,
dbdaaa23 50 str_or_none,
c93d53f5 51 str_to_int,
7c365c21 52 traverse_obj,
556dbe7f 53 try_get,
c5e8d7af
PH
54 unescapeHTML,
55 unified_strdate,
cf7e015f 56 unsmuggle_url,
8bdd16b4 57 update_url_query,
21c340b8 58 url_or_none,
6e6bc8da 59 urlencode_postdata,
fe93e2c4 60 urljoin,
7c365c21 61 variadic,
c5e8d7af
PH
62)
63
5f6a1245 64
201c1459 65def parse_qs(url):
66 return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
67
68
de7f3446 69class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
70 """Provide base functions for Youtube extractors"""
71 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 72 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
73
74 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
75 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
76 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 77
3462ffa8 78 _RESERVED_NAMES = (
bea74222 79 r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|shorts|'
46953e7e 80 r'movies|results|shared|hashtag|trending|feed|feeds|oembed|get_video_info|'
cd7c66cf 81 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 82
b2e8bc1b
JMF
83 _NETRC_MACHINE = 'youtube'
84 # If True it will raise an error if no login info is provided
85 _LOGIN_REQUIRED = False
86
70d5c17b 87 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 88
b2e8bc1b 89 def _login(self):
83317f69 90 """
91 Attempt to log in to YouTube.
92 True is returned if successful or skipped.
93 False is returned if login failed.
94
95 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
96 """
9d5d4d64 97
98 def warn(message):
99 self.report_warning(message)
100
101 # username+password login is broken
982ee69a
MB
102 if (self._LOGIN_REQUIRED
103 and self.get_param('cookiefile') is None
104 and self.get_param('cookiesfrombrowser') is None):
9d5d4d64 105 self.raise_login_required(
106 'Login details are needed to download this content', method='cookies')
68217024 107 username, password = self._get_login_info()
9d5d4d64 108 if username:
109 warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
110 return
9d5d4d64 111
2d6659b9 112 # Everything below this is broken!
113 r'''
b2e8bc1b
JMF
114 # No authentication to be performed
115 if username is None:
a06916d9 116 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
69ea8ca4 117 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
a06916d9 118 # if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
545cc85d 119 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 120 return True
b2e8bc1b 121
7cc3570e
PH
122 login_page = self._download_webpage(
123 self._LOGIN_URL, None,
69ea8ca4
PH
124 note='Downloading login page',
125 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
126 if login_page is False:
127 return
b2e8bc1b 128
1212e997 129 login_form = self._hidden_inputs(login_page)
c5e8d7af 130
e00eb564
S
131 def req(url, f_req, note, errnote):
132 data = login_form.copy()
133 data.update({
134 'pstMsg': 1,
135 'checkConnection': 'youtube',
136 'checkedDomains': 'youtube',
137 'hl': 'en',
138 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 139 'f.req': json.dumps(f_req),
e00eb564
S
140 'flowName': 'GlifWebSignIn',
141 'flowEntry': 'ServiceLogin',
baf67a60
S
142 # TODO: reverse actual botguard identifier generation algo
143 'bgRequest': '["identifier",""]',
041bc3ad 144 })
e00eb564
S
145 return self._download_json(
146 url, None, note=note, errnote=errnote,
147 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
148 fatal=False,
149 data=urlencode_postdata(data), headers={
150 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
151 'Google-Accounts-XSRF': 1,
152 })
153
3995d37d
S
154 lookup_req = [
155 username,
156 None, [], None, 'US', None, None, 2, False, True,
157 [
158 None, None,
159 [2, 1, None, 1,
160 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
161 None, [], 4],
162 1, [None, None, []], None, None, None, True
163 ],
164 username,
165 ]
166
e00eb564 167 lookup_results = req(
3995d37d 168 self._LOOKUP_URL, lookup_req,
e00eb564
S
169 'Looking up account info', 'Unable to look up account info')
170
171 if lookup_results is False:
172 return False
041bc3ad 173
3995d37d
S
174 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
175 if not user_hash:
176 warn('Unable to extract user hash')
177 return False
178
179 challenge_req = [
180 user_hash,
181 None, 1, None, [1, None, None, None, [password, None, True]],
182 [
183 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
184 1, [None, None, []], None, None, None, True
185 ]]
83317f69 186
3995d37d
S
187 challenge_results = req(
188 self._CHALLENGE_URL, challenge_req,
189 'Logging in', 'Unable to log in')
83317f69 190
3995d37d 191 if challenge_results is False:
e00eb564 192 return
83317f69 193
3995d37d
S
194 login_res = try_get(challenge_results, lambda x: x[0][5], list)
195 if login_res:
196 login_msg = try_get(login_res, lambda x: x[5], compat_str)
197 warn(
198 'Unable to login: %s' % 'Invalid password'
199 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
200 return False
201
202 res = try_get(challenge_results, lambda x: x[0][-1], list)
203 if not res:
204 warn('Unable to extract result entry')
205 return False
206
9a6628aa
S
207 login_challenge = try_get(res, lambda x: x[0][0], list)
208 if login_challenge:
209 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
210 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
211 # SEND_SUCCESS - TFA code has been successfully sent to phone
212 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 213 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
214 if status == 'QUOTA_EXCEEDED':
215 warn('Exceeded the limit of TFA codes, try later')
216 return False
217
218 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
219 if not tl:
220 warn('Unable to extract TL')
221 return False
222
223 tfa_code = self._get_tfa_info('2-step verification code')
224
225 if not tfa_code:
226 warn(
227 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
228 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
229 return False
230
231 tfa_code = remove_start(tfa_code, 'G-')
232
233 tfa_req = [
234 user_hash, None, 2, None,
235 [
236 9, None, None, None, None, None, None, None,
237 [None, tfa_code, True, 2]
238 ]]
239
240 tfa_results = req(
241 self._TFA_URL.format(tl), tfa_req,
242 'Submitting TFA code', 'Unable to submit TFA code')
243
244 if tfa_results is False:
245 return False
246
247 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
248 if tfa_res:
249 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
250 warn(
251 'Unable to finish TFA: %s' % 'Invalid TFA code'
252 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
253 return False
254
255 check_cookie_url = try_get(
256 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
257 else:
258 CHALLENGES = {
259 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
260 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
261 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
262 }
263 challenge = CHALLENGES.get(
264 challenge_str,
265 '%s returned error %s.' % (self.IE_NAME, challenge_str))
266 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
267 return False
3995d37d
S
268 else:
269 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
270
271 if not check_cookie_url:
272 warn('Unable to extract CheckCookie URL')
273 return False
e00eb564
S
274
275 check_cookie_results = self._download_webpage(
3995d37d
S
276 check_cookie_url, None, 'Checking cookie', fatal=False)
277
278 if check_cookie_results is False:
279 return False
e00eb564 280
3995d37d
S
281 if 'https://myaccount.google.com/' not in check_cookie_results:
282 warn('Unable to log in')
b2e8bc1b 283 return False
e00eb564 284
b2e8bc1b 285 return True
2d6659b9 286 '''
b2e8bc1b 287
cce889b9 288 def _initialize_consent(self):
289 cookies = self._get_cookies('https://www.youtube.com/')
290 if cookies.get('__Secure-3PSID'):
291 return
292 consent_id = None
293 consent = cookies.get('CONSENT')
294 if consent:
295 if 'YES' in consent.value:
296 return
297 consent_id = self._search_regex(
298 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
299 if not consent_id:
300 consent_id = random.randint(100, 999)
301 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 302
b2e8bc1b 303 def _real_initialize(self):
cce889b9 304 self._initialize_consent()
b2e8bc1b
JMF
305 if self._downloader is None:
306 return
b2e8bc1b
JMF
307 if not self._login():
308 return
c5e8d7af 309
a0566bbf 310 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 311 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
312 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 313
109dd3b2 314 _YT_DEFAULT_YTCFGS = {
315 'WEB': {
316 'INNERTUBE_API_VERSION': 'v1',
317 'INNERTUBE_CLIENT_NAME': 'WEB',
318 'INNERTUBE_CLIENT_VERSION': '2.20210622.10.00',
319 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
320 'INNERTUBE_CONTEXT': {
321 'client': {
322 'clientName': 'WEB',
323 'clientVersion': '2.20210622.10.00',
324 'hl': 'en',
325 }
326 },
327 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
328 },
329 'WEB_REMIX': {
330 'INNERTUBE_API_VERSION': 'v1',
331 'INNERTUBE_CLIENT_NAME': 'WEB_REMIX',
332 'INNERTUBE_CLIENT_VERSION': '1.20210621.00.00',
333 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
334 'INNERTUBE_CONTEXT': {
335 'client': {
336 'clientName': 'WEB_REMIX',
337 'clientVersion': '1.20210621.00.00',
338 'hl': 'en',
339 }
340 },
341 'INNERTUBE_CONTEXT_CLIENT_NAME': 67
342 },
343 'WEB_EMBEDDED_PLAYER': {
344 'INNERTUBE_API_VERSION': 'v1',
345 'INNERTUBE_CLIENT_NAME': 'WEB_EMBEDDED_PLAYER',
346 'INNERTUBE_CLIENT_VERSION': '1.20210620.0.1',
347 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
348 'INNERTUBE_CONTEXT': {
349 'client': {
350 'clientName': 'WEB_EMBEDDED_PLAYER',
351 'clientVersion': '1.20210620.0.1',
352 'hl': 'en',
353 }
354 },
355 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
356 },
357 'ANDROID': {
358 'INNERTUBE_API_VERSION': 'v1',
359 'INNERTUBE_CLIENT_NAME': 'ANDROID',
360 'INNERTUBE_CLIENT_VERSION': '16.20',
361 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
362 'INNERTUBE_CONTEXT': {
363 'client': {
364 'clientName': 'ANDROID',
365 'clientVersion': '16.20',
366 'hl': 'en',
367 }
368 },
fe93e2c4 369 'INNERTUBE_CONTEXT_CLIENT_NAME': 3
109dd3b2 370 },
371 'ANDROID_EMBEDDED_PLAYER': {
372 'INNERTUBE_API_VERSION': 'v1',
373 'INNERTUBE_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER',
374 'INNERTUBE_CLIENT_VERSION': '16.20',
375 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
376 'INNERTUBE_CONTEXT': {
377 'client': {
378 'clientName': 'ANDROID_EMBEDDED_PLAYER',
379 'clientVersion': '16.20',
380 'hl': 'en',
381 }
382 },
fe93e2c4 383 'INNERTUBE_CONTEXT_CLIENT_NAME': 55
109dd3b2 384 },
385 'ANDROID_MUSIC': {
386 'INNERTUBE_API_VERSION': 'v1',
387 'INNERTUBE_CLIENT_NAME': 'ANDROID_MUSIC',
388 'INNERTUBE_CLIENT_VERSION': '4.32',
389 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
390 'INNERTUBE_CONTEXT': {
391 'client': {
392 'clientName': 'ANDROID_MUSIC',
393 'clientVersion': '4.32',
394 'hl': 'en',
395 }
396 },
fe93e2c4 397 'INNERTUBE_CONTEXT_CLIENT_NAME': 21
11f9be09 398 },
399 'IOS': {
400 'INNERTUBE_API_VERSION': 'v1',
401 'INNERTUBE_CLIENT_NAME': 'IOS',
402 'INNERTUBE_CLIENT_VERSION': '16.20',
403 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
404 'INNERTUBE_CONTEXT': {
405 'client': {
406 'clientName': 'IOS',
407 'clientVersion': '16.20',
408 'hl': 'en',
409 }
410 },
411 'INNERTUBE_CONTEXT_CLIENT_NAME': 5
412
413 },
414 'IOS_MUSIC': {
415 'INNERTUBE_API_VERSION': 'v1',
416 'INNERTUBE_CLIENT_NAME': 'IOS_MUSIC',
417 'INNERTUBE_CLIENT_VERSION': '4.32',
418 'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
419 'INNERTUBE_CONTEXT': {
420 'client': {
421 'clientName': 'IOS_MUSIC',
422 'clientVersion': '4.32',
423 'hl': 'en',
424 }
425 },
426 'INNERTUBE_CONTEXT_CLIENT_NAME': 26
427 },
428 'IOS_MESSAGES_EXTENSION': {
429 'INNERTUBE_API_VERSION': 'v1',
430 'INNERTUBE_CLIENT_NAME': 'IOS_MESSAGES_EXTENSION',
431 'INNERTUBE_CLIENT_VERSION': '16.20',
432 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
433 'INNERTUBE_CONTEXT': {
434 'client': {
435 'clientName': 'IOS_MESSAGES_EXTENSION',
436 'clientVersion': '16.20',
437 'hl': 'en',
438 }
439 },
440 'INNERTUBE_CONTEXT_CLIENT_NAME': 66
109dd3b2 441 }
442 }
443
444 _YT_DEFAULT_INNERTUBE_HOSTS = {
445 'DIRECT': 'youtubei.googleapis.com',
446 'WEB': 'www.youtube.com',
447 'WEB_REMIX': 'music.youtube.com',
448 'ANDROID_MUSIC': 'music.youtube.com'
449 }
450
11f9be09 451 # clients starting with _ cannot be explicity requested by the user
452 _YT_CLIENTS = {
453 'web': 'WEB',
454 'web_music': 'WEB_REMIX',
455 '_web_embedded': 'WEB_EMBEDDED_PLAYER',
456 '_web_agegate': 'TVHTML5',
457 'android': 'ANDROID',
458 'android_music': 'ANDROID_MUSIC',
459 '_android_embedded': 'ANDROID_EMBEDDED_PLAYER',
460 '_android_agegate': 'ANDROID',
461 'ios': 'IOS',
462 'ios_music': 'IOS_MUSIC',
463 '_ios_embedded': 'IOS_MESSAGES_EXTENSION',
464 '_ios_agegate': 'IOS'
465 }
466
109dd3b2 467 def _get_default_ytcfg(self, client='WEB'):
468 if client in self._YT_DEFAULT_YTCFGS:
469 return copy.deepcopy(self._YT_DEFAULT_YTCFGS[client])
470 self.write_debug(f'INNERTUBE default client {client} does not exist - falling back to WEB client.')
471 return copy.deepcopy(self._YT_DEFAULT_YTCFGS['WEB'])
472
473 def _get_innertube_host(self, client='WEB'):
474 return dict_get(self._YT_DEFAULT_INNERTUBE_HOSTS, (client, 'WEB'))
475
476 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='WEB'):
477 # try_get but with fallback to default ytcfg client values when present
478 _func = lambda y: try_get(y, getter, expected_type)
479 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
480
481 def _extract_client_name(self, ytcfg, default_client='WEB'):
482 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str, default_client)
483
314ee305 484 @staticmethod
11f9be09 485 def _extract_session_index(*data):
486 for ytcfg in data:
487 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
488 if session_index is not None:
489 return session_index
314ee305 490
109dd3b2 491 def _extract_client_version(self, ytcfg, default_client='WEB'):
492 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str, default_client)
493
494 def _extract_api_key(self, ytcfg=None, default_client='WEB'):
495 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
496
497 def _extract_context(self, ytcfg=None, default_client='WEB'):
498 _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
499 context = _get_context(ytcfg)
500 if context:
501 return context
502
503 context = _get_context(self._get_default_ytcfg(default_client))
504 if not ytcfg:
505 return context
506
507 # Recreate the client context (required)
508 context['client'].update({
509 'clientVersion': self._extract_client_version(ytcfg, default_client),
510 'clientName': self._extract_client_name(ytcfg, default_client),
511 })
512 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
513 if visitor_data:
514 context['client']['visitorData'] = visitor_data
515 return context
516
517 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
1974e99f 518 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
519 # See: https://github.com/yt-dlp/yt-dlp/issues/393
520 yt_cookies = self._get_cookies('https://www.youtube.com')
521 sapisid_cookie = dict_get(
522 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
a5c56234
M
523 if sapisid_cookie is None:
524 return
525 time_now = round(time.time())
1974e99f 526 # SAPISID cookie is required if not already present
527 if not yt_cookies.get('SAPISID'):
528 self._set_cookie(
529 '.youtube.com', 'SAPISID', sapisid_cookie.value, secure=True, expire_time=time_now + 3600)
530 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
531 sapisidhash = hashlib.sha1(
109dd3b2 532 f'{time_now} {sapisid_cookie.value} {origin}'.encode('utf-8')).hexdigest()
1974e99f 533 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
534
535 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 536 note='Downloading API JSON', errnote='Unable to download API page',
109dd3b2 537 context=None, api_key=None, api_hostname=None, default_client='WEB'):
f4f751af 538
109dd3b2 539 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 540 data.update(query)
11f9be09 541 real_headers = self.generate_api_headers(default_client=default_client)
f4f751af 542 real_headers.update({'content-type': 'application/json'})
543 if headers:
544 real_headers.update(headers)
545cc85d 545 return self._download_json(
109dd3b2 546 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
a5c56234 547 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 548 data=json.dumps(data).encode('utf8'), headers=real_headers,
549 query={'key': api_key or self._extract_api_key()})
550
11f9be09 551 def extract_yt_initial_data(self, video_id, webpage):
8bdd16b4 552 return self._parse_json(
553 self._search_regex(
29f7c58a 554 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 555 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 556 video_id)
0c148415 557
a1c5d2ca 558 def _extract_identity_token(self, webpage, item_id):
11f9be09 559 if not webpage:
560 return None
561 ytcfg = self.extract_ytcfg(item_id, webpage)
a1c5d2ca
M
562 if ytcfg:
563 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
564 if token:
565 return token
566 return self._search_regex(
567 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
568 'identity token', default=None)
569
570 @staticmethod
fe93e2c4 571 def _extract_account_syncid(*args):
8ea3f7b9 572 """
573 Extract syncId required to download private playlists of secondary channels
fe93e2c4 574 @params response and/or ytcfg
8ea3f7b9 575 """
fe93e2c4 576 for data in args:
577 # ytcfg includes channel_syncid if on secondary channel
578 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
579 if delegated_sid:
580 return delegated_sid
581 sync_ids = (try_get(
582 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
583 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
584 if len(sync_ids) >= 2 and sync_ids[1]:
585 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
586 # and just "user_syncid||" for primary channel. We only want the channel_syncid
587 return sync_ids[0]
a1c5d2ca 588
11f9be09 589 def extract_ytcfg(self, video_id, webpage):
8c54a305 590 if not webpage:
591 return {}
29f7c58a 592 return self._parse_json(
593 self._search_regex(
594 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 595 default='{}'), video_id, fatal=False) or {}
596
11f9be09 597 def generate_api_headers(
598 self, ytcfg=None, identity_token=None, account_syncid=None,
599 visitor_data=None, api_hostname=None, default_client='WEB', session_index=None):
600 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
f4f751af 601 headers = {
109dd3b2 602 'X-YouTube-Client-Name': compat_str(
11f9be09 603 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
604 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
109dd3b2 605 'Origin': origin
f4f751af 606 }
2d6659b9 607 if not visitor_data and ytcfg:
608 visitor_data = try_get(
11f9be09 609 self._extract_context(ytcfg, default_client), lambda x: x['client']['visitorData'], compat_str)
f4f751af 610 if identity_token:
109dd3b2 611 headers['X-Youtube-Identity-Token'] = identity_token
f4f751af 612 if account_syncid:
613 headers['X-Goog-PageId'] = account_syncid
314ee305 614 if session_index is None and ytcfg:
615 session_index = self._extract_session_index(ytcfg)
616 if account_syncid or session_index is not None:
617 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
f4f751af 618 if visitor_data:
109dd3b2 619 headers['X-Goog-Visitor-Id'] = visitor_data
620 auth = self._generate_sapisidhash_header(origin)
f4f751af 621 if auth is not None:
622 headers['Authorization'] = auth
109dd3b2 623 headers['X-Origin'] = origin
f4f751af 624 return headers
29f7c58a 625
2d6659b9 626 @staticmethod
627 def _build_api_continuation_query(continuation, ctp=None):
628 query = {
629 'continuation': continuation
630 }
631 # TODO: Inconsistency with clickTrackingParams.
632 # Currently we have a fixed ctp contained within context (from ytcfg)
633 # and a ctp in root query for continuation.
634 if ctp:
635 query['clickTracking'] = {'clickTrackingParams': ctp}
636 return query
637
2d6659b9 638 @classmethod
639 def _extract_next_continuation_data(cls, renderer):
640 next_continuation = try_get(
641 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
642 lambda x: x['continuation']['reloadContinuationData']), dict)
643 if not next_continuation:
644 return
645 continuation = next_continuation.get('continuation')
646 if not continuation:
647 return
648 ctp = next_continuation.get('clickTrackingParams')
fe93e2c4 649 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 650
651 @classmethod
652 def _extract_continuation_ep_data(cls, continuation_ep: dict):
653 if isinstance(continuation_ep, dict):
654 continuation = try_get(
655 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
656 if not continuation:
657 return
658 ctp = continuation_ep.get('clickTrackingParams')
fe93e2c4 659 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 660
661 @classmethod
662 def _extract_continuation(cls, renderer):
663 next_continuation = cls._extract_next_continuation_data(renderer)
664 if next_continuation:
665 return next_continuation
fe93e2c4 666
2d6659b9 667 contents = []
668 for key in ('contents', 'items'):
669 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
fe93e2c4 670
2d6659b9 671 for content in contents:
672 if not isinstance(content, dict):
673 continue
674 continuation_ep = try_get(
675 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
676 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
677 dict)
678 continuation = cls._extract_continuation_ep_data(continuation_ep)
679 if continuation:
680 return continuation
681
fe93e2c4 682 @classmethod
683 def _extract_alerts(cls, data):
109dd3b2 684 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
685 if not isinstance(alert_dict, dict):
686 continue
687 for alert in alert_dict.values():
688 alert_type = alert.get('type')
689 if not alert_type:
690 continue
fe93e2c4 691 message = cls._get_text(alert.get('text'))
109dd3b2 692 if message:
693 yield alert_type, message
694
695 def _report_alerts(self, alerts, expected=True):
696 errors = []
697 warnings = []
698 for alert_type, alert_message in alerts:
699 if alert_type.lower() == 'error':
700 errors.append([alert_type, alert_message])
701 else:
702 warnings.append([alert_type, alert_message])
703
704 for alert_type, alert_message in (warnings + errors[:-1]):
705 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
706 if errors:
707 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
708
709 def _extract_and_report_alerts(self, data, *args, **kwargs):
710 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
711
47193e02 712 def _extract_badges(self, renderer: dict):
713 badges = set()
714 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
715 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
716 if label:
717 badges.add(label.lower())
718 return badges
719
720 @staticmethod
fe93e2c4 721 def _get_text(data, getter=None, max_runs=None):
722 for get in variadic(getter):
723 d = try_get(data, get) if get is not None else data
724 text = try_get(d, lambda x: x['simpleText'], compat_str)
725 if text:
726 return text
727 runs = try_get(d, lambda x: x['runs'], list) or []
728 if not runs and isinstance(d, list):
729 runs = d
730
731 def get_runs(runs):
732 for run in runs[:min(len(runs), max_runs or len(runs))]:
733 yield try_get(run, lambda x: x['text'], compat_str) or ''
734
735 text = ''.join(get_runs(runs))
736 if text:
737 return text
47193e02 738
109dd3b2 739 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
740 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
741 default_client='WEB'):
742 response = None
743 last_error = None
744 count = -1
745 retries = self.get_param('extractor_retries', 3)
746 if check_get_keys is None:
747 check_get_keys = []
748 while count < retries:
749 count += 1
750 if last_error:
751 self.report_warning('%s. Retrying ...' % last_error)
752 try:
753 response = self._call_api(
754 ep=ep, fatal=True, headers=headers,
755 video_id=item_id, query=query,
756 context=self._extract_context(ytcfg, default_client),
757 api_key=self._extract_api_key(ytcfg, default_client),
758 api_hostname=api_hostname, default_client=default_client,
759 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
760 except ExtractorError as e:
761 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
762 # Downloading page may result in intermittent 5xx HTTP error
763 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
764 last_error = 'HTTP Error %s' % e.cause.code
765 if count < retries:
766 continue
767 if fatal:
768 raise
769 else:
770 self.report_warning(error_to_compat_str(e))
771 return
772
773 else:
774 # Youtube may send alerts if there was an issue with the continuation page
775 try:
776 self._extract_and_report_alerts(response, expected=False)
777 except ExtractorError as e:
778 if fatal:
779 raise
780 self.report_warning(error_to_compat_str(e))
781 return
782 if not check_get_keys or dict_get(response, check_get_keys):
783 break
784 # Youtube sometimes sends incomplete data
785 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
786 last_error = 'Incomplete data received'
787 if count >= retries:
788 if fatal:
789 raise ExtractorError(last_error)
790 else:
791 self.report_warning(last_error)
792 return
793 return response
794
9297939e 795 @staticmethod
796 def is_music_url(url):
797 return re.match(r'https?://music\.youtube\.com/', url) is not None
798
30a074c2 799 def _extract_video(self, renderer):
800 video_id = renderer.get('videoId')
fe93e2c4 801 title = self._get_text(renderer.get('title'))
802 description = self._get_text(renderer.get('descriptionSnippet'))
803 duration = parse_duration(self._get_text(renderer.get('lengthText')))
804 view_count_text = self._get_text(renderer.get('viewCountText')) or ''
30a074c2 805 view_count = str_to_int(self._search_regex(
806 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
807 'view count', default=None))
fe93e2c4 808
809 uploader = self._get_text(renderer, (lambda x: x['ownerText'], lambda x: x['shortBylineText']))
810
30a074c2 811 return {
39ed931e 812 '_type': 'url',
30a074c2 813 'ie_key': YoutubeIE.ie_key(),
814 'id': video_id,
815 'url': video_id,
816 'title': title,
817 'description': description,
818 'duration': duration,
819 'view_count': view_count,
820 'uploader': uploader,
821 }
822
0c148415 823
360e1ca5 824class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 825 IE_DESC = 'YouTube.com'
bc2ca1bb 826 _INVIDIOUS_SITES = (
827 # invidious-redirect websites
828 r'(?:www\.)?redirect\.invidious\.io',
829 r'(?:(?:www|dev)\.)?invidio\.us',
830 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
831 r'(?:www\.)?invidious\.pussthecat\.org',
bc2ca1bb 832 r'(?:www\.)?invidious\.zee\.li',
bc2ca1bb 833 r'(?:www\.)?invidious\.ethibox\.fr',
bc2ca1bb 834 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
bc2ca1bb 835 # youtube-dl invidious instances list
836 r'(?:(?:www|no)\.)?invidiou\.sh',
837 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
838 r'(?:www\.)?invidious\.kabi\.tk',
bc2ca1bb 839 r'(?:www\.)?invidious\.mastodon\.host',
840 r'(?:www\.)?invidious\.zapashcanon\.fr',
ed807c18 841 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
201c1459 842 r'(?:www\.)?invidious\.tinfoil-hat\.net',
843 r'(?:www\.)?invidious\.himiko\.cloud',
844 r'(?:www\.)?invidious\.reallyancient\.tech',
bc2ca1bb 845 r'(?:www\.)?invidious\.tube',
846 r'(?:www\.)?invidiou\.site',
847 r'(?:www\.)?invidious\.site',
848 r'(?:www\.)?invidious\.xyz',
849 r'(?:www\.)?invidious\.nixnet\.xyz',
201c1459 850 r'(?:www\.)?invidious\.048596\.xyz',
bc2ca1bb 851 r'(?:www\.)?invidious\.drycat\.fr',
201c1459 852 r'(?:www\.)?inv\.skyn3t\.in',
bc2ca1bb 853 r'(?:www\.)?tube\.poal\.co',
854 r'(?:www\.)?tube\.connect\.cafe',
855 r'(?:www\.)?vid\.wxzm\.sx',
856 r'(?:www\.)?vid\.mint\.lgbt',
201c1459 857 r'(?:www\.)?vid\.puffyan\.us',
bc2ca1bb 858 r'(?:www\.)?yewtu\.be',
859 r'(?:www\.)?yt\.elukerio\.org',
860 r'(?:www\.)?yt\.lelux\.fi',
861 r'(?:www\.)?invidious\.ggc-project\.de',
862 r'(?:www\.)?yt\.maisputain\.ovh',
201c1459 863 r'(?:www\.)?ytprivate\.com',
864 r'(?:www\.)?invidious\.13ad\.de',
bc2ca1bb 865 r'(?:www\.)?invidious\.toot\.koeln',
866 r'(?:www\.)?invidious\.fdn\.fr',
867 r'(?:www\.)?watch\.nettohikari\.com',
ed807c18 868 r'(?:www\.)?invidious\.namazso\.eu',
869 r'(?:www\.)?invidious\.silkky\.cloud',
870 r'(?:www\.)?invidious\.exonip\.de',
871 r'(?:www\.)?invidious\.riverside\.rocks',
872 r'(?:www\.)?invidious\.blamefran\.net',
873 r'(?:www\.)?invidious\.moomoo\.de',
874 r'(?:www\.)?ytb\.trom\.tf',
875 r'(?:www\.)?yt\.cyberhost\.uk',
bc2ca1bb 876 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
877 r'(?:www\.)?qklhadlycap4cnod\.onion',
878 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
879 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
880 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
881 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
882 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
883 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
ed807c18 884 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
885 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
886 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
887 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
bc2ca1bb 888 )
cb7dfeea 889 _VALID_URL = r"""(?x)^
c5e8d7af 890 (
edb53e2d 891 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 892 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
893 (?:www\.)?deturl\.com/www\.youtube\.com|
894 (?:www\.)?pwnyoutube\.com|
895 (?:www\.)?hooktube\.com|
896 (?:www\.)?yourepeat\.com|
897 tube\.majestyc\.net|
898 %(invidious)s|
899 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
900 (?:.*?\#/)? # handle anchor (#/) redirect urls
901 (?: # the various things that can precede the ID:
ac7553d0 902 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 903 |(?: # or the v= param in all its forms
f7000f3a 904 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 905 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 906 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
907 v=
908 )
f4b05232 909 ))
cbaed4bb
S
910 |(?:
911 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
912 vid\.plus| # or vid.plus/xxxx
913 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 914 %(invidious)s
cbaed4bb 915 )/
edb53e2d 916 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 917 )
c5e8d7af 918 )? # all until now is optional -> you can pass the naked ID
201c1459 919 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 920 (?(1).+)? # if we found the ID, everything can follow
9297939e 921 (?:\#|$)""" % {
bc2ca1bb 922 'invidious': '|'.join(_INVIDIOUS_SITES),
923 }
e40c758c 924 _PLAYER_INFO_RE = (
cc2db878 925 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
926 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 927 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 928 )
2c62dc26 929 _formats = {
c2d3cb4c 930 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
931 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
932 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
933 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
934 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
935 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
936 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
937 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 938 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 939 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
940 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
941 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
942 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
943 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
944 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 945 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 946 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
947 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 948
949
950 # 3D videos
c2d3cb4c 951 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
952 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
953 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
954 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 955 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
956 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
957 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 958
96fb5605 959 # Apple HTTP Live Streaming
11f12195 960 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 961 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
962 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
963 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
964 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
965 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 966 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
967 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
968
969 # DASH mp4 video
d23028a8
S
970 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
971 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
972 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
973 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
974 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 975 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
976 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
977 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
978 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
979 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
980 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
981 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 982
f6f1fc92 983 # Dash mp4 audio
d23028a8
S
984 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
985 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
986 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
987 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
988 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
989 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
990 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
991
992 # Dash webm
d23028a8
S
993 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
994 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
995 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
996 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
997 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
998 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
999 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1000 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1001 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1002 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1003 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1004 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1005 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1006 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1007 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 1008 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
1009 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1010 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1011 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1012 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1013 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1014 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
1015
1016 # Dash webm audio
d23028a8
S
1017 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1018 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 1019
0857baad 1020 # Dash webm audio with opus inside
d23028a8
S
1021 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1022 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1023 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 1024
ce6b9a2d
PH
1025 # RTMP (unnamed)
1026 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
1027
1028 # av01 video only formats sometimes served with "unknown" codecs
1029 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1030 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1031 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1032 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 1033 }
29f7c58a 1034 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 1035
109dd3b2 1036 _AGE_GATE_REASONS = (
1037 'Sign in to confirm your age',
1038 'This video may be inappropriate for some users.',
1039 'Sorry, this content is age-restricted.')
1040
fd5c4aab
S
1041 _GEO_BYPASS = False
1042
78caa52a 1043 IE_NAME = 'youtube'
2eb88d95
PH
1044 _TESTS = [
1045 {
2d3d2997 1046 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
1047 'info_dict': {
1048 'id': 'BaW_jenozKc',
1049 'ext': 'mp4',
3867038a 1050 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
1051 'uploader': 'Philipp Hagemeister',
1052 'uploader_id': 'phihag',
ec85ded8 1053 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
1054 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1055 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 1056 'upload_date': '20121002',
3867038a 1057 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 1058 'categories': ['Science & Technology'],
3867038a 1059 'tags': ['youtube-dl'],
556dbe7f 1060 'duration': 10,
dbdaaa23 1061 'view_count': int,
3e7c1224
PH
1062 'like_count': int,
1063 'dislike_count': int,
7c80519c 1064 'start_time': 1,
297a564b 1065 'end_time': 9,
2eb88d95 1066 }
0e853ca4 1067 },
fccd3771 1068 {
4bc3a23e
PH
1069 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1070 'note': 'Embed-only video (#1746)',
1071 'info_dict': {
1072 'id': 'yZIXLfi8CZQ',
1073 'ext': 'mp4',
1074 'upload_date': '20120608',
1075 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1076 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1077 'uploader': 'SET India',
94bfcd23 1078 'uploader_id': 'setindia',
ec85ded8 1079 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 1080 'age_limit': 18,
545cc85d 1081 },
1082 'skip': 'Private video',
fccd3771 1083 },
11b56058 1084 {
8bdd16b4 1085 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
1086 'note': 'Use the first video ID in the URL',
1087 'info_dict': {
1088 'id': 'BaW_jenozKc',
1089 'ext': 'mp4',
3867038a 1090 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
1091 'uploader': 'Philipp Hagemeister',
1092 'uploader_id': 'phihag',
ec85ded8 1093 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 1094 'upload_date': '20121002',
3867038a 1095 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 1096 'categories': ['Science & Technology'],
3867038a 1097 'tags': ['youtube-dl'],
556dbe7f 1098 'duration': 10,
dbdaaa23 1099 'view_count': int,
11b56058
PM
1100 'like_count': int,
1101 'dislike_count': int,
34a7de29
S
1102 },
1103 'params': {
1104 'skip_download': True,
1105 },
11b56058 1106 },
dd27fd17 1107 {
2d3d2997 1108 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
1109 'note': '256k DASH audio (format 141) via DASH manifest',
1110 'info_dict': {
1111 'id': 'a9LDPn-MO4I',
1112 'ext': 'm4a',
1113 'upload_date': '20121002',
1114 'uploader_id': '8KVIDEO',
ec85ded8 1115 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
1116 'description': '',
1117 'uploader': '8KVIDEO',
1118 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 1119 },
4bc3a23e
PH
1120 'params': {
1121 'youtube_include_dash_manifest': True,
1122 'format': '141',
4919603f 1123 },
de3c7fe0 1124 'skip': 'format 141 not served anymore',
dd27fd17 1125 },
8bdd16b4 1126 # DASH manifest with encrypted signature
1127 {
1128 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1129 'info_dict': {
1130 'id': 'IB3lcPjvWLA',
1131 'ext': 'm4a',
1132 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1133 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1134 'duration': 244,
1135 'uploader': 'AfrojackVEVO',
1136 'uploader_id': 'AfrojackVEVO',
1137 'upload_date': '20131011',
cc2db878 1138 'abr': 129.495,
8bdd16b4 1139 },
1140 'params': {
1141 'youtube_include_dash_manifest': True,
1142 'format': '141/bestaudio[ext=m4a]',
1143 },
1144 },
dd2d55f1 1145 # Normal age-gate video (embed allowed)
c522adb1 1146 {
2d3d2997 1147 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1148 'info_dict': {
1149 'id': 'HtVdAasjOgU',
1150 'ext': 'mp4',
1151 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1152 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1153 'duration': 142,
c522adb1
JMF
1154 'uploader': 'The Witcher',
1155 'uploader_id': 'WitcherGame',
ec85ded8 1156 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 1157 'upload_date': '20140605',
34952f09 1158 'age_limit': 18,
c522adb1
JMF
1159 },
1160 },
8bdd16b4 1161 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1162 # YouTube Red ad is not captured for creator
1163 {
1164 'url': '__2ABJjxzNo',
1165 'info_dict': {
1166 'id': '__2ABJjxzNo',
1167 'ext': 'mp4',
1168 'duration': 266,
1169 'upload_date': '20100430',
1170 'uploader_id': 'deadmau5',
1171 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 1172 'creator': 'deadmau5',
1173 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1174 'uploader': 'deadmau5',
1175 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1176 'alt_title': 'Some Chords',
8bdd16b4 1177 },
1178 'expected_warnings': [
1179 'DASH manifest missing',
1180 ]
1181 },
067aa17e 1182 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1183 {
1184 'url': 'lqQg6PlCWgI',
1185 'info_dict': {
1186 'id': 'lqQg6PlCWgI',
1187 'ext': 'mp4',
556dbe7f 1188 'duration': 6085,
90227264 1189 'upload_date': '20150827',
cbe2bd91 1190 'uploader_id': 'olympic',
ec85ded8 1191 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 1192 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
11f9be09 1193 'uploader': 'Olympics',
cbe2bd91
PH
1194 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1195 },
1196 'params': {
1197 'skip_download': 'requires avconv',
e52a40ab 1198 }
cbe2bd91 1199 },
6271f1ca
PH
1200 # Non-square pixels
1201 {
1202 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1203 'info_dict': {
1204 'id': '_b-2C3KPAM0',
1205 'ext': 'mp4',
1206 'stretched_ratio': 16 / 9.,
556dbe7f 1207 'duration': 85,
6271f1ca
PH
1208 'upload_date': '20110310',
1209 'uploader_id': 'AllenMeow',
ec85ded8 1210 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 1211 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 1212 'uploader': '孫ᄋᄅ',
6271f1ca
PH
1213 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1214 },
06b491eb
S
1215 },
1216 # url_encoded_fmt_stream_map is empty string
1217 {
1218 'url': 'qEJwOuvDf7I',
1219 'info_dict': {
1220 'id': 'qEJwOuvDf7I',
f57b7835 1221 'ext': 'webm',
06b491eb
S
1222 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1223 'description': '',
1224 'upload_date': '20150404',
1225 'uploader_id': 'spbelect',
1226 'uploader': 'Наблюдатели Петербурга',
1227 },
1228 'params': {
1229 'skip_download': 'requires avconv',
e323cf3f
S
1230 },
1231 'skip': 'This live event has ended.',
06b491eb 1232 },
067aa17e 1233 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1234 {
1235 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1236 'info_dict': {
1237 'id': 'FIl7x6_3R5Y',
eb6793ba 1238 'ext': 'webm',
da77d856
S
1239 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1240 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1241 'duration': 220,
da77d856
S
1242 'upload_date': '20150625',
1243 'uploader_id': 'dorappi2000',
ec85ded8 1244 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 1245 'uploader': 'dorappi2000',
eb6793ba 1246 'formats': 'mincount:31',
da77d856 1247 },
eb6793ba 1248 'skip': 'not actual anymore',
2ee8f5d8 1249 },
8a1a26ce
YCH
1250 # DASH manifest with segment_list
1251 {
1252 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1253 'md5': '8ce563a1d667b599d21064e982ab9e31',
1254 'info_dict': {
1255 'id': 'CsmdDsKjzN8',
1256 'ext': 'mp4',
17ee98e1 1257 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
1258 'uploader': 'Airtek',
1259 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1260 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1261 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1262 },
1263 'params': {
1264 'youtube_include_dash_manifest': True,
1265 'format': '135', # bestvideo
be49068d
S
1266 },
1267 'skip': 'This live event has ended.',
2ee8f5d8 1268 },
cf7e015f
S
1269 {
1270 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 1271 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 1272 'info_dict': {
545cc85d 1273 'id': 'jvGDaLqkpTg',
1274 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1275 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
1276 },
1277 'playlist': [{
1278 'info_dict': {
545cc85d 1279 'id': 'jvGDaLqkpTg',
cf7e015f 1280 'ext': 'mp4',
545cc85d 1281 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1282 'description': 'md5:e03b909557865076822aa169218d6a5d',
1283 'duration': 10643,
1284 'upload_date': '20161111',
1285 'uploader': 'Team PGP',
1286 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1287 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1288 },
1289 }, {
1290 'info_dict': {
545cc85d 1291 'id': '3AKt1R1aDnw',
cf7e015f 1292 'ext': 'mp4',
545cc85d 1293 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1294 'description': 'md5:e03b909557865076822aa169218d6a5d',
1295 'duration': 10991,
1296 'upload_date': '20161111',
1297 'uploader': 'Team PGP',
1298 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1299 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1300 },
1301 }, {
1302 'info_dict': {
545cc85d 1303 'id': 'RtAMM00gpVc',
cf7e015f 1304 'ext': 'mp4',
545cc85d 1305 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1306 'description': 'md5:e03b909557865076822aa169218d6a5d',
1307 'duration': 10995,
1308 'upload_date': '20161111',
1309 'uploader': 'Team PGP',
1310 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1311 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1312 },
1313 }, {
1314 'info_dict': {
545cc85d 1315 'id': '6N2fdlP3C5U',
cf7e015f 1316 'ext': 'mp4',
545cc85d 1317 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1318 'description': 'md5:e03b909557865076822aa169218d6a5d',
1319 'duration': 10990,
1320 'upload_date': '20161111',
1321 'uploader': 'Team PGP',
1322 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1323 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1324 },
1325 }],
1326 'params': {
1327 'skip_download': True,
1328 },
cbaed4bb 1329 },
f9f49d87 1330 {
067aa17e 1331 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1332 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1333 'info_dict': {
1334 'id': 'gVfLd0zydlo',
1335 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1336 },
1337 'playlist_count': 2,
be49068d 1338 'skip': 'Not multifeed anymore',
f9f49d87 1339 },
cbaed4bb 1340 {
2d3d2997 1341 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1342 'only_matching': True,
0e49d9a6 1343 },
6d4fc66b 1344 {
2d3d2997 1345 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1346 'only_matching': True,
1347 },
0e49d9a6 1348 {
067aa17e 1349 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1350 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1351 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1352 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1353 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1354 'info_dict': {
1355 'id': 'lsguqyKfVQg',
1356 'ext': 'mp4',
1357 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
11f9be09 1358 'alt_title': 'Dark Walk',
0e49d9a6 1359 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1360 'duration': 133,
0e49d9a6
LL
1361 'upload_date': '20151119',
1362 'uploader_id': 'IronSoulElf',
ec85ded8 1363 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1364 'uploader': 'IronSoulElf',
11f9be09 1365 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1366 'track': 'Dark Walk',
1367 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
92bc97d3 1368 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
1369 },
1370 'params': {
1371 'skip_download': True,
1372 },
1373 },
61f92af1 1374 {
067aa17e 1375 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1376 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1377 'only_matching': True,
1378 },
313dfc45
LL
1379 {
1380 # Video with yt:stretch=17:0
1381 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1382 'info_dict': {
1383 'id': 'Q39EVAstoRM',
1384 'ext': 'mp4',
1385 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1386 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1387 'upload_date': '20151107',
1388 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1389 'uploader': 'CH GAMER DROID',
1390 },
1391 'params': {
1392 'skip_download': True,
1393 },
be49068d 1394 'skip': 'This video does not exist.',
313dfc45 1395 },
201c1459 1396 {
1397 # Video with incomplete 'yt:stretch=16:'
1398 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1399 'only_matching': True,
1400 },
7caf9830
S
1401 {
1402 # Video licensed under Creative Commons
1403 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1404 'info_dict': {
1405 'id': 'M4gD1WSo5mA',
1406 'ext': 'mp4',
1407 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1408 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1409 'duration': 721,
7caf9830
S
1410 'upload_date': '20150127',
1411 'uploader_id': 'BerkmanCenter',
ec85ded8 1412 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1413 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1414 'license': 'Creative Commons Attribution license (reuse allowed)',
1415 },
1416 'params': {
1417 'skip_download': True,
1418 },
1419 },
fd050249
S
1420 {
1421 # Channel-like uploader_url
1422 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1423 'info_dict': {
1424 'id': 'eQcmzGIKrzg',
1425 'ext': 'mp4',
1426 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1427 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1428 'duration': 4060,
fd050249 1429 'upload_date': '20151119',
eb6793ba 1430 'uploader': 'Bernie Sanders',
fd050249 1431 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1432 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1433 'license': 'Creative Commons Attribution license (reuse allowed)',
1434 },
1435 'params': {
1436 'skip_download': True,
1437 },
1438 },
040ac686
S
1439 {
1440 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1441 'only_matching': True,
7f29cf54
S
1442 },
1443 {
067aa17e 1444 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1445 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1446 'only_matching': True,
6496ccb4
S
1447 },
1448 {
1449 # Rental video preview
1450 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1451 'info_dict': {
1452 'id': 'uGpuVWrhIzE',
1453 'ext': 'mp4',
1454 'title': 'Piku - Trailer',
1455 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1456 'upload_date': '20150811',
1457 'uploader': 'FlixMatrix',
1458 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1459 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1460 'license': 'Standard YouTube License',
1461 },
1462 'params': {
1463 'skip_download': True,
1464 },
eb6793ba 1465 'skip': 'This video is not available.',
022a5d66 1466 },
12afdc2a
S
1467 {
1468 # YouTube Red video with episode data
1469 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1470 'info_dict': {
1471 'id': 'iqKdEhx-dD4',
1472 'ext': 'mp4',
1473 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1474 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1475 'duration': 2085,
12afdc2a
S
1476 'upload_date': '20170118',
1477 'uploader': 'Vsauce',
1478 'uploader_id': 'Vsauce',
1479 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1480 'series': 'Mind Field',
1481 'season_number': 1,
1482 'episode_number': 1,
1483 },
1484 'params': {
1485 'skip_download': True,
1486 },
1487 'expected_warnings': [
1488 'Skipping DASH manifest',
1489 ],
1490 },
c7121fa7
S
1491 {
1492 # The following content has been identified by the YouTube community
1493 # as inappropriate or offensive to some audiences.
1494 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1495 'info_dict': {
1496 'id': '6SJNVb0GnPI',
1497 'ext': 'mp4',
1498 'title': 'Race Differences in Intelligence',
1499 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1500 'duration': 965,
1501 'upload_date': '20140124',
1502 'uploader': 'New Century Foundation',
1503 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1504 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1505 },
1506 'params': {
1507 'skip_download': True,
1508 },
545cc85d 1509 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1510 },
022a5d66
S
1511 {
1512 # itag 212
1513 'url': '1t24XAntNCY',
1514 'only_matching': True,
fd5c4aab
S
1515 },
1516 {
1517 # geo restricted to JP
1518 'url': 'sJL6WA-aGkQ',
1519 'only_matching': True,
1520 },
cd5a74a2
S
1521 {
1522 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1523 'only_matching': True,
1524 },
bc2ca1bb 1525 {
1526 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1527 'only_matching': True,
1528 },
1529 {
1530 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1531 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1532 'only_matching': True,
1533 },
825cd268
RA
1534 {
1535 # DRM protected
1536 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1537 'only_matching': True,
4fe54c12
S
1538 },
1539 {
1540 # Video with unsupported adaptive stream type formats
1541 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1542 'info_dict': {
1543 'id': 'Z4Vy8R84T1U',
1544 'ext': 'mp4',
1545 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1546 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1547 'duration': 433,
1548 'upload_date': '20130923',
1549 'uploader': 'Amelia Putri Harwita',
1550 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1551 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1552 'formats': 'maxcount:10',
1553 },
1554 'params': {
1555 'skip_download': True,
1556 'youtube_include_dash_manifest': False,
1557 },
5429d6a9 1558 'skip': 'not actual anymore',
5caabd3c 1559 },
1560 {
822b9d9c 1561 # Youtube Music Auto-generated description
5caabd3c 1562 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1563 'info_dict': {
1564 'id': 'MgNrAu2pzNs',
1565 'ext': 'mp4',
1566 'title': 'Voyeur Girl',
1567 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1568 'upload_date': '20190312',
5429d6a9
S
1569 'uploader': 'Stephen - Topic',
1570 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1571 'artist': 'Stephen',
1572 'track': 'Voyeur Girl',
1573 'album': 'it\'s too much love to know my dear',
1574 'release_date': '20190313',
1575 'release_year': 2019,
1576 },
1577 'params': {
1578 'skip_download': True,
1579 },
1580 },
66b48727
RA
1581 {
1582 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1583 'only_matching': True,
1584 },
011e75e6
S
1585 {
1586 # invalid -> valid video id redirection
1587 'url': 'DJztXj2GPfl',
1588 'info_dict': {
1589 'id': 'DJztXj2GPfk',
1590 'ext': 'mp4',
1591 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1592 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1593 'upload_date': '20090125',
1594 'uploader': 'Prochorowka',
1595 'uploader_id': 'Prochorowka',
1596 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1597 'artist': 'Panjabi MC',
1598 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1599 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1600 },
1601 'params': {
1602 'skip_download': True,
1603 },
545cc85d 1604 'skip': 'Video unavailable',
ea74e00b
DP
1605 },
1606 {
1607 # empty description results in an empty string
1608 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1609 'info_dict': {
1610 'id': 'x41yOUIvK2k',
1611 'ext': 'mp4',
1612 'title': 'IMG 3456',
1613 'description': '',
1614 'upload_date': '20170613',
1615 'uploader_id': 'ElevageOrVert',
1616 'uploader': 'ElevageOrVert',
1617 },
1618 'params': {
1619 'skip_download': True,
1620 },
1621 },
a0566bbf 1622 {
29f7c58a 1623 # with '};' inside yt initial data (see [1])
1624 # see [2] for an example with '};' inside ytInitialPlayerResponse
1625 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1626 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1627 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1628 'info_dict': {
1629 'id': 'CHqg6qOn4no',
1630 'ext': 'mp4',
1631 'title': 'Part 77 Sort a list of simple types in c#',
1632 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1633 'upload_date': '20130831',
1634 'uploader_id': 'kudvenkat',
1635 'uploader': 'kudvenkat',
1636 },
1637 'params': {
1638 'skip_download': True,
1639 },
1640 },
29f7c58a 1641 {
1642 # another example of '};' in ytInitialData
1643 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1644 'only_matching': True,
1645 },
1646 {
1647 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1648 'only_matching': True,
1649 },
545cc85d 1650 {
cc2db878 1651 # https://github.com/ytdl-org/youtube-dl/pull/28094
1652 'url': 'OtqTfy26tG0',
1653 'info_dict': {
1654 'id': 'OtqTfy26tG0',
1655 'ext': 'mp4',
1656 'title': 'Burn Out',
1657 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1658 'upload_date': '20141120',
1659 'uploader': 'The Cinematic Orchestra - Topic',
1660 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1661 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1662 'artist': 'The Cinematic Orchestra',
1663 'track': 'Burn Out',
1664 'album': 'Every Day',
1665 'release_data': None,
1666 'release_year': None,
1667 },
1668 'params': {
1669 'skip_download': True,
1670 },
545cc85d 1671 },
bc2ca1bb 1672 {
1673 # controversial video, only works with bpctr when authenticated with cookies
1674 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1675 'only_matching': True,
1676 },
a1a7907b 1677 {
1678 # controversial video, requires bpctr/contentCheckOk
1679 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1680 'info_dict': {
1681 'id': 'SZJvDhaSDnc',
1682 'ext': 'mp4',
1683 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1684 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1685 'uploader': 'CBS This Morning',
11f9be09 1686 'uploader_id': 'CBSThisMorning',
a1a7907b 1687 'upload_date': '20140716',
1688 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1689 }
1690 },
f7ad7160 1691 {
1692 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1693 'url': 'cBvYw8_A0vQ',
1694 'info_dict': {
1695 'id': 'cBvYw8_A0vQ',
1696 'ext': 'mp4',
1697 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1698 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1699 'upload_date': '20201120',
1700 'uploader': 'Walk around Japan',
1701 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1702 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1703 },
1704 'params': {
1705 'skip_download': True,
1706 },
0fb983f6 1707 }, {
1708 # Has multiple audio streams
1709 'url': 'WaOKSUlf4TM',
1710 'only_matching': True
9297939e 1711 }, {
1712 # Requires Premium: has format 141 when requested using YTM url
1713 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1714 'only_matching': True
1715 }, {
120916da 1716 # multiple subtitles with same lang_code
1717 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1718 'only_matching': True,
109dd3b2 1719 }, {
1720 # Force use android client fallback
1721 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1722 'info_dict': {
1723 'id': 'YOelRv7fMxY',
11f9be09 1724 'title': 'DIGGING A SECRET TUNNEL Part 1',
109dd3b2 1725 'ext': '3gp',
1726 'upload_date': '20210624',
1727 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1728 'uploader': 'colinfurze',
11f9be09 1729 'uploader_id': 'colinfurze',
109dd3b2 1730 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
11f9be09 1731 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
109dd3b2 1732 },
1733 'params': {
1734 'format': '17', # 3gp format available on android
1735 'extractor_args': {'youtube': {'player_client': ['android']}},
1736 },
120916da 1737 },
109dd3b2 1738 {
1739 # Skip download of additional client configs (remix client config in this case)
1740 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1741 'only_matching': True,
1742 'params': {
1743 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1744 },
1745 }
2eb88d95
PH
1746 ]
1747
201c1459 1748 @classmethod
1749 def suitable(cls, url):
1bdae7d3 1750 # Hack for lazy extractors until more generic solution is implemented
1751 # (see #28780)
1752 from .youtube import parse_qs
201c1459 1753 qs = parse_qs(url)
1754 if qs.get('list', [None])[0]:
1755 return False
1756 return super(YoutubeIE, cls).suitable(url)
1757
e0df6211
PH
1758 def __init__(self, *args, **kwargs):
1759 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1760 self._code_cache = {}
83799698 1761 self._player_cache = {}
e0df6211 1762
109dd3b2 1763 def _extract_player_url(self, ytcfg=None, webpage=None):
1764 player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)
11f9be09 1765 if not player_url and webpage:
109dd3b2 1766 player_url = self._search_regex(
1767 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1768 webpage, 'player URL', fatal=False)
11f9be09 1769 if not player_url:
1770 return None
109dd3b2 1771 if player_url.startswith('//'):
1772 player_url = 'https:' + player_url
1773 elif not re.match(r'https?://', player_url):
1774 player_url = compat_urlparse.urljoin(
1775 'https://www.youtube.com', player_url)
1776 return player_url
1777
60064c53
PH
1778 def _signature_cache_id(self, example_sig):
1779 """ Return a string representation of a signature """
78caa52a 1780 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1781
e40c758c
S
1782 @classmethod
1783 def _extract_player_info(cls, player_url):
1784 for player_re in cls._PLAYER_INFO_RE:
1785 id_m = re.search(player_re, player_url)
1786 if id_m:
1787 break
1788 else:
c081b35c 1789 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1790 return id_m.group('id')
e40c758c 1791
109dd3b2 1792 def _load_player(self, video_id, player_url, fatal=True) -> bool:
1793 player_id = self._extract_player_info(player_url)
1794 if player_id not in self._code_cache:
1795 self._code_cache[player_id] = self._download_webpage(
1796 player_url, video_id, fatal=fatal,
1797 note='Downloading player ' + player_id,
1798 errnote='Download of %s failed' % player_url)
1799 return player_id in self._code_cache
1800
e40c758c 1801 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1802 player_id = self._extract_player_info(player_url)
e0df6211 1803
c4417ddb 1804 # Read from filesystem cache
545cc85d 1805 func_id = 'js_%s_%s' % (
1806 player_id, self._signature_cache_id(example_sig))
c4417ddb 1807 assert os.path.basename(func_id) == func_id
a0e07d31 1808
69ea8ca4 1809 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1810 if cache_spec is not None:
78caa52a 1811 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1812
109dd3b2 1813 if self._load_player(video_id, player_url):
1814 code = self._code_cache[player_id]
1815 res = self._parse_sig_js(code)
e0df6211 1816
109dd3b2 1817 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1818 cache_res = res(test_string)
1819 cache_spec = [ord(c) for c in cache_res]
83799698 1820
109dd3b2 1821 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1822 return res
83799698 1823
60064c53 1824 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1825 def gen_sig_code(idxs):
1826 def _genslice(start, end, step):
78caa52a 1827 starts = '' if start == 0 else str(start)
8bcc8756 1828 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1829 steps = '' if step == 1 else (':%d' % step)
78caa52a 1830 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1831
1832 step = None
7af808a5
PH
1833 # Quelch pyflakes warnings - start will be set when step is set
1834 start = '(Never used)'
edf3e38e
PH
1835 for i, prev in zip(idxs[1:], idxs[:-1]):
1836 if step is not None:
1837 if i - prev == step:
1838 continue
1839 yield _genslice(start, prev, step)
1840 step = None
1841 continue
1842 if i - prev in [-1, 1]:
1843 step = i - prev
1844 start = prev
1845 continue
1846 else:
78caa52a 1847 yield 's[%d]' % prev
edf3e38e 1848 if step is None:
78caa52a 1849 yield 's[%d]' % i
edf3e38e
PH
1850 else:
1851 yield _genslice(start, i, step)
1852
78caa52a 1853 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1854 cache_res = func(test_string)
edf3e38e 1855 cache_spec = [ord(c) for c in cache_res]
78caa52a 1856 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1857 signature_id_tuple = '(%s)' % (
1858 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1859 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1860 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1861 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1862
e0df6211
PH
1863 def _parse_sig_js(self, jscode):
1864 funcname = self._search_regex(
abefc03f
S
1865 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1866 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1867 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1868 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1869 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1870 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1871 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1872 # Obsolete patterns
1873 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1874 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1875 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1876 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1877 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1878 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1879 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1880 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1881 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1882
1883 jsi = JSInterpreter(jscode)
1884 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1885 return lambda s: initial_function([s])
1886
545cc85d 1887 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1888 """Turn the encrypted s field into a working signature"""
6b37f0be 1889
c8bf86d5 1890 if player_url is None:
69ea8ca4 1891 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1892
c8bf86d5 1893 try:
62af3a0e 1894 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1895 if player_id not in self._player_cache:
1896 func = self._extract_signature_function(
60064c53 1897 video_id, player_url, s
c8bf86d5
PH
1898 )
1899 self._player_cache[player_id] = func
1900 func = self._player_cache[player_id]
a06916d9 1901 if self.get_param('youtube_print_sig_code'):
60064c53 1902 self._print_sig_code(func, s)
c8bf86d5
PH
1903 return func(s)
1904 except Exception as e:
1905 tb = traceback.format_exc()
1906 raise ExtractorError(
78caa52a 1907 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1908
109dd3b2 1909 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1910 """
1911 Extract signatureTimestamp (sts)
1912 Required to tell API what sig/player version is in use.
1913 """
1914 sts = None
1915 if isinstance(ytcfg, dict):
1916 sts = int_or_none(ytcfg.get('STS'))
1917
1918 if not sts:
1919 # Attempt to extract from player
1920 if player_url is None:
1921 error_msg = 'Cannot extract signature timestamp without player_url.'
1922 if fatal:
1923 raise ExtractorError(error_msg)
1924 self.report_warning(error_msg)
1925 return
1926 if self._load_player(video_id, player_url, fatal=fatal):
1927 player_id = self._extract_player_info(player_url)
1928 code = self._code_cache[player_id]
1929 sts = int_or_none(self._search_regex(
1930 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
1931 'JS player signature timestamp', group='sts', fatal=fatal))
1932 return sts
1933
11f9be09 1934 def _mark_watched(self, video_id, player_responses):
352d63fd 1935 playback_url = traverse_obj(
1936 player_responses, (..., 'playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
1937 expected_type=url_or_none, get_all=False)
d77ab8e2 1938 if not playback_url:
352d63fd 1939 self.report_warning('Unable to mark watched')
d77ab8e2
S
1940 return
1941 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1942 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1943
1944 # cpn generation algorithm is reverse engineered from base.js.
1945 # In fact it works even with dummy cpn.
1946 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1947 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1948
1949 qs.update({
1950 'ver': ['2'],
1951 'cpn': [cpn],
1952 })
1953 playback_url = compat_urlparse.urlunparse(
15707c7e 1954 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1955
1956 self._download_webpage(
1957 playback_url, video_id, 'Marking watched',
1958 'Unable to mark watched', fatal=False)
1959
66c9fa36
S
1960 @staticmethod
1961 def _extract_urls(webpage):
1962 # Embedded YouTube player
1963 entries = [
1964 unescapeHTML(mobj.group('url'))
1965 for mobj in re.finditer(r'''(?x)
1966 (?:
1967 <iframe[^>]+?src=|
1968 data-video-url=|
1969 <embed[^>]+?src=|
1970 embedSWF\(?:\s*|
1971 <object[^>]+data=|
1972 new\s+SWFObject\(
1973 )
1974 (["\'])
1975 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1976 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1977 \1''', webpage)]
1978
1979 # lazyYT YouTube embed
1980 entries.extend(list(map(
1981 unescapeHTML,
1982 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1983
1984 # Wordpress "YouTube Video Importer" plugin
1985 matches = re.findall(r'''(?x)<div[^>]+
1986 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1987 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1988 entries.extend(m[-1] for m in matches)
1989
1990 return entries
1991
1992 @staticmethod
1993 def _extract_url(webpage):
1994 urls = YoutubeIE._extract_urls(webpage)
1995 return urls[0] if urls else None
1996
97665381
PH
1997 @classmethod
1998 def extract_id(cls, url):
1999 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 2000 if mobj is None:
69ea8ca4 2001 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
2002 video_id = mobj.group(2)
2003 return video_id
2004
7c365c21 2005 def _extract_chapters_from_json(self, data, duration):
2006 chapter_list = traverse_obj(
2007 data, (
2008 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2009 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2010 ), expected_type=list)
2011
2012 return self._extract_chapters(
2013 chapter_list,
2014 chapter_time=lambda chapter: float_or_none(
2015 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2016 chapter_title=lambda chapter: traverse_obj(
2017 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2018 duration=duration)
2019
2020 def _extract_chapters_from_engagement_panel(self, data, duration):
2021 content_list = traverse_obj(
8bdd16b4 2022 data,
7c365c21 2023 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
da503b7a 2024 expected_type=list, default=[])
7c365c21 2025 chapter_time = lambda chapter: parse_duration(self._get_text(chapter.get('timeDescription')))
2026 chapter_title = lambda chapter: self._get_text(chapter.get('title'))
2027
2028 return next((
2029 filter(None, (
2030 self._extract_chapters(
2031 traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2032 chapter_time, chapter_title, duration)
2033 for contents in content_list
2034 ))), [])
2035
2036 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
84213ea8 2037 chapters = []
7c365c21 2038 last_chapter = {'start_time': 0}
2039 for idx, chapter in enumerate(chapter_list or []):
2040 title = chapter_title(chapter)
84213ea8
S
2041 start_time = chapter_time(chapter)
2042 if start_time is None:
2043 continue
7c365c21 2044 last_chapter['end_time'] = start_time
2045 if start_time < last_chapter['start_time']:
2046 if idx == 1:
2047 chapters.pop()
2048 self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2049 else:
2050 self.report_warning(f'Invalid start time for chapter "{title}"')
2051 continue
2052 last_chapter = {'start_time': start_time, 'title': title}
2053 chapters.append(last_chapter)
2054 last_chapter['end_time'] = duration
84213ea8
S
2055 return chapters
2056
545cc85d 2057 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2058 return self._parse_json(self._search_regex(
2059 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2060 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 2061
d92f5d5a 2062 @staticmethod
2063 def parse_time_text(time_text):
2064 """
2065 Parse the comment time text
2066 time_text is in the format 'X units ago (edited)'
2067 """
2068 time_text_split = time_text.split(' ')
2069 if len(time_text_split) >= 3:
da503b7a 2070 try:
2071 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
2072 except ValueError:
2073 return None
d92f5d5a 2074
a1c5d2ca
M
2075 def _extract_comment(self, comment_renderer, parent=None):
2076 comment_id = comment_renderer.get('commentId')
2077 if not comment_id:
2078 return
fe93e2c4 2079
2080 text = self._get_text(comment_renderer.get('contentText'))
2081
49bd8c66 2082 # note: timestamp is an estimate calculated from the current time and time_text
fe93e2c4 2083 time_text = self._get_text(comment_renderer.get('publishedTimeText')) or ''
2084 time_text_dt = self.parse_time_text(time_text)
2085 if isinstance(time_text_dt, datetime.datetime):
2086 timestamp = calendar.timegm(time_text_dt.timetuple())
2087 author = self._get_text(comment_renderer.get('authorText'))
a1c5d2ca
M
2088 author_id = try_get(comment_renderer,
2089 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
fe93e2c4 2090
49bd8c66 2091 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2092 lambda x: x['likeCount']), compat_str)) or 0
a1c5d2ca
M
2093 author_thumbnail = try_get(comment_renderer,
2094 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2095
2096 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
97524332 2097 is_favorited = 'creatorHeart' in (try_get(
2098 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
a1c5d2ca
M
2099 return {
2100 'id': comment_id,
2101 'text': text,
d92f5d5a 2102 'timestamp': timestamp,
a1c5d2ca
M
2103 'time_text': time_text,
2104 'like_count': votes,
97524332 2105 'is_favorited': is_favorited,
a1c5d2ca
M
2106 'author': author,
2107 'author_id': author_id,
2108 'author_thumbnail': author_thumbnail,
2109 'author_is_uploader': author_is_uploader,
2110 'parent': parent or 'root'
2111 }
2112
2113 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
2d6659b9 2114 ytcfg, video_id, parent=None, comment_counts=None):
2115
2116 def extract_header(contents):
2117 _total_comments = 0
2118 _continuation = None
2119 for content in contents:
2120 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
fe93e2c4 2121 expected_comment_count = parse_count(self._get_text(
2122 comments_header_renderer, (lambda x: x['countText'], lambda x: x['commentsCount']), max_runs=1))
2123
2d6659b9 2124 if expected_comment_count:
fe93e2c4 2125 comment_counts[1] = expected_comment_count
2126 self.to_screen('Downloading ~%d comments' % expected_comment_count)
2d6659b9 2127 _total_comments = comment_counts[1]
2128 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2129 comment_sort_index = int(sort_mode_str != 'top') # 1 = new, 0 = top
2130
2131 sort_menu_item = try_get(
2132 comments_header_renderer,
2133 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2134 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2135
2136 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2137 if not _continuation:
2138 continue
2139
2140 sort_text = sort_menu_item.get('title')
2141 if isinstance(sort_text, compat_str):
2142 sort_text = sort_text.lower()
2143 else:
2144 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2145 self.to_screen('Sorting comments by %s' % sort_text)
2146 break
2147 return _total_comments, _continuation
a1c5d2ca 2148
2d6659b9 2149 def extract_thread(contents):
a1c5d2ca
M
2150 if not parent:
2151 comment_counts[2] = 0
2152 for content in contents:
2153 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2154 comment_renderer = try_get(
2155 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2156 content, (lambda x: x['commentRenderer'], dict))
2157
2158 if not comment_renderer:
2159 continue
2160 comment = self._extract_comment(comment_renderer, parent)
2161 if not comment:
2162 continue
2163 comment_counts[0] += 1
2164 yield comment
2165 # Attempt to get the replies
2166 comment_replies_renderer = try_get(
2167 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2168
2169 if comment_replies_renderer:
2170 comment_counts[2] += 1
2171 comment_entries_iter = self._comment_entries(
f4f751af 2172 comment_replies_renderer, identity_token, account_syncid, ytcfg,
2d6659b9 2173 video_id, parent=comment.get('id'), comment_counts=comment_counts)
a1c5d2ca
M
2174
2175 for reply_comment in comment_entries_iter:
2176 yield reply_comment
2177
2d6659b9 2178 # YouTube comments have a max depth of 2
2179 max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2180 if max_depth == 1 and parent:
2181 return
a1c5d2ca
M
2182 if not comment_counts:
2183 # comment so far, est. total comments, current comment thread #
2184 comment_counts = [0, 0, 0]
a1c5d2ca 2185
2d6659b9 2186 continuation = self._extract_continuation(root_continuation_data)
fe93e2c4 2187 if continuation and len(continuation['continuation']) < 27:
2d6659b9 2188 self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2189 continuation_token = self._generate_comment_continuation(video_id)
fe93e2c4 2190 continuation = self._build_api_continuation_query(continuation_token, None)
2d6659b9 2191
2192 visitor_data = None
2193 is_first_continuation = parent is None
a1c5d2ca
M
2194
2195 for page_num in itertools.count(0):
2196 if not continuation:
2197 break
11f9be09 2198 headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
2d6659b9 2199 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2200 if page_num == 0:
2201 if is_first_continuation:
2202 note_prefix = 'Downloading comment section API JSON'
a1c5d2ca 2203 else:
2d6659b9 2204 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
2205 comment_counts[2], comment_prog_str)
2206 else:
2207 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2208 ' ' if parent else '', ' replies' if parent else '',
2209 page_num, comment_prog_str)
2210
2211 response = self._extract_response(
fe93e2c4 2212 item_id=None, query=continuation,
2d6659b9 2213 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2214 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca
M
2215 if not response:
2216 break
f4f751af 2217 visitor_data = try_get(
2218 response,
2219 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2220 compat_str) or visitor_data
a1c5d2ca 2221
2d6659b9 2222 continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca 2223
2d6659b9 2224 continuation = None
2225 if isinstance(continuation_contents, list):
2226 for continuation_section in continuation_contents:
2227 if not isinstance(continuation_section, dict):
2228 continue
2229 continuation_items = try_get(
2230 continuation_section,
2231 (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2232 lambda x: x['appendContinuationItemsAction']['continuationItems']),
2233 list) or []
2234 if is_first_continuation:
2235 total_comments, continuation = extract_header(continuation_items)
2236 if total_comments:
2237 yield total_comments
2238 is_first_continuation = False
2239 if continuation:
2240 break
2241 continue
2242 count = 0
2243 for count, entry in enumerate(extract_thread(continuation_items)):
2244 yield entry
2245 continuation = self._extract_continuation({'contents': continuation_items})
2246 if continuation:
2247 # Sometimes YouTube provides a continuation without any comments
2248 # In most cases we end up just downloading these with very little comments to come.
2249 if count == 0:
2250 if not parent:
2251 self.report_warning('No comments received - assuming end of comments')
2252 continuation = None
a1c5d2ca
M
2253 break
2254
2d6659b9 2255 # Deprecated response structure
2256 elif isinstance(continuation_contents, dict):
2257 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2258 for key, continuation_renderer in continuation_contents.items():
2259 if key not in known_continuation_renderers:
2260 continue
2261 if not isinstance(continuation_renderer, dict):
2262 continue
2263 if is_first_continuation:
2264 header_continuation_items = [continuation_renderer.get('header') or {}]
2265 total_comments, continuation = extract_header(header_continuation_items)
2266 if total_comments:
2267 yield total_comments
2268 is_first_continuation = False
2269 if continuation:
2270 break
a1c5d2ca 2271
2d6659b9 2272 # Sometimes YouTube provides a continuation without any comments
2273 # In most cases we end up just downloading these with very little comments to come.
2274 count = 0
2275 for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2276 yield entry
2277 continuation = self._extract_continuation(continuation_renderer)
2278 if count == 0:
2279 if not parent:
2280 self.report_warning('No comments received - assuming end of comments')
2281 continuation = None
2282 break
a1c5d2ca 2283
2d6659b9 2284 @staticmethod
2285 def _generate_comment_continuation(video_id):
2286 """
2287 Generates initial comment section continuation token from given video id
2288 """
2289 b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2290 parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2291 new_continuation_intlist = list(itertools.chain.from_iterable(
2292 [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2293 return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2294
2295 def _extract_comments(self, ytcfg, video_id, contents, webpage):
a1c5d2ca 2296 """Entry for comment extraction"""
2d6659b9 2297 def _real_comment_extract(contents):
2298 if isinstance(contents, list):
2299 for entry in contents:
2300 for key, renderer in entry.items():
2301 if key not in known_entry_comment_renderers:
2302 continue
2303 yield from self._comment_entries(
2304 renderer, video_id=video_id, ytcfg=ytcfg,
2305 identity_token=self._extract_identity_token(webpage, item_id=video_id),
2306 account_syncid=self._extract_account_syncid(ytcfg))
2307 break
a1c5d2ca 2308 comments = []
2d6659b9 2309 known_entry_comment_renderers = ('itemSectionRenderer',)
a1c5d2ca 2310 estimated_total = 0
2d6659b9 2311 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
a1c5d2ca 2312
2d6659b9 2313 try:
2314 for comment in _real_comment_extract(contents):
2315 if len(comments) >= max_comments:
2316 break
2317 if isinstance(comment, int):
2318 estimated_total = comment
2319 continue
2320 comments.append(comment)
2321 except KeyboardInterrupt:
2322 self.to_screen('Interrupted by user')
d92f5d5a 2323 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
a1c5d2ca
M
2324 return {
2325 'comments': comments,
2326 'comment_count': len(comments),
2327 }
2328
109dd3b2 2329 @staticmethod
2330 def _generate_player_context(sts=None):
2331 context = {
2332 'html5Preference': 'HTML5_PREF_WANTS',
2333 }
2334 if sts is not None:
2335 context['signatureTimestamp'] = sts
2336 return {
2337 'playbackContext': {
2338 'contentPlaybackContext': context
a1a7907b 2339 },
2340 'contentCheckOk': True
109dd3b2 2341 }
2342
4e6767b5 2343 @staticmethod
c888ffb9 2344 def _get_video_info_params(video_id, client='TVHTML5'):
2345 GVI_CLIENTS = {
2346 'ANDROID': {
2347 'c': 'ANDROID',
2348 'cver': '16.20',
2349 },
2350 'TVHTML5': {
2351 'c': 'TVHTML5',
2352 'cver': '6.20180913',
11f9be09 2353 },
2354 'IOS': {
2355 'c': 'IOS',
2356 'cver': '16.20'
c888ffb9 2357 }
2358 }
2359 query = {
4e6767b5 2360 'video_id': video_id,
2361 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
c888ffb9 2362 'html5': '1'
4e6767b5 2363 }
c888ffb9 2364 query.update(GVI_CLIENTS.get(client))
2365 return query
4e6767b5 2366
11f9be09 2367 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr):
109dd3b2 2368
11f9be09 2369 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2370 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
2371 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False)
2372 headers = self.generate_api_headers(
2373 player_ytcfg, identity_token, syncid,
2374 default_client=self._YT_CLIENTS[client], session_index=session_index)
9297939e 2375
11f9be09 2376 yt_query = {'videoId': video_id}
2377 yt_query.update(self._generate_player_context(sts))
2378 return self._extract_response(
2379 item_id=video_id, ep='player', query=yt_query,
2380 ytcfg=player_ytcfg, headers=headers, fatal=False,
2381 default_client=self._YT_CLIENTS[client],
2382 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2383 ) or None
2384
2385 def _extract_age_gated_player_response(self, client, video_id, ytcfg, identity_token, player_url, initial_pr):
2386 gvi_client = self._YT_CLIENTS.get(f'_{client}_agegate')
2387 if not gvi_client:
2388 return
109dd3b2 2389
11f9be09 2390 pr = self._parse_json(traverse_obj(
2391 compat_parse_qs(self._download_webpage(
2392 self.http_scheme() + '//www.youtube.com/get_video_info', video_id,
2393 'Refetching age-gated %s info webpage' % gvi_client.lower(),
2394 'unable to download video info webpage', fatal=False,
2395 query=self._get_video_info_params(video_id, client=gvi_client))),
2396 ('player_response', 0), expected_type=str) or '{}', video_id)
2397 if pr:
2398 return pr
2399
2400 self.report_warning('Falling back to embedded-only age-gate workaround')
2401 embed_webpage = None
2402 if client == 'web' and 'configs' not in self._configuration_arg('player_skip'):
2403 embed_webpage = self._download_webpage(
2404 'https://www.youtube.com/embed/%s?html5=1' % video_id,
2405 video_id=video_id, note=f'Downloading age-gated {client} embed config')
2406
2407 ytcfg_age = self.extract_ytcfg(video_id, embed_webpage) or {}
2408 # If we extracted the embed webpage, it'll tell us if we can view the video
2409 embedded_pr = self._parse_json(
2410 traverse_obj(ytcfg_age, ('PLAYER_VARS', 'embedded_player_response'), expected_type=str) or '{}',
2411 video_id=video_id)
2412 embedded_ps_reason = traverse_obj(embedded_pr, ('playabilityStatus', 'reason'), expected_type=str) or ''
2413 if embedded_ps_reason in self._AGE_GATE_REASONS:
2414 return
2415 return self._extract_player_response(
2416 f'_{client}_embedded', video_id,
2417 ytcfg_age or ytcfg, ytcfg_age if client == 'web' else {},
2418 identity_token, player_url, initial_pr)
545cc85d 2419
11f9be09 2420 def _get_requested_clients(self, url, smuggled_data):
2421 requested_clients = [client for client in self._configuration_arg('player_client')
2422 if client[:0] != '_' and client in self._YT_CLIENTS]
2423 if not requested_clients:
2424 requested_clients = ['android', 'web']
cf7e015f 2425
11f9be09 2426 if smuggled_data.get('is_music_url') or self.is_music_url(url):
2427 requested_clients.extend(
2428 f'{client}_music' for client in requested_clients if not client.endswith('_music'))
dbdaaa23 2429
11f9be09 2430 return orderedSet(requested_clients)
cf7e015f 2431
11f9be09 2432 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, player_url, identity_token):
2433 initial_pr = None
2434 if webpage:
2435 initial_pr = self._extract_yt_initial_variable(
2436 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2437 video_id, 'initial player response')
6b09401b 2438
11f9be09 2439 age_gated = False
2440 for client in clients:
2441 player_ytcfg = master_ytcfg if client == 'web' else {}
2442 if age_gated:
2443 pr = None
2444 elif client == 'web' and initial_pr:
2445 pr = initial_pr
8fe10494 2446 else:
11f9be09 2447 if client == 'web_music' and 'configs' not in self._configuration_arg('player_skip'):
2448 ytm_webpage = self._download_webpage(
2449 'https://music.youtube.com',
2450 video_id, fatal=False, note='Downloading remix client config')
2451 player_ytcfg = self.extract_ytcfg(video_id, ytm_webpage) or {}
2452 pr = self._extract_player_response(
2453 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr)
2454 if pr:
2455 yield pr
2456 if age_gated or traverse_obj(pr, ('playabilityStatus', 'reason')) in self._AGE_GATE_REASONS:
2457 age_gated = True
2458 pr = self._extract_age_gated_player_response(
2459 client, video_id, player_ytcfg or master_ytcfg, identity_token, player_url, initial_pr)
2460 if pr:
2461 yield pr
2462 # Android player_response does not have microFormats which are needed for
2463 # extraction of some data. So we return the initial_pr with formats
2464 # stripped out even if not requested by the user
2465 # See: https://github.com/yt-dlp/yt-dlp/issues/501
2466 if initial_pr and 'web' not in clients:
2467 initial_pr['streamingData'] = None
2468 yield initial_pr
2469
2470 def _extract_formats(self, streaming_data, video_id, player_url, is_live):
2471 itags, stream_ids = [], []
cc2db878 2472 itag_qualities = {}
d3fc8074 2473 q = qualities([
60bdb7bd 2474 # "tiny" is the smallest video-only format. But some audio-only formats
2475 # was also labeled "tiny". It is not clear if such formats still exist
d3fc8074 2476 'tiny', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
2477 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2478 ])
11f9be09 2479 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
9297939e 2480
545cc85d 2481 for fmt in streaming_formats:
2482 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2483 continue
321bf820 2484
cc2db878 2485 itag = str_or_none(fmt.get('itag'))
9297939e 2486 audio_track = fmt.get('audioTrack') or {}
2487 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2488 if stream_id in stream_ids:
2489 continue
2490
cc2db878 2491 quality = fmt.get('quality')
d3fc8074 2492 if quality == 'tiny' or not quality:
2493 quality = fmt.get('audioQuality', '').lower() or quality
cc2db878 2494 if itag and quality:
2495 itag_qualities[itag] = quality
2496 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2497 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2498 # number of fragment that would subsequently requested with (`&sq=N`)
2499 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2500 continue
2501
545cc85d 2502 fmt_url = fmt.get('url')
2503 if not fmt_url:
2504 sc = compat_parse_qs(fmt.get('signatureCipher'))
2505 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2506 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2507 if not (sc and fmt_url and encrypted_sig):
2508 continue
545cc85d 2509 if not player_url:
201e9eaa 2510 continue
545cc85d 2511 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2512 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2513 fmt_url += '&' + sp + '=' + signature
2514
545cc85d 2515 if itag:
2516 itags.append(itag)
9297939e 2517 stream_ids.append(stream_id)
2518
cc2db878 2519 tbr = float_or_none(
2520 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 2521 dct = {
2522 'asr': int_or_none(fmt.get('audioSampleRate')),
2523 'filesize': int_or_none(fmt.get('contentLength')),
2524 'format_id': itag,
11f9be09 2525 'format_note': ', '.join(filter(None, (
2526 audio_track.get('displayName'), fmt.get('qualityLabel') or quality))),
545cc85d 2527 'fps': int_or_none(fmt.get('fps')),
2528 'height': int_or_none(fmt.get('height')),
dca3ff4a 2529 'quality': q(quality),
cc2db878 2530 'tbr': tbr,
545cc85d 2531 'url': fmt_url,
2532 'width': fmt.get('width'),
0fb983f6 2533 'language': audio_track.get('id', '').split('.')[0],
545cc85d 2534 }
60bdb7bd 2535 mime_mobj = re.match(
2536 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2537 if mime_mobj:
2538 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2539 dct.update(parse_codecs(mime_mobj.group(2)))
2540 # The 3gp format in android client has a quality of "small",
2541 # but is actually worse than all other formats
2542 if dct['ext'] == '3gp':
2543 dct['quality'] = q('tiny')
11f9be09 2544 dct['preference'] = -10
cc2db878 2545 no_audio = dct.get('acodec') == 'none'
2546 no_video = dct.get('vcodec') == 'none'
2547 if no_audio:
2548 dct['vbr'] = tbr
2549 if no_video:
2550 dct['abr'] = tbr
2551 if no_audio or no_video:
545cc85d 2552 dct['downloader_options'] = {
2553 # Youtube throttles chunks >~10M
2554 'http_chunk_size': 10485760,
bf1317d2 2555 }
7c60c33e 2556 if dct.get('ext'):
2557 dct['container'] = dct['ext'] + '_dash'
11f9be09 2558 yield dct
545cc85d 2559
4bb6b02f 2560 skip_manifests = self._configuration_arg('skip')
11f9be09 2561 get_dash = not is_live and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True)
5d3a0e79 2562 get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2563
11f9be09 2564 for sd in streaming_data:
5d3a0e79 2565 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
9297939e 2566 if hls_manifest_url:
2567 for f in self._extract_m3u8_formats(
2568 hls_manifest_url, video_id, 'mp4', fatal=False):
2569 itag = self._search_regex(
2570 r'/itag/(\d+)', f['url'], 'itag', default=None)
11f9be09 2571 if itag in itags:
2572 continue
9297939e 2573 if itag:
2574 f['format_id'] = itag
11f9be09 2575 itags.append(itag)
2576 yield f
545cc85d 2577
5d3a0e79 2578 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2579 if dash_manifest_url:
2580 for f in self._extract_mpd_formats(
2581 dash_manifest_url, video_id, fatal=False):
2582 itag = f['format_id']
2583 if itag in itags:
2584 continue
11f9be09 2585 if itag:
2586 itags.append(itag)
5d3a0e79 2587 if itag in itag_qualities:
2588 f['quality'] = q(itag_qualities[itag])
2589 filesize = int_or_none(self._search_regex(
2590 r'/clen/(\d+)', f.get('fragment_base_url')
2591 or f['url'], 'file size', default=None))
2592 if filesize:
2593 f['filesize'] = filesize
11f9be09 2594 yield f
2595
2596 def _real_extract(self, url):
2597 url, smuggled_data = unsmuggle_url(url, {})
2598 video_id = self._match_id(url)
2599
2600 base_url = self.http_scheme() + '//www.youtube.com/'
2601 webpage_url = base_url + 'watch?v=' + video_id
2602 webpage = self._download_webpage(
2603 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2604
2605 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2606 player_url = self._extract_player_url(master_ytcfg, webpage)
2607 identity_token = self._extract_identity_token(webpage, video_id)
2608
2609 player_responses = list(self._extract_player_responses(
2610 self._get_requested_clients(url, smuggled_data),
2611 video_id, webpage, master_ytcfg, player_url, identity_token))
2612
352d63fd 2613 get_first = lambda obj, keys, **kwargs: traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
11f9be09 2614
2615 playability_statuses = traverse_obj(
2616 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2617
2618 trailer_video_id = get_first(
2619 playability_statuses,
2620 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2621 expected_type=str)
2622 if trailer_video_id:
2623 return self.url_result(
2624 trailer_video_id, self.ie_key(), trailer_video_id)
2625
2626 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2627 if webpage else (lambda x: None))
2628
2629 video_details = traverse_obj(
2630 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2631 microformats = traverse_obj(
2632 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2633 expected_type=dict, default=[])
2634 video_title = (
2635 get_first(video_details, 'title')
2636 or self._get_text(microformats, (..., 'title'))
2637 or search_meta(['og:title', 'twitter:title', 'title']))
2638 video_description = get_first(video_details, 'shortDescription')
2639
2640 if not smuggled_data.get('force_singlefeed', False):
2641 if not self.get_param('noplaylist'):
2642 multifeed_metadata_list = get_first(
2643 player_responses,
2644 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2645 expected_type=str)
2646 if multifeed_metadata_list:
2647 entries = []
2648 feed_ids = []
2649 for feed in multifeed_metadata_list.split(','):
2650 # Unquote should take place before split on comma (,) since textual
2651 # fields may contain comma as well (see
2652 # https://github.com/ytdl-org/youtube-dl/issues/8536)
2653 feed_data = compat_parse_qs(
2654 compat_urllib_parse_unquote_plus(feed))
2655
2656 def feed_entry(name):
2657 return try_get(
2658 feed_data, lambda x: x[name][0], compat_str)
2659
2660 feed_id = feed_entry('id')
2661 if not feed_id:
2662 continue
2663 feed_title = feed_entry('title')
2664 title = video_title
2665 if feed_title:
2666 title += ' (%s)' % feed_title
2667 entries.append({
2668 '_type': 'url_transparent',
2669 'ie_key': 'Youtube',
2670 'url': smuggle_url(
2671 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2672 {'force_singlefeed': True}),
2673 'title': title,
2674 })
2675 feed_ids.append(feed_id)
2676 self.to_screen(
2677 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2678 % (', '.join(feed_ids), video_id))
2679 return self.playlist_result(
2680 entries, video_id, video_title, video_description)
2681 else:
2682 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2683
7ea65411 2684 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
11f9be09 2685 is_live = get_first(video_details, 'isLive')
7ea65411 2686 if is_live is None:
2687 is_live = get_first(live_broadcast_details, 'isLiveNow')
11f9be09 2688
2689 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2690 formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
bf1317d2 2691
545cc85d 2692 if not formats:
11f9be09 2693 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
b7da73eb 2694 self.raise_no_formats(
545cc85d 2695 'This video is DRM protected.', expected=True)
11f9be09 2696 pemr = get_first(
2697 playability_statuses,
2698 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2699 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2700 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
545cc85d 2701 if subreason:
545cc85d 2702 if subreason == 'The uploader has not made this video available in your country.':
11f9be09 2703 countries = get_first(microformats, 'availableCountries')
545cc85d 2704 if not countries:
2705 regions_allowed = search_meta('regionsAllowed')
2706 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2707 self.raise_geo_restricted(subreason, countries, metadata_available=True)
11f9be09 2708 reason += f'. {subreason}'
545cc85d 2709 if reason:
b7da73eb 2710 self.raise_no_formats(reason, expected=True)
bf1317d2 2711
11f9be09 2712 for f in formats:
2713 # TODO: detect if throttled
2714 if '&n=' in f['url']: # possibly throttled
2715 f['source_preference'] = -10
2716 # note = f.get('format_note')
2717 # f['format_note'] = f'{note} (throttled)' if note else '(throttled)'
2718
545cc85d 2719 self._sort_formats(formats)
bf1317d2 2720
11f9be09 2721 keywords = get_first(video_details, 'keywords', expected_type=list) or []
545cc85d 2722 if not keywords and webpage:
2723 keywords = [
2724 unescapeHTML(m.group('content'))
2725 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2726 for keyword in keywords:
2727 if keyword.startswith('yt:stretch='):
201c1459 2728 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2729 if mobj:
2730 # NB: float is intentional for forcing float division
2731 w, h = (float(v) for v in mobj.groups())
2732 if w > 0 and h > 0:
2733 ratio = w / h
2734 for f in formats:
2735 if f.get('vcodec') != 'none':
2736 f['stretched_ratio'] = ratio
2737 break
6449cd80 2738
545cc85d 2739 thumbnails = []
11f9be09 2740 thumbnail_dicts = traverse_obj(
2741 (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2742 expected_type=dict, default=[])
2743 for thumbnail in thumbnail_dicts:
2744 thumbnail_url = thumbnail.get('url')
2745 if not thumbnail_url:
2746 continue
2747 # Sometimes youtube gives a wrong thumbnail URL. See:
2748 # https://github.com/yt-dlp/yt-dlp/issues/233
2749 # https://github.com/ytdl-org/youtube-dl/issues/28023
2750 if 'maxresdefault' in thumbnail_url:
2751 thumbnail_url = thumbnail_url.split('?')[0]
2752 thumbnails.append({
2753 'url': thumbnail_url,
2754 'height': int_or_none(thumbnail.get('height')),
2755 'width': int_or_none(thumbnail.get('width')),
2756 })
ff2751ac 2757 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2758 if thumbnail_url:
2759 thumbnails.append({
2760 'url': thumbnail_url,
ff2751ac 2761 })
0ba692ac 2762 # The best resolution thumbnails sometimes does not appear in the webpage
2763 # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
cca80fe6 2764 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2765 hq_thumbnail_names = ['maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3']
2766 guaranteed_thumbnail_names = [
2767 'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2768 'mqdefault', 'mq1', 'mq2', 'mq3',
2769 'default', '1', '2', '3'
2770 ]
2771 thumbnail_names = hq_thumbnail_names + guaranteed_thumbnail_names
2772 n_thumbnail_names = len(thumbnail_names)
2773
0ba692ac 2774 thumbnails.extend({
2775 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2776 video_id=video_id, name=name, ext=ext,
2777 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
cca80fe6 2778 '_test_url': name in hq_thumbnail_names,
2779 } for name in thumbnail_names for ext in ('webp', 'jpg'))
0ba692ac 2780 for thumb in thumbnails:
cca80fe6 2781 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
0ba692ac 2782 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
ff2751ac 2783 self._remove_duplicate_formats(thumbnails)
545cc85d 2784
7ea65411 2785 category = get_first(microformats, 'category') or search_meta('genre')
2786 channel_id = str_or_none(
2787 get_first(video_details, 'channelId')
2788 or get_first(microformats, 'externalChannelId')
2789 or search_meta('channelId'))
2790 duration = int_or_none(
2791 get_first(video_details, 'lengthSeconds')
2792 or get_first(microformats, 'lengthSeconds')
2793 or parse_duration(search_meta('duration'))) or None
2794 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2795
2796 live_content = get_first(video_details, 'isLiveContent')
2797 is_upcoming = get_first(video_details, 'isUpcoming')
2798 if is_live is None:
2799 if is_upcoming or live_content is False:
2800 is_live = False
2801 if is_upcoming is None and (live_content or is_live):
2802 is_upcoming = False
2803 live_starttime = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2804 live_endtime = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2805 if not duration and live_endtime and live_starttime:
2806 duration = live_endtime - live_starttime
2807
545cc85d 2808 info = {
2809 'id': video_id,
2810 'title': self._live_title(video_title) if is_live else video_title,
2811 'formats': formats,
2812 'thumbnails': thumbnails,
2813 'description': video_description,
2814 'upload_date': unified_strdate(
11f9be09 2815 get_first(microformats, 'uploadDate')
545cc85d 2816 or search_meta('uploadDate')),
11f9be09 2817 'uploader': get_first(video_details, 'author'),
545cc85d 2818 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2819 'uploader_url': owner_profile_url,
2820 'channel_id': channel_id,
11f9be09 2821 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
545cc85d 2822 'duration': duration,
2823 'view_count': int_or_none(
11f9be09 2824 get_first((video_details, microformats), (..., 'viewCount'))
545cc85d 2825 or search_meta('interactionCount')),
11f9be09 2826 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
545cc85d 2827 'age_limit': 18 if (
11f9be09 2828 get_first(microformats, 'isFamilySafe') is False
545cc85d 2829 or search_meta('isFamilyFriendly') == 'false'
2830 or search_meta('og:restrictions:age') == '18+') else 0,
2831 'webpage_url': webpage_url,
2832 'categories': [category] if category else None,
2833 'tags': keywords,
11f9be09 2834 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
7ea65411 2835 'is_live': is_live,
2836 'was_live': (False if is_live or is_upcoming or live_content is False
2837 else None if is_live is None or is_upcoming is None
2838 else live_content),
2839 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
2840 'release_timestamp': live_starttime,
545cc85d 2841 }
b477fc13 2842
11f9be09 2843 pctr = get_first(player_responses, ('captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
545cc85d 2844 subtitles = {}
2845 if pctr:
774d79cc 2846 def process_language(container, base_url, lang_code, sub_name, query):
120916da 2847 lang_subs = container.setdefault(lang_code, [])
545cc85d 2848 for fmt in self._SUBTITLE_FORMATS:
2849 query.update({
2850 'fmt': fmt,
2851 })
2852 lang_subs.append({
2853 'ext': fmt,
2854 'url': update_url_query(base_url, query),
774d79cc 2855 'name': sub_name,
545cc85d 2856 })
7e72694b 2857
545cc85d 2858 for caption_track in (pctr.get('captionTracks') or []):
2859 base_url = caption_track.get('baseUrl')
2860 if not base_url:
2861 continue
2862 if caption_track.get('kind') != 'asr':
120916da 2863 lang_code = (
2864 remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2865 or caption_track.get('languageCode'))
545cc85d 2866 if not lang_code:
2867 continue
2868 process_language(
774d79cc 2869 subtitles, base_url, lang_code,
2d6659b9 2870 try_get(caption_track, lambda x: x['name']['simpleText']),
774d79cc 2871 {})
545cc85d 2872 continue
2873 automatic_captions = {}
2874 for translation_language in (pctr.get('translationLanguages') or []):
2875 translation_language_code = translation_language.get('languageCode')
2876 if not translation_language_code:
2877 continue
2878 process_language(
2879 automatic_captions, base_url, translation_language_code,
fe93e2c4 2880 self._get_text(translation_language.get('languageName'), max_runs=1),
545cc85d 2881 {'tlang': translation_language_code})
2882 info['automatic_captions'] = automatic_captions
2883 info['subtitles'] = subtitles
7e72694b 2884
545cc85d 2885 parsed_url = compat_urllib_parse_urlparse(url)
2886 for component in [parsed_url.fragment, parsed_url.query]:
2887 query = compat_parse_qs(component)
2888 for k, v in query.items():
2889 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2890 d_k += '_time'
2891 if d_k not in info and k in s_ks:
2892 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2893
2894 # Youtube Music Auto-generated description
822b9d9c 2895 if video_description:
38d70284 2896 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2897 if mobj:
822b9d9c
RA
2898 release_year = mobj.group('release_year')
2899 release_date = mobj.group('release_date')
2900 if release_date:
2901 release_date = release_date.replace('-', '')
2902 if not release_year:
545cc85d 2903 release_year = release_date[:4]
2904 info.update({
2905 'album': mobj.group('album'.strip()),
2906 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2907 'track': mobj.group('track').strip(),
2908 'release_date': release_date,
cc2db878 2909 'release_year': int_or_none(release_year),
545cc85d 2910 })
7e72694b 2911
545cc85d 2912 initial_data = None
2913 if webpage:
2914 initial_data = self._extract_yt_initial_variable(
2915 webpage, self._YT_INITIAL_DATA_RE, video_id,
2916 'yt initial data')
2917 if not initial_data:
11f9be09 2918 headers = self.generate_api_headers(
2919 master_ytcfg, identity_token, self._extract_account_syncid(master_ytcfg),
2920 session_index=self._extract_session_index(master_ytcfg))
2921
109dd3b2 2922 initial_data = self._extract_response(
2923 item_id=video_id, ep='next', fatal=False,
11f9be09 2924 ytcfg=master_ytcfg, headers=headers, query={'videoId': video_id},
109dd3b2 2925 note='Downloading initial data API JSON')
545cc85d 2926
c60ee3a2 2927 try:
2928 # This will error if there is no livechat
2929 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2930 info['subtitles']['live_chat'] = [{
2931 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
2932 'video_id': video_id,
2933 'ext': 'json',
f6745c49 2934 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
c60ee3a2 2935 }]
2936 except (KeyError, IndexError, TypeError):
2937 pass
545cc85d 2938
2939 if initial_data:
7c365c21 2940 info['chapters'] = (
2941 self._extract_chapters_from_json(initial_data, duration)
2942 or self._extract_chapters_from_engagement_panel(initial_data, duration)
2943 or None)
545cc85d 2944
2945 contents = try_get(
2946 initial_data,
2947 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2948 list) or []
2949 for content in contents:
2950 vpir = content.get('videoPrimaryInfoRenderer')
2951 if vpir:
2952 stl = vpir.get('superTitleLink')
2953 if stl:
fe93e2c4 2954 stl = self._get_text(stl)
545cc85d 2955 if try_get(
2956 vpir,
2957 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2958 info['location'] = stl
2959 else:
2960 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2961 if mobj:
2962 info.update({
2963 'series': mobj.group(1),
2964 'season_number': int(mobj.group(2)),
2965 'episode_number': int(mobj.group(3)),
2966 })
2967 for tlb in (try_get(
2968 vpir,
2969 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2970 list) or []):
2971 tbr = tlb.get('toggleButtonRenderer') or {}
2972 for getter, regex in [(
2973 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2974 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2975 lambda x: x['accessibility'],
2976 lambda x: x['accessibilityData']['accessibilityData'],
2977 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2978 label = (try_get(tbr, getter, dict) or {}).get('label')
2979 if label:
2980 mobj = re.match(regex, label)
2981 if mobj:
2982 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2983 break
2984 sbr_tooltip = try_get(
2985 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2986 if sbr_tooltip:
2987 like_count, dislike_count = sbr_tooltip.split(' / ')
2988 info.update({
2989 'like_count': str_to_int(like_count),
2990 'dislike_count': str_to_int(dislike_count),
2991 })
2992 vsir = content.get('videoSecondaryInfoRenderer')
2993 if vsir:
fe93e2c4 2994 info['channel'] = self._get_text(try_get(
545cc85d 2995 vsir,
2996 lambda x: x['owner']['videoOwnerRenderer']['title'],
cce889b9 2997 dict))
545cc85d 2998 rows = try_get(
2999 vsir,
3000 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3001 list) or []
3002 multiple_songs = False
3003 for row in rows:
3004 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3005 multiple_songs = True
3006 break
3007 for row in rows:
3008 mrr = row.get('metadataRowRenderer') or {}
3009 mrr_title = mrr.get('title')
3010 if not mrr_title:
3011 continue
fe93e2c4 3012 mrr_title = self._get_text(mrr['title'])
3013 mrr_contents_text = self._get_text(mrr['contents'][0])
545cc85d 3014 if mrr_title == 'License':
3015 info['license'] = mrr_contents_text
3016 elif not multiple_songs:
3017 if mrr_title == 'Album':
3018 info['album'] = mrr_contents_text
3019 elif mrr_title == 'Artist':
3020 info['artist'] = mrr_contents_text
3021 elif mrr_title == 'Song':
3022 info['track'] = mrr_contents_text
3023
3024 fallbacks = {
3025 'channel': 'uploader',
3026 'channel_id': 'uploader_id',
3027 'channel_url': 'uploader_url',
3028 }
3029 for to, frm in fallbacks.items():
3030 if not info.get(to):
3031 info[to] = info.get(frm)
3032
3033 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3034 v = info.get(s_k)
3035 if v:
3036 info[d_k] = v
b84071c0 3037
11f9be09 3038 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3039 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
c224251a 3040 is_membersonly = None
b28f8d24 3041 is_premium = None
c224251a
M
3042 if initial_data and is_private is not None:
3043 is_membersonly = False
b28f8d24 3044 is_premium = False
47193e02 3045 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3046 badge_labels = set()
3047 for content in contents:
3048 if not isinstance(content, dict):
3049 continue
3050 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3051 for badge_label in badge_labels:
3052 if badge_label.lower() == 'members only':
3053 is_membersonly = True
3054 elif badge_label.lower() == 'premium':
3055 is_premium = True
3056 elif badge_label.lower() == 'unlisted':
3057 is_unlisted = True
c224251a 3058
c224251a
M
3059 info['availability'] = self._availability(
3060 is_private=is_private,
b28f8d24 3061 needs_premium=is_premium,
c224251a
M
3062 needs_subscription=is_membersonly,
3063 needs_auth=info['age_limit'] >= 18,
3064 is_unlisted=None if is_private is None else is_unlisted)
3065
06167fbb 3066 # get xsrf for annotations or comments
a06916d9 3067 get_annotations = self.get_param('writeannotations', False)
3068 get_comments = self.get_param('getcomments', False)
06167fbb 3069 if get_annotations or get_comments:
29f7c58a 3070 xsrf_token = None
11f9be09 3071 if master_ytcfg:
3072 xsrf_token = try_get(master_ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
29f7c58a 3073 if not xsrf_token:
3074 xsrf_token = self._search_regex(
3075 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 3076 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 3077
3078 # annotations
06167fbb 3079 if get_annotations:
11f9be09 3080 invideo_url = get_first(
3081 player_responses,
3082 ('annotations', 0, 'playerAnnotationsUrlsRenderer', 'invideoUrl'),
3083 expected_type=str)
64b6a4e9 3084 if xsrf_token and invideo_url:
29f7c58a 3085 xsrf_field_name = None
11f9be09 3086 if master_ytcfg:
3087 xsrf_field_name = try_get(master_ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
29f7c58a 3088 if not xsrf_field_name:
3089 xsrf_field_name = self._search_regex(
3090 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 3091 webpage, 'xsrf field name',
29f7c58a 3092 group='xsrf_field_name', default='session_token')
8a784c74 3093 info['annotations'] = self._download_webpage(
64b6a4e9
RA
3094 self._proto_relative_url(invideo_url),
3095 video_id, note='Downloading annotations',
3096 errnote='Unable to download video annotations', fatal=False,
3097 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 3098
277d6ff5 3099 if get_comments:
11f9be09 3100 info['__post_extractor'] = lambda: self._extract_comments(master_ytcfg, video_id, contents, webpage)
4ea3be0a 3101
11f9be09 3102 self.mark_watched(video_id, player_responses)
d77ab8e2 3103
545cc85d 3104 return info
c5e8d7af 3105
5f6a1245 3106
8bdd16b4 3107class YoutubeTabIE(YoutubeBaseInfoExtractor):
3108 IE_DESC = 'YouTube.com tab'
70d5c17b 3109 _VALID_URL = r'''(?x)
3110 https?://
3111 (?:\w+\.)?
3112 (?:
3113 youtube(?:kids)?\.com|
3114 invidio\.us
3115 )/
3116 (?:
fe03a6cd 3117 (?P<channel_type>channel|c|user|browse)/|
70d5c17b 3118 (?P<not_channel>
9ba5705a 3119 feed/|hashtag/|
70d5c17b 3120 (?:playlist|watch)\?.*?\blist=
3121 )|
29f7c58a 3122 (?!(?:%s)\b) # Direct URLs
70d5c17b 3123 )
3124 (?P<id>[^/?\#&]+)
3125 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 3126 IE_NAME = 'youtube:tab'
3127
81127aa5 3128 _TESTS = [{
da692b79 3129 'note': 'playlists, multipage',
8bdd16b4 3130 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3131 'playlist_mincount': 94,
3132 'info_dict': {
3133 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3134 'title': 'Игорь Клейнер - Playlists',
3135 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3136 'uploader': 'Игорь Клейнер',
3137 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 3138 },
3139 }, {
da692b79 3140 'note': 'playlists, multipage, different order',
8bdd16b4 3141 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3142 'playlist_mincount': 94,
3143 'info_dict': {
3144 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3145 'title': 'Игорь Клейнер - Playlists',
3146 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3147 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3148 'uploader': 'Игорь Клейнер',
8bdd16b4 3149 },
201c1459 3150 }, {
da692b79 3151 'note': 'playlists, series',
201c1459 3152 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3153 'playlist_mincount': 5,
3154 'info_dict': {
3155 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3156 'title': '3Blue1Brown - Playlists',
3157 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
da692b79 3158 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3159 'uploader': '3Blue1Brown',
201c1459 3160 },
8bdd16b4 3161 }, {
da692b79 3162 'note': 'playlists, singlepage',
8bdd16b4 3163 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3164 'playlist_mincount': 4,
3165 'info_dict': {
3166 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3167 'title': 'ThirstForScience - Playlists',
3168 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 3169 'uploader': 'ThirstForScience',
3170 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 3171 }
3172 }, {
3173 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3174 'only_matching': True,
3175 }, {
da692b79 3176 'note': 'basic, single video playlist',
0e30a7b9 3177 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 3178 'info_dict': {
0e30a7b9 3179 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3180 'uploader': 'Sergey M.',
3181 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 3182 'title': 'youtube-dl public playlist',
81127aa5 3183 },
0e30a7b9 3184 'playlist_count': 1,
9291475f 3185 }, {
da692b79 3186 'note': 'empty playlist',
0e30a7b9 3187 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 3188 'info_dict': {
0e30a7b9 3189 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3190 'uploader': 'Sergey M.',
3191 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 3192 'title': 'youtube-dl empty playlist',
9291475f
PH
3193 },
3194 'playlist_count': 0,
3195 }, {
da692b79 3196 'note': 'Home tab',
8bdd16b4 3197 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 3198 'info_dict': {
8bdd16b4 3199 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3200 'title': 'lex will - Home',
3201 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3202 'uploader': 'lex will',
3203 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3204 },
8bdd16b4 3205 'playlist_mincount': 2,
9291475f 3206 }, {
da692b79 3207 'note': 'Videos tab',
8bdd16b4 3208 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 3209 'info_dict': {
8bdd16b4 3210 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3211 'title': 'lex will - Videos',
3212 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3213 'uploader': 'lex will',
3214 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3215 },
8bdd16b4 3216 'playlist_mincount': 975,
9291475f 3217 }, {
da692b79 3218 'note': 'Videos tab, sorted by popular',
8bdd16b4 3219 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 3220 'info_dict': {
8bdd16b4 3221 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3222 'title': 'lex will - Videos',
3223 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3224 'uploader': 'lex will',
3225 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3226 },
8bdd16b4 3227 'playlist_mincount': 199,
9291475f 3228 }, {
da692b79 3229 'note': 'Playlists tab',
8bdd16b4 3230 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 3231 'info_dict': {
8bdd16b4 3232 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3233 'title': 'lex will - Playlists',
3234 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3235 'uploader': 'lex will',
3236 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3237 },
8bdd16b4 3238 'playlist_mincount': 17,
ac7553d0 3239 }, {
da692b79 3240 'note': 'Community tab',
8bdd16b4 3241 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 3242 'info_dict': {
8bdd16b4 3243 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3244 'title': 'lex will - Community',
3245 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3246 'uploader': 'lex will',
3247 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3248 },
3249 'playlist_mincount': 18,
87dadd45 3250 }, {
da692b79 3251 'note': 'Channels tab',
8bdd16b4 3252 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 3253 'info_dict': {
8bdd16b4 3254 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3255 'title': 'lex will - Channels',
3256 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3257 'uploader': 'lex will',
3258 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3259 },
deaec5af 3260 'playlist_mincount': 12,
cd684175 3261 }, {
3262 'note': 'Search tab',
3263 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3264 'playlist_mincount': 40,
3265 'info_dict': {
3266 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3267 'title': '3Blue1Brown - Search - linear algebra',
3268 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3269 'uploader': '3Blue1Brown',
3270 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3271 },
6b08cdf6 3272 }, {
a0566bbf 3273 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3274 'only_matching': True,
3275 }, {
a0566bbf 3276 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3277 'only_matching': True,
3278 }, {
a0566bbf 3279 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3280 'only_matching': True,
3281 }, {
3282 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3283 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3284 'info_dict': {
3285 'title': '29C3: Not my department',
3286 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3287 'uploader': 'Christiaan008',
3288 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 3289 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 3290 },
3291 'playlist_count': 96,
3292 }, {
3293 'note': 'Large playlist',
3294 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 3295 'info_dict': {
8bdd16b4 3296 'title': 'Uploads from Cauchemar',
3297 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3298 'uploader': 'Cauchemar',
3299 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 3300 },
8bdd16b4 3301 'playlist_mincount': 1123,
3302 }, {
da692b79 3303 'note': 'even larger playlist, 8832 videos',
8bdd16b4 3304 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3305 'only_matching': True,
4b7df0d3
JMF
3306 }, {
3307 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3308 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3309 'info_dict': {
acf757f4
PH
3310 'title': 'Uploads from Interstellar Movie',
3311 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 3312 'uploader': 'Interstellar Movie',
8bdd16b4 3313 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 3314 },
481cc733 3315 'playlist_mincount': 21,
358de58c 3316 }, {
3317 'note': 'Playlist with "show unavailable videos" button',
3318 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3319 'info_dict': {
3320 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3321 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3322 'uploader': 'Phim Siêu Nhân Nhật Bản',
3323 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3324 },
da692b79 3325 'playlist_mincount': 200,
5d342002 3326 }, {
da692b79 3327 'note': 'Playlist with unavailable videos in page 7',
5d342002 3328 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3329 'info_dict': {
3330 'title': 'Uploads from BlankTV',
3331 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3332 'uploader': 'BlankTV',
3333 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3334 },
da692b79 3335 'playlist_mincount': 1000,
8bdd16b4 3336 }, {
da692b79 3337 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
8bdd16b4 3338 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3339 'info_dict': {
3340 'title': 'Data Analysis with Dr Mike Pound',
3341 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3342 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3343 'uploader': 'Computerphile',
deaec5af 3344 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 3345 },
3346 'playlist_mincount': 11,
3347 }, {
a0566bbf 3348 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 3349 'only_matching': True,
dacb3a86 3350 }, {
da692b79 3351 'note': 'Playlist URL that does not actually serve a playlist',
dacb3a86
S
3352 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3353 'info_dict': {
3354 'id': 'FqZTN594JQw',
3355 'ext': 'webm',
3356 'title': "Smiley's People 01 detective, Adventure Series, Action",
3357 'uploader': 'STREEM',
3358 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 3359 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
3360 'upload_date': '20150526',
3361 'license': 'Standard YouTube License',
3362 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3363 'categories': ['People & Blogs'],
3364 'tags': list,
dbdaaa23 3365 'view_count': int,
dacb3a86
S
3366 'like_count': int,
3367 'dislike_count': int,
3368 },
3369 'params': {
3370 'skip_download': True,
3371 },
13a75688 3372 'skip': 'This video is not available.',
dacb3a86 3373 'add_ie': [YoutubeIE.ie_key()],
481cc733 3374 }, {
8bdd16b4 3375 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 3376 'only_matching': True,
66b48727 3377 }, {
8bdd16b4 3378 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 3379 'only_matching': True,
a0566bbf 3380 }, {
3381 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3382 'info_dict': {
11f9be09 3383 'id': 'FMtPN8yp5LU', # This will keep changing
a0566bbf 3384 'ext': 'mp4',
deaec5af 3385 'title': compat_str,
a0566bbf 3386 'uploader': 'Sky News',
3387 'uploader_id': 'skynews',
3388 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
da692b79 3389 'upload_date': r're:\d{8}',
3390 'description': compat_str,
a0566bbf 3391 'categories': ['News & Politics'],
3392 'tags': list,
3393 'like_count': int,
3394 'dislike_count': int,
3395 },
3396 'params': {
3397 'skip_download': True,
3398 },
da692b79 3399 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
a0566bbf 3400 }, {
3401 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3402 'info_dict': {
3403 'id': 'a48o2S1cPoo',
3404 'ext': 'mp4',
3405 'title': 'The Young Turks - Live Main Show',
3406 'uploader': 'The Young Turks',
3407 'uploader_id': 'TheYoungTurks',
3408 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3409 'upload_date': '20150715',
3410 'license': 'Standard YouTube License',
3411 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3412 'categories': ['News & Politics'],
3413 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3414 'like_count': int,
3415 'dislike_count': int,
3416 },
3417 'params': {
3418 'skip_download': True,
3419 },
3420 'only_matching': True,
3421 }, {
3422 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3423 'only_matching': True,
3424 }, {
3425 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3426 'only_matching': True,
09f1580e 3427 }, {
3428 'note': 'A channel that is not live. Should raise error',
3429 'url': 'https://www.youtube.com/user/numberphile/live',
3430 'only_matching': True,
3d3dddc9 3431 }, {
3432 'url': 'https://www.youtube.com/feed/trending',
3433 'only_matching': True,
3434 }, {
3d3dddc9 3435 'url': 'https://www.youtube.com/feed/library',
3436 'only_matching': True,
3437 }, {
3d3dddc9 3438 'url': 'https://www.youtube.com/feed/history',
3439 'only_matching': True,
3440 }, {
3d3dddc9 3441 'url': 'https://www.youtube.com/feed/subscriptions',
3442 'only_matching': True,
3443 }, {
3d3dddc9 3444 'url': 'https://www.youtube.com/feed/watch_later',
3445 'only_matching': True,
3446 }, {
da692b79 3447 'note': 'Recommended - redirects to home page',
3d3dddc9 3448 'url': 'https://www.youtube.com/feed/recommended',
3449 'only_matching': True,
29f7c58a 3450 }, {
da692b79 3451 'note': 'inline playlist with not always working continuations',
29f7c58a 3452 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3453 'only_matching': True,
3454 }, {
3455 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3456 'only_matching': True,
3457 }, {
3458 'url': 'https://www.youtube.com/course',
3459 'only_matching': True,
3460 }, {
3461 'url': 'https://www.youtube.com/zsecurity',
3462 'only_matching': True,
3463 }, {
3464 'url': 'http://www.youtube.com/NASAgovVideo/videos',
3465 'only_matching': True,
3466 }, {
3467 'url': 'https://www.youtube.com/TheYoungTurks/live',
3468 'only_matching': True,
39ed931e 3469 }, {
3470 'url': 'https://www.youtube.com/hashtag/cctv9',
3471 'info_dict': {
3472 'id': 'cctv9',
3473 'title': '#cctv9',
3474 },
3475 'playlist_mincount': 350,
201c1459 3476 }, {
3477 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3478 'only_matching': True,
9297939e 3479 }, {
da692b79 3480 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
9297939e 3481 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3482 'only_matching': True
fe03a6cd 3483 }, {
3484 'note': '/browse/ should redirect to /channel/',
3485 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3486 'only_matching': True
3487 }, {
3488 'note': 'VLPL, should redirect to playlist?list=PL...',
3489 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3490 'info_dict': {
3491 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3492 'uploader': 'NoCopyrightSounds',
3493 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3494 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3495 'title': 'NCS Releases',
3496 },
3497 'playlist_mincount': 166,
18db7548 3498 }, {
3499 'note': 'Topic, should redirect to playlist?list=UU...',
3500 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3501 'info_dict': {
3502 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3503 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3504 'title': 'Uploads from Royalty Free Music - Topic',
3505 'uploader': 'Royalty Free Music - Topic',
3506 },
3507 'expected_warnings': [
3508 'A channel/user page was given',
3509 'The URL does not have a videos tab',
3510 ],
3511 'playlist_mincount': 101,
3512 }, {
3513 'note': 'Topic without a UU playlist',
3514 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3515 'info_dict': {
3516 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3517 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3518 },
3519 'expected_warnings': [
3520 'A channel/user page was given',
3521 'The URL does not have a videos tab',
3522 'Falling back to channel URL',
3523 ],
3524 'playlist_mincount': 9,
abcdd12b 3525 }, {
3526 'note': 'Youtube music Album',
3527 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3528 'info_dict': {
3529 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3530 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3531 },
3532 'playlist_count': 50,
47193e02 3533 }, {
3534 'note': 'unlisted single video playlist',
3535 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3536 'info_dict': {
3537 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3538 'uploader': 'colethedj',
3539 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3540 'title': 'yt-dlp unlisted playlist test',
3541 'availability': 'unlisted'
3542 },
3543 'playlist_count': 1,
29f7c58a 3544 }]
3545
3546 @classmethod
3547 def suitable(cls, url):
3548 return False if YoutubeIE.suitable(url) else super(
3549 YoutubeTabIE, cls).suitable(url)
8bdd16b4 3550
3551 def _extract_channel_id(self, webpage):
3552 channel_id = self._html_search_meta(
3553 'channelId', webpage, 'channel id', default=None)
3554 if channel_id:
3555 return channel_id
3556 channel_url = self._html_search_meta(
3557 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3558 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3559 'twitter:app:url:googleplay'), webpage, 'channel url')
3560 return self._search_regex(
3561 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3562 channel_url, 'channel id')
15f6397c 3563
8bdd16b4 3564 @staticmethod
cd7c66cf 3565 def _extract_basic_item_renderer(item):
3566 # Modified from _extract_grid_item_renderer
201c1459 3567 known_basic_renderers = (
3568 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 3569 )
3570 for key, renderer in item.items():
201c1459 3571 if not isinstance(renderer, dict):
cd7c66cf 3572 continue
201c1459 3573 elif key in known_basic_renderers:
3574 return renderer
3575 elif key.startswith('grid') and key.endswith('Renderer'):
3576 return renderer
8bdd16b4 3577
8bdd16b4 3578 def _grid_entries(self, grid_renderer):
3579 for item in grid_renderer['items']:
3580 if not isinstance(item, dict):
39b62db1 3581 continue
cd7c66cf 3582 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 3583 if not isinstance(renderer, dict):
3584 continue
fe93e2c4 3585 title = self._get_text(renderer.get('title'))
3586
8bdd16b4 3587 # playlist
3588 playlist_id = renderer.get('playlistId')
3589 if playlist_id:
3590 yield self.url_result(
3591 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3592 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3593 video_title=title)
201c1459 3594 continue
8bdd16b4 3595 # video
3596 video_id = renderer.get('videoId')
3597 if video_id:
3598 yield self._extract_video(renderer)
201c1459 3599 continue
8bdd16b4 3600 # channel
3601 channel_id = renderer.get('channelId')
3602 if channel_id:
8bdd16b4 3603 yield self.url_result(
3604 'https://www.youtube.com/channel/%s' % channel_id,
3605 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 3606 continue
3607 # generic endpoint URL support
3608 ep_url = urljoin('https://www.youtube.com/', try_get(
3609 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3610 compat_str))
3611 if ep_url:
3612 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3613 if ie.suitable(ep_url):
3614 yield self.url_result(
3615 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3616 break
8bdd16b4 3617
3d3dddc9 3618 def _shelf_entries_from_content(self, shelf_renderer):
3619 content = shelf_renderer.get('content')
3620 if not isinstance(content, dict):
8bdd16b4 3621 return
cd7c66cf 3622 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 3623 if renderer:
3624 # TODO: add support for nested playlists so each shelf is processed
3625 # as separate playlist
3626 # TODO: this includes only first N items
3627 for entry in self._grid_entries(renderer):
3628 yield entry
3629 renderer = content.get('horizontalListRenderer')
3630 if renderer:
3631 # TODO
3632 pass
8bdd16b4 3633
29f7c58a 3634 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3635 ep = try_get(
3636 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3637 compat_str)
3638 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3639 if shelf_url:
29f7c58a 3640 # Skipping links to another channels, note that checking for
3641 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3642 # will not work
3643 if skip_channels and '/channels?' in shelf_url:
3644 return
fe93e2c4 3645 title = self._get_text(shelf_renderer, lambda x: x['title'])
3d3dddc9 3646 yield self.url_result(shelf_url, video_title=title)
3647 # Shelf may not contain shelf URL, fallback to extraction from content
3648 for entry in self._shelf_entries_from_content(shelf_renderer):
3649 yield entry
c5e8d7af 3650
8bdd16b4 3651 def _playlist_entries(self, video_list_renderer):
3652 for content in video_list_renderer['contents']:
3653 if not isinstance(content, dict):
3654 continue
3655 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3656 if not isinstance(renderer, dict):
3657 continue
3658 video_id = renderer.get('videoId')
3659 if not video_id:
3660 continue
3661 yield self._extract_video(renderer)
07aeced6 3662
3462ffa8 3663 def _rich_entries(self, rich_grid_renderer):
3664 renderer = try_get(
70d5c17b 3665 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3666 video_id = renderer.get('videoId')
3667 if not video_id:
3668 return
3669 yield self._extract_video(renderer)
3670
8bdd16b4 3671 def _video_entry(self, video_renderer):
3672 video_id = video_renderer.get('videoId')
3673 if video_id:
3674 return self._extract_video(video_renderer)
dacb3a86 3675
8bdd16b4 3676 def _post_thread_entries(self, post_thread_renderer):
3677 post_renderer = try_get(
3678 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3679 if not post_renderer:
3680 return
3681 # video attachment
3682 video_renderer = try_get(
895b0931 3683 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3684 video_id = video_renderer.get('videoId')
3685 if video_id:
3686 entry = self._extract_video(video_renderer)
8bdd16b4 3687 if entry:
3688 yield entry
895b0931 3689 # playlist attachment
3690 playlist_id = try_get(
3691 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3692 if playlist_id:
3693 yield self.url_result(
e28f1c0a 3694 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3695 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3696 # inline video links
3697 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3698 for run in runs:
3699 if not isinstance(run, dict):
3700 continue
3701 ep_url = try_get(
3702 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3703 if not ep_url:
3704 continue
3705 if not YoutubeIE.suitable(ep_url):
3706 continue
3707 ep_video_id = YoutubeIE._match_id(ep_url)
3708 if video_id == ep_video_id:
3709 continue
895b0931 3710 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 3711
8bdd16b4 3712 def _post_thread_continuation_entries(self, post_thread_continuation):
3713 contents = post_thread_continuation.get('contents')
3714 if not isinstance(contents, list):
3715 return
3716 for content in contents:
3717 renderer = content.get('backstagePostThreadRenderer')
3718 if not isinstance(renderer, dict):
3719 continue
3720 for entry in self._post_thread_entries(renderer):
3721 yield entry
07aeced6 3722
39ed931e 3723 r''' # unused
3724 def _rich_grid_entries(self, contents):
3725 for content in contents:
3726 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3727 if video_renderer:
3728 entry = self._video_entry(video_renderer)
3729 if entry:
3730 yield entry
3731 '''
f4f751af 3732 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3462ffa8 3733
70d5c17b 3734 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3735 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3736 for content in contents:
3737 if not isinstance(content, dict):
8bdd16b4 3738 continue
70d5c17b 3739 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3740 if not is_renderer:
70d5c17b 3741 renderer = content.get('richItemRenderer')
3462ffa8 3742 if renderer:
3743 for entry in self._rich_entries(renderer):
3744 yield entry
3745 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3746 continue
3462ffa8 3747 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3748 for isr_content in isr_contents:
3749 if not isinstance(isr_content, dict):
3750 continue
69184e41 3751
3752 known_renderers = {
3753 'playlistVideoListRenderer': self._playlist_entries,
3754 'gridRenderer': self._grid_entries,
3755 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3756 'backstagePostThreadRenderer': self._post_thread_entries,
3757 'videoRenderer': lambda x: [self._video_entry(x)],
3758 }
3759 for key, renderer in isr_content.items():
3760 if key not in known_renderers:
3761 continue
3762 for entry in known_renderers[key](renderer):
3763 if entry:
3764 yield entry
3462ffa8 3765 continuation_list[0] = self._extract_continuation(renderer)
69184e41 3766 break
70d5c17b 3767
3462ffa8 3768 if not continuation_list[0]:
3769 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3770
3771 if not continuation_list[0]:
3772 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3773
3774 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3775 tab_content = try_get(tab, lambda x: x['content'], dict)
3776 if not tab_content:
3777 return
3462ffa8 3778 parent_renderer = (
29f7c58a 3779 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3780 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3781 for entry in extract_entries(parent_renderer):
3782 yield entry
3462ffa8 3783 continuation = continuation_list[0]
fe93e2c4 3784 visitor_data = None
d069eca7 3785
8bdd16b4 3786 for page_num in itertools.count(1):
3787 if not continuation:
3788 break
11f9be09 3789 headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
79360d99 3790 response = self._extract_response(
3791 item_id='%s page %s' % (item_id, page_num),
fe93e2c4 3792 query=continuation, headers=headers, ytcfg=ytcfg,
79360d99 3793 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3794
3795 if not response:
8bdd16b4 3796 break
f4f751af 3797 visitor_data = try_get(
3798 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
ebf1b291 3799
69184e41 3800 known_continuation_renderers = {
3801 'playlistVideoListContinuation': self._playlist_entries,
3802 'gridContinuation': self._grid_entries,
3803 'itemSectionContinuation': self._post_thread_continuation_entries,
3804 'sectionListContinuation': extract_entries, # for feeds
3805 }
8bdd16b4 3806 continuation_contents = try_get(
69184e41 3807 response, lambda x: x['continuationContents'], dict) or {}
3808 continuation_renderer = None
3809 for key, value in continuation_contents.items():
3810 if key not in known_continuation_renderers:
3462ffa8 3811 continue
69184e41 3812 continuation_renderer = value
3813 continuation_list = [None]
3814 for entry in known_continuation_renderers[key](continuation_renderer):
3815 yield entry
3816 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3817 break
3818 if continuation_renderer:
3819 continue
c5e8d7af 3820
a1b535bd 3821 known_renderers = {
3822 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3823 'gridVideoRenderer': (self._grid_entries, 'items'),
d61fc646 3824 'gridChannelRenderer': (self._grid_entries, 'items'),
a1b535bd 3825 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3826 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3827 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3828 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3829 }
cce889b9 3830 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3831 continuation_items = try_get(
cce889b9 3832 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3833 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3834 video_items_renderer = None
3835 for key, value in continuation_item.items():
3836 if key not in known_renderers:
8bdd16b4 3837 continue
a1b535bd 3838 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3839 continuation_list = [None]
a1b535bd 3840 for entry in known_renderers[key][0](video_items_renderer):
3841 yield entry
9ba5705a 3842 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3843 break
3844 if video_items_renderer:
3845 continue
8bdd16b4 3846 break
9558dcec 3847
8bdd16b4 3848 @staticmethod
3849 def _extract_selected_tab(tabs):
3850 for tab in tabs:
cd684175 3851 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3852 if renderer.get('selected') is True:
3853 return renderer
2b3c2546 3854 else:
8bdd16b4 3855 raise ExtractorError('Unable to find selected tab')
b82f815f 3856
47193e02 3857 @classmethod
3858 def _extract_uploader(cls, data):
8bdd16b4 3859 uploader = {}
47193e02 3860 renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3861 owner = try_get(
3862 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3863 if owner:
3864 uploader['uploader'] = owner.get('text')
3865 uploader['uploader_id'] = try_get(
3866 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3867 uploader['uploader_url'] = urljoin(
3868 'https://www.youtube.com/',
3869 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3870 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3871
d069eca7 3872 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3873 playlist_id = title = description = channel_url = channel_name = channel_id = None
3874 thumbnails_list = tags = []
3875
8bdd16b4 3876 selected_tab = self._extract_selected_tab(tabs)
3877 renderer = try_get(
3878 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3879 if renderer:
b60419c5 3880 channel_name = renderer.get('title')
3881 channel_url = renderer.get('channelUrl')
3882 channel_id = renderer.get('externalId')
39ed931e 3883 else:
64c0d954 3884 renderer = try_get(
3885 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3886
8bdd16b4 3887 if renderer:
3888 title = renderer.get('title')
ecc97af3 3889 description = renderer.get('description', '')
b60419c5 3890 playlist_id = channel_id
3891 tags = renderer.get('keywords', '').split()
3892 thumbnails_list = (
3893 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3894 or try_get(
47193e02 3895 self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3896 lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
ff84930c 3897 list)
b60419c5 3898 or [])
3899
3900 thumbnails = []
3901 for t in thumbnails_list:
3902 if not isinstance(t, dict):
3903 continue
3904 thumbnail_url = url_or_none(t.get('url'))
3905 if not thumbnail_url:
3906 continue
3907 thumbnails.append({
3908 'url': thumbnail_url,
3909 'width': int_or_none(t.get('width')),
3910 'height': int_or_none(t.get('height')),
3911 })
3462ffa8 3912 if playlist_id is None:
70d5c17b 3913 playlist_id = item_id
3914 if title is None:
39ed931e 3915 title = (
3916 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3917 or playlist_id)
b60419c5 3918 title += format_field(selected_tab, 'title', ' - %s')
cd684175 3919 title += format_field(selected_tab, 'expandedText', ' - %s')
b60419c5 3920 metadata = {
3921 'playlist_id': playlist_id,
3922 'playlist_title': title,
3923 'playlist_description': description,
3924 'uploader': channel_name,
3925 'uploader_id': channel_id,
3926 'uploader_url': channel_url,
3927 'thumbnails': thumbnails,
3928 'tags': tags,
3929 }
47193e02 3930 availability = self._extract_availability(data)
3931 if availability:
3932 metadata['availability'] = availability
b60419c5 3933 if not channel_id:
3934 metadata.update(self._extract_uploader(data))
3935 metadata.update({
3936 'channel': metadata['uploader'],
3937 'channel_id': metadata['uploader_id'],
3938 'channel_url': metadata['uploader_url']})
11f9be09 3939 ytcfg = self.extract_ytcfg(item_id, webpage)
b60419c5 3940 return self.playlist_result(
d069eca7
M
3941 self._entries(
3942 selected_tab, playlist_id,
3943 self._extract_identity_token(webpage, item_id),
fe93e2c4 3944 self._extract_account_syncid(ytcfg, data), ytcfg),
b60419c5 3945 **metadata)
73c4ac2c 3946
79360d99 3947 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
2be71994 3948 first_id = last_id = None
11f9be09 3949 ytcfg = self.extract_ytcfg(playlist_id, webpage)
3950 headers = self.generate_api_headers(
fe93e2c4 3951 ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3952 identity_token=self._extract_identity_token(webpage, item_id=playlist_id))
2be71994 3953 for page_num in itertools.count(1):
cd7c66cf 3954 videos = list(self._playlist_entries(playlist))
3955 if not videos:
3956 return
2be71994 3957 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3958 if start >= len(videos):
3959 return
3960 for video in videos[start:]:
3961 if video['id'] == first_id:
3962 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3963 return
3964 yield video
3965 first_id = first_id or videos[0]['id']
3966 last_id = videos[-1]['id']
79360d99 3967 watch_endpoint = try_get(
3968 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3969 query = {
3970 'playlistId': playlist_id,
3971 'videoId': watch_endpoint.get('videoId') or last_id,
3972 'index': watch_endpoint.get('index') or len(videos),
3973 'params': watch_endpoint.get('params') or 'OAE%3D'
3974 }
3975 response = self._extract_response(
3976 item_id='%s page %d' % (playlist_id, page_num),
fe93e2c4 3977 query=query, ep='next', headers=headers, ytcfg=ytcfg,
79360d99 3978 check_get_keys='contents'
3979 )
cd7c66cf 3980 playlist = try_get(
79360d99 3981 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 3982
79360d99 3983 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
8bdd16b4 3984 title = playlist.get('title') or try_get(
3985 data, lambda x: x['titleText']['simpleText'], compat_str)
3986 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3987
3988 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3989 playlist_url = urljoin(url, try_get(
3990 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3991 compat_str))
3992 if playlist_url and playlist_url != url:
3993 return self.url_result(
3994 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3995 video_title=title)
cd7c66cf 3996
8bdd16b4 3997 return self.playlist_result(
79360d99 3998 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
cd7c66cf 3999 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 4000
47193e02 4001 def _extract_availability(self, data):
4002 """
4003 Gets the availability of a given playlist/tab.
4004 Note: Unless YouTube tells us explicitly, we do not assume it is public
4005 @param data: response
4006 """
4007 is_private = is_unlisted = None
4008 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4009 badge_labels = self._extract_badges(renderer)
4010
4011 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4012 privacy_dropdown_entries = try_get(
4013 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4014 for renderer_dict in privacy_dropdown_entries:
4015 is_selected = try_get(
4016 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4017 if not is_selected:
4018 continue
fe93e2c4 4019 label = self._get_text(
4020 try_get(renderer_dict, lambda x: x['privacyDropdownItemRenderer']['label'], dict) or [])
47193e02 4021 if label:
4022 badge_labels.add(label.lower())
4023 break
4024
4025 for badge_label in badge_labels:
4026 if badge_label == 'unlisted':
4027 is_unlisted = True
4028 elif badge_label == 'private':
4029 is_private = True
4030 elif badge_label == 'public':
4031 is_unlisted = is_private = False
4032 return self._availability(is_private, False, False, False, is_unlisted)
4033
4034 @staticmethod
4035 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4036 sidebar_renderer = try_get(
4037 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4038 for item in sidebar_renderer:
4039 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4040 if renderer:
4041 return renderer
4042
358de58c 4043 def _reload_with_unavailable_videos(self, item_id, data, webpage):
4044 """
4045 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4046 """
5d342002 4047 browse_id = params = None
47193e02 4048 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4049 if not renderer:
4050 return
4051 menu_renderer = try_get(
4052 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4053 for menu_item in menu_renderer:
4054 if not isinstance(menu_item, dict):
358de58c 4055 continue
47193e02 4056 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4057 text = try_get(
4058 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
4059 if not text or text.lower() != 'show unavailable videos':
4060 continue
4061 browse_endpoint = try_get(
4062 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4063 browse_id = browse_endpoint.get('browseId')
4064 params = browse_endpoint.get('params')
4065 break
5d342002 4066
11f9be09 4067 ytcfg = self.extract_ytcfg(item_id, webpage)
4068 headers = self.generate_api_headers(
fe93e2c4 4069 ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
47193e02 4070 identity_token=self._extract_identity_token(webpage, item_id=item_id),
4071 visitor_data=try_get(
4072 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
4073 query = {
4074 'params': params or 'wgYCCAA=',
4075 'browseId': browse_id or 'VL%s' % item_id
4076 }
4077 return self._extract_response(
4078 item_id=item_id, headers=headers, query=query,
fe93e2c4 4079 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
47193e02 4080 note='Downloading API JSON with unavailable videos')
358de58c 4081
cd7c66cf 4082 def _extract_webpage(self, url, item_id):
a06916d9 4083 retries = self.get_param('extractor_retries', 3)
62bff2c1 4084 count = -1
c705177d 4085 last_error = 'Incomplete yt initial data recieved'
14fdfea9 4086 while count < retries:
62bff2c1 4087 count += 1
14fdfea9 4088 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 4089 # See: https://github.com/yt-dlp/yt-dlp/issues/116
4090 if count:
c705177d 4091 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 4092 webpage = self._download_webpage(
4093 url, item_id,
cd7c66cf 4094 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
11f9be09 4095 data = self.extract_yt_initial_data(item_id, webpage)
14fdfea9 4096 if data.get('contents') or data.get('currentVideoEndpoint'):
4097 break
95c01b6c 4098 # Extract alerts here only when there is error
4099 self._extract_and_report_alerts(data)
c705177d 4100 if count >= retries:
6a39ee13 4101 raise ExtractorError(last_error)
cd7c66cf 4102 return webpage, data
4103
9297939e 4104 @staticmethod
4105 def _smuggle_data(entries, data):
4106 for entry in entries:
4107 if data:
4108 entry['url'] = smuggle_url(entry['url'], data)
4109 yield entry
4110
cd7c66cf 4111 def _real_extract(self, url):
9297939e 4112 url, smuggled_data = unsmuggle_url(url, {})
4113 if self.is_music_url(url):
4114 smuggled_data['is_music_url'] = True
fe03a6cd 4115 info_dict = self.__real_extract(url, smuggled_data)
9297939e 4116 if info_dict.get('entries'):
4117 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4118 return info_dict
4119
fe03a6cd 4120 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4121
4122 def __real_extract(self, url, smuggled_data):
cd7c66cf 4123 item_id = self._match_id(url)
4124 url = compat_urlparse.urlunparse(
4125 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 4126 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 4127
fe03a6cd 4128 def get_mobj(url):
4129 mobj = self._url_re.match(url).groupdict()
07cce701 4130 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 4131 return mobj
4132
4133 mobj = get_mobj(url)
4134 # Youtube returns incomplete data if tabname is not lower case
4135 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4136
4137 if is_channel:
4138 if smuggled_data.get('is_music_url'):
4139 if item_id[:2] == 'VL':
4140 # Youtube music VL channels have an equivalent playlist
4141 item_id = item_id[2:]
4142 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
abcdd12b 4143 elif item_id[:2] == 'MP':
4144 # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4145 item_id = self._search_regex(
4146 r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4147 self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4148 'playlist id')
4149 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
fe03a6cd 4150 elif mobj['channel_type'] == 'browse':
4151 # Youtube music /browse/ should be changed to /channel/
4152 pre = 'https://www.youtube.com/channel/%s' % item_id
4153 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4154 # Home URLs should redirect to /videos/
6a39ee13 4155 self.report_warning(
cd7c66cf 4156 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4157 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 4158 tab = '/videos'
4159
4160 url = ''.join((pre, tab, post))
4161 mobj = get_mobj(url)
cd7c66cf 4162
4163 # Handle both video/playlist URLs
201c1459 4164 qs = parse_qs(url)
cd7c66cf 4165 video_id = qs.get('v', [None])[0]
4166 playlist_id = qs.get('list', [None])[0]
4167
fe03a6cd 4168 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 4169 if not playlist_id:
fe03a6cd 4170 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 4171 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 4172 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6a39ee13 4173 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
cd7c66cf 4174 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
18db7548 4175 mobj = get_mobj(url)
cd7c66cf 4176
4177 if video_id and playlist_id:
a06916d9 4178 if self.get_param('noplaylist'):
cd7c66cf 4179 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4180 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4181 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4182
4183 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 4184
18db7548 4185 tabs = try_get(
4186 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4187 if tabs:
4188 selected_tab = self._extract_selected_tab(tabs)
4189 tab_name = selected_tab.get('title', '')
09f1580e 4190 if 'no-youtube-channel-redirect' not in compat_opts:
4191 if mobj['tab'] == '/live':
4192 # Live tab should have redirected to the video
4193 raise ExtractorError('The channel is not currently live', expected=True)
4194 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4195 if not mobj['not_channel'] and item_id[:2] == 'UC':
4196 # Topic channels don't have /videos. Use the equivalent playlist instead
4197 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4198 pl_id = 'UU%s' % item_id[2:]
4199 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4200 try:
4201 pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4202 for alert_type, alert_message in self._extract_alerts(pl_data):
4203 if alert_type == 'error':
4204 raise ExtractorError('Youtube said: %s' % alert_message)
4205 item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4206 except ExtractorError:
4207 self.report_warning('The playlist gave error. Falling back to channel URL')
4208 else:
4209 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
18db7548 4210
4211 self.write_debug('Final URL: %s' % url)
4212
358de58c 4213 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 4214 if 'no-youtube-unavailable-videos' not in compat_opts:
4215 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
95c01b6c 4216 self._extract_and_report_alerts(data)
8bdd16b4 4217 tabs = try_get(
4218 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4219 if tabs:
d069eca7 4220 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 4221
8bdd16b4 4222 playlist = try_get(
4223 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4224 if playlist:
79360d99 4225 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
cd7c66cf 4226
a0566bbf 4227 video_id = try_get(
4228 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4229 compat_str) or video_id
8bdd16b4 4230 if video_id:
09f1580e 4231 if mobj['tab'] != '/live': # live tab is expected to redirect to video
4232 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 4233 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 4234
8bdd16b4 4235 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 4236
c5e8d7af 4237
8bdd16b4 4238class YoutubePlaylistIE(InfoExtractor):
4239 IE_DESC = 'YouTube.com playlists'
4240 _VALID_URL = r'''(?x)(?:
4241 (?:https?://)?
4242 (?:\w+\.)?
4243 (?:
4244 (?:
4245 youtube(?:kids)?\.com|
29f7c58a 4246 invidio\.us
8bdd16b4 4247 )
4248 /.*?\?.*?\blist=
4249 )?
4250 (?P<id>%(playlist_id)s)
4251 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4252 IE_NAME = 'youtube:playlist'
cdc628a4 4253 _TESTS = [{
8bdd16b4 4254 'note': 'issue #673',
4255 'url': 'PLBB231211A4F62143',
cdc628a4 4256 'info_dict': {
8bdd16b4 4257 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4258 'id': 'PLBB231211A4F62143',
4259 'uploader': 'Wickydoo',
4260 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
11f9be09 4261 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
8bdd16b4 4262 },
4263 'playlist_mincount': 29,
4264 }, {
4265 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4266 'info_dict': {
4267 'title': 'YDL_safe_search',
4268 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4269 },
4270 'playlist_count': 2,
4271 'skip': 'This playlist is private',
9558dcec 4272 }, {
8bdd16b4 4273 'note': 'embedded',
4274 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4275 'playlist_count': 4,
9558dcec 4276 'info_dict': {
8bdd16b4 4277 'title': 'JODA15',
4278 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4279 'uploader': 'milan',
4280 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 4281 }
cdc628a4 4282 }, {
8bdd16b4 4283 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
11f9be09 4284 'playlist_mincount': 654,
8bdd16b4 4285 'info_dict': {
4286 'title': '2018 Chinese New Singles (11/6 updated)',
4287 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4288 'uploader': 'LBK',
4289 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
11f9be09 4290 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
8bdd16b4 4291 }
daa0df9e 4292 }, {
29f7c58a 4293 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4294 'only_matching': True,
4295 }, {
4296 # music album playlist
4297 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4298 'only_matching': True,
4299 }]
4300
4301 @classmethod
4302 def suitable(cls, url):
201c1459 4303 if YoutubeTabIE.suitable(url):
4304 return False
1bdae7d3 4305 # Hack for lazy extractors until more generic solution is implemented
4306 # (see #28780)
4307 from .youtube import parse_qs
201c1459 4308 qs = parse_qs(url)
4309 if qs.get('v', [None])[0]:
4310 return False
4311 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 4312
4313 def _real_extract(self, url):
4314 playlist_id = self._match_id(url)
46953e7e 4315 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 4316 url = update_url_query(
4317 'https://www.youtube.com/playlist',
4318 parse_qs(url) or {'list': playlist_id})
4319 if is_music_url:
4320 url = smuggle_url(url, {'is_music_url': True})
4321 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 4322
4323
4324class YoutubeYtBeIE(InfoExtractor):
c76eb41b 4325 IE_DESC = 'youtu.be'
29f7c58a 4326 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4327 _TESTS = [{
8bdd16b4 4328 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4329 'info_dict': {
4330 'id': 'yeWKywCrFtk',
4331 'ext': 'mp4',
4332 'title': 'Small Scale Baler and Braiding Rugs',
4333 'uploader': 'Backus-Page House Museum',
4334 'uploader_id': 'backuspagemuseum',
4335 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4336 'upload_date': '20161008',
4337 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4338 'categories': ['Nonprofits & Activism'],
4339 'tags': list,
4340 'like_count': int,
4341 'dislike_count': int,
4342 },
4343 'params': {
4344 'noplaylist': True,
4345 'skip_download': True,
4346 },
39e7107d 4347 }, {
8bdd16b4 4348 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 4349 'only_matching': True,
cdc628a4
PH
4350 }]
4351
8bdd16b4 4352 def _real_extract(self, url):
29f7c58a 4353 mobj = re.match(self._VALID_URL, url)
4354 video_id = mobj.group('id')
4355 playlist_id = mobj.group('playlist_id')
8bdd16b4 4356 return self.url_result(
29f7c58a 4357 update_url_query('https://www.youtube.com/watch', {
4358 'v': video_id,
4359 'list': playlist_id,
4360 'feature': 'youtu.be',
4361 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 4362
4363
4364class YoutubeYtUserIE(InfoExtractor):
c76eb41b 4365 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 4366 _VALID_URL = r'ytuser:(?P<id>.+)'
4367 _TESTS = [{
4368 'url': 'ytuser:phihag',
4369 'only_matching': True,
4370 }]
4371
4372 def _real_extract(self, url):
4373 user_id = self._match_id(url)
4374 return self.url_result(
4375 'https://www.youtube.com/user/%s' % user_id,
4376 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 4377
b05654f0 4378
3d3dddc9 4379class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 4380 IE_NAME = 'youtube:favorites'
4381 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4382 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4383 _LOGIN_REQUIRED = True
4384 _TESTS = [{
4385 'url': ':ytfav',
4386 'only_matching': True,
4387 }, {
4388 'url': ':ytfavorites',
4389 'only_matching': True,
4390 }]
4391
4392 def _real_extract(self, url):
4393 return self.url_result(
4394 'https://www.youtube.com/playlist?list=LL',
4395 ie=YoutubeTabIE.ie_key())
4396
4397
79360d99 4398class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
69184e41 4399 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
4400 # there doesn't appear to be a real limit, for example if you search for
4401 # 'python' you get more than 8.000.000 results
4402 _MAX_RESULTS = float('inf')
78caa52a 4403 IE_NAME = 'youtube:search'
b05654f0 4404 _SEARCH_KEY = 'ytsearch'
6c894ea1 4405 _SEARCH_PARAMS = None
9dd8e46a 4406 _TESTS = []
b05654f0 4407
6c894ea1 4408 def _entries(self, query, n):
a5c56234 4409 data = {'query': query}
6c894ea1
U
4410 if self._SEARCH_PARAMS:
4411 data['params'] = self._SEARCH_PARAMS
4412 total = 0
fe93e2c4 4413 continuation = {}
6c894ea1 4414 for page_num in itertools.count(1):
fe93e2c4 4415 data.update(continuation)
79360d99 4416 search = self._extract_response(
4417 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4418 check_get_keys=('contents', 'onResponseReceivedCommands')
4419 )
6c894ea1 4420 if not search:
b4c08069 4421 break
6c894ea1
U
4422 slr_contents = try_get(
4423 search,
4424 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4425 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4426 list)
4427 if not slr_contents:
a22b2fd1 4428 break
0366ae87 4429
0366ae87
M
4430 # Youtube sometimes adds promoted content to searches,
4431 # changing the index location of videos and token.
4432 # So we search through all entries till we find them.
fe93e2c4 4433 continuation = None
30a074c2 4434 for slr_content in slr_contents:
fe93e2c4 4435 if not continuation:
4436 continuation = self._extract_continuation({'contents': [slr_content]})
a96c6d15 4437
30a074c2 4438 isr_contents = try_get(
4439 slr_content,
4440 lambda x: x['itemSectionRenderer']['contents'],
4441 list)
9da76d30 4442 if not isr_contents:
30a074c2 4443 continue
4444 for content in isr_contents:
4445 if not isinstance(content, dict):
4446 continue
4447 video = content.get('videoRenderer')
4448 if not isinstance(video, dict):
4449 continue
4450 video_id = video.get('videoId')
4451 if not video_id:
4452 continue
4453
4454 yield self._extract_video(video)
4455 total += 1
4456 if total == n:
4457 return
0366ae87 4458
fe93e2c4 4459 if not continuation:
6c894ea1 4460 break
b05654f0 4461
6c894ea1
U
4462 def _get_n_results(self, query, n):
4463 """Get a specified number of results for a query"""
11f9be09 4464 return self.playlist_result(self._entries(query, n), query, query)
75dff0ee 4465
c9ae7b95 4466
a3dd9248 4467class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 4468 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 4469 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 4470 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 4471 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 4472
c9ae7b95 4473
386e1dd9 4474class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 4475 IE_DESC = 'YouTube.com search URLs'
386e1dd9 4476 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4477 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 4478 # _MAX_RESULTS = 100
3462ffa8 4479 _TESTS = [{
4480 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4481 'playlist_mincount': 5,
4482 'info_dict': {
11f9be09 4483 'id': 'youtube-dl test video',
3462ffa8 4484 'title': 'youtube-dl test video',
4485 }
4486 }, {
4487 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4488 'only_matching': True,
4489 }]
4490
386e1dd9 4491 @classmethod
4492 def _make_valid_url(cls):
4493 return cls._VALID_URL
4494
3462ffa8 4495 def _real_extract(self, url):
386e1dd9 4496 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4497 query = (qs.get('search_query') or qs.get('q'))[0]
4498 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4499 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 4500
4501
4502class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 4503 """
25f14e9f 4504 Base class for feed extractors
3d3dddc9 4505 Subclasses must define the _FEED_NAME property.
d7ae0639 4506 """
b2e8bc1b 4507 _LOGIN_REQUIRED = True
ef2f3c7f 4508 _TESTS = []
d7ae0639
JMF
4509
4510 @property
4511 def IE_NAME(self):
78caa52a 4512 return 'youtube:%s' % self._FEED_NAME
04cc9617 4513
3853309f 4514 def _real_extract(self, url):
3d3dddc9 4515 return self.url_result(
4516 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4517 ie=YoutubeTabIE.ie_key())
25f14e9f
S
4518
4519
ef2f3c7f 4520class YoutubeWatchLaterIE(InfoExtractor):
4521 IE_NAME = 'youtube:watchlater'
70d5c17b 4522 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 4523 _VALID_URL = r':ytwatchlater'
bc7a9cd8 4524 _TESTS = [{
8bdd16b4 4525 'url': ':ytwatchlater',
bc7a9cd8
S
4526 'only_matching': True,
4527 }]
25f14e9f
S
4528
4529 def _real_extract(self, url):
ef2f3c7f 4530 return self.url_result(
4531 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 4532
4533
25f14e9f
S
4534class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4535 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 4536 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 4537 _FEED_NAME = 'recommended'
45db527f 4538 _LOGIN_REQUIRED = False
3d3dddc9 4539 _TESTS = [{
4540 'url': ':ytrec',
4541 'only_matching': True,
4542 }, {
4543 'url': ':ytrecommended',
4544 'only_matching': True,
4545 }, {
4546 'url': 'https://youtube.com',
4547 'only_matching': True,
4548 }]
1ed5b5c9 4549
1ed5b5c9 4550
25f14e9f 4551class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 4552 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 4553 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 4554 _FEED_NAME = 'subscriptions'
3d3dddc9 4555 _TESTS = [{
4556 'url': ':ytsubs',
4557 'only_matching': True,
4558 }, {
4559 'url': ':ytsubscriptions',
4560 'only_matching': True,
4561 }]
1ed5b5c9 4562
1ed5b5c9 4563
25f14e9f 4564class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
4565 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4566 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 4567 _FEED_NAME = 'history'
3d3dddc9 4568 _TESTS = [{
4569 'url': ':ythistory',
4570 'only_matching': True,
4571 }]
1ed5b5c9
JMF
4572
4573
15870e90
PH
4574class YoutubeTruncatedURLIE(InfoExtractor):
4575 IE_NAME = 'youtube:truncated_url'
4576 IE_DESC = False # Do not list
975d35db 4577 _VALID_URL = r'''(?x)
b95aab84
PH
4578 (?:https?://)?
4579 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4580 (?:watch\?(?:
c4808c60 4581 feature=[a-z_]+|
b95aab84
PH
4582 annotation_id=annotation_[^&]+|
4583 x-yt-cl=[0-9]+|
c1708b89 4584 hl=[^&]*|
287be8c6 4585 t=[0-9]+
b95aab84
PH
4586 )?
4587 |
4588 attribution_link\?a=[^&]+
4589 )
4590 $
975d35db 4591 '''
15870e90 4592
c4808c60 4593 _TESTS = [{
2d3d2997 4594 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 4595 'only_matching': True,
dc2fc736 4596 }, {
2d3d2997 4597 'url': 'https://www.youtube.com/watch?',
dc2fc736 4598 'only_matching': True,
b95aab84
PH
4599 }, {
4600 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4601 'only_matching': True,
4602 }, {
4603 'url': 'https://www.youtube.com/watch?feature=foo',
4604 'only_matching': True,
c1708b89
PH
4605 }, {
4606 'url': 'https://www.youtube.com/watch?hl=en-GB',
4607 'only_matching': True,
287be8c6
PH
4608 }, {
4609 'url': 'https://www.youtube.com/watch?t=2372',
4610 'only_matching': True,
c4808c60
PH
4611 }]
4612
15870e90
PH
4613 def _real_extract(self, url):
4614 raise ExtractorError(
78caa52a
PH
4615 'Did you forget to quote the URL? Remember that & is a meta '
4616 'character in most shells, so you want to put the URL in quotes, '
3867038a 4617 'like youtube-dl '
2d3d2997 4618 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 4619 ' or simply youtube-dl BaW_jenozKc .',
15870e90 4620 expected=True)
772fd5cc
PH
4621
4622
4623class YoutubeTruncatedIDIE(InfoExtractor):
4624 IE_NAME = 'youtube:truncated_id'
4625 IE_DESC = False # Do not list
b95aab84 4626 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
4627
4628 _TESTS = [{
4629 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4630 'only_matching': True,
4631 }]
4632
4633 def _real_extract(self, url):
4634 video_id = self._match_id(url)
4635 raise ExtractorError(
4636 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4637 expected=True)