]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[youtube] Improve extraction of livestream metadata
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
2d6659b9 5import base64
d92f5d5a 6import calendar
109dd3b2 7import copy
fe93e2c4 8import datetime
a5c56234 9import hashlib
0ca96d48 10import itertools
c5e8d7af 11import json
c4417ddb 12import os.path
d77ab8e2 13import random
c5e8d7af 14import re
8a784c74 15import time
e0df6211 16import traceback
c5e8d7af 17
b05654f0 18from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 19from ..compat import (
edf3e38e 20 compat_chr,
29f7c58a 21 compat_HTTPError,
c5e8d7af 22 compat_parse_qs,
545cc85d 23 compat_str,
7fd002c0 24 compat_urllib_parse_unquote_plus,
15707c7e 25 compat_urllib_parse_urlencode,
7c80519c 26 compat_urllib_parse_urlparse,
7c61bd36 27 compat_urlparse,
4bb4a188 28)
545cc85d 29from ..jsinterp import JSInterpreter
4bb4a188 30from ..utils import (
2d6659b9 31 bytes_to_intlist,
c5e8d7af 32 clean_html,
d92f5d5a 33 datetime_from_str,
11f9be09 34 dict_get,
358de58c 35 error_to_compat_str,
c5e8d7af 36 ExtractorError,
2d30521a 37 float_or_none,
11f9be09 38 format_field,
dd27fd17 39 int_or_none,
2d6659b9 40 intlist_to_bytes,
94278f72 41 mimetype2ext,
11f9be09 42 orderedSet,
6310acf5 43 parse_codecs,
49bd8c66 44 parse_count,
7c80519c 45 parse_duration,
7ea65411 46 parse_iso8601,
dca3ff4a 47 qualities,
3995d37d 48 remove_start,
cf7e015f 49 smuggle_url,
dbdaaa23 50 str_or_none,
c93d53f5 51 str_to_int,
7c365c21 52 traverse_obj,
556dbe7f 53 try_get,
c5e8d7af
PH
54 unescapeHTML,
55 unified_strdate,
cf7e015f 56 unsmuggle_url,
8bdd16b4 57 update_url_query,
21c340b8 58 url_or_none,
6e6bc8da 59 urlencode_postdata,
fe93e2c4 60 urljoin,
7c365c21 61 variadic,
c5e8d7af
PH
62)
63
5f6a1245 64
201c1459 65def parse_qs(url):
66 return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
67
68
de7f3446 69class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
70 """Provide base functions for Youtube extractors"""
71 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 72 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
73
74 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
75 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
76 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 77
3462ffa8 78 _RESERVED_NAMES = (
bea74222 79 r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|shorts|'
46953e7e 80 r'movies|results|shared|hashtag|trending|feed|feeds|oembed|get_video_info|'
cd7c66cf 81 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 82
b2e8bc1b
JMF
83 _NETRC_MACHINE = 'youtube'
84 # If True it will raise an error if no login info is provided
85 _LOGIN_REQUIRED = False
86
70d5c17b 87 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 88
b2e8bc1b 89 def _login(self):
83317f69 90 """
91 Attempt to log in to YouTube.
92 True is returned if successful or skipped.
93 False is returned if login failed.
94
95 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
96 """
9d5d4d64 97
98 def warn(message):
99 self.report_warning(message)
100
101 # username+password login is broken
102 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
103 self.raise_login_required(
104 'Login details are needed to download this content', method='cookies')
68217024 105 username, password = self._get_login_info()
9d5d4d64 106 if username:
107 warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
108 return
9d5d4d64 109
2d6659b9 110 # Everything below this is broken!
111 r'''
b2e8bc1b
JMF
112 # No authentication to be performed
113 if username is None:
a06916d9 114 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
69ea8ca4 115 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
a06916d9 116 # if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
545cc85d 117 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 118 return True
b2e8bc1b 119
7cc3570e
PH
120 login_page = self._download_webpage(
121 self._LOGIN_URL, None,
69ea8ca4
PH
122 note='Downloading login page',
123 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
124 if login_page is False:
125 return
b2e8bc1b 126
1212e997 127 login_form = self._hidden_inputs(login_page)
c5e8d7af 128
e00eb564
S
129 def req(url, f_req, note, errnote):
130 data = login_form.copy()
131 data.update({
132 'pstMsg': 1,
133 'checkConnection': 'youtube',
134 'checkedDomains': 'youtube',
135 'hl': 'en',
136 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 137 'f.req': json.dumps(f_req),
e00eb564
S
138 'flowName': 'GlifWebSignIn',
139 'flowEntry': 'ServiceLogin',
baf67a60
S
140 # TODO: reverse actual botguard identifier generation algo
141 'bgRequest': '["identifier",""]',
041bc3ad 142 })
e00eb564
S
143 return self._download_json(
144 url, None, note=note, errnote=errnote,
145 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
146 fatal=False,
147 data=urlencode_postdata(data), headers={
148 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
149 'Google-Accounts-XSRF': 1,
150 })
151
3995d37d
S
152 lookup_req = [
153 username,
154 None, [], None, 'US', None, None, 2, False, True,
155 [
156 None, None,
157 [2, 1, None, 1,
158 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
159 None, [], 4],
160 1, [None, None, []], None, None, None, True
161 ],
162 username,
163 ]
164
e00eb564 165 lookup_results = req(
3995d37d 166 self._LOOKUP_URL, lookup_req,
e00eb564
S
167 'Looking up account info', 'Unable to look up account info')
168
169 if lookup_results is False:
170 return False
041bc3ad 171
3995d37d
S
172 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
173 if not user_hash:
174 warn('Unable to extract user hash')
175 return False
176
177 challenge_req = [
178 user_hash,
179 None, 1, None, [1, None, None, None, [password, None, True]],
180 [
181 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
182 1, [None, None, []], None, None, None, True
183 ]]
83317f69 184
3995d37d
S
185 challenge_results = req(
186 self._CHALLENGE_URL, challenge_req,
187 'Logging in', 'Unable to log in')
83317f69 188
3995d37d 189 if challenge_results is False:
e00eb564 190 return
83317f69 191
3995d37d
S
192 login_res = try_get(challenge_results, lambda x: x[0][5], list)
193 if login_res:
194 login_msg = try_get(login_res, lambda x: x[5], compat_str)
195 warn(
196 'Unable to login: %s' % 'Invalid password'
197 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
198 return False
199
200 res = try_get(challenge_results, lambda x: x[0][-1], list)
201 if not res:
202 warn('Unable to extract result entry')
203 return False
204
9a6628aa
S
205 login_challenge = try_get(res, lambda x: x[0][0], list)
206 if login_challenge:
207 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
208 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
209 # SEND_SUCCESS - TFA code has been successfully sent to phone
210 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 211 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
212 if status == 'QUOTA_EXCEEDED':
213 warn('Exceeded the limit of TFA codes, try later')
214 return False
215
216 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
217 if not tl:
218 warn('Unable to extract TL')
219 return False
220
221 tfa_code = self._get_tfa_info('2-step verification code')
222
223 if not tfa_code:
224 warn(
225 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
226 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
227 return False
228
229 tfa_code = remove_start(tfa_code, 'G-')
230
231 tfa_req = [
232 user_hash, None, 2, None,
233 [
234 9, None, None, None, None, None, None, None,
235 [None, tfa_code, True, 2]
236 ]]
237
238 tfa_results = req(
239 self._TFA_URL.format(tl), tfa_req,
240 'Submitting TFA code', 'Unable to submit TFA code')
241
242 if tfa_results is False:
243 return False
244
245 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
246 if tfa_res:
247 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
248 warn(
249 'Unable to finish TFA: %s' % 'Invalid TFA code'
250 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
251 return False
252
253 check_cookie_url = try_get(
254 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
255 else:
256 CHALLENGES = {
257 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
258 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
259 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
260 }
261 challenge = CHALLENGES.get(
262 challenge_str,
263 '%s returned error %s.' % (self.IE_NAME, challenge_str))
264 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
265 return False
3995d37d
S
266 else:
267 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
268
269 if not check_cookie_url:
270 warn('Unable to extract CheckCookie URL')
271 return False
e00eb564
S
272
273 check_cookie_results = self._download_webpage(
3995d37d
S
274 check_cookie_url, None, 'Checking cookie', fatal=False)
275
276 if check_cookie_results is False:
277 return False
e00eb564 278
3995d37d
S
279 if 'https://myaccount.google.com/' not in check_cookie_results:
280 warn('Unable to log in')
b2e8bc1b 281 return False
e00eb564 282
b2e8bc1b 283 return True
2d6659b9 284 '''
b2e8bc1b 285
cce889b9 286 def _initialize_consent(self):
287 cookies = self._get_cookies('https://www.youtube.com/')
288 if cookies.get('__Secure-3PSID'):
289 return
290 consent_id = None
291 consent = cookies.get('CONSENT')
292 if consent:
293 if 'YES' in consent.value:
294 return
295 consent_id = self._search_regex(
296 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
297 if not consent_id:
298 consent_id = random.randint(100, 999)
299 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 300
b2e8bc1b 301 def _real_initialize(self):
cce889b9 302 self._initialize_consent()
b2e8bc1b
JMF
303 if self._downloader is None:
304 return
b2e8bc1b
JMF
305 if not self._login():
306 return
c5e8d7af 307
a0566bbf 308 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 309 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
310 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 311
109dd3b2 312 _YT_DEFAULT_YTCFGS = {
313 'WEB': {
314 'INNERTUBE_API_VERSION': 'v1',
315 'INNERTUBE_CLIENT_NAME': 'WEB',
316 'INNERTUBE_CLIENT_VERSION': '2.20210622.10.00',
317 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
318 'INNERTUBE_CONTEXT': {
319 'client': {
320 'clientName': 'WEB',
321 'clientVersion': '2.20210622.10.00',
322 'hl': 'en',
323 }
324 },
325 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
326 },
327 'WEB_REMIX': {
328 'INNERTUBE_API_VERSION': 'v1',
329 'INNERTUBE_CLIENT_NAME': 'WEB_REMIX',
330 'INNERTUBE_CLIENT_VERSION': '1.20210621.00.00',
331 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
332 'INNERTUBE_CONTEXT': {
333 'client': {
334 'clientName': 'WEB_REMIX',
335 'clientVersion': '1.20210621.00.00',
336 'hl': 'en',
337 }
338 },
339 'INNERTUBE_CONTEXT_CLIENT_NAME': 67
340 },
341 'WEB_EMBEDDED_PLAYER': {
342 'INNERTUBE_API_VERSION': 'v1',
343 'INNERTUBE_CLIENT_NAME': 'WEB_EMBEDDED_PLAYER',
344 'INNERTUBE_CLIENT_VERSION': '1.20210620.0.1',
345 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
346 'INNERTUBE_CONTEXT': {
347 'client': {
348 'clientName': 'WEB_EMBEDDED_PLAYER',
349 'clientVersion': '1.20210620.0.1',
350 'hl': 'en',
351 }
352 },
353 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
354 },
355 'ANDROID': {
356 'INNERTUBE_API_VERSION': 'v1',
357 'INNERTUBE_CLIENT_NAME': 'ANDROID',
358 'INNERTUBE_CLIENT_VERSION': '16.20',
359 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
360 'INNERTUBE_CONTEXT': {
361 'client': {
362 'clientName': 'ANDROID',
363 'clientVersion': '16.20',
364 'hl': 'en',
365 }
366 },
fe93e2c4 367 'INNERTUBE_CONTEXT_CLIENT_NAME': 3
109dd3b2 368 },
369 'ANDROID_EMBEDDED_PLAYER': {
370 'INNERTUBE_API_VERSION': 'v1',
371 'INNERTUBE_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER',
372 'INNERTUBE_CLIENT_VERSION': '16.20',
373 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
374 'INNERTUBE_CONTEXT': {
375 'client': {
376 'clientName': 'ANDROID_EMBEDDED_PLAYER',
377 'clientVersion': '16.20',
378 'hl': 'en',
379 }
380 },
fe93e2c4 381 'INNERTUBE_CONTEXT_CLIENT_NAME': 55
109dd3b2 382 },
383 'ANDROID_MUSIC': {
384 'INNERTUBE_API_VERSION': 'v1',
385 'INNERTUBE_CLIENT_NAME': 'ANDROID_MUSIC',
386 'INNERTUBE_CLIENT_VERSION': '4.32',
387 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
388 'INNERTUBE_CONTEXT': {
389 'client': {
390 'clientName': 'ANDROID_MUSIC',
391 'clientVersion': '4.32',
392 'hl': 'en',
393 }
394 },
fe93e2c4 395 'INNERTUBE_CONTEXT_CLIENT_NAME': 21
11f9be09 396 },
397 'IOS': {
398 'INNERTUBE_API_VERSION': 'v1',
399 'INNERTUBE_CLIENT_NAME': 'IOS',
400 'INNERTUBE_CLIENT_VERSION': '16.20',
401 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
402 'INNERTUBE_CONTEXT': {
403 'client': {
404 'clientName': 'IOS',
405 'clientVersion': '16.20',
406 'hl': 'en',
407 }
408 },
409 'INNERTUBE_CONTEXT_CLIENT_NAME': 5
410
411 },
412 'IOS_MUSIC': {
413 'INNERTUBE_API_VERSION': 'v1',
414 'INNERTUBE_CLIENT_NAME': 'IOS_MUSIC',
415 'INNERTUBE_CLIENT_VERSION': '4.32',
416 'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
417 'INNERTUBE_CONTEXT': {
418 'client': {
419 'clientName': 'IOS_MUSIC',
420 'clientVersion': '4.32',
421 'hl': 'en',
422 }
423 },
424 'INNERTUBE_CONTEXT_CLIENT_NAME': 26
425 },
426 'IOS_MESSAGES_EXTENSION': {
427 'INNERTUBE_API_VERSION': 'v1',
428 'INNERTUBE_CLIENT_NAME': 'IOS_MESSAGES_EXTENSION',
429 'INNERTUBE_CLIENT_VERSION': '16.20',
430 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
431 'INNERTUBE_CONTEXT': {
432 'client': {
433 'clientName': 'IOS_MESSAGES_EXTENSION',
434 'clientVersion': '16.20',
435 'hl': 'en',
436 }
437 },
438 'INNERTUBE_CONTEXT_CLIENT_NAME': 66
109dd3b2 439 }
440 }
441
442 _YT_DEFAULT_INNERTUBE_HOSTS = {
443 'DIRECT': 'youtubei.googleapis.com',
444 'WEB': 'www.youtube.com',
445 'WEB_REMIX': 'music.youtube.com',
446 'ANDROID_MUSIC': 'music.youtube.com'
447 }
448
11f9be09 449 # clients starting with _ cannot be explicity requested by the user
450 _YT_CLIENTS = {
451 'web': 'WEB',
452 'web_music': 'WEB_REMIX',
453 '_web_embedded': 'WEB_EMBEDDED_PLAYER',
454 '_web_agegate': 'TVHTML5',
455 'android': 'ANDROID',
456 'android_music': 'ANDROID_MUSIC',
457 '_android_embedded': 'ANDROID_EMBEDDED_PLAYER',
458 '_android_agegate': 'ANDROID',
459 'ios': 'IOS',
460 'ios_music': 'IOS_MUSIC',
461 '_ios_embedded': 'IOS_MESSAGES_EXTENSION',
462 '_ios_agegate': 'IOS'
463 }
464
109dd3b2 465 def _get_default_ytcfg(self, client='WEB'):
466 if client in self._YT_DEFAULT_YTCFGS:
467 return copy.deepcopy(self._YT_DEFAULT_YTCFGS[client])
468 self.write_debug(f'INNERTUBE default client {client} does not exist - falling back to WEB client.')
469 return copy.deepcopy(self._YT_DEFAULT_YTCFGS['WEB'])
470
471 def _get_innertube_host(self, client='WEB'):
472 return dict_get(self._YT_DEFAULT_INNERTUBE_HOSTS, (client, 'WEB'))
473
474 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='WEB'):
475 # try_get but with fallback to default ytcfg client values when present
476 _func = lambda y: try_get(y, getter, expected_type)
477 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
478
479 def _extract_client_name(self, ytcfg, default_client='WEB'):
480 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str, default_client)
481
314ee305 482 @staticmethod
11f9be09 483 def _extract_session_index(*data):
484 for ytcfg in data:
485 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
486 if session_index is not None:
487 return session_index
314ee305 488
109dd3b2 489 def _extract_client_version(self, ytcfg, default_client='WEB'):
490 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str, default_client)
491
492 def _extract_api_key(self, ytcfg=None, default_client='WEB'):
493 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
494
495 def _extract_context(self, ytcfg=None, default_client='WEB'):
496 _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
497 context = _get_context(ytcfg)
498 if context:
499 return context
500
501 context = _get_context(self._get_default_ytcfg(default_client))
502 if not ytcfg:
503 return context
504
505 # Recreate the client context (required)
506 context['client'].update({
507 'clientVersion': self._extract_client_version(ytcfg, default_client),
508 'clientName': self._extract_client_name(ytcfg, default_client),
509 })
510 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
511 if visitor_data:
512 context['client']['visitorData'] = visitor_data
513 return context
514
515 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
1974e99f 516 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
517 # See: https://github.com/yt-dlp/yt-dlp/issues/393
518 yt_cookies = self._get_cookies('https://www.youtube.com')
519 sapisid_cookie = dict_get(
520 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
a5c56234
M
521 if sapisid_cookie is None:
522 return
523 time_now = round(time.time())
1974e99f 524 # SAPISID cookie is required if not already present
525 if not yt_cookies.get('SAPISID'):
526 self._set_cookie(
527 '.youtube.com', 'SAPISID', sapisid_cookie.value, secure=True, expire_time=time_now + 3600)
528 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
529 sapisidhash = hashlib.sha1(
109dd3b2 530 f'{time_now} {sapisid_cookie.value} {origin}'.encode('utf-8')).hexdigest()
1974e99f 531 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
532
533 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 534 note='Downloading API JSON', errnote='Unable to download API page',
109dd3b2 535 context=None, api_key=None, api_hostname=None, default_client='WEB'):
f4f751af 536
109dd3b2 537 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 538 data.update(query)
11f9be09 539 real_headers = self.generate_api_headers(default_client=default_client)
f4f751af 540 real_headers.update({'content-type': 'application/json'})
541 if headers:
542 real_headers.update(headers)
545cc85d 543 return self._download_json(
109dd3b2 544 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
a5c56234 545 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 546 data=json.dumps(data).encode('utf8'), headers=real_headers,
547 query={'key': api_key or self._extract_api_key()})
548
11f9be09 549 def extract_yt_initial_data(self, video_id, webpage):
8bdd16b4 550 return self._parse_json(
551 self._search_regex(
29f7c58a 552 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 553 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 554 video_id)
0c148415 555
a1c5d2ca 556 def _extract_identity_token(self, webpage, item_id):
11f9be09 557 if not webpage:
558 return None
559 ytcfg = self.extract_ytcfg(item_id, webpage)
a1c5d2ca
M
560 if ytcfg:
561 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
562 if token:
563 return token
564 return self._search_regex(
565 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
566 'identity token', default=None)
567
568 @staticmethod
fe93e2c4 569 def _extract_account_syncid(*args):
8ea3f7b9 570 """
571 Extract syncId required to download private playlists of secondary channels
fe93e2c4 572 @params response and/or ytcfg
8ea3f7b9 573 """
fe93e2c4 574 for data in args:
575 # ytcfg includes channel_syncid if on secondary channel
576 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
577 if delegated_sid:
578 return delegated_sid
579 sync_ids = (try_get(
580 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
581 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
582 if len(sync_ids) >= 2 and sync_ids[1]:
583 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
584 # and just "user_syncid||" for primary channel. We only want the channel_syncid
585 return sync_ids[0]
a1c5d2ca 586
11f9be09 587 def extract_ytcfg(self, video_id, webpage):
8c54a305 588 if not webpage:
589 return {}
29f7c58a 590 return self._parse_json(
591 self._search_regex(
592 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 593 default='{}'), video_id, fatal=False) or {}
594
11f9be09 595 def generate_api_headers(
596 self, ytcfg=None, identity_token=None, account_syncid=None,
597 visitor_data=None, api_hostname=None, default_client='WEB', session_index=None):
598 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
f4f751af 599 headers = {
109dd3b2 600 'X-YouTube-Client-Name': compat_str(
11f9be09 601 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
602 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
109dd3b2 603 'Origin': origin
f4f751af 604 }
2d6659b9 605 if not visitor_data and ytcfg:
606 visitor_data = try_get(
11f9be09 607 self._extract_context(ytcfg, default_client), lambda x: x['client']['visitorData'], compat_str)
f4f751af 608 if identity_token:
109dd3b2 609 headers['X-Youtube-Identity-Token'] = identity_token
f4f751af 610 if account_syncid:
611 headers['X-Goog-PageId'] = account_syncid
314ee305 612 if session_index is None and ytcfg:
613 session_index = self._extract_session_index(ytcfg)
614 if account_syncid or session_index is not None:
615 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
f4f751af 616 if visitor_data:
109dd3b2 617 headers['X-Goog-Visitor-Id'] = visitor_data
618 auth = self._generate_sapisidhash_header(origin)
f4f751af 619 if auth is not None:
620 headers['Authorization'] = auth
109dd3b2 621 headers['X-Origin'] = origin
f4f751af 622 return headers
29f7c58a 623
2d6659b9 624 @staticmethod
625 def _build_api_continuation_query(continuation, ctp=None):
626 query = {
627 'continuation': continuation
628 }
629 # TODO: Inconsistency with clickTrackingParams.
630 # Currently we have a fixed ctp contained within context (from ytcfg)
631 # and a ctp in root query for continuation.
632 if ctp:
633 query['clickTracking'] = {'clickTrackingParams': ctp}
634 return query
635
2d6659b9 636 @classmethod
637 def _extract_next_continuation_data(cls, renderer):
638 next_continuation = try_get(
639 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
640 lambda x: x['continuation']['reloadContinuationData']), dict)
641 if not next_continuation:
642 return
643 continuation = next_continuation.get('continuation')
644 if not continuation:
645 return
646 ctp = next_continuation.get('clickTrackingParams')
fe93e2c4 647 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 648
649 @classmethod
650 def _extract_continuation_ep_data(cls, continuation_ep: dict):
651 if isinstance(continuation_ep, dict):
652 continuation = try_get(
653 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
654 if not continuation:
655 return
656 ctp = continuation_ep.get('clickTrackingParams')
fe93e2c4 657 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 658
659 @classmethod
660 def _extract_continuation(cls, renderer):
661 next_continuation = cls._extract_next_continuation_data(renderer)
662 if next_continuation:
663 return next_continuation
fe93e2c4 664
2d6659b9 665 contents = []
666 for key in ('contents', 'items'):
667 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
fe93e2c4 668
2d6659b9 669 for content in contents:
670 if not isinstance(content, dict):
671 continue
672 continuation_ep = try_get(
673 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
674 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
675 dict)
676 continuation = cls._extract_continuation_ep_data(continuation_ep)
677 if continuation:
678 return continuation
679
fe93e2c4 680 @classmethod
681 def _extract_alerts(cls, data):
109dd3b2 682 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
683 if not isinstance(alert_dict, dict):
684 continue
685 for alert in alert_dict.values():
686 alert_type = alert.get('type')
687 if not alert_type:
688 continue
fe93e2c4 689 message = cls._get_text(alert.get('text'))
109dd3b2 690 if message:
691 yield alert_type, message
692
693 def _report_alerts(self, alerts, expected=True):
694 errors = []
695 warnings = []
696 for alert_type, alert_message in alerts:
697 if alert_type.lower() == 'error':
698 errors.append([alert_type, alert_message])
699 else:
700 warnings.append([alert_type, alert_message])
701
702 for alert_type, alert_message in (warnings + errors[:-1]):
703 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
704 if errors:
705 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
706
707 def _extract_and_report_alerts(self, data, *args, **kwargs):
708 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
709
47193e02 710 def _extract_badges(self, renderer: dict):
711 badges = set()
712 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
713 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
714 if label:
715 badges.add(label.lower())
716 return badges
717
718 @staticmethod
fe93e2c4 719 def _get_text(data, getter=None, max_runs=None):
720 for get in variadic(getter):
721 d = try_get(data, get) if get is not None else data
722 text = try_get(d, lambda x: x['simpleText'], compat_str)
723 if text:
724 return text
725 runs = try_get(d, lambda x: x['runs'], list) or []
726 if not runs and isinstance(d, list):
727 runs = d
728
729 def get_runs(runs):
730 for run in runs[:min(len(runs), max_runs or len(runs))]:
731 yield try_get(run, lambda x: x['text'], compat_str) or ''
732
733 text = ''.join(get_runs(runs))
734 if text:
735 return text
47193e02 736
109dd3b2 737 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
738 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
739 default_client='WEB'):
740 response = None
741 last_error = None
742 count = -1
743 retries = self.get_param('extractor_retries', 3)
744 if check_get_keys is None:
745 check_get_keys = []
746 while count < retries:
747 count += 1
748 if last_error:
749 self.report_warning('%s. Retrying ...' % last_error)
750 try:
751 response = self._call_api(
752 ep=ep, fatal=True, headers=headers,
753 video_id=item_id, query=query,
754 context=self._extract_context(ytcfg, default_client),
755 api_key=self._extract_api_key(ytcfg, default_client),
756 api_hostname=api_hostname, default_client=default_client,
757 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
758 except ExtractorError as e:
759 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
760 # Downloading page may result in intermittent 5xx HTTP error
761 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
762 last_error = 'HTTP Error %s' % e.cause.code
763 if count < retries:
764 continue
765 if fatal:
766 raise
767 else:
768 self.report_warning(error_to_compat_str(e))
769 return
770
771 else:
772 # Youtube may send alerts if there was an issue with the continuation page
773 try:
774 self._extract_and_report_alerts(response, expected=False)
775 except ExtractorError as e:
776 if fatal:
777 raise
778 self.report_warning(error_to_compat_str(e))
779 return
780 if not check_get_keys or dict_get(response, check_get_keys):
781 break
782 # Youtube sometimes sends incomplete data
783 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
784 last_error = 'Incomplete data received'
785 if count >= retries:
786 if fatal:
787 raise ExtractorError(last_error)
788 else:
789 self.report_warning(last_error)
790 return
791 return response
792
9297939e 793 @staticmethod
794 def is_music_url(url):
795 return re.match(r'https?://music\.youtube\.com/', url) is not None
796
30a074c2 797 def _extract_video(self, renderer):
798 video_id = renderer.get('videoId')
fe93e2c4 799 title = self._get_text(renderer.get('title'))
800 description = self._get_text(renderer.get('descriptionSnippet'))
801 duration = parse_duration(self._get_text(renderer.get('lengthText')))
802 view_count_text = self._get_text(renderer.get('viewCountText')) or ''
30a074c2 803 view_count = str_to_int(self._search_regex(
804 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
805 'view count', default=None))
fe93e2c4 806
807 uploader = self._get_text(renderer, (lambda x: x['ownerText'], lambda x: x['shortBylineText']))
808
30a074c2 809 return {
39ed931e 810 '_type': 'url',
30a074c2 811 'ie_key': YoutubeIE.ie_key(),
812 'id': video_id,
813 'url': video_id,
814 'title': title,
815 'description': description,
816 'duration': duration,
817 'view_count': view_count,
818 'uploader': uploader,
819 }
820
0c148415 821
360e1ca5 822class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 823 IE_DESC = 'YouTube.com'
bc2ca1bb 824 _INVIDIOUS_SITES = (
825 # invidious-redirect websites
826 r'(?:www\.)?redirect\.invidious\.io',
827 r'(?:(?:www|dev)\.)?invidio\.us',
828 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
829 r'(?:www\.)?invidious\.pussthecat\.org',
bc2ca1bb 830 r'(?:www\.)?invidious\.zee\.li',
bc2ca1bb 831 r'(?:www\.)?invidious\.ethibox\.fr',
bc2ca1bb 832 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
bc2ca1bb 833 # youtube-dl invidious instances list
834 r'(?:(?:www|no)\.)?invidiou\.sh',
835 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
836 r'(?:www\.)?invidious\.kabi\.tk',
bc2ca1bb 837 r'(?:www\.)?invidious\.mastodon\.host',
838 r'(?:www\.)?invidious\.zapashcanon\.fr',
ed807c18 839 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
201c1459 840 r'(?:www\.)?invidious\.tinfoil-hat\.net',
841 r'(?:www\.)?invidious\.himiko\.cloud',
842 r'(?:www\.)?invidious\.reallyancient\.tech',
bc2ca1bb 843 r'(?:www\.)?invidious\.tube',
844 r'(?:www\.)?invidiou\.site',
845 r'(?:www\.)?invidious\.site',
846 r'(?:www\.)?invidious\.xyz',
847 r'(?:www\.)?invidious\.nixnet\.xyz',
201c1459 848 r'(?:www\.)?invidious\.048596\.xyz',
bc2ca1bb 849 r'(?:www\.)?invidious\.drycat\.fr',
201c1459 850 r'(?:www\.)?inv\.skyn3t\.in',
bc2ca1bb 851 r'(?:www\.)?tube\.poal\.co',
852 r'(?:www\.)?tube\.connect\.cafe',
853 r'(?:www\.)?vid\.wxzm\.sx',
854 r'(?:www\.)?vid\.mint\.lgbt',
201c1459 855 r'(?:www\.)?vid\.puffyan\.us',
bc2ca1bb 856 r'(?:www\.)?yewtu\.be',
857 r'(?:www\.)?yt\.elukerio\.org',
858 r'(?:www\.)?yt\.lelux\.fi',
859 r'(?:www\.)?invidious\.ggc-project\.de',
860 r'(?:www\.)?yt\.maisputain\.ovh',
201c1459 861 r'(?:www\.)?ytprivate\.com',
862 r'(?:www\.)?invidious\.13ad\.de',
bc2ca1bb 863 r'(?:www\.)?invidious\.toot\.koeln',
864 r'(?:www\.)?invidious\.fdn\.fr',
865 r'(?:www\.)?watch\.nettohikari\.com',
ed807c18 866 r'(?:www\.)?invidious\.namazso\.eu',
867 r'(?:www\.)?invidious\.silkky\.cloud',
868 r'(?:www\.)?invidious\.exonip\.de',
869 r'(?:www\.)?invidious\.riverside\.rocks',
870 r'(?:www\.)?invidious\.blamefran\.net',
871 r'(?:www\.)?invidious\.moomoo\.de',
872 r'(?:www\.)?ytb\.trom\.tf',
873 r'(?:www\.)?yt\.cyberhost\.uk',
bc2ca1bb 874 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
875 r'(?:www\.)?qklhadlycap4cnod\.onion',
876 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
877 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
878 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
879 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
880 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
881 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
ed807c18 882 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
883 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
884 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
885 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
bc2ca1bb 886 )
cb7dfeea 887 _VALID_URL = r"""(?x)^
c5e8d7af 888 (
edb53e2d 889 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 890 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
891 (?:www\.)?deturl\.com/www\.youtube\.com|
892 (?:www\.)?pwnyoutube\.com|
893 (?:www\.)?hooktube\.com|
894 (?:www\.)?yourepeat\.com|
895 tube\.majestyc\.net|
896 %(invidious)s|
897 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
898 (?:.*?\#/)? # handle anchor (#/) redirect urls
899 (?: # the various things that can precede the ID:
ac7553d0 900 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 901 |(?: # or the v= param in all its forms
f7000f3a 902 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 903 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 904 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
905 v=
906 )
f4b05232 907 ))
cbaed4bb
S
908 |(?:
909 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
910 vid\.plus| # or vid.plus/xxxx
911 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 912 %(invidious)s
cbaed4bb 913 )/
edb53e2d 914 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 915 )
c5e8d7af 916 )? # all until now is optional -> you can pass the naked ID
201c1459 917 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 918 (?(1).+)? # if we found the ID, everything can follow
9297939e 919 (?:\#|$)""" % {
bc2ca1bb 920 'invidious': '|'.join(_INVIDIOUS_SITES),
921 }
e40c758c 922 _PLAYER_INFO_RE = (
cc2db878 923 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
924 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 925 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 926 )
2c62dc26 927 _formats = {
c2d3cb4c 928 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
929 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
930 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
931 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
932 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
933 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
934 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
935 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 936 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 937 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
938 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
939 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
940 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
941 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
942 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 943 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 944 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
945 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 946
947
948 # 3D videos
c2d3cb4c 949 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
950 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
951 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
952 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 953 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
954 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
955 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 956
96fb5605 957 # Apple HTTP Live Streaming
11f12195 958 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 959 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
960 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
961 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
962 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
963 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 964 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
965 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
966
967 # DASH mp4 video
d23028a8
S
968 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
969 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
970 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
971 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
972 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 973 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
974 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
975 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
976 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
977 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
978 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
979 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 980
f6f1fc92 981 # Dash mp4 audio
d23028a8
S
982 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
983 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
984 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
985 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
986 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
987 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
988 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
989
990 # Dash webm
d23028a8
S
991 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
992 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
993 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
994 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
995 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
996 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
997 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
998 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
999 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1000 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1001 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1002 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1003 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1004 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1005 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 1006 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
1007 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1008 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1009 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1010 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1011 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1012 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
1013
1014 # Dash webm audio
d23028a8
S
1015 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1016 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 1017
0857baad 1018 # Dash webm audio with opus inside
d23028a8
S
1019 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1020 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1021 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 1022
ce6b9a2d
PH
1023 # RTMP (unnamed)
1024 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
1025
1026 # av01 video only formats sometimes served with "unknown" codecs
1027 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1028 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1029 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1030 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 1031 }
29f7c58a 1032 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 1033
109dd3b2 1034 _AGE_GATE_REASONS = (
1035 'Sign in to confirm your age',
1036 'This video may be inappropriate for some users.',
1037 'Sorry, this content is age-restricted.')
1038
fd5c4aab
S
1039 _GEO_BYPASS = False
1040
78caa52a 1041 IE_NAME = 'youtube'
2eb88d95
PH
1042 _TESTS = [
1043 {
2d3d2997 1044 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
1045 'info_dict': {
1046 'id': 'BaW_jenozKc',
1047 'ext': 'mp4',
3867038a 1048 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
1049 'uploader': 'Philipp Hagemeister',
1050 'uploader_id': 'phihag',
ec85ded8 1051 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
1052 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1053 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 1054 'upload_date': '20121002',
3867038a 1055 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 1056 'categories': ['Science & Technology'],
3867038a 1057 'tags': ['youtube-dl'],
556dbe7f 1058 'duration': 10,
dbdaaa23 1059 'view_count': int,
3e7c1224
PH
1060 'like_count': int,
1061 'dislike_count': int,
7c80519c 1062 'start_time': 1,
297a564b 1063 'end_time': 9,
2eb88d95 1064 }
0e853ca4 1065 },
fccd3771 1066 {
4bc3a23e
PH
1067 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1068 'note': 'Embed-only video (#1746)',
1069 'info_dict': {
1070 'id': 'yZIXLfi8CZQ',
1071 'ext': 'mp4',
1072 'upload_date': '20120608',
1073 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1074 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1075 'uploader': 'SET India',
94bfcd23 1076 'uploader_id': 'setindia',
ec85ded8 1077 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 1078 'age_limit': 18,
545cc85d 1079 },
1080 'skip': 'Private video',
fccd3771 1081 },
11b56058 1082 {
8bdd16b4 1083 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
1084 'note': 'Use the first video ID in the URL',
1085 'info_dict': {
1086 'id': 'BaW_jenozKc',
1087 'ext': 'mp4',
3867038a 1088 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
1089 'uploader': 'Philipp Hagemeister',
1090 'uploader_id': 'phihag',
ec85ded8 1091 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 1092 'upload_date': '20121002',
3867038a 1093 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 1094 'categories': ['Science & Technology'],
3867038a 1095 'tags': ['youtube-dl'],
556dbe7f 1096 'duration': 10,
dbdaaa23 1097 'view_count': int,
11b56058
PM
1098 'like_count': int,
1099 'dislike_count': int,
34a7de29
S
1100 },
1101 'params': {
1102 'skip_download': True,
1103 },
11b56058 1104 },
dd27fd17 1105 {
2d3d2997 1106 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
1107 'note': '256k DASH audio (format 141) via DASH manifest',
1108 'info_dict': {
1109 'id': 'a9LDPn-MO4I',
1110 'ext': 'm4a',
1111 'upload_date': '20121002',
1112 'uploader_id': '8KVIDEO',
ec85ded8 1113 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
1114 'description': '',
1115 'uploader': '8KVIDEO',
1116 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 1117 },
4bc3a23e
PH
1118 'params': {
1119 'youtube_include_dash_manifest': True,
1120 'format': '141',
4919603f 1121 },
de3c7fe0 1122 'skip': 'format 141 not served anymore',
dd27fd17 1123 },
8bdd16b4 1124 # DASH manifest with encrypted signature
1125 {
1126 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1127 'info_dict': {
1128 'id': 'IB3lcPjvWLA',
1129 'ext': 'm4a',
1130 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1131 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1132 'duration': 244,
1133 'uploader': 'AfrojackVEVO',
1134 'uploader_id': 'AfrojackVEVO',
1135 'upload_date': '20131011',
cc2db878 1136 'abr': 129.495,
8bdd16b4 1137 },
1138 'params': {
1139 'youtube_include_dash_manifest': True,
1140 'format': '141/bestaudio[ext=m4a]',
1141 },
1142 },
dd2d55f1 1143 # Normal age-gate video (embed allowed)
c522adb1 1144 {
2d3d2997 1145 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1146 'info_dict': {
1147 'id': 'HtVdAasjOgU',
1148 'ext': 'mp4',
1149 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1150 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1151 'duration': 142,
c522adb1
JMF
1152 'uploader': 'The Witcher',
1153 'uploader_id': 'WitcherGame',
ec85ded8 1154 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 1155 'upload_date': '20140605',
34952f09 1156 'age_limit': 18,
c522adb1
JMF
1157 },
1158 },
8bdd16b4 1159 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1160 # YouTube Red ad is not captured for creator
1161 {
1162 'url': '__2ABJjxzNo',
1163 'info_dict': {
1164 'id': '__2ABJjxzNo',
1165 'ext': 'mp4',
1166 'duration': 266,
1167 'upload_date': '20100430',
1168 'uploader_id': 'deadmau5',
1169 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 1170 'creator': 'deadmau5',
1171 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1172 'uploader': 'deadmau5',
1173 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1174 'alt_title': 'Some Chords',
8bdd16b4 1175 },
1176 'expected_warnings': [
1177 'DASH manifest missing',
1178 ]
1179 },
067aa17e 1180 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1181 {
1182 'url': 'lqQg6PlCWgI',
1183 'info_dict': {
1184 'id': 'lqQg6PlCWgI',
1185 'ext': 'mp4',
556dbe7f 1186 'duration': 6085,
90227264 1187 'upload_date': '20150827',
cbe2bd91 1188 'uploader_id': 'olympic',
ec85ded8 1189 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 1190 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
11f9be09 1191 'uploader': 'Olympics',
cbe2bd91
PH
1192 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1193 },
1194 'params': {
1195 'skip_download': 'requires avconv',
e52a40ab 1196 }
cbe2bd91 1197 },
6271f1ca
PH
1198 # Non-square pixels
1199 {
1200 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1201 'info_dict': {
1202 'id': '_b-2C3KPAM0',
1203 'ext': 'mp4',
1204 'stretched_ratio': 16 / 9.,
556dbe7f 1205 'duration': 85,
6271f1ca
PH
1206 'upload_date': '20110310',
1207 'uploader_id': 'AllenMeow',
ec85ded8 1208 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 1209 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 1210 'uploader': '孫ᄋᄅ',
6271f1ca
PH
1211 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1212 },
06b491eb
S
1213 },
1214 # url_encoded_fmt_stream_map is empty string
1215 {
1216 'url': 'qEJwOuvDf7I',
1217 'info_dict': {
1218 'id': 'qEJwOuvDf7I',
f57b7835 1219 'ext': 'webm',
06b491eb
S
1220 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1221 'description': '',
1222 'upload_date': '20150404',
1223 'uploader_id': 'spbelect',
1224 'uploader': 'Наблюдатели Петербурга',
1225 },
1226 'params': {
1227 'skip_download': 'requires avconv',
e323cf3f
S
1228 },
1229 'skip': 'This live event has ended.',
06b491eb 1230 },
067aa17e 1231 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1232 {
1233 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1234 'info_dict': {
1235 'id': 'FIl7x6_3R5Y',
eb6793ba 1236 'ext': 'webm',
da77d856
S
1237 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1238 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1239 'duration': 220,
da77d856
S
1240 'upload_date': '20150625',
1241 'uploader_id': 'dorappi2000',
ec85ded8 1242 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 1243 'uploader': 'dorappi2000',
eb6793ba 1244 'formats': 'mincount:31',
da77d856 1245 },
eb6793ba 1246 'skip': 'not actual anymore',
2ee8f5d8 1247 },
8a1a26ce
YCH
1248 # DASH manifest with segment_list
1249 {
1250 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1251 'md5': '8ce563a1d667b599d21064e982ab9e31',
1252 'info_dict': {
1253 'id': 'CsmdDsKjzN8',
1254 'ext': 'mp4',
17ee98e1 1255 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
1256 'uploader': 'Airtek',
1257 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1258 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1259 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1260 },
1261 'params': {
1262 'youtube_include_dash_manifest': True,
1263 'format': '135', # bestvideo
be49068d
S
1264 },
1265 'skip': 'This live event has ended.',
2ee8f5d8 1266 },
cf7e015f
S
1267 {
1268 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 1269 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 1270 'info_dict': {
545cc85d 1271 'id': 'jvGDaLqkpTg',
1272 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1273 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
1274 },
1275 'playlist': [{
1276 'info_dict': {
545cc85d 1277 'id': 'jvGDaLqkpTg',
cf7e015f 1278 'ext': 'mp4',
545cc85d 1279 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1280 'description': 'md5:e03b909557865076822aa169218d6a5d',
1281 'duration': 10643,
1282 'upload_date': '20161111',
1283 'uploader': 'Team PGP',
1284 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1285 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1286 },
1287 }, {
1288 'info_dict': {
545cc85d 1289 'id': '3AKt1R1aDnw',
cf7e015f 1290 'ext': 'mp4',
545cc85d 1291 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1292 'description': 'md5:e03b909557865076822aa169218d6a5d',
1293 'duration': 10991,
1294 'upload_date': '20161111',
1295 'uploader': 'Team PGP',
1296 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1297 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1298 },
1299 }, {
1300 'info_dict': {
545cc85d 1301 'id': 'RtAMM00gpVc',
cf7e015f 1302 'ext': 'mp4',
545cc85d 1303 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1304 'description': 'md5:e03b909557865076822aa169218d6a5d',
1305 'duration': 10995,
1306 'upload_date': '20161111',
1307 'uploader': 'Team PGP',
1308 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1309 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1310 },
1311 }, {
1312 'info_dict': {
545cc85d 1313 'id': '6N2fdlP3C5U',
cf7e015f 1314 'ext': 'mp4',
545cc85d 1315 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1316 'description': 'md5:e03b909557865076822aa169218d6a5d',
1317 'duration': 10990,
1318 'upload_date': '20161111',
1319 'uploader': 'Team PGP',
1320 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1321 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1322 },
1323 }],
1324 'params': {
1325 'skip_download': True,
1326 },
cbaed4bb 1327 },
f9f49d87 1328 {
067aa17e 1329 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1330 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1331 'info_dict': {
1332 'id': 'gVfLd0zydlo',
1333 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1334 },
1335 'playlist_count': 2,
be49068d 1336 'skip': 'Not multifeed anymore',
f9f49d87 1337 },
cbaed4bb 1338 {
2d3d2997 1339 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1340 'only_matching': True,
0e49d9a6 1341 },
6d4fc66b 1342 {
2d3d2997 1343 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1344 'only_matching': True,
1345 },
0e49d9a6 1346 {
067aa17e 1347 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1348 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1349 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1350 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1351 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1352 'info_dict': {
1353 'id': 'lsguqyKfVQg',
1354 'ext': 'mp4',
1355 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
11f9be09 1356 'alt_title': 'Dark Walk',
0e49d9a6 1357 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1358 'duration': 133,
0e49d9a6
LL
1359 'upload_date': '20151119',
1360 'uploader_id': 'IronSoulElf',
ec85ded8 1361 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1362 'uploader': 'IronSoulElf',
11f9be09 1363 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1364 'track': 'Dark Walk',
1365 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
92bc97d3 1366 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
1367 },
1368 'params': {
1369 'skip_download': True,
1370 },
1371 },
61f92af1 1372 {
067aa17e 1373 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1374 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1375 'only_matching': True,
1376 },
313dfc45
LL
1377 {
1378 # Video with yt:stretch=17:0
1379 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1380 'info_dict': {
1381 'id': 'Q39EVAstoRM',
1382 'ext': 'mp4',
1383 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1384 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1385 'upload_date': '20151107',
1386 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1387 'uploader': 'CH GAMER DROID',
1388 },
1389 'params': {
1390 'skip_download': True,
1391 },
be49068d 1392 'skip': 'This video does not exist.',
313dfc45 1393 },
201c1459 1394 {
1395 # Video with incomplete 'yt:stretch=16:'
1396 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1397 'only_matching': True,
1398 },
7caf9830
S
1399 {
1400 # Video licensed under Creative Commons
1401 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1402 'info_dict': {
1403 'id': 'M4gD1WSo5mA',
1404 'ext': 'mp4',
1405 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1406 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1407 'duration': 721,
7caf9830
S
1408 'upload_date': '20150127',
1409 'uploader_id': 'BerkmanCenter',
ec85ded8 1410 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1411 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1412 'license': 'Creative Commons Attribution license (reuse allowed)',
1413 },
1414 'params': {
1415 'skip_download': True,
1416 },
1417 },
fd050249
S
1418 {
1419 # Channel-like uploader_url
1420 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1421 'info_dict': {
1422 'id': 'eQcmzGIKrzg',
1423 'ext': 'mp4',
1424 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1425 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1426 'duration': 4060,
fd050249 1427 'upload_date': '20151119',
eb6793ba 1428 'uploader': 'Bernie Sanders',
fd050249 1429 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1430 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1431 'license': 'Creative Commons Attribution license (reuse allowed)',
1432 },
1433 'params': {
1434 'skip_download': True,
1435 },
1436 },
040ac686
S
1437 {
1438 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1439 'only_matching': True,
7f29cf54
S
1440 },
1441 {
067aa17e 1442 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1443 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1444 'only_matching': True,
6496ccb4
S
1445 },
1446 {
1447 # Rental video preview
1448 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1449 'info_dict': {
1450 'id': 'uGpuVWrhIzE',
1451 'ext': 'mp4',
1452 'title': 'Piku - Trailer',
1453 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1454 'upload_date': '20150811',
1455 'uploader': 'FlixMatrix',
1456 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1457 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1458 'license': 'Standard YouTube License',
1459 },
1460 'params': {
1461 'skip_download': True,
1462 },
eb6793ba 1463 'skip': 'This video is not available.',
022a5d66 1464 },
12afdc2a
S
1465 {
1466 # YouTube Red video with episode data
1467 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1468 'info_dict': {
1469 'id': 'iqKdEhx-dD4',
1470 'ext': 'mp4',
1471 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1472 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1473 'duration': 2085,
12afdc2a
S
1474 'upload_date': '20170118',
1475 'uploader': 'Vsauce',
1476 'uploader_id': 'Vsauce',
1477 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1478 'series': 'Mind Field',
1479 'season_number': 1,
1480 'episode_number': 1,
1481 },
1482 'params': {
1483 'skip_download': True,
1484 },
1485 'expected_warnings': [
1486 'Skipping DASH manifest',
1487 ],
1488 },
c7121fa7
S
1489 {
1490 # The following content has been identified by the YouTube community
1491 # as inappropriate or offensive to some audiences.
1492 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1493 'info_dict': {
1494 'id': '6SJNVb0GnPI',
1495 'ext': 'mp4',
1496 'title': 'Race Differences in Intelligence',
1497 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1498 'duration': 965,
1499 'upload_date': '20140124',
1500 'uploader': 'New Century Foundation',
1501 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1502 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1503 },
1504 'params': {
1505 'skip_download': True,
1506 },
545cc85d 1507 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1508 },
022a5d66
S
1509 {
1510 # itag 212
1511 'url': '1t24XAntNCY',
1512 'only_matching': True,
fd5c4aab
S
1513 },
1514 {
1515 # geo restricted to JP
1516 'url': 'sJL6WA-aGkQ',
1517 'only_matching': True,
1518 },
cd5a74a2
S
1519 {
1520 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1521 'only_matching': True,
1522 },
bc2ca1bb 1523 {
1524 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1525 'only_matching': True,
1526 },
1527 {
1528 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1529 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1530 'only_matching': True,
1531 },
825cd268
RA
1532 {
1533 # DRM protected
1534 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1535 'only_matching': True,
4fe54c12
S
1536 },
1537 {
1538 # Video with unsupported adaptive stream type formats
1539 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1540 'info_dict': {
1541 'id': 'Z4Vy8R84T1U',
1542 'ext': 'mp4',
1543 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1544 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1545 'duration': 433,
1546 'upload_date': '20130923',
1547 'uploader': 'Amelia Putri Harwita',
1548 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1549 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1550 'formats': 'maxcount:10',
1551 },
1552 'params': {
1553 'skip_download': True,
1554 'youtube_include_dash_manifest': False,
1555 },
5429d6a9 1556 'skip': 'not actual anymore',
5caabd3c 1557 },
1558 {
822b9d9c 1559 # Youtube Music Auto-generated description
5caabd3c 1560 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1561 'info_dict': {
1562 'id': 'MgNrAu2pzNs',
1563 'ext': 'mp4',
1564 'title': 'Voyeur Girl',
1565 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1566 'upload_date': '20190312',
5429d6a9
S
1567 'uploader': 'Stephen - Topic',
1568 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1569 'artist': 'Stephen',
1570 'track': 'Voyeur Girl',
1571 'album': 'it\'s too much love to know my dear',
1572 'release_date': '20190313',
1573 'release_year': 2019,
1574 },
1575 'params': {
1576 'skip_download': True,
1577 },
1578 },
66b48727
RA
1579 {
1580 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1581 'only_matching': True,
1582 },
011e75e6
S
1583 {
1584 # invalid -> valid video id redirection
1585 'url': 'DJztXj2GPfl',
1586 'info_dict': {
1587 'id': 'DJztXj2GPfk',
1588 'ext': 'mp4',
1589 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1590 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1591 'upload_date': '20090125',
1592 'uploader': 'Prochorowka',
1593 'uploader_id': 'Prochorowka',
1594 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1595 'artist': 'Panjabi MC',
1596 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1597 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1598 },
1599 'params': {
1600 'skip_download': True,
1601 },
545cc85d 1602 'skip': 'Video unavailable',
ea74e00b
DP
1603 },
1604 {
1605 # empty description results in an empty string
1606 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1607 'info_dict': {
1608 'id': 'x41yOUIvK2k',
1609 'ext': 'mp4',
1610 'title': 'IMG 3456',
1611 'description': '',
1612 'upload_date': '20170613',
1613 'uploader_id': 'ElevageOrVert',
1614 'uploader': 'ElevageOrVert',
1615 },
1616 'params': {
1617 'skip_download': True,
1618 },
1619 },
a0566bbf 1620 {
29f7c58a 1621 # with '};' inside yt initial data (see [1])
1622 # see [2] for an example with '};' inside ytInitialPlayerResponse
1623 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1624 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1625 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1626 'info_dict': {
1627 'id': 'CHqg6qOn4no',
1628 'ext': 'mp4',
1629 'title': 'Part 77 Sort a list of simple types in c#',
1630 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1631 'upload_date': '20130831',
1632 'uploader_id': 'kudvenkat',
1633 'uploader': 'kudvenkat',
1634 },
1635 'params': {
1636 'skip_download': True,
1637 },
1638 },
29f7c58a 1639 {
1640 # another example of '};' in ytInitialData
1641 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1642 'only_matching': True,
1643 },
1644 {
1645 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1646 'only_matching': True,
1647 },
545cc85d 1648 {
cc2db878 1649 # https://github.com/ytdl-org/youtube-dl/pull/28094
1650 'url': 'OtqTfy26tG0',
1651 'info_dict': {
1652 'id': 'OtqTfy26tG0',
1653 'ext': 'mp4',
1654 'title': 'Burn Out',
1655 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1656 'upload_date': '20141120',
1657 'uploader': 'The Cinematic Orchestra - Topic',
1658 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1659 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1660 'artist': 'The Cinematic Orchestra',
1661 'track': 'Burn Out',
1662 'album': 'Every Day',
1663 'release_data': None,
1664 'release_year': None,
1665 },
1666 'params': {
1667 'skip_download': True,
1668 },
545cc85d 1669 },
bc2ca1bb 1670 {
1671 # controversial video, only works with bpctr when authenticated with cookies
1672 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1673 'only_matching': True,
1674 },
a1a7907b 1675 {
1676 # controversial video, requires bpctr/contentCheckOk
1677 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1678 'info_dict': {
1679 'id': 'SZJvDhaSDnc',
1680 'ext': 'mp4',
1681 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1682 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1683 'uploader': 'CBS This Morning',
11f9be09 1684 'uploader_id': 'CBSThisMorning',
a1a7907b 1685 'upload_date': '20140716',
1686 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1687 }
1688 },
f7ad7160 1689 {
1690 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1691 'url': 'cBvYw8_A0vQ',
1692 'info_dict': {
1693 'id': 'cBvYw8_A0vQ',
1694 'ext': 'mp4',
1695 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1696 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1697 'upload_date': '20201120',
1698 'uploader': 'Walk around Japan',
1699 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1700 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1701 },
1702 'params': {
1703 'skip_download': True,
1704 },
0fb983f6 1705 }, {
1706 # Has multiple audio streams
1707 'url': 'WaOKSUlf4TM',
1708 'only_matching': True
9297939e 1709 }, {
1710 # Requires Premium: has format 141 when requested using YTM url
1711 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1712 'only_matching': True
1713 }, {
120916da 1714 # multiple subtitles with same lang_code
1715 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1716 'only_matching': True,
109dd3b2 1717 }, {
1718 # Force use android client fallback
1719 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1720 'info_dict': {
1721 'id': 'YOelRv7fMxY',
11f9be09 1722 'title': 'DIGGING A SECRET TUNNEL Part 1',
109dd3b2 1723 'ext': '3gp',
1724 'upload_date': '20210624',
1725 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1726 'uploader': 'colinfurze',
11f9be09 1727 'uploader_id': 'colinfurze',
109dd3b2 1728 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
11f9be09 1729 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
109dd3b2 1730 },
1731 'params': {
1732 'format': '17', # 3gp format available on android
1733 'extractor_args': {'youtube': {'player_client': ['android']}},
1734 },
120916da 1735 },
109dd3b2 1736 {
1737 # Skip download of additional client configs (remix client config in this case)
1738 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1739 'only_matching': True,
1740 'params': {
1741 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1742 },
1743 }
2eb88d95
PH
1744 ]
1745
201c1459 1746 @classmethod
1747 def suitable(cls, url):
1bdae7d3 1748 # Hack for lazy extractors until more generic solution is implemented
1749 # (see #28780)
1750 from .youtube import parse_qs
201c1459 1751 qs = parse_qs(url)
1752 if qs.get('list', [None])[0]:
1753 return False
1754 return super(YoutubeIE, cls).suitable(url)
1755
e0df6211
PH
1756 def __init__(self, *args, **kwargs):
1757 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1758 self._code_cache = {}
83799698 1759 self._player_cache = {}
e0df6211 1760
109dd3b2 1761 def _extract_player_url(self, ytcfg=None, webpage=None):
1762 player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)
11f9be09 1763 if not player_url and webpage:
109dd3b2 1764 player_url = self._search_regex(
1765 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1766 webpage, 'player URL', fatal=False)
11f9be09 1767 if not player_url:
1768 return None
109dd3b2 1769 if player_url.startswith('//'):
1770 player_url = 'https:' + player_url
1771 elif not re.match(r'https?://', player_url):
1772 player_url = compat_urlparse.urljoin(
1773 'https://www.youtube.com', player_url)
1774 return player_url
1775
60064c53
PH
1776 def _signature_cache_id(self, example_sig):
1777 """ Return a string representation of a signature """
78caa52a 1778 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1779
e40c758c
S
1780 @classmethod
1781 def _extract_player_info(cls, player_url):
1782 for player_re in cls._PLAYER_INFO_RE:
1783 id_m = re.search(player_re, player_url)
1784 if id_m:
1785 break
1786 else:
c081b35c 1787 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1788 return id_m.group('id')
e40c758c 1789
109dd3b2 1790 def _load_player(self, video_id, player_url, fatal=True) -> bool:
1791 player_id = self._extract_player_info(player_url)
1792 if player_id not in self._code_cache:
1793 self._code_cache[player_id] = self._download_webpage(
1794 player_url, video_id, fatal=fatal,
1795 note='Downloading player ' + player_id,
1796 errnote='Download of %s failed' % player_url)
1797 return player_id in self._code_cache
1798
e40c758c 1799 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1800 player_id = self._extract_player_info(player_url)
e0df6211 1801
c4417ddb 1802 # Read from filesystem cache
545cc85d 1803 func_id = 'js_%s_%s' % (
1804 player_id, self._signature_cache_id(example_sig))
c4417ddb 1805 assert os.path.basename(func_id) == func_id
a0e07d31 1806
69ea8ca4 1807 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1808 if cache_spec is not None:
78caa52a 1809 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1810
109dd3b2 1811 if self._load_player(video_id, player_url):
1812 code = self._code_cache[player_id]
1813 res = self._parse_sig_js(code)
e0df6211 1814
109dd3b2 1815 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1816 cache_res = res(test_string)
1817 cache_spec = [ord(c) for c in cache_res]
83799698 1818
109dd3b2 1819 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1820 return res
83799698 1821
60064c53 1822 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1823 def gen_sig_code(idxs):
1824 def _genslice(start, end, step):
78caa52a 1825 starts = '' if start == 0 else str(start)
8bcc8756 1826 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1827 steps = '' if step == 1 else (':%d' % step)
78caa52a 1828 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1829
1830 step = None
7af808a5
PH
1831 # Quelch pyflakes warnings - start will be set when step is set
1832 start = '(Never used)'
edf3e38e
PH
1833 for i, prev in zip(idxs[1:], idxs[:-1]):
1834 if step is not None:
1835 if i - prev == step:
1836 continue
1837 yield _genslice(start, prev, step)
1838 step = None
1839 continue
1840 if i - prev in [-1, 1]:
1841 step = i - prev
1842 start = prev
1843 continue
1844 else:
78caa52a 1845 yield 's[%d]' % prev
edf3e38e 1846 if step is None:
78caa52a 1847 yield 's[%d]' % i
edf3e38e
PH
1848 else:
1849 yield _genslice(start, i, step)
1850
78caa52a 1851 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1852 cache_res = func(test_string)
edf3e38e 1853 cache_spec = [ord(c) for c in cache_res]
78caa52a 1854 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1855 signature_id_tuple = '(%s)' % (
1856 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1857 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1858 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1859 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1860
e0df6211
PH
1861 def _parse_sig_js(self, jscode):
1862 funcname = self._search_regex(
abefc03f
S
1863 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1864 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1865 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1866 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1867 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1868 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1869 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1870 # Obsolete patterns
1871 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1872 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1873 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1874 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1875 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1876 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1877 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1878 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1879 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1880
1881 jsi = JSInterpreter(jscode)
1882 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1883 return lambda s: initial_function([s])
1884
545cc85d 1885 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1886 """Turn the encrypted s field into a working signature"""
6b37f0be 1887
c8bf86d5 1888 if player_url is None:
69ea8ca4 1889 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1890
c8bf86d5 1891 try:
62af3a0e 1892 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1893 if player_id not in self._player_cache:
1894 func = self._extract_signature_function(
60064c53 1895 video_id, player_url, s
c8bf86d5
PH
1896 )
1897 self._player_cache[player_id] = func
1898 func = self._player_cache[player_id]
a06916d9 1899 if self.get_param('youtube_print_sig_code'):
60064c53 1900 self._print_sig_code(func, s)
c8bf86d5
PH
1901 return func(s)
1902 except Exception as e:
1903 tb = traceback.format_exc()
1904 raise ExtractorError(
78caa52a 1905 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1906
109dd3b2 1907 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1908 """
1909 Extract signatureTimestamp (sts)
1910 Required to tell API what sig/player version is in use.
1911 """
1912 sts = None
1913 if isinstance(ytcfg, dict):
1914 sts = int_or_none(ytcfg.get('STS'))
1915
1916 if not sts:
1917 # Attempt to extract from player
1918 if player_url is None:
1919 error_msg = 'Cannot extract signature timestamp without player_url.'
1920 if fatal:
1921 raise ExtractorError(error_msg)
1922 self.report_warning(error_msg)
1923 return
1924 if self._load_player(video_id, player_url, fatal=fatal):
1925 player_id = self._extract_player_info(player_url)
1926 code = self._code_cache[player_id]
1927 sts = int_or_none(self._search_regex(
1928 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
1929 'JS player signature timestamp', group='sts', fatal=fatal))
1930 return sts
1931
11f9be09 1932 def _mark_watched(self, video_id, player_responses):
352d63fd 1933 playback_url = traverse_obj(
1934 player_responses, (..., 'playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
1935 expected_type=url_or_none, get_all=False)
d77ab8e2 1936 if not playback_url:
352d63fd 1937 self.report_warning('Unable to mark watched')
d77ab8e2
S
1938 return
1939 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1940 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1941
1942 # cpn generation algorithm is reverse engineered from base.js.
1943 # In fact it works even with dummy cpn.
1944 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1945 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1946
1947 qs.update({
1948 'ver': ['2'],
1949 'cpn': [cpn],
1950 })
1951 playback_url = compat_urlparse.urlunparse(
15707c7e 1952 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1953
1954 self._download_webpage(
1955 playback_url, video_id, 'Marking watched',
1956 'Unable to mark watched', fatal=False)
1957
66c9fa36
S
1958 @staticmethod
1959 def _extract_urls(webpage):
1960 # Embedded YouTube player
1961 entries = [
1962 unescapeHTML(mobj.group('url'))
1963 for mobj in re.finditer(r'''(?x)
1964 (?:
1965 <iframe[^>]+?src=|
1966 data-video-url=|
1967 <embed[^>]+?src=|
1968 embedSWF\(?:\s*|
1969 <object[^>]+data=|
1970 new\s+SWFObject\(
1971 )
1972 (["\'])
1973 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1974 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1975 \1''', webpage)]
1976
1977 # lazyYT YouTube embed
1978 entries.extend(list(map(
1979 unescapeHTML,
1980 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1981
1982 # Wordpress "YouTube Video Importer" plugin
1983 matches = re.findall(r'''(?x)<div[^>]+
1984 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1985 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1986 entries.extend(m[-1] for m in matches)
1987
1988 return entries
1989
1990 @staticmethod
1991 def _extract_url(webpage):
1992 urls = YoutubeIE._extract_urls(webpage)
1993 return urls[0] if urls else None
1994
97665381
PH
1995 @classmethod
1996 def extract_id(cls, url):
1997 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1998 if mobj is None:
69ea8ca4 1999 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
2000 video_id = mobj.group(2)
2001 return video_id
2002
7c365c21 2003 def _extract_chapters_from_json(self, data, duration):
2004 chapter_list = traverse_obj(
2005 data, (
2006 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2007 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2008 ), expected_type=list)
2009
2010 return self._extract_chapters(
2011 chapter_list,
2012 chapter_time=lambda chapter: float_or_none(
2013 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2014 chapter_title=lambda chapter: traverse_obj(
2015 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2016 duration=duration)
2017
2018 def _extract_chapters_from_engagement_panel(self, data, duration):
2019 content_list = traverse_obj(
8bdd16b4 2020 data,
7c365c21 2021 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
da503b7a 2022 expected_type=list, default=[])
7c365c21 2023 chapter_time = lambda chapter: parse_duration(self._get_text(chapter.get('timeDescription')))
2024 chapter_title = lambda chapter: self._get_text(chapter.get('title'))
2025
2026 return next((
2027 filter(None, (
2028 self._extract_chapters(
2029 traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2030 chapter_time, chapter_title, duration)
2031 for contents in content_list
2032 ))), [])
2033
2034 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
84213ea8 2035 chapters = []
7c365c21 2036 last_chapter = {'start_time': 0}
2037 for idx, chapter in enumerate(chapter_list or []):
2038 title = chapter_title(chapter)
84213ea8
S
2039 start_time = chapter_time(chapter)
2040 if start_time is None:
2041 continue
7c365c21 2042 last_chapter['end_time'] = start_time
2043 if start_time < last_chapter['start_time']:
2044 if idx == 1:
2045 chapters.pop()
2046 self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2047 else:
2048 self.report_warning(f'Invalid start time for chapter "{title}"')
2049 continue
2050 last_chapter = {'start_time': start_time, 'title': title}
2051 chapters.append(last_chapter)
2052 last_chapter['end_time'] = duration
84213ea8
S
2053 return chapters
2054
545cc85d 2055 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2056 return self._parse_json(self._search_regex(
2057 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2058 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 2059
d92f5d5a 2060 @staticmethod
2061 def parse_time_text(time_text):
2062 """
2063 Parse the comment time text
2064 time_text is in the format 'X units ago (edited)'
2065 """
2066 time_text_split = time_text.split(' ')
2067 if len(time_text_split) >= 3:
da503b7a 2068 try:
2069 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
2070 except ValueError:
2071 return None
d92f5d5a 2072
a1c5d2ca
M
2073 def _extract_comment(self, comment_renderer, parent=None):
2074 comment_id = comment_renderer.get('commentId')
2075 if not comment_id:
2076 return
fe93e2c4 2077
2078 text = self._get_text(comment_renderer.get('contentText'))
2079
49bd8c66 2080 # note: timestamp is an estimate calculated from the current time and time_text
fe93e2c4 2081 time_text = self._get_text(comment_renderer.get('publishedTimeText')) or ''
2082 time_text_dt = self.parse_time_text(time_text)
2083 if isinstance(time_text_dt, datetime.datetime):
2084 timestamp = calendar.timegm(time_text_dt.timetuple())
2085 author = self._get_text(comment_renderer.get('authorText'))
a1c5d2ca
M
2086 author_id = try_get(comment_renderer,
2087 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
fe93e2c4 2088
49bd8c66 2089 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2090 lambda x: x['likeCount']), compat_str)) or 0
a1c5d2ca
M
2091 author_thumbnail = try_get(comment_renderer,
2092 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2093
2094 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
97524332 2095 is_favorited = 'creatorHeart' in (try_get(
2096 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
a1c5d2ca
M
2097 return {
2098 'id': comment_id,
2099 'text': text,
d92f5d5a 2100 'timestamp': timestamp,
a1c5d2ca
M
2101 'time_text': time_text,
2102 'like_count': votes,
97524332 2103 'is_favorited': is_favorited,
a1c5d2ca
M
2104 'author': author,
2105 'author_id': author_id,
2106 'author_thumbnail': author_thumbnail,
2107 'author_is_uploader': author_is_uploader,
2108 'parent': parent or 'root'
2109 }
2110
2111 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
2d6659b9 2112 ytcfg, video_id, parent=None, comment_counts=None):
2113
2114 def extract_header(contents):
2115 _total_comments = 0
2116 _continuation = None
2117 for content in contents:
2118 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
fe93e2c4 2119 expected_comment_count = parse_count(self._get_text(
2120 comments_header_renderer, (lambda x: x['countText'], lambda x: x['commentsCount']), max_runs=1))
2121
2d6659b9 2122 if expected_comment_count:
fe93e2c4 2123 comment_counts[1] = expected_comment_count
2124 self.to_screen('Downloading ~%d comments' % expected_comment_count)
2d6659b9 2125 _total_comments = comment_counts[1]
2126 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2127 comment_sort_index = int(sort_mode_str != 'top') # 1 = new, 0 = top
2128
2129 sort_menu_item = try_get(
2130 comments_header_renderer,
2131 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2132 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2133
2134 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2135 if not _continuation:
2136 continue
2137
2138 sort_text = sort_menu_item.get('title')
2139 if isinstance(sort_text, compat_str):
2140 sort_text = sort_text.lower()
2141 else:
2142 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2143 self.to_screen('Sorting comments by %s' % sort_text)
2144 break
2145 return _total_comments, _continuation
a1c5d2ca 2146
2d6659b9 2147 def extract_thread(contents):
a1c5d2ca
M
2148 if not parent:
2149 comment_counts[2] = 0
2150 for content in contents:
2151 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2152 comment_renderer = try_get(
2153 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2154 content, (lambda x: x['commentRenderer'], dict))
2155
2156 if not comment_renderer:
2157 continue
2158 comment = self._extract_comment(comment_renderer, parent)
2159 if not comment:
2160 continue
2161 comment_counts[0] += 1
2162 yield comment
2163 # Attempt to get the replies
2164 comment_replies_renderer = try_get(
2165 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2166
2167 if comment_replies_renderer:
2168 comment_counts[2] += 1
2169 comment_entries_iter = self._comment_entries(
f4f751af 2170 comment_replies_renderer, identity_token, account_syncid, ytcfg,
2d6659b9 2171 video_id, parent=comment.get('id'), comment_counts=comment_counts)
a1c5d2ca
M
2172
2173 for reply_comment in comment_entries_iter:
2174 yield reply_comment
2175
2d6659b9 2176 # YouTube comments have a max depth of 2
2177 max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2178 if max_depth == 1 and parent:
2179 return
a1c5d2ca
M
2180 if not comment_counts:
2181 # comment so far, est. total comments, current comment thread #
2182 comment_counts = [0, 0, 0]
a1c5d2ca 2183
2d6659b9 2184 continuation = self._extract_continuation(root_continuation_data)
fe93e2c4 2185 if continuation and len(continuation['continuation']) < 27:
2d6659b9 2186 self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2187 continuation_token = self._generate_comment_continuation(video_id)
fe93e2c4 2188 continuation = self._build_api_continuation_query(continuation_token, None)
2d6659b9 2189
2190 visitor_data = None
2191 is_first_continuation = parent is None
a1c5d2ca
M
2192
2193 for page_num in itertools.count(0):
2194 if not continuation:
2195 break
11f9be09 2196 headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
2d6659b9 2197 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2198 if page_num == 0:
2199 if is_first_continuation:
2200 note_prefix = 'Downloading comment section API JSON'
a1c5d2ca 2201 else:
2d6659b9 2202 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
2203 comment_counts[2], comment_prog_str)
2204 else:
2205 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2206 ' ' if parent else '', ' replies' if parent else '',
2207 page_num, comment_prog_str)
2208
2209 response = self._extract_response(
fe93e2c4 2210 item_id=None, query=continuation,
2d6659b9 2211 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2212 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca
M
2213 if not response:
2214 break
f4f751af 2215 visitor_data = try_get(
2216 response,
2217 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2218 compat_str) or visitor_data
a1c5d2ca 2219
2d6659b9 2220 continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca 2221
2d6659b9 2222 continuation = None
2223 if isinstance(continuation_contents, list):
2224 for continuation_section in continuation_contents:
2225 if not isinstance(continuation_section, dict):
2226 continue
2227 continuation_items = try_get(
2228 continuation_section,
2229 (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2230 lambda x: x['appendContinuationItemsAction']['continuationItems']),
2231 list) or []
2232 if is_first_continuation:
2233 total_comments, continuation = extract_header(continuation_items)
2234 if total_comments:
2235 yield total_comments
2236 is_first_continuation = False
2237 if continuation:
2238 break
2239 continue
2240 count = 0
2241 for count, entry in enumerate(extract_thread(continuation_items)):
2242 yield entry
2243 continuation = self._extract_continuation({'contents': continuation_items})
2244 if continuation:
2245 # Sometimes YouTube provides a continuation without any comments
2246 # In most cases we end up just downloading these with very little comments to come.
2247 if count == 0:
2248 if not parent:
2249 self.report_warning('No comments received - assuming end of comments')
2250 continuation = None
a1c5d2ca
M
2251 break
2252
2d6659b9 2253 # Deprecated response structure
2254 elif isinstance(continuation_contents, dict):
2255 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2256 for key, continuation_renderer in continuation_contents.items():
2257 if key not in known_continuation_renderers:
2258 continue
2259 if not isinstance(continuation_renderer, dict):
2260 continue
2261 if is_first_continuation:
2262 header_continuation_items = [continuation_renderer.get('header') or {}]
2263 total_comments, continuation = extract_header(header_continuation_items)
2264 if total_comments:
2265 yield total_comments
2266 is_first_continuation = False
2267 if continuation:
2268 break
a1c5d2ca 2269
2d6659b9 2270 # Sometimes YouTube provides a continuation without any comments
2271 # In most cases we end up just downloading these with very little comments to come.
2272 count = 0
2273 for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2274 yield entry
2275 continuation = self._extract_continuation(continuation_renderer)
2276 if count == 0:
2277 if not parent:
2278 self.report_warning('No comments received - assuming end of comments')
2279 continuation = None
2280 break
a1c5d2ca 2281
2d6659b9 2282 @staticmethod
2283 def _generate_comment_continuation(video_id):
2284 """
2285 Generates initial comment section continuation token from given video id
2286 """
2287 b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2288 parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2289 new_continuation_intlist = list(itertools.chain.from_iterable(
2290 [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2291 return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2292
2293 def _extract_comments(self, ytcfg, video_id, contents, webpage):
a1c5d2ca 2294 """Entry for comment extraction"""
2d6659b9 2295 def _real_comment_extract(contents):
2296 if isinstance(contents, list):
2297 for entry in contents:
2298 for key, renderer in entry.items():
2299 if key not in known_entry_comment_renderers:
2300 continue
2301 yield from self._comment_entries(
2302 renderer, video_id=video_id, ytcfg=ytcfg,
2303 identity_token=self._extract_identity_token(webpage, item_id=video_id),
2304 account_syncid=self._extract_account_syncid(ytcfg))
2305 break
a1c5d2ca 2306 comments = []
2d6659b9 2307 known_entry_comment_renderers = ('itemSectionRenderer',)
a1c5d2ca 2308 estimated_total = 0
2d6659b9 2309 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
a1c5d2ca 2310
2d6659b9 2311 try:
2312 for comment in _real_comment_extract(contents):
2313 if len(comments) >= max_comments:
2314 break
2315 if isinstance(comment, int):
2316 estimated_total = comment
2317 continue
2318 comments.append(comment)
2319 except KeyboardInterrupt:
2320 self.to_screen('Interrupted by user')
d92f5d5a 2321 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
a1c5d2ca
M
2322 return {
2323 'comments': comments,
2324 'comment_count': len(comments),
2325 }
2326
109dd3b2 2327 @staticmethod
2328 def _generate_player_context(sts=None):
2329 context = {
2330 'html5Preference': 'HTML5_PREF_WANTS',
2331 }
2332 if sts is not None:
2333 context['signatureTimestamp'] = sts
2334 return {
2335 'playbackContext': {
2336 'contentPlaybackContext': context
a1a7907b 2337 },
2338 'contentCheckOk': True
109dd3b2 2339 }
2340
4e6767b5 2341 @staticmethod
c888ffb9 2342 def _get_video_info_params(video_id, client='TVHTML5'):
2343 GVI_CLIENTS = {
2344 'ANDROID': {
2345 'c': 'ANDROID',
2346 'cver': '16.20',
2347 },
2348 'TVHTML5': {
2349 'c': 'TVHTML5',
2350 'cver': '6.20180913',
11f9be09 2351 },
2352 'IOS': {
2353 'c': 'IOS',
2354 'cver': '16.20'
c888ffb9 2355 }
2356 }
2357 query = {
4e6767b5 2358 'video_id': video_id,
2359 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
c888ffb9 2360 'html5': '1'
4e6767b5 2361 }
c888ffb9 2362 query.update(GVI_CLIENTS.get(client))
2363 return query
4e6767b5 2364
11f9be09 2365 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr):
109dd3b2 2366
11f9be09 2367 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2368 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
2369 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False)
2370 headers = self.generate_api_headers(
2371 player_ytcfg, identity_token, syncid,
2372 default_client=self._YT_CLIENTS[client], session_index=session_index)
9297939e 2373
11f9be09 2374 yt_query = {'videoId': video_id}
2375 yt_query.update(self._generate_player_context(sts))
2376 return self._extract_response(
2377 item_id=video_id, ep='player', query=yt_query,
2378 ytcfg=player_ytcfg, headers=headers, fatal=False,
2379 default_client=self._YT_CLIENTS[client],
2380 note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2381 ) or None
2382
2383 def _extract_age_gated_player_response(self, client, video_id, ytcfg, identity_token, player_url, initial_pr):
2384 gvi_client = self._YT_CLIENTS.get(f'_{client}_agegate')
2385 if not gvi_client:
2386 return
109dd3b2 2387
11f9be09 2388 pr = self._parse_json(traverse_obj(
2389 compat_parse_qs(self._download_webpage(
2390 self.http_scheme() + '//www.youtube.com/get_video_info', video_id,
2391 'Refetching age-gated %s info webpage' % gvi_client.lower(),
2392 'unable to download video info webpage', fatal=False,
2393 query=self._get_video_info_params(video_id, client=gvi_client))),
2394 ('player_response', 0), expected_type=str) or '{}', video_id)
2395 if pr:
2396 return pr
2397
2398 self.report_warning('Falling back to embedded-only age-gate workaround')
2399 embed_webpage = None
2400 if client == 'web' and 'configs' not in self._configuration_arg('player_skip'):
2401 embed_webpage = self._download_webpage(
2402 'https://www.youtube.com/embed/%s?html5=1' % video_id,
2403 video_id=video_id, note=f'Downloading age-gated {client} embed config')
2404
2405 ytcfg_age = self.extract_ytcfg(video_id, embed_webpage) or {}
2406 # If we extracted the embed webpage, it'll tell us if we can view the video
2407 embedded_pr = self._parse_json(
2408 traverse_obj(ytcfg_age, ('PLAYER_VARS', 'embedded_player_response'), expected_type=str) or '{}',
2409 video_id=video_id)
2410 embedded_ps_reason = traverse_obj(embedded_pr, ('playabilityStatus', 'reason'), expected_type=str) or ''
2411 if embedded_ps_reason in self._AGE_GATE_REASONS:
2412 return
2413 return self._extract_player_response(
2414 f'_{client}_embedded', video_id,
2415 ytcfg_age or ytcfg, ytcfg_age if client == 'web' else {},
2416 identity_token, player_url, initial_pr)
545cc85d 2417
11f9be09 2418 def _get_requested_clients(self, url, smuggled_data):
2419 requested_clients = [client for client in self._configuration_arg('player_client')
2420 if client[:0] != '_' and client in self._YT_CLIENTS]
2421 if not requested_clients:
2422 requested_clients = ['android', 'web']
cf7e015f 2423
11f9be09 2424 if smuggled_data.get('is_music_url') or self.is_music_url(url):
2425 requested_clients.extend(
2426 f'{client}_music' for client in requested_clients if not client.endswith('_music'))
dbdaaa23 2427
11f9be09 2428 return orderedSet(requested_clients)
cf7e015f 2429
11f9be09 2430 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, player_url, identity_token):
2431 initial_pr = None
2432 if webpage:
2433 initial_pr = self._extract_yt_initial_variable(
2434 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2435 video_id, 'initial player response')
6b09401b 2436
11f9be09 2437 age_gated = False
2438 for client in clients:
2439 player_ytcfg = master_ytcfg if client == 'web' else {}
2440 if age_gated:
2441 pr = None
2442 elif client == 'web' and initial_pr:
2443 pr = initial_pr
8fe10494 2444 else:
11f9be09 2445 if client == 'web_music' and 'configs' not in self._configuration_arg('player_skip'):
2446 ytm_webpage = self._download_webpage(
2447 'https://music.youtube.com',
2448 video_id, fatal=False, note='Downloading remix client config')
2449 player_ytcfg = self.extract_ytcfg(video_id, ytm_webpage) or {}
2450 pr = self._extract_player_response(
2451 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr)
2452 if pr:
2453 yield pr
2454 if age_gated or traverse_obj(pr, ('playabilityStatus', 'reason')) in self._AGE_GATE_REASONS:
2455 age_gated = True
2456 pr = self._extract_age_gated_player_response(
2457 client, video_id, player_ytcfg or master_ytcfg, identity_token, player_url, initial_pr)
2458 if pr:
2459 yield pr
2460 # Android player_response does not have microFormats which are needed for
2461 # extraction of some data. So we return the initial_pr with formats
2462 # stripped out even if not requested by the user
2463 # See: https://github.com/yt-dlp/yt-dlp/issues/501
2464 if initial_pr and 'web' not in clients:
2465 initial_pr['streamingData'] = None
2466 yield initial_pr
2467
2468 def _extract_formats(self, streaming_data, video_id, player_url, is_live):
2469 itags, stream_ids = [], []
cc2db878 2470 itag_qualities = {}
d3fc8074 2471 q = qualities([
60bdb7bd 2472 # "tiny" is the smallest video-only format. But some audio-only formats
2473 # was also labeled "tiny". It is not clear if such formats still exist
d3fc8074 2474 'tiny', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
2475 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2476 ])
11f9be09 2477 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
9297939e 2478
545cc85d 2479 for fmt in streaming_formats:
2480 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2481 continue
321bf820 2482
cc2db878 2483 itag = str_or_none(fmt.get('itag'))
9297939e 2484 audio_track = fmt.get('audioTrack') or {}
2485 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2486 if stream_id in stream_ids:
2487 continue
2488
cc2db878 2489 quality = fmt.get('quality')
d3fc8074 2490 if quality == 'tiny' or not quality:
2491 quality = fmt.get('audioQuality', '').lower() or quality
cc2db878 2492 if itag and quality:
2493 itag_qualities[itag] = quality
2494 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2495 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2496 # number of fragment that would subsequently requested with (`&sq=N`)
2497 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2498 continue
2499
545cc85d 2500 fmt_url = fmt.get('url')
2501 if not fmt_url:
2502 sc = compat_parse_qs(fmt.get('signatureCipher'))
2503 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2504 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2505 if not (sc and fmt_url and encrypted_sig):
2506 continue
545cc85d 2507 if not player_url:
201e9eaa 2508 continue
545cc85d 2509 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2510 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2511 fmt_url += '&' + sp + '=' + signature
2512
545cc85d 2513 if itag:
2514 itags.append(itag)
9297939e 2515 stream_ids.append(stream_id)
2516
cc2db878 2517 tbr = float_or_none(
2518 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 2519 dct = {
2520 'asr': int_or_none(fmt.get('audioSampleRate')),
2521 'filesize': int_or_none(fmt.get('contentLength')),
2522 'format_id': itag,
11f9be09 2523 'format_note': ', '.join(filter(None, (
2524 audio_track.get('displayName'), fmt.get('qualityLabel') or quality))),
545cc85d 2525 'fps': int_or_none(fmt.get('fps')),
2526 'height': int_or_none(fmt.get('height')),
dca3ff4a 2527 'quality': q(quality),
cc2db878 2528 'tbr': tbr,
545cc85d 2529 'url': fmt_url,
2530 'width': fmt.get('width'),
0fb983f6 2531 'language': audio_track.get('id', '').split('.')[0],
545cc85d 2532 }
60bdb7bd 2533 mime_mobj = re.match(
2534 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2535 if mime_mobj:
2536 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2537 dct.update(parse_codecs(mime_mobj.group(2)))
2538 # The 3gp format in android client has a quality of "small",
2539 # but is actually worse than all other formats
2540 if dct['ext'] == '3gp':
2541 dct['quality'] = q('tiny')
11f9be09 2542 dct['preference'] = -10
cc2db878 2543 no_audio = dct.get('acodec') == 'none'
2544 no_video = dct.get('vcodec') == 'none'
2545 if no_audio:
2546 dct['vbr'] = tbr
2547 if no_video:
2548 dct['abr'] = tbr
2549 if no_audio or no_video:
545cc85d 2550 dct['downloader_options'] = {
2551 # Youtube throttles chunks >~10M
2552 'http_chunk_size': 10485760,
bf1317d2 2553 }
7c60c33e 2554 if dct.get('ext'):
2555 dct['container'] = dct['ext'] + '_dash'
11f9be09 2556 yield dct
545cc85d 2557
4bb6b02f 2558 skip_manifests = self._configuration_arg('skip')
11f9be09 2559 get_dash = not is_live and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True)
5d3a0e79 2560 get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2561
11f9be09 2562 for sd in streaming_data:
5d3a0e79 2563 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
9297939e 2564 if hls_manifest_url:
2565 for f in self._extract_m3u8_formats(
2566 hls_manifest_url, video_id, 'mp4', fatal=False):
2567 itag = self._search_regex(
2568 r'/itag/(\d+)', f['url'], 'itag', default=None)
11f9be09 2569 if itag in itags:
2570 continue
9297939e 2571 if itag:
2572 f['format_id'] = itag
11f9be09 2573 itags.append(itag)
2574 yield f
545cc85d 2575
5d3a0e79 2576 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2577 if dash_manifest_url:
2578 for f in self._extract_mpd_formats(
2579 dash_manifest_url, video_id, fatal=False):
2580 itag = f['format_id']
2581 if itag in itags:
2582 continue
11f9be09 2583 if itag:
2584 itags.append(itag)
5d3a0e79 2585 if itag in itag_qualities:
2586 f['quality'] = q(itag_qualities[itag])
2587 filesize = int_or_none(self._search_regex(
2588 r'/clen/(\d+)', f.get('fragment_base_url')
2589 or f['url'], 'file size', default=None))
2590 if filesize:
2591 f['filesize'] = filesize
11f9be09 2592 yield f
2593
2594 def _real_extract(self, url):
2595 url, smuggled_data = unsmuggle_url(url, {})
2596 video_id = self._match_id(url)
2597
2598 base_url = self.http_scheme() + '//www.youtube.com/'
2599 webpage_url = base_url + 'watch?v=' + video_id
2600 webpage = self._download_webpage(
2601 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2602
2603 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2604 player_url = self._extract_player_url(master_ytcfg, webpage)
2605 identity_token = self._extract_identity_token(webpage, video_id)
2606
2607 player_responses = list(self._extract_player_responses(
2608 self._get_requested_clients(url, smuggled_data),
2609 video_id, webpage, master_ytcfg, player_url, identity_token))
2610
352d63fd 2611 get_first = lambda obj, keys, **kwargs: traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
11f9be09 2612
2613 playability_statuses = traverse_obj(
2614 player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2615
2616 trailer_video_id = get_first(
2617 playability_statuses,
2618 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2619 expected_type=str)
2620 if trailer_video_id:
2621 return self.url_result(
2622 trailer_video_id, self.ie_key(), trailer_video_id)
2623
2624 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2625 if webpage else (lambda x: None))
2626
2627 video_details = traverse_obj(
2628 player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2629 microformats = traverse_obj(
2630 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2631 expected_type=dict, default=[])
2632 video_title = (
2633 get_first(video_details, 'title')
2634 or self._get_text(microformats, (..., 'title'))
2635 or search_meta(['og:title', 'twitter:title', 'title']))
2636 video_description = get_first(video_details, 'shortDescription')
2637
2638 if not smuggled_data.get('force_singlefeed', False):
2639 if not self.get_param('noplaylist'):
2640 multifeed_metadata_list = get_first(
2641 player_responses,
2642 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2643 expected_type=str)
2644 if multifeed_metadata_list:
2645 entries = []
2646 feed_ids = []
2647 for feed in multifeed_metadata_list.split(','):
2648 # Unquote should take place before split on comma (,) since textual
2649 # fields may contain comma as well (see
2650 # https://github.com/ytdl-org/youtube-dl/issues/8536)
2651 feed_data = compat_parse_qs(
2652 compat_urllib_parse_unquote_plus(feed))
2653
2654 def feed_entry(name):
2655 return try_get(
2656 feed_data, lambda x: x[name][0], compat_str)
2657
2658 feed_id = feed_entry('id')
2659 if not feed_id:
2660 continue
2661 feed_title = feed_entry('title')
2662 title = video_title
2663 if feed_title:
2664 title += ' (%s)' % feed_title
2665 entries.append({
2666 '_type': 'url_transparent',
2667 'ie_key': 'Youtube',
2668 'url': smuggle_url(
2669 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2670 {'force_singlefeed': True}),
2671 'title': title,
2672 })
2673 feed_ids.append(feed_id)
2674 self.to_screen(
2675 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2676 % (', '.join(feed_ids), video_id))
2677 return self.playlist_result(
2678 entries, video_id, video_title, video_description)
2679 else:
2680 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2681
7ea65411 2682 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
11f9be09 2683 is_live = get_first(video_details, 'isLive')
7ea65411 2684 if is_live is None:
2685 is_live = get_first(live_broadcast_details, 'isLiveNow')
11f9be09 2686
2687 streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2688 formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
bf1317d2 2689
545cc85d 2690 if not formats:
11f9be09 2691 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
b7da73eb 2692 self.raise_no_formats(
545cc85d 2693 'This video is DRM protected.', expected=True)
11f9be09 2694 pemr = get_first(
2695 playability_statuses,
2696 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2697 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2698 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
545cc85d 2699 if subreason:
545cc85d 2700 if subreason == 'The uploader has not made this video available in your country.':
11f9be09 2701 countries = get_first(microformats, 'availableCountries')
545cc85d 2702 if not countries:
2703 regions_allowed = search_meta('regionsAllowed')
2704 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2705 self.raise_geo_restricted(subreason, countries, metadata_available=True)
11f9be09 2706 reason += f'. {subreason}'
545cc85d 2707 if reason:
b7da73eb 2708 self.raise_no_formats(reason, expected=True)
bf1317d2 2709
11f9be09 2710 for f in formats:
2711 # TODO: detect if throttled
2712 if '&n=' in f['url']: # possibly throttled
2713 f['source_preference'] = -10
2714 # note = f.get('format_note')
2715 # f['format_note'] = f'{note} (throttled)' if note else '(throttled)'
2716
545cc85d 2717 self._sort_formats(formats)
bf1317d2 2718
11f9be09 2719 keywords = get_first(video_details, 'keywords', expected_type=list) or []
545cc85d 2720 if not keywords and webpage:
2721 keywords = [
2722 unescapeHTML(m.group('content'))
2723 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2724 for keyword in keywords:
2725 if keyword.startswith('yt:stretch='):
201c1459 2726 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2727 if mobj:
2728 # NB: float is intentional for forcing float division
2729 w, h = (float(v) for v in mobj.groups())
2730 if w > 0 and h > 0:
2731 ratio = w / h
2732 for f in formats:
2733 if f.get('vcodec') != 'none':
2734 f['stretched_ratio'] = ratio
2735 break
6449cd80 2736
545cc85d 2737 thumbnails = []
11f9be09 2738 thumbnail_dicts = traverse_obj(
2739 (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2740 expected_type=dict, default=[])
2741 for thumbnail in thumbnail_dicts:
2742 thumbnail_url = thumbnail.get('url')
2743 if not thumbnail_url:
2744 continue
2745 # Sometimes youtube gives a wrong thumbnail URL. See:
2746 # https://github.com/yt-dlp/yt-dlp/issues/233
2747 # https://github.com/ytdl-org/youtube-dl/issues/28023
2748 if 'maxresdefault' in thumbnail_url:
2749 thumbnail_url = thumbnail_url.split('?')[0]
2750 thumbnails.append({
2751 'url': thumbnail_url,
2752 'height': int_or_none(thumbnail.get('height')),
2753 'width': int_or_none(thumbnail.get('width')),
2754 })
ff2751ac 2755 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2756 if thumbnail_url:
2757 thumbnails.append({
2758 'url': thumbnail_url,
ff2751ac 2759 })
0ba692ac 2760 # The best resolution thumbnails sometimes does not appear in the webpage
2761 # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
cca80fe6 2762 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2763 hq_thumbnail_names = ['maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3']
2764 guaranteed_thumbnail_names = [
2765 'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2766 'mqdefault', 'mq1', 'mq2', 'mq3',
2767 'default', '1', '2', '3'
2768 ]
2769 thumbnail_names = hq_thumbnail_names + guaranteed_thumbnail_names
2770 n_thumbnail_names = len(thumbnail_names)
2771
0ba692ac 2772 thumbnails.extend({
2773 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2774 video_id=video_id, name=name, ext=ext,
2775 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
cca80fe6 2776 '_test_url': name in hq_thumbnail_names,
2777 } for name in thumbnail_names for ext in ('webp', 'jpg'))
0ba692ac 2778 for thumb in thumbnails:
cca80fe6 2779 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
0ba692ac 2780 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
ff2751ac 2781 self._remove_duplicate_formats(thumbnails)
545cc85d 2782
7ea65411 2783 category = get_first(microformats, 'category') or search_meta('genre')
2784 channel_id = str_or_none(
2785 get_first(video_details, 'channelId')
2786 or get_first(microformats, 'externalChannelId')
2787 or search_meta('channelId'))
2788 duration = int_or_none(
2789 get_first(video_details, 'lengthSeconds')
2790 or get_first(microformats, 'lengthSeconds')
2791 or parse_duration(search_meta('duration'))) or None
2792 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2793
2794 live_content = get_first(video_details, 'isLiveContent')
2795 is_upcoming = get_first(video_details, 'isUpcoming')
2796 if is_live is None:
2797 if is_upcoming or live_content is False:
2798 is_live = False
2799 if is_upcoming is None and (live_content or is_live):
2800 is_upcoming = False
2801 live_starttime = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2802 live_endtime = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2803 if not duration and live_endtime and live_starttime:
2804 duration = live_endtime - live_starttime
2805
545cc85d 2806 info = {
2807 'id': video_id,
2808 'title': self._live_title(video_title) if is_live else video_title,
2809 'formats': formats,
2810 'thumbnails': thumbnails,
2811 'description': video_description,
2812 'upload_date': unified_strdate(
11f9be09 2813 get_first(microformats, 'uploadDate')
545cc85d 2814 or search_meta('uploadDate')),
11f9be09 2815 'uploader': get_first(video_details, 'author'),
545cc85d 2816 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2817 'uploader_url': owner_profile_url,
2818 'channel_id': channel_id,
11f9be09 2819 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
545cc85d 2820 'duration': duration,
2821 'view_count': int_or_none(
11f9be09 2822 get_first((video_details, microformats), (..., 'viewCount'))
545cc85d 2823 or search_meta('interactionCount')),
11f9be09 2824 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
545cc85d 2825 'age_limit': 18 if (
11f9be09 2826 get_first(microformats, 'isFamilySafe') is False
545cc85d 2827 or search_meta('isFamilyFriendly') == 'false'
2828 or search_meta('og:restrictions:age') == '18+') else 0,
2829 'webpage_url': webpage_url,
2830 'categories': [category] if category else None,
2831 'tags': keywords,
11f9be09 2832 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
7ea65411 2833 'is_live': is_live,
2834 'was_live': (False if is_live or is_upcoming or live_content is False
2835 else None if is_live is None or is_upcoming is None
2836 else live_content),
2837 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL
2838 'release_timestamp': live_starttime,
545cc85d 2839 }
b477fc13 2840
11f9be09 2841 pctr = get_first(player_responses, ('captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
545cc85d 2842 subtitles = {}
2843 if pctr:
774d79cc 2844 def process_language(container, base_url, lang_code, sub_name, query):
120916da 2845 lang_subs = container.setdefault(lang_code, [])
545cc85d 2846 for fmt in self._SUBTITLE_FORMATS:
2847 query.update({
2848 'fmt': fmt,
2849 })
2850 lang_subs.append({
2851 'ext': fmt,
2852 'url': update_url_query(base_url, query),
774d79cc 2853 'name': sub_name,
545cc85d 2854 })
7e72694b 2855
545cc85d 2856 for caption_track in (pctr.get('captionTracks') or []):
2857 base_url = caption_track.get('baseUrl')
2858 if not base_url:
2859 continue
2860 if caption_track.get('kind') != 'asr':
120916da 2861 lang_code = (
2862 remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2863 or caption_track.get('languageCode'))
545cc85d 2864 if not lang_code:
2865 continue
2866 process_language(
774d79cc 2867 subtitles, base_url, lang_code,
2d6659b9 2868 try_get(caption_track, lambda x: x['name']['simpleText']),
774d79cc 2869 {})
545cc85d 2870 continue
2871 automatic_captions = {}
2872 for translation_language in (pctr.get('translationLanguages') or []):
2873 translation_language_code = translation_language.get('languageCode')
2874 if not translation_language_code:
2875 continue
2876 process_language(
2877 automatic_captions, base_url, translation_language_code,
fe93e2c4 2878 self._get_text(translation_language.get('languageName'), max_runs=1),
545cc85d 2879 {'tlang': translation_language_code})
2880 info['automatic_captions'] = automatic_captions
2881 info['subtitles'] = subtitles
7e72694b 2882
545cc85d 2883 parsed_url = compat_urllib_parse_urlparse(url)
2884 for component in [parsed_url.fragment, parsed_url.query]:
2885 query = compat_parse_qs(component)
2886 for k, v in query.items():
2887 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2888 d_k += '_time'
2889 if d_k not in info and k in s_ks:
2890 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2891
2892 # Youtube Music Auto-generated description
822b9d9c 2893 if video_description:
38d70284 2894 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2895 if mobj:
822b9d9c
RA
2896 release_year = mobj.group('release_year')
2897 release_date = mobj.group('release_date')
2898 if release_date:
2899 release_date = release_date.replace('-', '')
2900 if not release_year:
545cc85d 2901 release_year = release_date[:4]
2902 info.update({
2903 'album': mobj.group('album'.strip()),
2904 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2905 'track': mobj.group('track').strip(),
2906 'release_date': release_date,
cc2db878 2907 'release_year': int_or_none(release_year),
545cc85d 2908 })
7e72694b 2909
545cc85d 2910 initial_data = None
2911 if webpage:
2912 initial_data = self._extract_yt_initial_variable(
2913 webpage, self._YT_INITIAL_DATA_RE, video_id,
2914 'yt initial data')
2915 if not initial_data:
11f9be09 2916 headers = self.generate_api_headers(
2917 master_ytcfg, identity_token, self._extract_account_syncid(master_ytcfg),
2918 session_index=self._extract_session_index(master_ytcfg))
2919
109dd3b2 2920 initial_data = self._extract_response(
2921 item_id=video_id, ep='next', fatal=False,
11f9be09 2922 ytcfg=master_ytcfg, headers=headers, query={'videoId': video_id},
109dd3b2 2923 note='Downloading initial data API JSON')
545cc85d 2924
c60ee3a2 2925 try:
2926 # This will error if there is no livechat
2927 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2928 info['subtitles']['live_chat'] = [{
2929 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
2930 'video_id': video_id,
2931 'ext': 'json',
f6745c49 2932 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
c60ee3a2 2933 }]
2934 except (KeyError, IndexError, TypeError):
2935 pass
545cc85d 2936
2937 if initial_data:
7c365c21 2938 info['chapters'] = (
2939 self._extract_chapters_from_json(initial_data, duration)
2940 or self._extract_chapters_from_engagement_panel(initial_data, duration)
2941 or None)
545cc85d 2942
2943 contents = try_get(
2944 initial_data,
2945 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2946 list) or []
2947 for content in contents:
2948 vpir = content.get('videoPrimaryInfoRenderer')
2949 if vpir:
2950 stl = vpir.get('superTitleLink')
2951 if stl:
fe93e2c4 2952 stl = self._get_text(stl)
545cc85d 2953 if try_get(
2954 vpir,
2955 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2956 info['location'] = stl
2957 else:
2958 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2959 if mobj:
2960 info.update({
2961 'series': mobj.group(1),
2962 'season_number': int(mobj.group(2)),
2963 'episode_number': int(mobj.group(3)),
2964 })
2965 for tlb in (try_get(
2966 vpir,
2967 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2968 list) or []):
2969 tbr = tlb.get('toggleButtonRenderer') or {}
2970 for getter, regex in [(
2971 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2972 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2973 lambda x: x['accessibility'],
2974 lambda x: x['accessibilityData']['accessibilityData'],
2975 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2976 label = (try_get(tbr, getter, dict) or {}).get('label')
2977 if label:
2978 mobj = re.match(regex, label)
2979 if mobj:
2980 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2981 break
2982 sbr_tooltip = try_get(
2983 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2984 if sbr_tooltip:
2985 like_count, dislike_count = sbr_tooltip.split(' / ')
2986 info.update({
2987 'like_count': str_to_int(like_count),
2988 'dislike_count': str_to_int(dislike_count),
2989 })
2990 vsir = content.get('videoSecondaryInfoRenderer')
2991 if vsir:
fe93e2c4 2992 info['channel'] = self._get_text(try_get(
545cc85d 2993 vsir,
2994 lambda x: x['owner']['videoOwnerRenderer']['title'],
cce889b9 2995 dict))
545cc85d 2996 rows = try_get(
2997 vsir,
2998 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2999 list) or []
3000 multiple_songs = False
3001 for row in rows:
3002 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3003 multiple_songs = True
3004 break
3005 for row in rows:
3006 mrr = row.get('metadataRowRenderer') or {}
3007 mrr_title = mrr.get('title')
3008 if not mrr_title:
3009 continue
fe93e2c4 3010 mrr_title = self._get_text(mrr['title'])
3011 mrr_contents_text = self._get_text(mrr['contents'][0])
545cc85d 3012 if mrr_title == 'License':
3013 info['license'] = mrr_contents_text
3014 elif not multiple_songs:
3015 if mrr_title == 'Album':
3016 info['album'] = mrr_contents_text
3017 elif mrr_title == 'Artist':
3018 info['artist'] = mrr_contents_text
3019 elif mrr_title == 'Song':
3020 info['track'] = mrr_contents_text
3021
3022 fallbacks = {
3023 'channel': 'uploader',
3024 'channel_id': 'uploader_id',
3025 'channel_url': 'uploader_url',
3026 }
3027 for to, frm in fallbacks.items():
3028 if not info.get(to):
3029 info[to] = info.get(frm)
3030
3031 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3032 v = info.get(s_k)
3033 if v:
3034 info[d_k] = v
b84071c0 3035
11f9be09 3036 is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3037 is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
c224251a 3038 is_membersonly = None
b28f8d24 3039 is_premium = None
c224251a
M
3040 if initial_data and is_private is not None:
3041 is_membersonly = False
b28f8d24 3042 is_premium = False
47193e02 3043 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3044 badge_labels = set()
3045 for content in contents:
3046 if not isinstance(content, dict):
3047 continue
3048 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3049 for badge_label in badge_labels:
3050 if badge_label.lower() == 'members only':
3051 is_membersonly = True
3052 elif badge_label.lower() == 'premium':
3053 is_premium = True
3054 elif badge_label.lower() == 'unlisted':
3055 is_unlisted = True
c224251a 3056
c224251a
M
3057 info['availability'] = self._availability(
3058 is_private=is_private,
b28f8d24 3059 needs_premium=is_premium,
c224251a
M
3060 needs_subscription=is_membersonly,
3061 needs_auth=info['age_limit'] >= 18,
3062 is_unlisted=None if is_private is None else is_unlisted)
3063
06167fbb 3064 # get xsrf for annotations or comments
a06916d9 3065 get_annotations = self.get_param('writeannotations', False)
3066 get_comments = self.get_param('getcomments', False)
06167fbb 3067 if get_annotations or get_comments:
29f7c58a 3068 xsrf_token = None
11f9be09 3069 if master_ytcfg:
3070 xsrf_token = try_get(master_ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
29f7c58a 3071 if not xsrf_token:
3072 xsrf_token = self._search_regex(
3073 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 3074 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 3075
3076 # annotations
06167fbb 3077 if get_annotations:
11f9be09 3078 invideo_url = get_first(
3079 player_responses,
3080 ('annotations', 0, 'playerAnnotationsUrlsRenderer', 'invideoUrl'),
3081 expected_type=str)
64b6a4e9 3082 if xsrf_token and invideo_url:
29f7c58a 3083 xsrf_field_name = None
11f9be09 3084 if master_ytcfg:
3085 xsrf_field_name = try_get(master_ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
29f7c58a 3086 if not xsrf_field_name:
3087 xsrf_field_name = self._search_regex(
3088 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 3089 webpage, 'xsrf field name',
29f7c58a 3090 group='xsrf_field_name', default='session_token')
8a784c74 3091 info['annotations'] = self._download_webpage(
64b6a4e9
RA
3092 self._proto_relative_url(invideo_url),
3093 video_id, note='Downloading annotations',
3094 errnote='Unable to download video annotations', fatal=False,
3095 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 3096
277d6ff5 3097 if get_comments:
11f9be09 3098 info['__post_extractor'] = lambda: self._extract_comments(master_ytcfg, video_id, contents, webpage)
4ea3be0a 3099
11f9be09 3100 self.mark_watched(video_id, player_responses)
d77ab8e2 3101
545cc85d 3102 return info
c5e8d7af 3103
5f6a1245 3104
8bdd16b4 3105class YoutubeTabIE(YoutubeBaseInfoExtractor):
3106 IE_DESC = 'YouTube.com tab'
70d5c17b 3107 _VALID_URL = r'''(?x)
3108 https?://
3109 (?:\w+\.)?
3110 (?:
3111 youtube(?:kids)?\.com|
3112 invidio\.us
3113 )/
3114 (?:
fe03a6cd 3115 (?P<channel_type>channel|c|user|browse)/|
70d5c17b 3116 (?P<not_channel>
9ba5705a 3117 feed/|hashtag/|
70d5c17b 3118 (?:playlist|watch)\?.*?\blist=
3119 )|
29f7c58a 3120 (?!(?:%s)\b) # Direct URLs
70d5c17b 3121 )
3122 (?P<id>[^/?\#&]+)
3123 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 3124 IE_NAME = 'youtube:tab'
3125
81127aa5 3126 _TESTS = [{
da692b79 3127 'note': 'playlists, multipage',
8bdd16b4 3128 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3129 'playlist_mincount': 94,
3130 'info_dict': {
3131 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3132 'title': 'Игорь Клейнер - Playlists',
3133 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3134 'uploader': 'Игорь Клейнер',
3135 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 3136 },
3137 }, {
da692b79 3138 'note': 'playlists, multipage, different order',
8bdd16b4 3139 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3140 'playlist_mincount': 94,
3141 'info_dict': {
3142 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3143 'title': 'Игорь Клейнер - Playlists',
3144 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3145 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3146 'uploader': 'Игорь Клейнер',
8bdd16b4 3147 },
201c1459 3148 }, {
da692b79 3149 'note': 'playlists, series',
201c1459 3150 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3151 'playlist_mincount': 5,
3152 'info_dict': {
3153 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3154 'title': '3Blue1Brown - Playlists',
3155 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
da692b79 3156 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3157 'uploader': '3Blue1Brown',
201c1459 3158 },
8bdd16b4 3159 }, {
da692b79 3160 'note': 'playlists, singlepage',
8bdd16b4 3161 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3162 'playlist_mincount': 4,
3163 'info_dict': {
3164 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3165 'title': 'ThirstForScience - Playlists',
3166 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 3167 'uploader': 'ThirstForScience',
3168 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 3169 }
3170 }, {
3171 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3172 'only_matching': True,
3173 }, {
da692b79 3174 'note': 'basic, single video playlist',
0e30a7b9 3175 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 3176 'info_dict': {
0e30a7b9 3177 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3178 'uploader': 'Sergey M.',
3179 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 3180 'title': 'youtube-dl public playlist',
81127aa5 3181 },
0e30a7b9 3182 'playlist_count': 1,
9291475f 3183 }, {
da692b79 3184 'note': 'empty playlist',
0e30a7b9 3185 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 3186 'info_dict': {
0e30a7b9 3187 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3188 'uploader': 'Sergey M.',
3189 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 3190 'title': 'youtube-dl empty playlist',
9291475f
PH
3191 },
3192 'playlist_count': 0,
3193 }, {
da692b79 3194 'note': 'Home tab',
8bdd16b4 3195 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 3196 'info_dict': {
8bdd16b4 3197 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3198 'title': 'lex will - Home',
3199 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3200 'uploader': 'lex will',
3201 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3202 },
8bdd16b4 3203 'playlist_mincount': 2,
9291475f 3204 }, {
da692b79 3205 'note': 'Videos tab',
8bdd16b4 3206 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 3207 'info_dict': {
8bdd16b4 3208 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3209 'title': 'lex will - Videos',
3210 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3211 'uploader': 'lex will',
3212 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3213 },
8bdd16b4 3214 'playlist_mincount': 975,
9291475f 3215 }, {
da692b79 3216 'note': 'Videos tab, sorted by popular',
8bdd16b4 3217 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 3218 'info_dict': {
8bdd16b4 3219 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3220 'title': 'lex will - Videos',
3221 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3222 'uploader': 'lex will',
3223 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3224 },
8bdd16b4 3225 'playlist_mincount': 199,
9291475f 3226 }, {
da692b79 3227 'note': 'Playlists tab',
8bdd16b4 3228 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 3229 'info_dict': {
8bdd16b4 3230 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3231 'title': 'lex will - Playlists',
3232 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3233 'uploader': 'lex will',
3234 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3235 },
8bdd16b4 3236 'playlist_mincount': 17,
ac7553d0 3237 }, {
da692b79 3238 'note': 'Community tab',
8bdd16b4 3239 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 3240 'info_dict': {
8bdd16b4 3241 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3242 'title': 'lex will - Community',
3243 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3244 'uploader': 'lex will',
3245 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3246 },
3247 'playlist_mincount': 18,
87dadd45 3248 }, {
da692b79 3249 'note': 'Channels tab',
8bdd16b4 3250 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 3251 'info_dict': {
8bdd16b4 3252 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3253 'title': 'lex will - Channels',
3254 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3255 'uploader': 'lex will',
3256 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3257 },
deaec5af 3258 'playlist_mincount': 12,
cd684175 3259 }, {
3260 'note': 'Search tab',
3261 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3262 'playlist_mincount': 40,
3263 'info_dict': {
3264 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3265 'title': '3Blue1Brown - Search - linear algebra',
3266 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3267 'uploader': '3Blue1Brown',
3268 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3269 },
6b08cdf6 3270 }, {
a0566bbf 3271 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3272 'only_matching': True,
3273 }, {
a0566bbf 3274 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3275 'only_matching': True,
3276 }, {
a0566bbf 3277 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3278 'only_matching': True,
3279 }, {
3280 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3281 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3282 'info_dict': {
3283 'title': '29C3: Not my department',
3284 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3285 'uploader': 'Christiaan008',
3286 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 3287 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 3288 },
3289 'playlist_count': 96,
3290 }, {
3291 'note': 'Large playlist',
3292 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 3293 'info_dict': {
8bdd16b4 3294 'title': 'Uploads from Cauchemar',
3295 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3296 'uploader': 'Cauchemar',
3297 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 3298 },
8bdd16b4 3299 'playlist_mincount': 1123,
3300 }, {
da692b79 3301 'note': 'even larger playlist, 8832 videos',
8bdd16b4 3302 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3303 'only_matching': True,
4b7df0d3
JMF
3304 }, {
3305 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3306 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3307 'info_dict': {
acf757f4
PH
3308 'title': 'Uploads from Interstellar Movie',
3309 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 3310 'uploader': 'Interstellar Movie',
8bdd16b4 3311 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 3312 },
481cc733 3313 'playlist_mincount': 21,
358de58c 3314 }, {
3315 'note': 'Playlist with "show unavailable videos" button',
3316 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3317 'info_dict': {
3318 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3319 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3320 'uploader': 'Phim Siêu Nhân Nhật Bản',
3321 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3322 },
da692b79 3323 'playlist_mincount': 200,
5d342002 3324 }, {
da692b79 3325 'note': 'Playlist with unavailable videos in page 7',
5d342002 3326 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3327 'info_dict': {
3328 'title': 'Uploads from BlankTV',
3329 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3330 'uploader': 'BlankTV',
3331 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3332 },
da692b79 3333 'playlist_mincount': 1000,
8bdd16b4 3334 }, {
da692b79 3335 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
8bdd16b4 3336 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3337 'info_dict': {
3338 'title': 'Data Analysis with Dr Mike Pound',
3339 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3340 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3341 'uploader': 'Computerphile',
deaec5af 3342 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 3343 },
3344 'playlist_mincount': 11,
3345 }, {
a0566bbf 3346 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 3347 'only_matching': True,
dacb3a86 3348 }, {
da692b79 3349 'note': 'Playlist URL that does not actually serve a playlist',
dacb3a86
S
3350 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3351 'info_dict': {
3352 'id': 'FqZTN594JQw',
3353 'ext': 'webm',
3354 'title': "Smiley's People 01 detective, Adventure Series, Action",
3355 'uploader': 'STREEM',
3356 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 3357 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
3358 'upload_date': '20150526',
3359 'license': 'Standard YouTube License',
3360 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3361 'categories': ['People & Blogs'],
3362 'tags': list,
dbdaaa23 3363 'view_count': int,
dacb3a86
S
3364 'like_count': int,
3365 'dislike_count': int,
3366 },
3367 'params': {
3368 'skip_download': True,
3369 },
13a75688 3370 'skip': 'This video is not available.',
dacb3a86 3371 'add_ie': [YoutubeIE.ie_key()],
481cc733 3372 }, {
8bdd16b4 3373 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 3374 'only_matching': True,
66b48727 3375 }, {
8bdd16b4 3376 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 3377 'only_matching': True,
a0566bbf 3378 }, {
3379 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3380 'info_dict': {
11f9be09 3381 'id': 'FMtPN8yp5LU', # This will keep changing
a0566bbf 3382 'ext': 'mp4',
deaec5af 3383 'title': compat_str,
a0566bbf 3384 'uploader': 'Sky News',
3385 'uploader_id': 'skynews',
3386 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
da692b79 3387 'upload_date': r're:\d{8}',
3388 'description': compat_str,
a0566bbf 3389 'categories': ['News & Politics'],
3390 'tags': list,
3391 'like_count': int,
3392 'dislike_count': int,
3393 },
3394 'params': {
3395 'skip_download': True,
3396 },
da692b79 3397 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
a0566bbf 3398 }, {
3399 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3400 'info_dict': {
3401 'id': 'a48o2S1cPoo',
3402 'ext': 'mp4',
3403 'title': 'The Young Turks - Live Main Show',
3404 'uploader': 'The Young Turks',
3405 'uploader_id': 'TheYoungTurks',
3406 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3407 'upload_date': '20150715',
3408 'license': 'Standard YouTube License',
3409 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3410 'categories': ['News & Politics'],
3411 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3412 'like_count': int,
3413 'dislike_count': int,
3414 },
3415 'params': {
3416 'skip_download': True,
3417 },
3418 'only_matching': True,
3419 }, {
3420 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3421 'only_matching': True,
3422 }, {
3423 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3424 'only_matching': True,
09f1580e 3425 }, {
3426 'note': 'A channel that is not live. Should raise error',
3427 'url': 'https://www.youtube.com/user/numberphile/live',
3428 'only_matching': True,
3d3dddc9 3429 }, {
3430 'url': 'https://www.youtube.com/feed/trending',
3431 'only_matching': True,
3432 }, {
3d3dddc9 3433 'url': 'https://www.youtube.com/feed/library',
3434 'only_matching': True,
3435 }, {
3d3dddc9 3436 'url': 'https://www.youtube.com/feed/history',
3437 'only_matching': True,
3438 }, {
3d3dddc9 3439 'url': 'https://www.youtube.com/feed/subscriptions',
3440 'only_matching': True,
3441 }, {
3d3dddc9 3442 'url': 'https://www.youtube.com/feed/watch_later',
3443 'only_matching': True,
3444 }, {
da692b79 3445 'note': 'Recommended - redirects to home page',
3d3dddc9 3446 'url': 'https://www.youtube.com/feed/recommended',
3447 'only_matching': True,
29f7c58a 3448 }, {
da692b79 3449 'note': 'inline playlist with not always working continuations',
29f7c58a 3450 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3451 'only_matching': True,
3452 }, {
3453 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3454 'only_matching': True,
3455 }, {
3456 'url': 'https://www.youtube.com/course',
3457 'only_matching': True,
3458 }, {
3459 'url': 'https://www.youtube.com/zsecurity',
3460 'only_matching': True,
3461 }, {
3462 'url': 'http://www.youtube.com/NASAgovVideo/videos',
3463 'only_matching': True,
3464 }, {
3465 'url': 'https://www.youtube.com/TheYoungTurks/live',
3466 'only_matching': True,
39ed931e 3467 }, {
3468 'url': 'https://www.youtube.com/hashtag/cctv9',
3469 'info_dict': {
3470 'id': 'cctv9',
3471 'title': '#cctv9',
3472 },
3473 'playlist_mincount': 350,
201c1459 3474 }, {
3475 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3476 'only_matching': True,
9297939e 3477 }, {
da692b79 3478 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
9297939e 3479 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3480 'only_matching': True
fe03a6cd 3481 }, {
3482 'note': '/browse/ should redirect to /channel/',
3483 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3484 'only_matching': True
3485 }, {
3486 'note': 'VLPL, should redirect to playlist?list=PL...',
3487 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3488 'info_dict': {
3489 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3490 'uploader': 'NoCopyrightSounds',
3491 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3492 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3493 'title': 'NCS Releases',
3494 },
3495 'playlist_mincount': 166,
18db7548 3496 }, {
3497 'note': 'Topic, should redirect to playlist?list=UU...',
3498 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3499 'info_dict': {
3500 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3501 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3502 'title': 'Uploads from Royalty Free Music - Topic',
3503 'uploader': 'Royalty Free Music - Topic',
3504 },
3505 'expected_warnings': [
3506 'A channel/user page was given',
3507 'The URL does not have a videos tab',
3508 ],
3509 'playlist_mincount': 101,
3510 }, {
3511 'note': 'Topic without a UU playlist',
3512 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3513 'info_dict': {
3514 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3515 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3516 },
3517 'expected_warnings': [
3518 'A channel/user page was given',
3519 'The URL does not have a videos tab',
3520 'Falling back to channel URL',
3521 ],
3522 'playlist_mincount': 9,
abcdd12b 3523 }, {
3524 'note': 'Youtube music Album',
3525 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3526 'info_dict': {
3527 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3528 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3529 },
3530 'playlist_count': 50,
47193e02 3531 }, {
3532 'note': 'unlisted single video playlist',
3533 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3534 'info_dict': {
3535 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3536 'uploader': 'colethedj',
3537 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3538 'title': 'yt-dlp unlisted playlist test',
3539 'availability': 'unlisted'
3540 },
3541 'playlist_count': 1,
29f7c58a 3542 }]
3543
3544 @classmethod
3545 def suitable(cls, url):
3546 return False if YoutubeIE.suitable(url) else super(
3547 YoutubeTabIE, cls).suitable(url)
8bdd16b4 3548
3549 def _extract_channel_id(self, webpage):
3550 channel_id = self._html_search_meta(
3551 'channelId', webpage, 'channel id', default=None)
3552 if channel_id:
3553 return channel_id
3554 channel_url = self._html_search_meta(
3555 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3556 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3557 'twitter:app:url:googleplay'), webpage, 'channel url')
3558 return self._search_regex(
3559 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3560 channel_url, 'channel id')
15f6397c 3561
8bdd16b4 3562 @staticmethod
cd7c66cf 3563 def _extract_basic_item_renderer(item):
3564 # Modified from _extract_grid_item_renderer
201c1459 3565 known_basic_renderers = (
3566 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 3567 )
3568 for key, renderer in item.items():
201c1459 3569 if not isinstance(renderer, dict):
cd7c66cf 3570 continue
201c1459 3571 elif key in known_basic_renderers:
3572 return renderer
3573 elif key.startswith('grid') and key.endswith('Renderer'):
3574 return renderer
8bdd16b4 3575
8bdd16b4 3576 def _grid_entries(self, grid_renderer):
3577 for item in grid_renderer['items']:
3578 if not isinstance(item, dict):
39b62db1 3579 continue
cd7c66cf 3580 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 3581 if not isinstance(renderer, dict):
3582 continue
fe93e2c4 3583 title = self._get_text(renderer.get('title'))
3584
8bdd16b4 3585 # playlist
3586 playlist_id = renderer.get('playlistId')
3587 if playlist_id:
3588 yield self.url_result(
3589 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3590 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3591 video_title=title)
201c1459 3592 continue
8bdd16b4 3593 # video
3594 video_id = renderer.get('videoId')
3595 if video_id:
3596 yield self._extract_video(renderer)
201c1459 3597 continue
8bdd16b4 3598 # channel
3599 channel_id = renderer.get('channelId')
3600 if channel_id:
8bdd16b4 3601 yield self.url_result(
3602 'https://www.youtube.com/channel/%s' % channel_id,
3603 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 3604 continue
3605 # generic endpoint URL support
3606 ep_url = urljoin('https://www.youtube.com/', try_get(
3607 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3608 compat_str))
3609 if ep_url:
3610 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3611 if ie.suitable(ep_url):
3612 yield self.url_result(
3613 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3614 break
8bdd16b4 3615
3d3dddc9 3616 def _shelf_entries_from_content(self, shelf_renderer):
3617 content = shelf_renderer.get('content')
3618 if not isinstance(content, dict):
8bdd16b4 3619 return
cd7c66cf 3620 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 3621 if renderer:
3622 # TODO: add support for nested playlists so each shelf is processed
3623 # as separate playlist
3624 # TODO: this includes only first N items
3625 for entry in self._grid_entries(renderer):
3626 yield entry
3627 renderer = content.get('horizontalListRenderer')
3628 if renderer:
3629 # TODO
3630 pass
8bdd16b4 3631
29f7c58a 3632 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3633 ep = try_get(
3634 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3635 compat_str)
3636 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3637 if shelf_url:
29f7c58a 3638 # Skipping links to another channels, note that checking for
3639 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3640 # will not work
3641 if skip_channels and '/channels?' in shelf_url:
3642 return
fe93e2c4 3643 title = self._get_text(shelf_renderer, lambda x: x['title'])
3d3dddc9 3644 yield self.url_result(shelf_url, video_title=title)
3645 # Shelf may not contain shelf URL, fallback to extraction from content
3646 for entry in self._shelf_entries_from_content(shelf_renderer):
3647 yield entry
c5e8d7af 3648
8bdd16b4 3649 def _playlist_entries(self, video_list_renderer):
3650 for content in video_list_renderer['contents']:
3651 if not isinstance(content, dict):
3652 continue
3653 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3654 if not isinstance(renderer, dict):
3655 continue
3656 video_id = renderer.get('videoId')
3657 if not video_id:
3658 continue
3659 yield self._extract_video(renderer)
07aeced6 3660
3462ffa8 3661 def _rich_entries(self, rich_grid_renderer):
3662 renderer = try_get(
70d5c17b 3663 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3664 video_id = renderer.get('videoId')
3665 if not video_id:
3666 return
3667 yield self._extract_video(renderer)
3668
8bdd16b4 3669 def _video_entry(self, video_renderer):
3670 video_id = video_renderer.get('videoId')
3671 if video_id:
3672 return self._extract_video(video_renderer)
dacb3a86 3673
8bdd16b4 3674 def _post_thread_entries(self, post_thread_renderer):
3675 post_renderer = try_get(
3676 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3677 if not post_renderer:
3678 return
3679 # video attachment
3680 video_renderer = try_get(
895b0931 3681 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3682 video_id = video_renderer.get('videoId')
3683 if video_id:
3684 entry = self._extract_video(video_renderer)
8bdd16b4 3685 if entry:
3686 yield entry
895b0931 3687 # playlist attachment
3688 playlist_id = try_get(
3689 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3690 if playlist_id:
3691 yield self.url_result(
e28f1c0a 3692 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3693 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3694 # inline video links
3695 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3696 for run in runs:
3697 if not isinstance(run, dict):
3698 continue
3699 ep_url = try_get(
3700 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3701 if not ep_url:
3702 continue
3703 if not YoutubeIE.suitable(ep_url):
3704 continue
3705 ep_video_id = YoutubeIE._match_id(ep_url)
3706 if video_id == ep_video_id:
3707 continue
895b0931 3708 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 3709
8bdd16b4 3710 def _post_thread_continuation_entries(self, post_thread_continuation):
3711 contents = post_thread_continuation.get('contents')
3712 if not isinstance(contents, list):
3713 return
3714 for content in contents:
3715 renderer = content.get('backstagePostThreadRenderer')
3716 if not isinstance(renderer, dict):
3717 continue
3718 for entry in self._post_thread_entries(renderer):
3719 yield entry
07aeced6 3720
39ed931e 3721 r''' # unused
3722 def _rich_grid_entries(self, contents):
3723 for content in contents:
3724 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3725 if video_renderer:
3726 entry = self._video_entry(video_renderer)
3727 if entry:
3728 yield entry
3729 '''
f4f751af 3730 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3462ffa8 3731
70d5c17b 3732 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3733 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3734 for content in contents:
3735 if not isinstance(content, dict):
8bdd16b4 3736 continue
70d5c17b 3737 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3738 if not is_renderer:
70d5c17b 3739 renderer = content.get('richItemRenderer')
3462ffa8 3740 if renderer:
3741 for entry in self._rich_entries(renderer):
3742 yield entry
3743 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3744 continue
3462ffa8 3745 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3746 for isr_content in isr_contents:
3747 if not isinstance(isr_content, dict):
3748 continue
69184e41 3749
3750 known_renderers = {
3751 'playlistVideoListRenderer': self._playlist_entries,
3752 'gridRenderer': self._grid_entries,
3753 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3754 'backstagePostThreadRenderer': self._post_thread_entries,
3755 'videoRenderer': lambda x: [self._video_entry(x)],
3756 }
3757 for key, renderer in isr_content.items():
3758 if key not in known_renderers:
3759 continue
3760 for entry in known_renderers[key](renderer):
3761 if entry:
3762 yield entry
3462ffa8 3763 continuation_list[0] = self._extract_continuation(renderer)
69184e41 3764 break
70d5c17b 3765
3462ffa8 3766 if not continuation_list[0]:
3767 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3768
3769 if not continuation_list[0]:
3770 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3771
3772 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3773 tab_content = try_get(tab, lambda x: x['content'], dict)
3774 if not tab_content:
3775 return
3462ffa8 3776 parent_renderer = (
29f7c58a 3777 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3778 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3779 for entry in extract_entries(parent_renderer):
3780 yield entry
3462ffa8 3781 continuation = continuation_list[0]
fe93e2c4 3782 visitor_data = None
d069eca7 3783
8bdd16b4 3784 for page_num in itertools.count(1):
3785 if not continuation:
3786 break
11f9be09 3787 headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
79360d99 3788 response = self._extract_response(
3789 item_id='%s page %s' % (item_id, page_num),
fe93e2c4 3790 query=continuation, headers=headers, ytcfg=ytcfg,
79360d99 3791 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3792
3793 if not response:
8bdd16b4 3794 break
f4f751af 3795 visitor_data = try_get(
3796 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
ebf1b291 3797
69184e41 3798 known_continuation_renderers = {
3799 'playlistVideoListContinuation': self._playlist_entries,
3800 'gridContinuation': self._grid_entries,
3801 'itemSectionContinuation': self._post_thread_continuation_entries,
3802 'sectionListContinuation': extract_entries, # for feeds
3803 }
8bdd16b4 3804 continuation_contents = try_get(
69184e41 3805 response, lambda x: x['continuationContents'], dict) or {}
3806 continuation_renderer = None
3807 for key, value in continuation_contents.items():
3808 if key not in known_continuation_renderers:
3462ffa8 3809 continue
69184e41 3810 continuation_renderer = value
3811 continuation_list = [None]
3812 for entry in known_continuation_renderers[key](continuation_renderer):
3813 yield entry
3814 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3815 break
3816 if continuation_renderer:
3817 continue
c5e8d7af 3818
a1b535bd 3819 known_renderers = {
3820 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3821 'gridVideoRenderer': (self._grid_entries, 'items'),
d61fc646 3822 'gridChannelRenderer': (self._grid_entries, 'items'),
a1b535bd 3823 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3824 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3825 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3826 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3827 }
cce889b9 3828 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3829 continuation_items = try_get(
cce889b9 3830 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3831 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3832 video_items_renderer = None
3833 for key, value in continuation_item.items():
3834 if key not in known_renderers:
8bdd16b4 3835 continue
a1b535bd 3836 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3837 continuation_list = [None]
a1b535bd 3838 for entry in known_renderers[key][0](video_items_renderer):
3839 yield entry
9ba5705a 3840 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3841 break
3842 if video_items_renderer:
3843 continue
8bdd16b4 3844 break
9558dcec 3845
8bdd16b4 3846 @staticmethod
3847 def _extract_selected_tab(tabs):
3848 for tab in tabs:
cd684175 3849 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3850 if renderer.get('selected') is True:
3851 return renderer
2b3c2546 3852 else:
8bdd16b4 3853 raise ExtractorError('Unable to find selected tab')
b82f815f 3854
47193e02 3855 @classmethod
3856 def _extract_uploader(cls, data):
8bdd16b4 3857 uploader = {}
47193e02 3858 renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3859 owner = try_get(
3860 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3861 if owner:
3862 uploader['uploader'] = owner.get('text')
3863 uploader['uploader_id'] = try_get(
3864 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3865 uploader['uploader_url'] = urljoin(
3866 'https://www.youtube.com/',
3867 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3868 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3869
d069eca7 3870 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3871 playlist_id = title = description = channel_url = channel_name = channel_id = None
3872 thumbnails_list = tags = []
3873
8bdd16b4 3874 selected_tab = self._extract_selected_tab(tabs)
3875 renderer = try_get(
3876 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3877 if renderer:
b60419c5 3878 channel_name = renderer.get('title')
3879 channel_url = renderer.get('channelUrl')
3880 channel_id = renderer.get('externalId')
39ed931e 3881 else:
64c0d954 3882 renderer = try_get(
3883 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3884
8bdd16b4 3885 if renderer:
3886 title = renderer.get('title')
ecc97af3 3887 description = renderer.get('description', '')
b60419c5 3888 playlist_id = channel_id
3889 tags = renderer.get('keywords', '').split()
3890 thumbnails_list = (
3891 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3892 or try_get(
47193e02 3893 self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3894 lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
ff84930c 3895 list)
b60419c5 3896 or [])
3897
3898 thumbnails = []
3899 for t in thumbnails_list:
3900 if not isinstance(t, dict):
3901 continue
3902 thumbnail_url = url_or_none(t.get('url'))
3903 if not thumbnail_url:
3904 continue
3905 thumbnails.append({
3906 'url': thumbnail_url,
3907 'width': int_or_none(t.get('width')),
3908 'height': int_or_none(t.get('height')),
3909 })
3462ffa8 3910 if playlist_id is None:
70d5c17b 3911 playlist_id = item_id
3912 if title is None:
39ed931e 3913 title = (
3914 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3915 or playlist_id)
b60419c5 3916 title += format_field(selected_tab, 'title', ' - %s')
cd684175 3917 title += format_field(selected_tab, 'expandedText', ' - %s')
b60419c5 3918 metadata = {
3919 'playlist_id': playlist_id,
3920 'playlist_title': title,
3921 'playlist_description': description,
3922 'uploader': channel_name,
3923 'uploader_id': channel_id,
3924 'uploader_url': channel_url,
3925 'thumbnails': thumbnails,
3926 'tags': tags,
3927 }
47193e02 3928 availability = self._extract_availability(data)
3929 if availability:
3930 metadata['availability'] = availability
b60419c5 3931 if not channel_id:
3932 metadata.update(self._extract_uploader(data))
3933 metadata.update({
3934 'channel': metadata['uploader'],
3935 'channel_id': metadata['uploader_id'],
3936 'channel_url': metadata['uploader_url']})
11f9be09 3937 ytcfg = self.extract_ytcfg(item_id, webpage)
b60419c5 3938 return self.playlist_result(
d069eca7
M
3939 self._entries(
3940 selected_tab, playlist_id,
3941 self._extract_identity_token(webpage, item_id),
fe93e2c4 3942 self._extract_account_syncid(ytcfg, data), ytcfg),
b60419c5 3943 **metadata)
73c4ac2c 3944
79360d99 3945 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
2be71994 3946 first_id = last_id = None
11f9be09 3947 ytcfg = self.extract_ytcfg(playlist_id, webpage)
3948 headers = self.generate_api_headers(
fe93e2c4 3949 ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3950 identity_token=self._extract_identity_token(webpage, item_id=playlist_id))
2be71994 3951 for page_num in itertools.count(1):
cd7c66cf 3952 videos = list(self._playlist_entries(playlist))
3953 if not videos:
3954 return
2be71994 3955 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3956 if start >= len(videos):
3957 return
3958 for video in videos[start:]:
3959 if video['id'] == first_id:
3960 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3961 return
3962 yield video
3963 first_id = first_id or videos[0]['id']
3964 last_id = videos[-1]['id']
79360d99 3965 watch_endpoint = try_get(
3966 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3967 query = {
3968 'playlistId': playlist_id,
3969 'videoId': watch_endpoint.get('videoId') or last_id,
3970 'index': watch_endpoint.get('index') or len(videos),
3971 'params': watch_endpoint.get('params') or 'OAE%3D'
3972 }
3973 response = self._extract_response(
3974 item_id='%s page %d' % (playlist_id, page_num),
fe93e2c4 3975 query=query, ep='next', headers=headers, ytcfg=ytcfg,
79360d99 3976 check_get_keys='contents'
3977 )
cd7c66cf 3978 playlist = try_get(
79360d99 3979 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 3980
79360d99 3981 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
8bdd16b4 3982 title = playlist.get('title') or try_get(
3983 data, lambda x: x['titleText']['simpleText'], compat_str)
3984 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3985
3986 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3987 playlist_url = urljoin(url, try_get(
3988 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3989 compat_str))
3990 if playlist_url and playlist_url != url:
3991 return self.url_result(
3992 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3993 video_title=title)
cd7c66cf 3994
8bdd16b4 3995 return self.playlist_result(
79360d99 3996 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
cd7c66cf 3997 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 3998
47193e02 3999 def _extract_availability(self, data):
4000 """
4001 Gets the availability of a given playlist/tab.
4002 Note: Unless YouTube tells us explicitly, we do not assume it is public
4003 @param data: response
4004 """
4005 is_private = is_unlisted = None
4006 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4007 badge_labels = self._extract_badges(renderer)
4008
4009 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4010 privacy_dropdown_entries = try_get(
4011 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4012 for renderer_dict in privacy_dropdown_entries:
4013 is_selected = try_get(
4014 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4015 if not is_selected:
4016 continue
fe93e2c4 4017 label = self._get_text(
4018 try_get(renderer_dict, lambda x: x['privacyDropdownItemRenderer']['label'], dict) or [])
47193e02 4019 if label:
4020 badge_labels.add(label.lower())
4021 break
4022
4023 for badge_label in badge_labels:
4024 if badge_label == 'unlisted':
4025 is_unlisted = True
4026 elif badge_label == 'private':
4027 is_private = True
4028 elif badge_label == 'public':
4029 is_unlisted = is_private = False
4030 return self._availability(is_private, False, False, False, is_unlisted)
4031
4032 @staticmethod
4033 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4034 sidebar_renderer = try_get(
4035 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4036 for item in sidebar_renderer:
4037 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4038 if renderer:
4039 return renderer
4040
358de58c 4041 def _reload_with_unavailable_videos(self, item_id, data, webpage):
4042 """
4043 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4044 """
5d342002 4045 browse_id = params = None
47193e02 4046 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4047 if not renderer:
4048 return
4049 menu_renderer = try_get(
4050 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4051 for menu_item in menu_renderer:
4052 if not isinstance(menu_item, dict):
358de58c 4053 continue
47193e02 4054 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4055 text = try_get(
4056 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
4057 if not text or text.lower() != 'show unavailable videos':
4058 continue
4059 browse_endpoint = try_get(
4060 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4061 browse_id = browse_endpoint.get('browseId')
4062 params = browse_endpoint.get('params')
4063 break
5d342002 4064
11f9be09 4065 ytcfg = self.extract_ytcfg(item_id, webpage)
4066 headers = self.generate_api_headers(
fe93e2c4 4067 ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
47193e02 4068 identity_token=self._extract_identity_token(webpage, item_id=item_id),
4069 visitor_data=try_get(
4070 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
4071 query = {
4072 'params': params or 'wgYCCAA=',
4073 'browseId': browse_id or 'VL%s' % item_id
4074 }
4075 return self._extract_response(
4076 item_id=item_id, headers=headers, query=query,
fe93e2c4 4077 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
47193e02 4078 note='Downloading API JSON with unavailable videos')
358de58c 4079
cd7c66cf 4080 def _extract_webpage(self, url, item_id):
a06916d9 4081 retries = self.get_param('extractor_retries', 3)
62bff2c1 4082 count = -1
c705177d 4083 last_error = 'Incomplete yt initial data recieved'
14fdfea9 4084 while count < retries:
62bff2c1 4085 count += 1
14fdfea9 4086 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 4087 # See: https://github.com/yt-dlp/yt-dlp/issues/116
4088 if count:
c705177d 4089 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 4090 webpage = self._download_webpage(
4091 url, item_id,
cd7c66cf 4092 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
11f9be09 4093 data = self.extract_yt_initial_data(item_id, webpage)
14fdfea9 4094 if data.get('contents') or data.get('currentVideoEndpoint'):
4095 break
95c01b6c 4096 # Extract alerts here only when there is error
4097 self._extract_and_report_alerts(data)
c705177d 4098 if count >= retries:
6a39ee13 4099 raise ExtractorError(last_error)
cd7c66cf 4100 return webpage, data
4101
9297939e 4102 @staticmethod
4103 def _smuggle_data(entries, data):
4104 for entry in entries:
4105 if data:
4106 entry['url'] = smuggle_url(entry['url'], data)
4107 yield entry
4108
cd7c66cf 4109 def _real_extract(self, url):
9297939e 4110 url, smuggled_data = unsmuggle_url(url, {})
4111 if self.is_music_url(url):
4112 smuggled_data['is_music_url'] = True
fe03a6cd 4113 info_dict = self.__real_extract(url, smuggled_data)
9297939e 4114 if info_dict.get('entries'):
4115 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4116 return info_dict
4117
fe03a6cd 4118 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4119
4120 def __real_extract(self, url, smuggled_data):
cd7c66cf 4121 item_id = self._match_id(url)
4122 url = compat_urlparse.urlunparse(
4123 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 4124 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 4125
fe03a6cd 4126 def get_mobj(url):
4127 mobj = self._url_re.match(url).groupdict()
07cce701 4128 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 4129 return mobj
4130
4131 mobj = get_mobj(url)
4132 # Youtube returns incomplete data if tabname is not lower case
4133 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4134
4135 if is_channel:
4136 if smuggled_data.get('is_music_url'):
4137 if item_id[:2] == 'VL':
4138 # Youtube music VL channels have an equivalent playlist
4139 item_id = item_id[2:]
4140 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
abcdd12b 4141 elif item_id[:2] == 'MP':
4142 # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4143 item_id = self._search_regex(
4144 r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4145 self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4146 'playlist id')
4147 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
fe03a6cd 4148 elif mobj['channel_type'] == 'browse':
4149 # Youtube music /browse/ should be changed to /channel/
4150 pre = 'https://www.youtube.com/channel/%s' % item_id
4151 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4152 # Home URLs should redirect to /videos/
6a39ee13 4153 self.report_warning(
cd7c66cf 4154 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4155 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 4156 tab = '/videos'
4157
4158 url = ''.join((pre, tab, post))
4159 mobj = get_mobj(url)
cd7c66cf 4160
4161 # Handle both video/playlist URLs
201c1459 4162 qs = parse_qs(url)
cd7c66cf 4163 video_id = qs.get('v', [None])[0]
4164 playlist_id = qs.get('list', [None])[0]
4165
fe03a6cd 4166 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 4167 if not playlist_id:
fe03a6cd 4168 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 4169 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 4170 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6a39ee13 4171 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
cd7c66cf 4172 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
18db7548 4173 mobj = get_mobj(url)
cd7c66cf 4174
4175 if video_id and playlist_id:
a06916d9 4176 if self.get_param('noplaylist'):
cd7c66cf 4177 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4178 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4179 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4180
4181 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 4182
18db7548 4183 tabs = try_get(
4184 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4185 if tabs:
4186 selected_tab = self._extract_selected_tab(tabs)
4187 tab_name = selected_tab.get('title', '')
09f1580e 4188 if 'no-youtube-channel-redirect' not in compat_opts:
4189 if mobj['tab'] == '/live':
4190 # Live tab should have redirected to the video
4191 raise ExtractorError('The channel is not currently live', expected=True)
4192 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4193 if not mobj['not_channel'] and item_id[:2] == 'UC':
4194 # Topic channels don't have /videos. Use the equivalent playlist instead
4195 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4196 pl_id = 'UU%s' % item_id[2:]
4197 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4198 try:
4199 pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4200 for alert_type, alert_message in self._extract_alerts(pl_data):
4201 if alert_type == 'error':
4202 raise ExtractorError('Youtube said: %s' % alert_message)
4203 item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4204 except ExtractorError:
4205 self.report_warning('The playlist gave error. Falling back to channel URL')
4206 else:
4207 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
18db7548 4208
4209 self.write_debug('Final URL: %s' % url)
4210
358de58c 4211 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 4212 if 'no-youtube-unavailable-videos' not in compat_opts:
4213 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
95c01b6c 4214 self._extract_and_report_alerts(data)
8bdd16b4 4215 tabs = try_get(
4216 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4217 if tabs:
d069eca7 4218 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 4219
8bdd16b4 4220 playlist = try_get(
4221 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4222 if playlist:
79360d99 4223 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
cd7c66cf 4224
a0566bbf 4225 video_id = try_get(
4226 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4227 compat_str) or video_id
8bdd16b4 4228 if video_id:
09f1580e 4229 if mobj['tab'] != '/live': # live tab is expected to redirect to video
4230 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 4231 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 4232
8bdd16b4 4233 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 4234
c5e8d7af 4235
8bdd16b4 4236class YoutubePlaylistIE(InfoExtractor):
4237 IE_DESC = 'YouTube.com playlists'
4238 _VALID_URL = r'''(?x)(?:
4239 (?:https?://)?
4240 (?:\w+\.)?
4241 (?:
4242 (?:
4243 youtube(?:kids)?\.com|
29f7c58a 4244 invidio\.us
8bdd16b4 4245 )
4246 /.*?\?.*?\blist=
4247 )?
4248 (?P<id>%(playlist_id)s)
4249 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4250 IE_NAME = 'youtube:playlist'
cdc628a4 4251 _TESTS = [{
8bdd16b4 4252 'note': 'issue #673',
4253 'url': 'PLBB231211A4F62143',
cdc628a4 4254 'info_dict': {
8bdd16b4 4255 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4256 'id': 'PLBB231211A4F62143',
4257 'uploader': 'Wickydoo',
4258 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
11f9be09 4259 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
8bdd16b4 4260 },
4261 'playlist_mincount': 29,
4262 }, {
4263 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4264 'info_dict': {
4265 'title': 'YDL_safe_search',
4266 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4267 },
4268 'playlist_count': 2,
4269 'skip': 'This playlist is private',
9558dcec 4270 }, {
8bdd16b4 4271 'note': 'embedded',
4272 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4273 'playlist_count': 4,
9558dcec 4274 'info_dict': {
8bdd16b4 4275 'title': 'JODA15',
4276 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4277 'uploader': 'milan',
4278 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 4279 }
cdc628a4 4280 }, {
8bdd16b4 4281 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
11f9be09 4282 'playlist_mincount': 654,
8bdd16b4 4283 'info_dict': {
4284 'title': '2018 Chinese New Singles (11/6 updated)',
4285 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4286 'uploader': 'LBK',
4287 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
11f9be09 4288 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
8bdd16b4 4289 }
daa0df9e 4290 }, {
29f7c58a 4291 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4292 'only_matching': True,
4293 }, {
4294 # music album playlist
4295 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4296 'only_matching': True,
4297 }]
4298
4299 @classmethod
4300 def suitable(cls, url):
201c1459 4301 if YoutubeTabIE.suitable(url):
4302 return False
1bdae7d3 4303 # Hack for lazy extractors until more generic solution is implemented
4304 # (see #28780)
4305 from .youtube import parse_qs
201c1459 4306 qs = parse_qs(url)
4307 if qs.get('v', [None])[0]:
4308 return False
4309 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 4310
4311 def _real_extract(self, url):
4312 playlist_id = self._match_id(url)
46953e7e 4313 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 4314 url = update_url_query(
4315 'https://www.youtube.com/playlist',
4316 parse_qs(url) or {'list': playlist_id})
4317 if is_music_url:
4318 url = smuggle_url(url, {'is_music_url': True})
4319 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 4320
4321
4322class YoutubeYtBeIE(InfoExtractor):
c76eb41b 4323 IE_DESC = 'youtu.be'
29f7c58a 4324 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4325 _TESTS = [{
8bdd16b4 4326 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4327 'info_dict': {
4328 'id': 'yeWKywCrFtk',
4329 'ext': 'mp4',
4330 'title': 'Small Scale Baler and Braiding Rugs',
4331 'uploader': 'Backus-Page House Museum',
4332 'uploader_id': 'backuspagemuseum',
4333 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4334 'upload_date': '20161008',
4335 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4336 'categories': ['Nonprofits & Activism'],
4337 'tags': list,
4338 'like_count': int,
4339 'dislike_count': int,
4340 },
4341 'params': {
4342 'noplaylist': True,
4343 'skip_download': True,
4344 },
39e7107d 4345 }, {
8bdd16b4 4346 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 4347 'only_matching': True,
cdc628a4
PH
4348 }]
4349
8bdd16b4 4350 def _real_extract(self, url):
29f7c58a 4351 mobj = re.match(self._VALID_URL, url)
4352 video_id = mobj.group('id')
4353 playlist_id = mobj.group('playlist_id')
8bdd16b4 4354 return self.url_result(
29f7c58a 4355 update_url_query('https://www.youtube.com/watch', {
4356 'v': video_id,
4357 'list': playlist_id,
4358 'feature': 'youtu.be',
4359 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 4360
4361
4362class YoutubeYtUserIE(InfoExtractor):
c76eb41b 4363 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 4364 _VALID_URL = r'ytuser:(?P<id>.+)'
4365 _TESTS = [{
4366 'url': 'ytuser:phihag',
4367 'only_matching': True,
4368 }]
4369
4370 def _real_extract(self, url):
4371 user_id = self._match_id(url)
4372 return self.url_result(
4373 'https://www.youtube.com/user/%s' % user_id,
4374 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 4375
b05654f0 4376
3d3dddc9 4377class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 4378 IE_NAME = 'youtube:favorites'
4379 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4380 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4381 _LOGIN_REQUIRED = True
4382 _TESTS = [{
4383 'url': ':ytfav',
4384 'only_matching': True,
4385 }, {
4386 'url': ':ytfavorites',
4387 'only_matching': True,
4388 }]
4389
4390 def _real_extract(self, url):
4391 return self.url_result(
4392 'https://www.youtube.com/playlist?list=LL',
4393 ie=YoutubeTabIE.ie_key())
4394
4395
79360d99 4396class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
69184e41 4397 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
4398 # there doesn't appear to be a real limit, for example if you search for
4399 # 'python' you get more than 8.000.000 results
4400 _MAX_RESULTS = float('inf')
78caa52a 4401 IE_NAME = 'youtube:search'
b05654f0 4402 _SEARCH_KEY = 'ytsearch'
6c894ea1 4403 _SEARCH_PARAMS = None
9dd8e46a 4404 _TESTS = []
b05654f0 4405
6c894ea1 4406 def _entries(self, query, n):
a5c56234 4407 data = {'query': query}
6c894ea1
U
4408 if self._SEARCH_PARAMS:
4409 data['params'] = self._SEARCH_PARAMS
4410 total = 0
fe93e2c4 4411 continuation = {}
6c894ea1 4412 for page_num in itertools.count(1):
fe93e2c4 4413 data.update(continuation)
79360d99 4414 search = self._extract_response(
4415 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4416 check_get_keys=('contents', 'onResponseReceivedCommands')
4417 )
6c894ea1 4418 if not search:
b4c08069 4419 break
6c894ea1
U
4420 slr_contents = try_get(
4421 search,
4422 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4423 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4424 list)
4425 if not slr_contents:
a22b2fd1 4426 break
0366ae87 4427
0366ae87
M
4428 # Youtube sometimes adds promoted content to searches,
4429 # changing the index location of videos and token.
4430 # So we search through all entries till we find them.
fe93e2c4 4431 continuation = None
30a074c2 4432 for slr_content in slr_contents:
fe93e2c4 4433 if not continuation:
4434 continuation = self._extract_continuation({'contents': [slr_content]})
a96c6d15 4435
30a074c2 4436 isr_contents = try_get(
4437 slr_content,
4438 lambda x: x['itemSectionRenderer']['contents'],
4439 list)
9da76d30 4440 if not isr_contents:
30a074c2 4441 continue
4442 for content in isr_contents:
4443 if not isinstance(content, dict):
4444 continue
4445 video = content.get('videoRenderer')
4446 if not isinstance(video, dict):
4447 continue
4448 video_id = video.get('videoId')
4449 if not video_id:
4450 continue
4451
4452 yield self._extract_video(video)
4453 total += 1
4454 if total == n:
4455 return
0366ae87 4456
fe93e2c4 4457 if not continuation:
6c894ea1 4458 break
b05654f0 4459
6c894ea1
U
4460 def _get_n_results(self, query, n):
4461 """Get a specified number of results for a query"""
11f9be09 4462 return self.playlist_result(self._entries(query, n), query, query)
75dff0ee 4463
c9ae7b95 4464
a3dd9248 4465class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 4466 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 4467 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 4468 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 4469 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 4470
c9ae7b95 4471
386e1dd9 4472class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 4473 IE_DESC = 'YouTube.com search URLs'
386e1dd9 4474 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4475 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 4476 # _MAX_RESULTS = 100
3462ffa8 4477 _TESTS = [{
4478 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4479 'playlist_mincount': 5,
4480 'info_dict': {
11f9be09 4481 'id': 'youtube-dl test video',
3462ffa8 4482 'title': 'youtube-dl test video',
4483 }
4484 }, {
4485 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4486 'only_matching': True,
4487 }]
4488
386e1dd9 4489 @classmethod
4490 def _make_valid_url(cls):
4491 return cls._VALID_URL
4492
3462ffa8 4493 def _real_extract(self, url):
386e1dd9 4494 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4495 query = (qs.get('search_query') or qs.get('q'))[0]
4496 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4497 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 4498
4499
4500class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 4501 """
25f14e9f 4502 Base class for feed extractors
3d3dddc9 4503 Subclasses must define the _FEED_NAME property.
d7ae0639 4504 """
b2e8bc1b 4505 _LOGIN_REQUIRED = True
ef2f3c7f 4506 _TESTS = []
d7ae0639
JMF
4507
4508 @property
4509 def IE_NAME(self):
78caa52a 4510 return 'youtube:%s' % self._FEED_NAME
04cc9617 4511
3853309f 4512 def _real_extract(self, url):
3d3dddc9 4513 return self.url_result(
4514 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4515 ie=YoutubeTabIE.ie_key())
25f14e9f
S
4516
4517
ef2f3c7f 4518class YoutubeWatchLaterIE(InfoExtractor):
4519 IE_NAME = 'youtube:watchlater'
70d5c17b 4520 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 4521 _VALID_URL = r':ytwatchlater'
bc7a9cd8 4522 _TESTS = [{
8bdd16b4 4523 'url': ':ytwatchlater',
bc7a9cd8
S
4524 'only_matching': True,
4525 }]
25f14e9f
S
4526
4527 def _real_extract(self, url):
ef2f3c7f 4528 return self.url_result(
4529 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 4530
4531
25f14e9f
S
4532class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4533 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 4534 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 4535 _FEED_NAME = 'recommended'
45db527f 4536 _LOGIN_REQUIRED = False
3d3dddc9 4537 _TESTS = [{
4538 'url': ':ytrec',
4539 'only_matching': True,
4540 }, {
4541 'url': ':ytrecommended',
4542 'only_matching': True,
4543 }, {
4544 'url': 'https://youtube.com',
4545 'only_matching': True,
4546 }]
1ed5b5c9 4547
1ed5b5c9 4548
25f14e9f 4549class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 4550 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 4551 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 4552 _FEED_NAME = 'subscriptions'
3d3dddc9 4553 _TESTS = [{
4554 'url': ':ytsubs',
4555 'only_matching': True,
4556 }, {
4557 'url': ':ytsubscriptions',
4558 'only_matching': True,
4559 }]
1ed5b5c9 4560
1ed5b5c9 4561
25f14e9f 4562class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
4563 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4564 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 4565 _FEED_NAME = 'history'
3d3dddc9 4566 _TESTS = [{
4567 'url': ':ythistory',
4568 'only_matching': True,
4569 }]
1ed5b5c9
JMF
4570
4571
15870e90
PH
4572class YoutubeTruncatedURLIE(InfoExtractor):
4573 IE_NAME = 'youtube:truncated_url'
4574 IE_DESC = False # Do not list
975d35db 4575 _VALID_URL = r'''(?x)
b95aab84
PH
4576 (?:https?://)?
4577 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4578 (?:watch\?(?:
c4808c60 4579 feature=[a-z_]+|
b95aab84
PH
4580 annotation_id=annotation_[^&]+|
4581 x-yt-cl=[0-9]+|
c1708b89 4582 hl=[^&]*|
287be8c6 4583 t=[0-9]+
b95aab84
PH
4584 )?
4585 |
4586 attribution_link\?a=[^&]+
4587 )
4588 $
975d35db 4589 '''
15870e90 4590
c4808c60 4591 _TESTS = [{
2d3d2997 4592 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 4593 'only_matching': True,
dc2fc736 4594 }, {
2d3d2997 4595 'url': 'https://www.youtube.com/watch?',
dc2fc736 4596 'only_matching': True,
b95aab84
PH
4597 }, {
4598 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4599 'only_matching': True,
4600 }, {
4601 'url': 'https://www.youtube.com/watch?feature=foo',
4602 'only_matching': True,
c1708b89
PH
4603 }, {
4604 'url': 'https://www.youtube.com/watch?hl=en-GB',
4605 'only_matching': True,
287be8c6
PH
4606 }, {
4607 'url': 'https://www.youtube.com/watch?t=2372',
4608 'only_matching': True,
c4808c60
PH
4609 }]
4610
15870e90
PH
4611 def _real_extract(self, url):
4612 raise ExtractorError(
78caa52a
PH
4613 'Did you forget to quote the URL? Remember that & is a meta '
4614 'character in most shells, so you want to put the URL in quotes, '
3867038a 4615 'like youtube-dl '
2d3d2997 4616 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 4617 ' or simply youtube-dl BaW_jenozKc .',
15870e90 4618 expected=True)
772fd5cc
PH
4619
4620
4621class YoutubeTruncatedIDIE(InfoExtractor):
4622 IE_NAME = 'youtube:truncated_id'
4623 IE_DESC = False # Do not list
b95aab84 4624 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
4625
4626 _TESTS = [{
4627 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4628 'only_matching': True,
4629 }]
4630
4631 def _real_extract(self, url):
4632 video_id = self._match_id(url)
4633 raise ExtractorError(
4634 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4635 expected=True)