]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[youtube:comments] Fix `is_favorited` (#491)
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
2d6659b9 5import base64
d92f5d5a 6import calendar
109dd3b2 7import copy
a5c56234 8import hashlib
0ca96d48 9import itertools
c5e8d7af 10import json
c4417ddb 11import os.path
d77ab8e2 12import random
c5e8d7af 13import re
8a784c74 14import time
e0df6211 15import traceback
c5e8d7af 16
b05654f0 17from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 18from ..compat import (
edf3e38e 19 compat_chr,
29f7c58a 20 compat_HTTPError,
c5e8d7af 21 compat_parse_qs,
545cc85d 22 compat_str,
7fd002c0 23 compat_urllib_parse_unquote_plus,
15707c7e 24 compat_urllib_parse_urlencode,
7c80519c 25 compat_urllib_parse_urlparse,
7c61bd36 26 compat_urlparse,
4bb4a188 27)
545cc85d 28from ..jsinterp import JSInterpreter
4bb4a188 29from ..utils import (
c224251a 30 bool_or_none,
2d6659b9 31 bytes_to_intlist,
c5e8d7af 32 clean_html,
26fe8ffe 33 dict_get,
d92f5d5a 34 datetime_from_str,
358de58c 35 error_to_compat_str,
c5e8d7af 36 ExtractorError,
b60419c5 37 format_field,
2d30521a 38 float_or_none,
dd27fd17 39 int_or_none,
2d6659b9 40 intlist_to_bytes,
94278f72 41 mimetype2ext,
6310acf5 42 parse_codecs,
7c80519c 43 parse_duration,
dca3ff4a 44 qualities,
3995d37d 45 remove_start,
cf7e015f 46 smuggle_url,
dbdaaa23 47 str_or_none,
c93d53f5 48 str_to_int,
556dbe7f 49 try_get,
c5e8d7af
PH
50 unescapeHTML,
51 unified_strdate,
cf7e015f 52 unsmuggle_url,
8bdd16b4 53 update_url_query,
21c340b8 54 url_or_none,
6e6bc8da 55 urlencode_postdata,
d92f5d5a 56 urljoin
c5e8d7af
PH
57)
58
5f6a1245 59
201c1459 60def parse_qs(url):
61 return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
62
63
de7f3446 64class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
65 """Provide base functions for Youtube extractors"""
66 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 67 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
68
69 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
70 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
71 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 72
3462ffa8 73 _RESERVED_NAMES = (
bea74222 74 r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|shorts|'
46953e7e 75 r'movies|results|shared|hashtag|trending|feed|feeds|oembed|get_video_info|'
cd7c66cf 76 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 77
b2e8bc1b
JMF
78 _NETRC_MACHINE = 'youtube'
79 # If True it will raise an error if no login info is provided
80 _LOGIN_REQUIRED = False
81
70d5c17b 82 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 83
b2e8bc1b 84 def _login(self):
83317f69 85 """
86 Attempt to log in to YouTube.
87 True is returned if successful or skipped.
88 False is returned if login failed.
89
90 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
91 """
9d5d4d64 92
93 def warn(message):
94 self.report_warning(message)
95
96 # username+password login is broken
97 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
98 self.raise_login_required(
99 'Login details are needed to download this content', method='cookies')
68217024 100 username, password = self._get_login_info()
9d5d4d64 101 if username:
102 warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
103 return
9d5d4d64 104
2d6659b9 105 # Everything below this is broken!
106 r'''
b2e8bc1b
JMF
107 # No authentication to be performed
108 if username is None:
a06916d9 109 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
69ea8ca4 110 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
a06916d9 111 # if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
545cc85d 112 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 113 return True
b2e8bc1b 114
7cc3570e
PH
115 login_page = self._download_webpage(
116 self._LOGIN_URL, None,
69ea8ca4
PH
117 note='Downloading login page',
118 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
119 if login_page is False:
120 return
b2e8bc1b 121
1212e997 122 login_form = self._hidden_inputs(login_page)
c5e8d7af 123
e00eb564
S
124 def req(url, f_req, note, errnote):
125 data = login_form.copy()
126 data.update({
127 'pstMsg': 1,
128 'checkConnection': 'youtube',
129 'checkedDomains': 'youtube',
130 'hl': 'en',
131 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 132 'f.req': json.dumps(f_req),
e00eb564
S
133 'flowName': 'GlifWebSignIn',
134 'flowEntry': 'ServiceLogin',
baf67a60
S
135 # TODO: reverse actual botguard identifier generation algo
136 'bgRequest': '["identifier",""]',
041bc3ad 137 })
e00eb564
S
138 return self._download_json(
139 url, None, note=note, errnote=errnote,
140 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
141 fatal=False,
142 data=urlencode_postdata(data), headers={
143 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
144 'Google-Accounts-XSRF': 1,
145 })
146
3995d37d
S
147 lookup_req = [
148 username,
149 None, [], None, 'US', None, None, 2, False, True,
150 [
151 None, None,
152 [2, 1, None, 1,
153 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
154 None, [], 4],
155 1, [None, None, []], None, None, None, True
156 ],
157 username,
158 ]
159
e00eb564 160 lookup_results = req(
3995d37d 161 self._LOOKUP_URL, lookup_req,
e00eb564
S
162 'Looking up account info', 'Unable to look up account info')
163
164 if lookup_results is False:
165 return False
041bc3ad 166
3995d37d
S
167 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
168 if not user_hash:
169 warn('Unable to extract user hash')
170 return False
171
172 challenge_req = [
173 user_hash,
174 None, 1, None, [1, None, None, None, [password, None, True]],
175 [
176 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
177 1, [None, None, []], None, None, None, True
178 ]]
83317f69 179
3995d37d
S
180 challenge_results = req(
181 self._CHALLENGE_URL, challenge_req,
182 'Logging in', 'Unable to log in')
83317f69 183
3995d37d 184 if challenge_results is False:
e00eb564 185 return
83317f69 186
3995d37d
S
187 login_res = try_get(challenge_results, lambda x: x[0][5], list)
188 if login_res:
189 login_msg = try_get(login_res, lambda x: x[5], compat_str)
190 warn(
191 'Unable to login: %s' % 'Invalid password'
192 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
193 return False
194
195 res = try_get(challenge_results, lambda x: x[0][-1], list)
196 if not res:
197 warn('Unable to extract result entry')
198 return False
199
9a6628aa
S
200 login_challenge = try_get(res, lambda x: x[0][0], list)
201 if login_challenge:
202 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
203 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
204 # SEND_SUCCESS - TFA code has been successfully sent to phone
205 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 206 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
207 if status == 'QUOTA_EXCEEDED':
208 warn('Exceeded the limit of TFA codes, try later')
209 return False
210
211 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
212 if not tl:
213 warn('Unable to extract TL')
214 return False
215
216 tfa_code = self._get_tfa_info('2-step verification code')
217
218 if not tfa_code:
219 warn(
220 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
221 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
222 return False
223
224 tfa_code = remove_start(tfa_code, 'G-')
225
226 tfa_req = [
227 user_hash, None, 2, None,
228 [
229 9, None, None, None, None, None, None, None,
230 [None, tfa_code, True, 2]
231 ]]
232
233 tfa_results = req(
234 self._TFA_URL.format(tl), tfa_req,
235 'Submitting TFA code', 'Unable to submit TFA code')
236
237 if tfa_results is False:
238 return False
239
240 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
241 if tfa_res:
242 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
243 warn(
244 'Unable to finish TFA: %s' % 'Invalid TFA code'
245 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
246 return False
247
248 check_cookie_url = try_get(
249 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
250 else:
251 CHALLENGES = {
252 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
253 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
254 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
255 }
256 challenge = CHALLENGES.get(
257 challenge_str,
258 '%s returned error %s.' % (self.IE_NAME, challenge_str))
259 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
260 return False
3995d37d
S
261 else:
262 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
263
264 if not check_cookie_url:
265 warn('Unable to extract CheckCookie URL')
266 return False
e00eb564
S
267
268 check_cookie_results = self._download_webpage(
3995d37d
S
269 check_cookie_url, None, 'Checking cookie', fatal=False)
270
271 if check_cookie_results is False:
272 return False
e00eb564 273
3995d37d
S
274 if 'https://myaccount.google.com/' not in check_cookie_results:
275 warn('Unable to log in')
b2e8bc1b 276 return False
e00eb564 277
b2e8bc1b 278 return True
2d6659b9 279 '''
b2e8bc1b 280
cce889b9 281 def _initialize_consent(self):
282 cookies = self._get_cookies('https://www.youtube.com/')
283 if cookies.get('__Secure-3PSID'):
284 return
285 consent_id = None
286 consent = cookies.get('CONSENT')
287 if consent:
288 if 'YES' in consent.value:
289 return
290 consent_id = self._search_regex(
291 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
292 if not consent_id:
293 consent_id = random.randint(100, 999)
294 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 295
b2e8bc1b 296 def _real_initialize(self):
cce889b9 297 self._initialize_consent()
b2e8bc1b
JMF
298 if self._downloader is None:
299 return
b2e8bc1b
JMF
300 if not self._login():
301 return
c5e8d7af 302
a0566bbf 303 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 304 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
305 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 306
109dd3b2 307 _YT_DEFAULT_YTCFGS = {
308 'WEB': {
309 'INNERTUBE_API_VERSION': 'v1',
310 'INNERTUBE_CLIENT_NAME': 'WEB',
311 'INNERTUBE_CLIENT_VERSION': '2.20210622.10.00',
312 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
313 'INNERTUBE_CONTEXT': {
314 'client': {
315 'clientName': 'WEB',
316 'clientVersion': '2.20210622.10.00',
317 'hl': 'en',
318 }
319 },
320 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
321 },
322 'WEB_REMIX': {
323 'INNERTUBE_API_VERSION': 'v1',
324 'INNERTUBE_CLIENT_NAME': 'WEB_REMIX',
325 'INNERTUBE_CLIENT_VERSION': '1.20210621.00.00',
326 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
327 'INNERTUBE_CONTEXT': {
328 'client': {
329 'clientName': 'WEB_REMIX',
330 'clientVersion': '1.20210621.00.00',
331 'hl': 'en',
332 }
333 },
334 'INNERTUBE_CONTEXT_CLIENT_NAME': 67
335 },
336 'WEB_EMBEDDED_PLAYER': {
337 'INNERTUBE_API_VERSION': 'v1',
338 'INNERTUBE_CLIENT_NAME': 'WEB_EMBEDDED_PLAYER',
339 'INNERTUBE_CLIENT_VERSION': '1.20210620.0.1',
340 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
341 'INNERTUBE_CONTEXT': {
342 'client': {
343 'clientName': 'WEB_EMBEDDED_PLAYER',
344 'clientVersion': '1.20210620.0.1',
345 'hl': 'en',
346 }
347 },
348 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
349 },
350 'ANDROID': {
351 'INNERTUBE_API_VERSION': 'v1',
352 'INNERTUBE_CLIENT_NAME': 'ANDROID',
353 'INNERTUBE_CLIENT_VERSION': '16.20',
354 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
355 'INNERTUBE_CONTEXT': {
356 'client': {
357 'clientName': 'ANDROID',
358 'clientVersion': '16.20',
359 'hl': 'en',
360 }
361 },
362 'INNERTUBE_CONTEXT_CLIENT_NAME': 'ANDROID'
363 },
364 'ANDROID_EMBEDDED_PLAYER': {
365 'INNERTUBE_API_VERSION': 'v1',
366 'INNERTUBE_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER',
367 'INNERTUBE_CLIENT_VERSION': '16.20',
368 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
369 'INNERTUBE_CONTEXT': {
370 'client': {
371 'clientName': 'ANDROID_EMBEDDED_PLAYER',
372 'clientVersion': '16.20',
373 'hl': 'en',
374 }
375 },
376 'INNERTUBE_CONTEXT_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER'
377 },
378 'ANDROID_MUSIC': {
379 'INNERTUBE_API_VERSION': 'v1',
380 'INNERTUBE_CLIENT_NAME': 'ANDROID_MUSIC',
381 'INNERTUBE_CLIENT_VERSION': '4.32',
382 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
383 'INNERTUBE_CONTEXT': {
384 'client': {
385 'clientName': 'ANDROID_MUSIC',
386 'clientVersion': '4.32',
387 'hl': 'en',
388 }
389 },
390 'INNERTUBE_CONTEXT_CLIENT_NAME': 'ANDROID_MUSIC'
391 }
392 }
393
394 _YT_DEFAULT_INNERTUBE_HOSTS = {
395 'DIRECT': 'youtubei.googleapis.com',
396 'WEB': 'www.youtube.com',
397 'WEB_REMIX': 'music.youtube.com',
398 'ANDROID_MUSIC': 'music.youtube.com'
399 }
400
401 def _get_default_ytcfg(self, client='WEB'):
402 if client in self._YT_DEFAULT_YTCFGS:
403 return copy.deepcopy(self._YT_DEFAULT_YTCFGS[client])
404 self.write_debug(f'INNERTUBE default client {client} does not exist - falling back to WEB client.')
405 return copy.deepcopy(self._YT_DEFAULT_YTCFGS['WEB'])
406
407 def _get_innertube_host(self, client='WEB'):
408 return dict_get(self._YT_DEFAULT_INNERTUBE_HOSTS, (client, 'WEB'))
409
410 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='WEB'):
411 # try_get but with fallback to default ytcfg client values when present
412 _func = lambda y: try_get(y, getter, expected_type)
413 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
414
415 def _extract_client_name(self, ytcfg, default_client='WEB'):
416 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str, default_client)
417
418 def _extract_client_version(self, ytcfg, default_client='WEB'):
419 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str, default_client)
420
421 def _extract_api_key(self, ytcfg=None, default_client='WEB'):
422 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
423
424 def _extract_context(self, ytcfg=None, default_client='WEB'):
425 _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
426 context = _get_context(ytcfg)
427 if context:
428 return context
429
430 context = _get_context(self._get_default_ytcfg(default_client))
431 if not ytcfg:
432 return context
433
434 # Recreate the client context (required)
435 context['client'].update({
436 'clientVersion': self._extract_client_version(ytcfg, default_client),
437 'clientName': self._extract_client_name(ytcfg, default_client),
438 })
439 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
440 if visitor_data:
441 context['client']['visitorData'] = visitor_data
442 return context
443
444 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
1974e99f 445 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
446 # See: https://github.com/yt-dlp/yt-dlp/issues/393
447 yt_cookies = self._get_cookies('https://www.youtube.com')
448 sapisid_cookie = dict_get(
449 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
a5c56234
M
450 if sapisid_cookie is None:
451 return
452 time_now = round(time.time())
1974e99f 453 # SAPISID cookie is required if not already present
454 if not yt_cookies.get('SAPISID'):
455 self._set_cookie(
456 '.youtube.com', 'SAPISID', sapisid_cookie.value, secure=True, expire_time=time_now + 3600)
457 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
458 sapisidhash = hashlib.sha1(
109dd3b2 459 f'{time_now} {sapisid_cookie.value} {origin}'.encode('utf-8')).hexdigest()
1974e99f 460 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
461
462 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 463 note='Downloading API JSON', errnote='Unable to download API page',
109dd3b2 464 context=None, api_key=None, api_hostname=None, default_client='WEB'):
f4f751af 465
109dd3b2 466 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 467 data.update(query)
109dd3b2 468 real_headers = self._generate_api_headers(client=default_client)
f4f751af 469 real_headers.update({'content-type': 'application/json'})
470 if headers:
471 real_headers.update(headers)
545cc85d 472 return self._download_json(
109dd3b2 473 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
a5c56234 474 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 475 data=json.dumps(data).encode('utf8'), headers=real_headers,
476 query={'key': api_key or self._extract_api_key()})
477
8bdd16b4 478 def _extract_yt_initial_data(self, video_id, webpage):
479 return self._parse_json(
480 self._search_regex(
29f7c58a 481 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 482 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 483 video_id)
0c148415 484
a1c5d2ca
M
485 def _extract_identity_token(self, webpage, item_id):
486 ytcfg = self._extract_ytcfg(item_id, webpage)
487 if ytcfg:
488 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
489 if token:
490 return token
491 return self._search_regex(
492 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
493 'identity token', default=None)
494
495 @staticmethod
496 def _extract_account_syncid(data):
8ea3f7b9 497 """
498 Extract syncId required to download private playlists of secondary channels
499 @param data Either response or ytcfg
500 """
501 sync_ids = (try_get(
502 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
503 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
a1c5d2ca
M
504 if len(sync_ids) >= 2 and sync_ids[1]:
505 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
506 # and just "user_syncid||" for primary channel. We only want the channel_syncid
507 return sync_ids[0]
8ea3f7b9 508 # ytcfg includes channel_syncid if on secondary channel
509 return data.get('DELEGATED_SESSION_ID')
a1c5d2ca 510
29f7c58a 511 def _extract_ytcfg(self, video_id, webpage):
8c54a305 512 if not webpage:
513 return {}
29f7c58a 514 return self._parse_json(
515 self._search_regex(
516 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 517 default='{}'), video_id, fatal=False) or {}
518
109dd3b2 519 def _generate_api_headers(self, ytcfg=None, identity_token=None, account_syncid=None,
520 visitor_data=None, api_hostname=None, client='WEB'):
521 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(client))
f4f751af 522 headers = {
109dd3b2 523 'X-YouTube-Client-Name': compat_str(
524 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=client)),
525 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, client),
526 'Origin': origin
f4f751af 527 }
2d6659b9 528 if not visitor_data and ytcfg:
529 visitor_data = try_get(
530 self._extract_context(ytcfg, client), lambda x: x['client']['visitorData'], compat_str)
f4f751af 531 if identity_token:
109dd3b2 532 headers['X-Youtube-Identity-Token'] = identity_token
f4f751af 533 if account_syncid:
534 headers['X-Goog-PageId'] = account_syncid
535 headers['X-Goog-AuthUser'] = 0
536 if visitor_data:
109dd3b2 537 headers['X-Goog-Visitor-Id'] = visitor_data
538 auth = self._generate_sapisidhash_header(origin)
f4f751af 539 if auth is not None:
540 headers['Authorization'] = auth
109dd3b2 541 headers['X-Origin'] = origin
f4f751af 542 return headers
29f7c58a 543
2d6659b9 544 @staticmethod
545 def _build_api_continuation_query(continuation, ctp=None):
546 query = {
547 'continuation': continuation
548 }
549 # TODO: Inconsistency with clickTrackingParams.
550 # Currently we have a fixed ctp contained within context (from ytcfg)
551 # and a ctp in root query for continuation.
552 if ctp:
553 query['clickTracking'] = {'clickTrackingParams': ctp}
554 return query
555
556 @classmethod
557 def _continuation_query_ajax_to_api(cls, continuation_query):
558 continuation = dict_get(continuation_query, ('continuation', 'ctoken'))
559 return cls._build_api_continuation_query(continuation, continuation_query.get('itct'))
560
561 @staticmethod
562 def _build_continuation_query(continuation, ctp=None):
563 query = {
564 'ctoken': continuation,
565 'continuation': continuation,
566 }
567 if ctp:
568 query['itct'] = ctp
569 return query
570
571 @classmethod
572 def _extract_next_continuation_data(cls, renderer):
573 next_continuation = try_get(
574 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
575 lambda x: x['continuation']['reloadContinuationData']), dict)
576 if not next_continuation:
577 return
578 continuation = next_continuation.get('continuation')
579 if not continuation:
580 return
581 ctp = next_continuation.get('clickTrackingParams')
582 return cls._build_continuation_query(continuation, ctp)
583
584 @classmethod
585 def _extract_continuation_ep_data(cls, continuation_ep: dict):
586 if isinstance(continuation_ep, dict):
587 continuation = try_get(
588 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
589 if not continuation:
590 return
591 ctp = continuation_ep.get('clickTrackingParams')
592 return cls._build_continuation_query(continuation, ctp)
593
594 @classmethod
595 def _extract_continuation(cls, renderer):
596 next_continuation = cls._extract_next_continuation_data(renderer)
597 if next_continuation:
598 return next_continuation
599 contents = []
600 for key in ('contents', 'items'):
601 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
602 for content in contents:
603 if not isinstance(content, dict):
604 continue
605 continuation_ep = try_get(
606 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
607 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
608 dict)
609 continuation = cls._extract_continuation_ep_data(continuation_ep)
610 if continuation:
611 return continuation
612
109dd3b2 613 @staticmethod
614 def _extract_alerts(data):
615 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
616 if not isinstance(alert_dict, dict):
617 continue
618 for alert in alert_dict.values():
619 alert_type = alert.get('type')
620 if not alert_type:
621 continue
622 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str) or ''
623 if message:
624 yield alert_type, message
625 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
626 message += try_get(run, lambda x: x['text'], compat_str)
627 if message:
628 yield alert_type, message
629
630 def _report_alerts(self, alerts, expected=True):
631 errors = []
632 warnings = []
633 for alert_type, alert_message in alerts:
634 if alert_type.lower() == 'error':
635 errors.append([alert_type, alert_message])
636 else:
637 warnings.append([alert_type, alert_message])
638
639 for alert_type, alert_message in (warnings + errors[:-1]):
640 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
641 if errors:
642 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
643
644 def _extract_and_report_alerts(self, data, *args, **kwargs):
645 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
646
647 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
648 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
649 default_client='WEB'):
650 response = None
651 last_error = None
652 count = -1
653 retries = self.get_param('extractor_retries', 3)
654 if check_get_keys is None:
655 check_get_keys = []
656 while count < retries:
657 count += 1
658 if last_error:
659 self.report_warning('%s. Retrying ...' % last_error)
660 try:
661 response = self._call_api(
662 ep=ep, fatal=True, headers=headers,
663 video_id=item_id, query=query,
664 context=self._extract_context(ytcfg, default_client),
665 api_key=self._extract_api_key(ytcfg, default_client),
666 api_hostname=api_hostname, default_client=default_client,
667 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
668 except ExtractorError as e:
669 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
670 # Downloading page may result in intermittent 5xx HTTP error
671 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
672 last_error = 'HTTP Error %s' % e.cause.code
673 if count < retries:
674 continue
675 if fatal:
676 raise
677 else:
678 self.report_warning(error_to_compat_str(e))
679 return
680
681 else:
682 # Youtube may send alerts if there was an issue with the continuation page
683 try:
684 self._extract_and_report_alerts(response, expected=False)
685 except ExtractorError as e:
686 if fatal:
687 raise
688 self.report_warning(error_to_compat_str(e))
689 return
690 if not check_get_keys or dict_get(response, check_get_keys):
691 break
692 # Youtube sometimes sends incomplete data
693 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
694 last_error = 'Incomplete data received'
695 if count >= retries:
696 if fatal:
697 raise ExtractorError(last_error)
698 else:
699 self.report_warning(last_error)
700 return
701 return response
702
9297939e 703 @staticmethod
704 def is_music_url(url):
705 return re.match(r'https?://music\.youtube\.com/', url) is not None
706
30a074c2 707 def _extract_video(self, renderer):
708 video_id = renderer.get('videoId')
709 title = try_get(
710 renderer,
711 (lambda x: x['title']['runs'][0]['text'],
712 lambda x: x['title']['simpleText']), compat_str)
713 description = try_get(
714 renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
715 compat_str)
716 duration = parse_duration(try_get(
717 renderer, lambda x: x['lengthText']['simpleText'], compat_str))
718 view_count_text = try_get(
719 renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
720 view_count = str_to_int(self._search_regex(
721 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
722 'view count', default=None))
723 uploader = try_get(
bc2ca1bb 724 renderer,
725 (lambda x: x['ownerText']['runs'][0]['text'],
726 lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
30a074c2 727 return {
39ed931e 728 '_type': 'url',
30a074c2 729 'ie_key': YoutubeIE.ie_key(),
730 'id': video_id,
731 'url': video_id,
732 'title': title,
733 'description': description,
734 'duration': duration,
735 'view_count': view_count,
736 'uploader': uploader,
737 }
738
0c148415 739
360e1ca5 740class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 741 IE_DESC = 'YouTube.com'
bc2ca1bb 742 _INVIDIOUS_SITES = (
743 # invidious-redirect websites
744 r'(?:www\.)?redirect\.invidious\.io',
745 r'(?:(?:www|dev)\.)?invidio\.us',
746 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
747 r'(?:www\.)?invidious\.pussthecat\.org',
bc2ca1bb 748 r'(?:www\.)?invidious\.zee\.li',
bc2ca1bb 749 r'(?:www\.)?invidious\.ethibox\.fr',
bc2ca1bb 750 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
bc2ca1bb 751 # youtube-dl invidious instances list
752 r'(?:(?:www|no)\.)?invidiou\.sh',
753 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
754 r'(?:www\.)?invidious\.kabi\.tk',
bc2ca1bb 755 r'(?:www\.)?invidious\.mastodon\.host',
756 r'(?:www\.)?invidious\.zapashcanon\.fr',
ed807c18 757 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
201c1459 758 r'(?:www\.)?invidious\.tinfoil-hat\.net',
759 r'(?:www\.)?invidious\.himiko\.cloud',
760 r'(?:www\.)?invidious\.reallyancient\.tech',
bc2ca1bb 761 r'(?:www\.)?invidious\.tube',
762 r'(?:www\.)?invidiou\.site',
763 r'(?:www\.)?invidious\.site',
764 r'(?:www\.)?invidious\.xyz',
765 r'(?:www\.)?invidious\.nixnet\.xyz',
201c1459 766 r'(?:www\.)?invidious\.048596\.xyz',
bc2ca1bb 767 r'(?:www\.)?invidious\.drycat\.fr',
201c1459 768 r'(?:www\.)?inv\.skyn3t\.in',
bc2ca1bb 769 r'(?:www\.)?tube\.poal\.co',
770 r'(?:www\.)?tube\.connect\.cafe',
771 r'(?:www\.)?vid\.wxzm\.sx',
772 r'(?:www\.)?vid\.mint\.lgbt',
201c1459 773 r'(?:www\.)?vid\.puffyan\.us',
bc2ca1bb 774 r'(?:www\.)?yewtu\.be',
775 r'(?:www\.)?yt\.elukerio\.org',
776 r'(?:www\.)?yt\.lelux\.fi',
777 r'(?:www\.)?invidious\.ggc-project\.de',
778 r'(?:www\.)?yt\.maisputain\.ovh',
201c1459 779 r'(?:www\.)?ytprivate\.com',
780 r'(?:www\.)?invidious\.13ad\.de',
bc2ca1bb 781 r'(?:www\.)?invidious\.toot\.koeln',
782 r'(?:www\.)?invidious\.fdn\.fr',
783 r'(?:www\.)?watch\.nettohikari\.com',
ed807c18 784 r'(?:www\.)?invidious\.namazso\.eu',
785 r'(?:www\.)?invidious\.silkky\.cloud',
786 r'(?:www\.)?invidious\.exonip\.de',
787 r'(?:www\.)?invidious\.riverside\.rocks',
788 r'(?:www\.)?invidious\.blamefran\.net',
789 r'(?:www\.)?invidious\.moomoo\.de',
790 r'(?:www\.)?ytb\.trom\.tf',
791 r'(?:www\.)?yt\.cyberhost\.uk',
bc2ca1bb 792 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
793 r'(?:www\.)?qklhadlycap4cnod\.onion',
794 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
795 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
796 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
797 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
798 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
799 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
ed807c18 800 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
801 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
802 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
803 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
bc2ca1bb 804 )
cb7dfeea 805 _VALID_URL = r"""(?x)^
c5e8d7af 806 (
edb53e2d 807 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 808 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
809 (?:www\.)?deturl\.com/www\.youtube\.com|
810 (?:www\.)?pwnyoutube\.com|
811 (?:www\.)?hooktube\.com|
812 (?:www\.)?yourepeat\.com|
813 tube\.majestyc\.net|
814 %(invidious)s|
815 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
816 (?:.*?\#/)? # handle anchor (#/) redirect urls
817 (?: # the various things that can precede the ID:
ac7553d0 818 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 819 |(?: # or the v= param in all its forms
f7000f3a 820 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 821 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 822 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
823 v=
824 )
f4b05232 825 ))
cbaed4bb
S
826 |(?:
827 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
828 vid\.plus| # or vid.plus/xxxx
829 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 830 %(invidious)s
cbaed4bb 831 )/
edb53e2d 832 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 833 )
c5e8d7af 834 )? # all until now is optional -> you can pass the naked ID
201c1459 835 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 836 (?(1).+)? # if we found the ID, everything can follow
9297939e 837 (?:\#|$)""" % {
bc2ca1bb 838 'invidious': '|'.join(_INVIDIOUS_SITES),
839 }
e40c758c 840 _PLAYER_INFO_RE = (
cc2db878 841 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
842 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 843 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 844 )
2c62dc26 845 _formats = {
c2d3cb4c 846 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
847 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
848 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
849 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
850 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
851 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
852 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
853 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 854 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 855 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
856 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
857 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
858 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
859 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
860 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 861 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 862 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
863 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 864
865
866 # 3D videos
c2d3cb4c 867 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
868 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
869 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
870 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 871 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
872 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
873 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 874
96fb5605 875 # Apple HTTP Live Streaming
11f12195 876 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 877 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
878 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
879 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
880 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
881 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 882 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
883 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
884
885 # DASH mp4 video
d23028a8
S
886 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
887 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
888 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
889 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
890 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 891 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
892 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
893 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
894 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
895 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
896 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
897 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 898
f6f1fc92 899 # Dash mp4 audio
d23028a8
S
900 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
901 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
902 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
903 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
904 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
905 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
906 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
907
908 # Dash webm
d23028a8
S
909 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
910 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
911 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
912 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
913 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
914 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
915 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
916 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
917 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
918 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
919 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
920 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
921 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
922 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
923 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 924 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
925 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
926 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
927 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
928 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
929 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
930 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
931
932 # Dash webm audio
d23028a8
S
933 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
934 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 935
0857baad 936 # Dash webm audio with opus inside
d23028a8
S
937 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
938 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
939 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 940
ce6b9a2d
PH
941 # RTMP (unnamed)
942 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
943
944 # av01 video only formats sometimes served with "unknown" codecs
945 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
946 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
947 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
948 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 949 }
29f7c58a 950 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 951
109dd3b2 952 _AGE_GATE_REASONS = (
953 'Sign in to confirm your age',
954 'This video may be inappropriate for some users.',
955 'Sorry, this content is age-restricted.')
956
fd5c4aab
S
957 _GEO_BYPASS = False
958
78caa52a 959 IE_NAME = 'youtube'
2eb88d95
PH
960 _TESTS = [
961 {
2d3d2997 962 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
963 'info_dict': {
964 'id': 'BaW_jenozKc',
965 'ext': 'mp4',
3867038a 966 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
967 'uploader': 'Philipp Hagemeister',
968 'uploader_id': 'phihag',
ec85ded8 969 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
970 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
971 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 972 'upload_date': '20121002',
3867038a 973 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 974 'categories': ['Science & Technology'],
3867038a 975 'tags': ['youtube-dl'],
556dbe7f 976 'duration': 10,
dbdaaa23 977 'view_count': int,
3e7c1224
PH
978 'like_count': int,
979 'dislike_count': int,
7c80519c 980 'start_time': 1,
297a564b 981 'end_time': 9,
2eb88d95 982 }
0e853ca4 983 },
fccd3771 984 {
4bc3a23e
PH
985 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
986 'note': 'Embed-only video (#1746)',
987 'info_dict': {
988 'id': 'yZIXLfi8CZQ',
989 'ext': 'mp4',
990 'upload_date': '20120608',
991 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
992 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
993 'uploader': 'SET India',
94bfcd23 994 'uploader_id': 'setindia',
ec85ded8 995 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 996 'age_limit': 18,
545cc85d 997 },
998 'skip': 'Private video',
fccd3771 999 },
11b56058 1000 {
8bdd16b4 1001 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
1002 'note': 'Use the first video ID in the URL',
1003 'info_dict': {
1004 'id': 'BaW_jenozKc',
1005 'ext': 'mp4',
3867038a 1006 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
1007 'uploader': 'Philipp Hagemeister',
1008 'uploader_id': 'phihag',
ec85ded8 1009 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 1010 'upload_date': '20121002',
3867038a 1011 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 1012 'categories': ['Science & Technology'],
3867038a 1013 'tags': ['youtube-dl'],
556dbe7f 1014 'duration': 10,
dbdaaa23 1015 'view_count': int,
11b56058
PM
1016 'like_count': int,
1017 'dislike_count': int,
34a7de29
S
1018 },
1019 'params': {
1020 'skip_download': True,
1021 },
11b56058 1022 },
dd27fd17 1023 {
2d3d2997 1024 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
1025 'note': '256k DASH audio (format 141) via DASH manifest',
1026 'info_dict': {
1027 'id': 'a9LDPn-MO4I',
1028 'ext': 'm4a',
1029 'upload_date': '20121002',
1030 'uploader_id': '8KVIDEO',
ec85ded8 1031 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
1032 'description': '',
1033 'uploader': '8KVIDEO',
1034 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 1035 },
4bc3a23e
PH
1036 'params': {
1037 'youtube_include_dash_manifest': True,
1038 'format': '141',
4919603f 1039 },
de3c7fe0 1040 'skip': 'format 141 not served anymore',
dd27fd17 1041 },
8bdd16b4 1042 # DASH manifest with encrypted signature
1043 {
1044 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1045 'info_dict': {
1046 'id': 'IB3lcPjvWLA',
1047 'ext': 'm4a',
1048 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1049 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1050 'duration': 244,
1051 'uploader': 'AfrojackVEVO',
1052 'uploader_id': 'AfrojackVEVO',
1053 'upload_date': '20131011',
cc2db878 1054 'abr': 129.495,
8bdd16b4 1055 },
1056 'params': {
1057 'youtube_include_dash_manifest': True,
1058 'format': '141/bestaudio[ext=m4a]',
1059 },
1060 },
aa79ac0c
PH
1061 # Controversy video
1062 {
1063 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
1064 'info_dict': {
1065 'id': 'T4XJQO3qol8',
1066 'ext': 'mp4',
556dbe7f 1067 'duration': 219,
aa79ac0c 1068 'upload_date': '20100909',
4fe54c12 1069 'uploader': 'Amazing Atheist',
aa79ac0c 1070 'uploader_id': 'TheAmazingAtheist',
ec85ded8 1071 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c 1072 'title': 'Burning Everyone\'s Koran',
545cc85d 1073 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
aa79ac0c 1074 }
c522adb1 1075 },
dd2d55f1 1076 # Normal age-gate video (embed allowed)
c522adb1 1077 {
2d3d2997 1078 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1079 'info_dict': {
1080 'id': 'HtVdAasjOgU',
1081 'ext': 'mp4',
1082 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1083 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1084 'duration': 142,
c522adb1
JMF
1085 'uploader': 'The Witcher',
1086 'uploader_id': 'WitcherGame',
ec85ded8 1087 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 1088 'upload_date': '20140605',
34952f09 1089 'age_limit': 18,
c522adb1
JMF
1090 },
1091 },
8bdd16b4 1092 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1093 # YouTube Red ad is not captured for creator
1094 {
1095 'url': '__2ABJjxzNo',
1096 'info_dict': {
1097 'id': '__2ABJjxzNo',
1098 'ext': 'mp4',
1099 'duration': 266,
1100 'upload_date': '20100430',
1101 'uploader_id': 'deadmau5',
1102 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 1103 'creator': 'deadmau5',
1104 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1105 'uploader': 'deadmau5',
1106 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1107 'alt_title': 'Some Chords',
8bdd16b4 1108 },
1109 'expected_warnings': [
1110 'DASH manifest missing',
1111 ]
1112 },
067aa17e 1113 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1114 {
1115 'url': 'lqQg6PlCWgI',
1116 'info_dict': {
1117 'id': 'lqQg6PlCWgI',
1118 'ext': 'mp4',
556dbe7f 1119 'duration': 6085,
90227264 1120 'upload_date': '20150827',
cbe2bd91 1121 'uploader_id': 'olympic',
ec85ded8 1122 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 1123 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 1124 'uploader': 'Olympic',
cbe2bd91
PH
1125 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1126 },
1127 'params': {
1128 'skip_download': 'requires avconv',
e52a40ab 1129 }
cbe2bd91 1130 },
6271f1ca
PH
1131 # Non-square pixels
1132 {
1133 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1134 'info_dict': {
1135 'id': '_b-2C3KPAM0',
1136 'ext': 'mp4',
1137 'stretched_ratio': 16 / 9.,
556dbe7f 1138 'duration': 85,
6271f1ca
PH
1139 'upload_date': '20110310',
1140 'uploader_id': 'AllenMeow',
ec85ded8 1141 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 1142 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 1143 'uploader': '孫ᄋᄅ',
6271f1ca
PH
1144 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1145 },
06b491eb
S
1146 },
1147 # url_encoded_fmt_stream_map is empty string
1148 {
1149 'url': 'qEJwOuvDf7I',
1150 'info_dict': {
1151 'id': 'qEJwOuvDf7I',
f57b7835 1152 'ext': 'webm',
06b491eb
S
1153 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1154 'description': '',
1155 'upload_date': '20150404',
1156 'uploader_id': 'spbelect',
1157 'uploader': 'Наблюдатели Петербурга',
1158 },
1159 'params': {
1160 'skip_download': 'requires avconv',
e323cf3f
S
1161 },
1162 'skip': 'This live event has ended.',
06b491eb 1163 },
067aa17e 1164 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1165 {
1166 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1167 'info_dict': {
1168 'id': 'FIl7x6_3R5Y',
eb6793ba 1169 'ext': 'webm',
da77d856
S
1170 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1171 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1172 'duration': 220,
da77d856
S
1173 'upload_date': '20150625',
1174 'uploader_id': 'dorappi2000',
ec85ded8 1175 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 1176 'uploader': 'dorappi2000',
eb6793ba 1177 'formats': 'mincount:31',
da77d856 1178 },
eb6793ba 1179 'skip': 'not actual anymore',
2ee8f5d8 1180 },
8a1a26ce
YCH
1181 # DASH manifest with segment_list
1182 {
1183 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1184 'md5': '8ce563a1d667b599d21064e982ab9e31',
1185 'info_dict': {
1186 'id': 'CsmdDsKjzN8',
1187 'ext': 'mp4',
17ee98e1 1188 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
1189 'uploader': 'Airtek',
1190 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1191 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1192 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1193 },
1194 'params': {
1195 'youtube_include_dash_manifest': True,
1196 'format': '135', # bestvideo
be49068d
S
1197 },
1198 'skip': 'This live event has ended.',
2ee8f5d8 1199 },
cf7e015f
S
1200 {
1201 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 1202 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 1203 'info_dict': {
545cc85d 1204 'id': 'jvGDaLqkpTg',
1205 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1206 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
1207 },
1208 'playlist': [{
1209 'info_dict': {
545cc85d 1210 'id': 'jvGDaLqkpTg',
cf7e015f 1211 'ext': 'mp4',
545cc85d 1212 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1213 'description': 'md5:e03b909557865076822aa169218d6a5d',
1214 'duration': 10643,
1215 'upload_date': '20161111',
1216 'uploader': 'Team PGP',
1217 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1218 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1219 },
1220 }, {
1221 'info_dict': {
545cc85d 1222 'id': '3AKt1R1aDnw',
cf7e015f 1223 'ext': 'mp4',
545cc85d 1224 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1225 'description': 'md5:e03b909557865076822aa169218d6a5d',
1226 'duration': 10991,
1227 'upload_date': '20161111',
1228 'uploader': 'Team PGP',
1229 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1230 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1231 },
1232 }, {
1233 'info_dict': {
545cc85d 1234 'id': 'RtAMM00gpVc',
cf7e015f 1235 'ext': 'mp4',
545cc85d 1236 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1237 'description': 'md5:e03b909557865076822aa169218d6a5d',
1238 'duration': 10995,
1239 'upload_date': '20161111',
1240 'uploader': 'Team PGP',
1241 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1242 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1243 },
1244 }, {
1245 'info_dict': {
545cc85d 1246 'id': '6N2fdlP3C5U',
cf7e015f 1247 'ext': 'mp4',
545cc85d 1248 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1249 'description': 'md5:e03b909557865076822aa169218d6a5d',
1250 'duration': 10990,
1251 'upload_date': '20161111',
1252 'uploader': 'Team PGP',
1253 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1254 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1255 },
1256 }],
1257 'params': {
1258 'skip_download': True,
1259 },
cbaed4bb 1260 },
f9f49d87 1261 {
067aa17e 1262 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1263 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1264 'info_dict': {
1265 'id': 'gVfLd0zydlo',
1266 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1267 },
1268 'playlist_count': 2,
be49068d 1269 'skip': 'Not multifeed anymore',
f9f49d87 1270 },
cbaed4bb 1271 {
2d3d2997 1272 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1273 'only_matching': True,
0e49d9a6 1274 },
6d4fc66b 1275 {
2d3d2997 1276 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1277 'only_matching': True,
1278 },
0e49d9a6 1279 {
067aa17e 1280 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1281 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1282 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1283 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1284 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1285 'info_dict': {
1286 'id': 'lsguqyKfVQg',
1287 'ext': 'mp4',
1288 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 1289 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 1290 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1291 'duration': 133,
0e49d9a6
LL
1292 'upload_date': '20151119',
1293 'uploader_id': 'IronSoulElf',
ec85ded8 1294 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1295 'uploader': 'IronSoulElf',
eb6793ba
S
1296 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
1297 'track': 'Dark Walk - Position Music',
1298 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 1299 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
1300 },
1301 'params': {
1302 'skip_download': True,
1303 },
1304 },
61f92af1 1305 {
067aa17e 1306 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1307 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1308 'only_matching': True,
1309 },
313dfc45
LL
1310 {
1311 # Video with yt:stretch=17:0
1312 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1313 'info_dict': {
1314 'id': 'Q39EVAstoRM',
1315 'ext': 'mp4',
1316 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1317 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1318 'upload_date': '20151107',
1319 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1320 'uploader': 'CH GAMER DROID',
1321 },
1322 'params': {
1323 'skip_download': True,
1324 },
be49068d 1325 'skip': 'This video does not exist.',
313dfc45 1326 },
201c1459 1327 {
1328 # Video with incomplete 'yt:stretch=16:'
1329 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1330 'only_matching': True,
1331 },
7caf9830
S
1332 {
1333 # Video licensed under Creative Commons
1334 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1335 'info_dict': {
1336 'id': 'M4gD1WSo5mA',
1337 'ext': 'mp4',
1338 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1339 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1340 'duration': 721,
7caf9830
S
1341 'upload_date': '20150127',
1342 'uploader_id': 'BerkmanCenter',
ec85ded8 1343 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1344 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1345 'license': 'Creative Commons Attribution license (reuse allowed)',
1346 },
1347 'params': {
1348 'skip_download': True,
1349 },
1350 },
fd050249
S
1351 {
1352 # Channel-like uploader_url
1353 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1354 'info_dict': {
1355 'id': 'eQcmzGIKrzg',
1356 'ext': 'mp4',
1357 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1358 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1359 'duration': 4060,
fd050249 1360 'upload_date': '20151119',
eb6793ba 1361 'uploader': 'Bernie Sanders',
fd050249 1362 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1363 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1364 'license': 'Creative Commons Attribution license (reuse allowed)',
1365 },
1366 'params': {
1367 'skip_download': True,
1368 },
1369 },
040ac686
S
1370 {
1371 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1372 'only_matching': True,
7f29cf54
S
1373 },
1374 {
067aa17e 1375 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1376 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1377 'only_matching': True,
6496ccb4
S
1378 },
1379 {
1380 # Rental video preview
1381 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1382 'info_dict': {
1383 'id': 'uGpuVWrhIzE',
1384 'ext': 'mp4',
1385 'title': 'Piku - Trailer',
1386 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1387 'upload_date': '20150811',
1388 'uploader': 'FlixMatrix',
1389 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1390 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1391 'license': 'Standard YouTube License',
1392 },
1393 'params': {
1394 'skip_download': True,
1395 },
eb6793ba 1396 'skip': 'This video is not available.',
022a5d66 1397 },
12afdc2a
S
1398 {
1399 # YouTube Red video with episode data
1400 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1401 'info_dict': {
1402 'id': 'iqKdEhx-dD4',
1403 'ext': 'mp4',
1404 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1405 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1406 'duration': 2085,
12afdc2a
S
1407 'upload_date': '20170118',
1408 'uploader': 'Vsauce',
1409 'uploader_id': 'Vsauce',
1410 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1411 'series': 'Mind Field',
1412 'season_number': 1,
1413 'episode_number': 1,
1414 },
1415 'params': {
1416 'skip_download': True,
1417 },
1418 'expected_warnings': [
1419 'Skipping DASH manifest',
1420 ],
1421 },
c7121fa7
S
1422 {
1423 # The following content has been identified by the YouTube community
1424 # as inappropriate or offensive to some audiences.
1425 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1426 'info_dict': {
1427 'id': '6SJNVb0GnPI',
1428 'ext': 'mp4',
1429 'title': 'Race Differences in Intelligence',
1430 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1431 'duration': 965,
1432 'upload_date': '20140124',
1433 'uploader': 'New Century Foundation',
1434 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1435 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1436 },
1437 'params': {
1438 'skip_download': True,
1439 },
545cc85d 1440 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1441 },
022a5d66
S
1442 {
1443 # itag 212
1444 'url': '1t24XAntNCY',
1445 'only_matching': True,
fd5c4aab
S
1446 },
1447 {
1448 # geo restricted to JP
1449 'url': 'sJL6WA-aGkQ',
1450 'only_matching': True,
1451 },
cd5a74a2
S
1452 {
1453 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1454 'only_matching': True,
1455 },
bc2ca1bb 1456 {
1457 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1458 'only_matching': True,
1459 },
1460 {
1461 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1462 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1463 'only_matching': True,
1464 },
825cd268
RA
1465 {
1466 # DRM protected
1467 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1468 'only_matching': True,
4fe54c12
S
1469 },
1470 {
1471 # Video with unsupported adaptive stream type formats
1472 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1473 'info_dict': {
1474 'id': 'Z4Vy8R84T1U',
1475 'ext': 'mp4',
1476 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1477 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1478 'duration': 433,
1479 'upload_date': '20130923',
1480 'uploader': 'Amelia Putri Harwita',
1481 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1482 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1483 'formats': 'maxcount:10',
1484 },
1485 'params': {
1486 'skip_download': True,
1487 'youtube_include_dash_manifest': False,
1488 },
5429d6a9 1489 'skip': 'not actual anymore',
5caabd3c 1490 },
1491 {
822b9d9c 1492 # Youtube Music Auto-generated description
5caabd3c 1493 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1494 'info_dict': {
1495 'id': 'MgNrAu2pzNs',
1496 'ext': 'mp4',
1497 'title': 'Voyeur Girl',
1498 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1499 'upload_date': '20190312',
5429d6a9
S
1500 'uploader': 'Stephen - Topic',
1501 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1502 'artist': 'Stephen',
1503 'track': 'Voyeur Girl',
1504 'album': 'it\'s too much love to know my dear',
1505 'release_date': '20190313',
1506 'release_year': 2019,
1507 },
1508 'params': {
1509 'skip_download': True,
1510 },
1511 },
66b48727
RA
1512 {
1513 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1514 'only_matching': True,
1515 },
011e75e6
S
1516 {
1517 # invalid -> valid video id redirection
1518 'url': 'DJztXj2GPfl',
1519 'info_dict': {
1520 'id': 'DJztXj2GPfk',
1521 'ext': 'mp4',
1522 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1523 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1524 'upload_date': '20090125',
1525 'uploader': 'Prochorowka',
1526 'uploader_id': 'Prochorowka',
1527 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1528 'artist': 'Panjabi MC',
1529 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1530 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1531 },
1532 'params': {
1533 'skip_download': True,
1534 },
545cc85d 1535 'skip': 'Video unavailable',
ea74e00b
DP
1536 },
1537 {
1538 # empty description results in an empty string
1539 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1540 'info_dict': {
1541 'id': 'x41yOUIvK2k',
1542 'ext': 'mp4',
1543 'title': 'IMG 3456',
1544 'description': '',
1545 'upload_date': '20170613',
1546 'uploader_id': 'ElevageOrVert',
1547 'uploader': 'ElevageOrVert',
1548 },
1549 'params': {
1550 'skip_download': True,
1551 },
1552 },
a0566bbf 1553 {
29f7c58a 1554 # with '};' inside yt initial data (see [1])
1555 # see [2] for an example with '};' inside ytInitialPlayerResponse
1556 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1557 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1558 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1559 'info_dict': {
1560 'id': 'CHqg6qOn4no',
1561 'ext': 'mp4',
1562 'title': 'Part 77 Sort a list of simple types in c#',
1563 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1564 'upload_date': '20130831',
1565 'uploader_id': 'kudvenkat',
1566 'uploader': 'kudvenkat',
1567 },
1568 'params': {
1569 'skip_download': True,
1570 },
1571 },
29f7c58a 1572 {
1573 # another example of '};' in ytInitialData
1574 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1575 'only_matching': True,
1576 },
1577 {
1578 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1579 'only_matching': True,
1580 },
545cc85d 1581 {
cc2db878 1582 # https://github.com/ytdl-org/youtube-dl/pull/28094
1583 'url': 'OtqTfy26tG0',
1584 'info_dict': {
1585 'id': 'OtqTfy26tG0',
1586 'ext': 'mp4',
1587 'title': 'Burn Out',
1588 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1589 'upload_date': '20141120',
1590 'uploader': 'The Cinematic Orchestra - Topic',
1591 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1592 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1593 'artist': 'The Cinematic Orchestra',
1594 'track': 'Burn Out',
1595 'album': 'Every Day',
1596 'release_data': None,
1597 'release_year': None,
1598 },
1599 'params': {
1600 'skip_download': True,
1601 },
545cc85d 1602 },
bc2ca1bb 1603 {
1604 # controversial video, only works with bpctr when authenticated with cookies
1605 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1606 'only_matching': True,
1607 },
f7ad7160 1608 {
1609 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1610 'url': 'cBvYw8_A0vQ',
1611 'info_dict': {
1612 'id': 'cBvYw8_A0vQ',
1613 'ext': 'mp4',
1614 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1615 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1616 'upload_date': '20201120',
1617 'uploader': 'Walk around Japan',
1618 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1619 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1620 },
1621 'params': {
1622 'skip_download': True,
1623 },
0fb983f6 1624 }, {
1625 # Has multiple audio streams
1626 'url': 'WaOKSUlf4TM',
1627 'only_matching': True
9297939e 1628 }, {
1629 # Requires Premium: has format 141 when requested using YTM url
1630 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1631 'only_matching': True
1632 }, {
120916da 1633 # multiple subtitles with same lang_code
1634 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1635 'only_matching': True,
109dd3b2 1636 }, {
1637 # Force use android client fallback
1638 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1639 'info_dict': {
1640 'id': 'YOelRv7fMxY',
1641 'title': 'Digging a Secret Tunnel from my Workshop',
1642 'ext': '3gp',
1643 'upload_date': '20210624',
1644 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1645 'uploader': 'colinfurze',
1646 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1647 'description': 'md5:ecb672623246d98c6c562eed6ae798c3'
1648 },
1649 'params': {
1650 'format': '17', # 3gp format available on android
1651 'extractor_args': {'youtube': {'player_client': ['android']}},
1652 },
120916da 1653 },
109dd3b2 1654 {
1655 # Skip download of additional client configs (remix client config in this case)
1656 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1657 'only_matching': True,
1658 'params': {
1659 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1660 },
1661 }
2eb88d95
PH
1662 ]
1663
201c1459 1664 @classmethod
1665 def suitable(cls, url):
1bdae7d3 1666 # Hack for lazy extractors until more generic solution is implemented
1667 # (see #28780)
1668 from .youtube import parse_qs
201c1459 1669 qs = parse_qs(url)
1670 if qs.get('list', [None])[0]:
1671 return False
1672 return super(YoutubeIE, cls).suitable(url)
1673
e0df6211
PH
1674 def __init__(self, *args, **kwargs):
1675 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1676 self._code_cache = {}
83799698 1677 self._player_cache = {}
e0df6211 1678
109dd3b2 1679 def _extract_player_url(self, ytcfg=None, webpage=None):
1680 player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)
1681 if not player_url:
1682 player_url = self._search_regex(
1683 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1684 webpage, 'player URL', fatal=False)
1685 if player_url.startswith('//'):
1686 player_url = 'https:' + player_url
1687 elif not re.match(r'https?://', player_url):
1688 player_url = compat_urlparse.urljoin(
1689 'https://www.youtube.com', player_url)
1690 return player_url
1691
60064c53
PH
1692 def _signature_cache_id(self, example_sig):
1693 """ Return a string representation of a signature """
78caa52a 1694 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1695
e40c758c
S
1696 @classmethod
1697 def _extract_player_info(cls, player_url):
1698 for player_re in cls._PLAYER_INFO_RE:
1699 id_m = re.search(player_re, player_url)
1700 if id_m:
1701 break
1702 else:
c081b35c 1703 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1704 return id_m.group('id')
e40c758c 1705
109dd3b2 1706 def _load_player(self, video_id, player_url, fatal=True) -> bool:
1707 player_id = self._extract_player_info(player_url)
1708 if player_id not in self._code_cache:
1709 self._code_cache[player_id] = self._download_webpage(
1710 player_url, video_id, fatal=fatal,
1711 note='Downloading player ' + player_id,
1712 errnote='Download of %s failed' % player_url)
1713 return player_id in self._code_cache
1714
e40c758c 1715 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1716 player_id = self._extract_player_info(player_url)
e0df6211 1717
c4417ddb 1718 # Read from filesystem cache
545cc85d 1719 func_id = 'js_%s_%s' % (
1720 player_id, self._signature_cache_id(example_sig))
c4417ddb 1721 assert os.path.basename(func_id) == func_id
a0e07d31 1722
69ea8ca4 1723 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1724 if cache_spec is not None:
78caa52a 1725 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1726
109dd3b2 1727 if self._load_player(video_id, player_url):
1728 code = self._code_cache[player_id]
1729 res = self._parse_sig_js(code)
e0df6211 1730
109dd3b2 1731 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1732 cache_res = res(test_string)
1733 cache_spec = [ord(c) for c in cache_res]
83799698 1734
109dd3b2 1735 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1736 return res
83799698 1737
60064c53 1738 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1739 def gen_sig_code(idxs):
1740 def _genslice(start, end, step):
78caa52a 1741 starts = '' if start == 0 else str(start)
8bcc8756 1742 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1743 steps = '' if step == 1 else (':%d' % step)
78caa52a 1744 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1745
1746 step = None
7af808a5
PH
1747 # Quelch pyflakes warnings - start will be set when step is set
1748 start = '(Never used)'
edf3e38e
PH
1749 for i, prev in zip(idxs[1:], idxs[:-1]):
1750 if step is not None:
1751 if i - prev == step:
1752 continue
1753 yield _genslice(start, prev, step)
1754 step = None
1755 continue
1756 if i - prev in [-1, 1]:
1757 step = i - prev
1758 start = prev
1759 continue
1760 else:
78caa52a 1761 yield 's[%d]' % prev
edf3e38e 1762 if step is None:
78caa52a 1763 yield 's[%d]' % i
edf3e38e
PH
1764 else:
1765 yield _genslice(start, i, step)
1766
78caa52a 1767 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1768 cache_res = func(test_string)
edf3e38e 1769 cache_spec = [ord(c) for c in cache_res]
78caa52a 1770 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1771 signature_id_tuple = '(%s)' % (
1772 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1773 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1774 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1775 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1776
e0df6211
PH
1777 def _parse_sig_js(self, jscode):
1778 funcname = self._search_regex(
abefc03f
S
1779 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1780 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1781 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1782 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1783 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1784 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1785 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1786 # Obsolete patterns
1787 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1788 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1789 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1790 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1791 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1792 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1793 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1794 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1795 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1796
1797 jsi = JSInterpreter(jscode)
1798 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1799 return lambda s: initial_function([s])
1800
545cc85d 1801 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1802 """Turn the encrypted s field into a working signature"""
6b37f0be 1803
c8bf86d5 1804 if player_url is None:
69ea8ca4 1805 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1806
c8bf86d5 1807 try:
62af3a0e 1808 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1809 if player_id not in self._player_cache:
1810 func = self._extract_signature_function(
60064c53 1811 video_id, player_url, s
c8bf86d5
PH
1812 )
1813 self._player_cache[player_id] = func
1814 func = self._player_cache[player_id]
a06916d9 1815 if self.get_param('youtube_print_sig_code'):
60064c53 1816 self._print_sig_code(func, s)
c8bf86d5
PH
1817 return func(s)
1818 except Exception as e:
1819 tb = traceback.format_exc()
1820 raise ExtractorError(
78caa52a 1821 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1822
109dd3b2 1823 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1824 """
1825 Extract signatureTimestamp (sts)
1826 Required to tell API what sig/player version is in use.
1827 """
1828 sts = None
1829 if isinstance(ytcfg, dict):
1830 sts = int_or_none(ytcfg.get('STS'))
1831
1832 if not sts:
1833 # Attempt to extract from player
1834 if player_url is None:
1835 error_msg = 'Cannot extract signature timestamp without player_url.'
1836 if fatal:
1837 raise ExtractorError(error_msg)
1838 self.report_warning(error_msg)
1839 return
1840 if self._load_player(video_id, player_url, fatal=fatal):
1841 player_id = self._extract_player_info(player_url)
1842 code = self._code_cache[player_id]
1843 sts = int_or_none(self._search_regex(
1844 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
1845 'JS player signature timestamp', group='sts', fatal=fatal))
1846 return sts
1847
545cc85d 1848 def _mark_watched(self, video_id, player_response):
21c340b8
S
1849 playback_url = url_or_none(try_get(
1850 player_response,
545cc85d 1851 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
d77ab8e2
S
1852 if not playback_url:
1853 return
1854 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1855 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1856
1857 # cpn generation algorithm is reverse engineered from base.js.
1858 # In fact it works even with dummy cpn.
1859 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1860 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1861
1862 qs.update({
1863 'ver': ['2'],
1864 'cpn': [cpn],
1865 })
1866 playback_url = compat_urlparse.urlunparse(
15707c7e 1867 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1868
1869 self._download_webpage(
1870 playback_url, video_id, 'Marking watched',
1871 'Unable to mark watched', fatal=False)
1872
66c9fa36
S
1873 @staticmethod
1874 def _extract_urls(webpage):
1875 # Embedded YouTube player
1876 entries = [
1877 unescapeHTML(mobj.group('url'))
1878 for mobj in re.finditer(r'''(?x)
1879 (?:
1880 <iframe[^>]+?src=|
1881 data-video-url=|
1882 <embed[^>]+?src=|
1883 embedSWF\(?:\s*|
1884 <object[^>]+data=|
1885 new\s+SWFObject\(
1886 )
1887 (["\'])
1888 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1889 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1890 \1''', webpage)]
1891
1892 # lazyYT YouTube embed
1893 entries.extend(list(map(
1894 unescapeHTML,
1895 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1896
1897 # Wordpress "YouTube Video Importer" plugin
1898 matches = re.findall(r'''(?x)<div[^>]+
1899 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1900 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1901 entries.extend(m[-1] for m in matches)
1902
1903 return entries
1904
1905 @staticmethod
1906 def _extract_url(webpage):
1907 urls = YoutubeIE._extract_urls(webpage)
1908 return urls[0] if urls else None
1909
97665381
PH
1910 @classmethod
1911 def extract_id(cls, url):
1912 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1913 if mobj is None:
69ea8ca4 1914 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1915 video_id = mobj.group(2)
1916 return video_id
1917
545cc85d 1918 def _extract_chapters_from_json(self, data, video_id, duration):
84213ea8 1919 chapters_list = try_get(
8bdd16b4 1920 data,
84213ea8
S
1921 lambda x: x['playerOverlays']
1922 ['playerOverlayRenderer']
1923 ['decoratedPlayerBarRenderer']
1924 ['decoratedPlayerBarRenderer']
1925 ['playerBar']
1926 ['chapteredPlayerBarRenderer']
1927 ['chapters'],
1928 list)
1929 if not chapters_list:
1930 return
1931
1932 def chapter_time(chapter):
1933 return float_or_none(
1934 try_get(
1935 chapter,
1936 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1937 int),
1938 scale=1000)
1939 chapters = []
1940 for next_num, chapter in enumerate(chapters_list, start=1):
1941 start_time = chapter_time(chapter)
1942 if start_time is None:
1943 continue
1944 end_time = (chapter_time(chapters_list[next_num])
1945 if next_num < len(chapters_list) else duration)
1946 if end_time is None:
1947 continue
1948 title = try_get(
1949 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1950 compat_str)
1951 chapters.append({
1952 'start_time': start_time,
1953 'end_time': end_time,
1954 'title': title,
1955 })
1956 return chapters
1957
545cc85d 1958 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1959 return self._parse_json(self._search_regex(
1960 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1961 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 1962
d92f5d5a 1963 @staticmethod
1964 def parse_time_text(time_text):
1965 """
1966 Parse the comment time text
1967 time_text is in the format 'X units ago (edited)'
1968 """
1969 time_text_split = time_text.split(' ')
1970 if len(time_text_split) >= 3:
1971 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
1972
a1c5d2ca
M
1973 @staticmethod
1974 def _join_text_entries(runs):
1975 text = None
1976 for run in runs:
1977 if not isinstance(run, dict):
1978 continue
1979 sub_text = try_get(run, lambda x: x['text'], compat_str)
1980 if sub_text:
1981 if not text:
1982 text = sub_text
1983 continue
1984 text += sub_text
1985 return text
1986
1987 def _extract_comment(self, comment_renderer, parent=None):
1988 comment_id = comment_renderer.get('commentId')
1989 if not comment_id:
1990 return
1991 comment_text_runs = try_get(comment_renderer, lambda x: x['contentText']['runs']) or []
1992 text = self._join_text_entries(comment_text_runs) or ''
1993 comment_time_text = try_get(comment_renderer, lambda x: x['publishedTimeText']['runs']) or []
1994 time_text = self._join_text_entries(comment_time_text)
d92f5d5a 1995 timestamp = calendar.timegm(self.parse_time_text(time_text).timetuple())
a1c5d2ca
M
1996 author = try_get(comment_renderer, lambda x: x['authorText']['simpleText'], compat_str)
1997 author_id = try_get(comment_renderer,
1998 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
1999 votes = str_to_int(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2000 lambda x: x['likeCount']), compat_str)) or 0
2001 author_thumbnail = try_get(comment_renderer,
2002 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2003
2004 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
97524332 2005 is_favorited = 'creatorHeart' in (try_get(
2006 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
a1c5d2ca
M
2007 return {
2008 'id': comment_id,
2009 'text': text,
d92f5d5a 2010 'timestamp': timestamp,
a1c5d2ca
M
2011 'time_text': time_text,
2012 'like_count': votes,
97524332 2013 'is_favorited': is_favorited,
a1c5d2ca
M
2014 'author': author,
2015 'author_id': author_id,
2016 'author_thumbnail': author_thumbnail,
2017 'author_is_uploader': author_is_uploader,
2018 'parent': parent or 'root'
2019 }
2020
2021 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
2d6659b9 2022 ytcfg, video_id, parent=None, comment_counts=None):
2023
2024 def extract_header(contents):
2025 _total_comments = 0
2026 _continuation = None
2027 for content in contents:
2028 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
2029 expected_comment_count = try_get(comments_header_renderer,
2030 (lambda x: x['countText']['runs'][0]['text'],
2031 lambda x: x['commentsCount']['runs'][0]['text']),
2032 compat_str)
2033 if expected_comment_count:
2034 comment_counts[1] = str_to_int(expected_comment_count)
2035 self.to_screen('Downloading ~%d comments' % str_to_int(expected_comment_count))
2036 _total_comments = comment_counts[1]
2037 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2038 comment_sort_index = int(sort_mode_str != 'top') # 1 = new, 0 = top
2039
2040 sort_menu_item = try_get(
2041 comments_header_renderer,
2042 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2043 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2044
2045 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2046 if not _continuation:
2047 continue
2048
2049 sort_text = sort_menu_item.get('title')
2050 if isinstance(sort_text, compat_str):
2051 sort_text = sort_text.lower()
2052 else:
2053 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2054 self.to_screen('Sorting comments by %s' % sort_text)
2055 break
2056 return _total_comments, _continuation
a1c5d2ca 2057
2d6659b9 2058 def extract_thread(contents):
a1c5d2ca
M
2059 if not parent:
2060 comment_counts[2] = 0
2061 for content in contents:
2062 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2063 comment_renderer = try_get(
2064 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2065 content, (lambda x: x['commentRenderer'], dict))
2066
2067 if not comment_renderer:
2068 continue
2069 comment = self._extract_comment(comment_renderer, parent)
2070 if not comment:
2071 continue
2072 comment_counts[0] += 1
2073 yield comment
2074 # Attempt to get the replies
2075 comment_replies_renderer = try_get(
2076 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2077
2078 if comment_replies_renderer:
2079 comment_counts[2] += 1
2080 comment_entries_iter = self._comment_entries(
f4f751af 2081 comment_replies_renderer, identity_token, account_syncid, ytcfg,
2d6659b9 2082 video_id, parent=comment.get('id'), comment_counts=comment_counts)
a1c5d2ca
M
2083
2084 for reply_comment in comment_entries_iter:
2085 yield reply_comment
2086
2d6659b9 2087 # YouTube comments have a max depth of 2
2088 max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2089 if max_depth == 1 and parent:
2090 return
a1c5d2ca
M
2091 if not comment_counts:
2092 # comment so far, est. total comments, current comment thread #
2093 comment_counts = [0, 0, 0]
a1c5d2ca 2094
2d6659b9 2095 continuation = self._extract_continuation(root_continuation_data)
2096 if continuation and len(continuation['ctoken']) < 27:
2097 self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2098 continuation_token = self._generate_comment_continuation(video_id)
2099 continuation = self._build_continuation_query(continuation_token, None)
2100
2101 visitor_data = None
2102 is_first_continuation = parent is None
a1c5d2ca
M
2103
2104 for page_num in itertools.count(0):
2105 if not continuation:
2106 break
f4f751af 2107 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
2d6659b9 2108 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2109 if page_num == 0:
2110 if is_first_continuation:
2111 note_prefix = 'Downloading comment section API JSON'
a1c5d2ca 2112 else:
2d6659b9 2113 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
2114 comment_counts[2], comment_prog_str)
2115 else:
2116 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2117 ' ' if parent else '', ' replies' if parent else '',
2118 page_num, comment_prog_str)
2119
2120 response = self._extract_response(
2121 item_id=None, query=self._continuation_query_ajax_to_api(continuation),
2122 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2123 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca
M
2124 if not response:
2125 break
f4f751af 2126 visitor_data = try_get(
2127 response,
2128 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2129 compat_str) or visitor_data
a1c5d2ca 2130
2d6659b9 2131 continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca 2132
2d6659b9 2133 continuation = None
2134 if isinstance(continuation_contents, list):
2135 for continuation_section in continuation_contents:
2136 if not isinstance(continuation_section, dict):
2137 continue
2138 continuation_items = try_get(
2139 continuation_section,
2140 (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2141 lambda x: x['appendContinuationItemsAction']['continuationItems']),
2142 list) or []
2143 if is_first_continuation:
2144 total_comments, continuation = extract_header(continuation_items)
2145 if total_comments:
2146 yield total_comments
2147 is_first_continuation = False
2148 if continuation:
2149 break
2150 continue
2151 count = 0
2152 for count, entry in enumerate(extract_thread(continuation_items)):
2153 yield entry
2154 continuation = self._extract_continuation({'contents': continuation_items})
2155 if continuation:
2156 # Sometimes YouTube provides a continuation without any comments
2157 # In most cases we end up just downloading these with very little comments to come.
2158 if count == 0:
2159 if not parent:
2160 self.report_warning('No comments received - assuming end of comments')
2161 continuation = None
a1c5d2ca
M
2162 break
2163
2d6659b9 2164 # Deprecated response structure
2165 elif isinstance(continuation_contents, dict):
2166 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2167 for key, continuation_renderer in continuation_contents.items():
2168 if key not in known_continuation_renderers:
2169 continue
2170 if not isinstance(continuation_renderer, dict):
2171 continue
2172 if is_first_continuation:
2173 header_continuation_items = [continuation_renderer.get('header') or {}]
2174 total_comments, continuation = extract_header(header_continuation_items)
2175 if total_comments:
2176 yield total_comments
2177 is_first_continuation = False
2178 if continuation:
2179 break
a1c5d2ca 2180
2d6659b9 2181 # Sometimes YouTube provides a continuation without any comments
2182 # In most cases we end up just downloading these with very little comments to come.
2183 count = 0
2184 for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2185 yield entry
2186 continuation = self._extract_continuation(continuation_renderer)
2187 if count == 0:
2188 if not parent:
2189 self.report_warning('No comments received - assuming end of comments')
2190 continuation = None
2191 break
a1c5d2ca 2192
2d6659b9 2193 @staticmethod
2194 def _generate_comment_continuation(video_id):
2195 """
2196 Generates initial comment section continuation token from given video id
2197 """
2198 b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2199 parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2200 new_continuation_intlist = list(itertools.chain.from_iterable(
2201 [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2202 return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2203
2204 def _extract_comments(self, ytcfg, video_id, contents, webpage):
a1c5d2ca 2205 """Entry for comment extraction"""
2d6659b9 2206 def _real_comment_extract(contents):
2207 if isinstance(contents, list):
2208 for entry in contents:
2209 for key, renderer in entry.items():
2210 if key not in known_entry_comment_renderers:
2211 continue
2212 yield from self._comment_entries(
2213 renderer, video_id=video_id, ytcfg=ytcfg,
2214 identity_token=self._extract_identity_token(webpage, item_id=video_id),
2215 account_syncid=self._extract_account_syncid(ytcfg))
2216 break
a1c5d2ca 2217 comments = []
2d6659b9 2218 known_entry_comment_renderers = ('itemSectionRenderer',)
a1c5d2ca 2219 estimated_total = 0
2d6659b9 2220 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
a1c5d2ca 2221
2d6659b9 2222 try:
2223 for comment in _real_comment_extract(contents):
2224 if len(comments) >= max_comments:
2225 break
2226 if isinstance(comment, int):
2227 estimated_total = comment
2228 continue
2229 comments.append(comment)
2230 except KeyboardInterrupt:
2231 self.to_screen('Interrupted by user')
d92f5d5a 2232 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
a1c5d2ca
M
2233 return {
2234 'comments': comments,
2235 'comment_count': len(comments),
2236 }
2237
109dd3b2 2238 @staticmethod
2239 def _generate_player_context(sts=None):
2240 context = {
2241 'html5Preference': 'HTML5_PREF_WANTS',
2242 }
2243 if sts is not None:
2244 context['signatureTimestamp'] = sts
2245 return {
2246 'playbackContext': {
2247 'contentPlaybackContext': context
2248 }
2249 }
2250
4e6767b5 2251 @staticmethod
2252 def _get_video_info_params(video_id):
2253 return {
2254 'video_id': video_id,
2255 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
2256 'html5': '1',
2257 'c': 'TVHTML5',
2258 'cver': '6.20180913',
2259 }
2260
c5e8d7af 2261 def _real_extract(self, url):
cf7e015f 2262 url, smuggled_data = unsmuggle_url(url, {})
545cc85d 2263 video_id = self._match_id(url)
9297939e 2264
2265 is_music_url = smuggled_data.get('is_music_url') or self.is_music_url(url)
2266
545cc85d 2267 base_url = self.http_scheme() + '//www.youtube.com/'
b3d12425 2268 webpage_url = base_url + 'watch?v=' + video_id
2269 webpage = self._download_webpage(
cce889b9 2270 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
545cc85d 2271
109dd3b2 2272 ytcfg = self._extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2273 identity_token = self._extract_identity_token(webpage, video_id)
2274 syncid = self._extract_account_syncid(ytcfg)
2275 headers = self._generate_api_headers(ytcfg, identity_token, syncid)
2276
2277 player_url = self._extract_player_url(ytcfg, webpage)
2278
2d6659b9 2279 player_client = self._configuration_arg('player_client', [''])[0]
4bb6b02f 2280 if player_client not in ('web', 'android', ''):
2281 self.report_warning(f'Invalid player_client {player_client} given. Falling back to WEB')
2282 force_mobile_client = player_client == 'android'
2283 player_skip = self._configuration_arg('player_skip')
109dd3b2 2284
9297939e 2285 def get_text(x):
2286 if not x:
2287 return
2288 text = x.get('simpleText')
2289 if text and isinstance(text, compat_str):
2290 return text
2291 runs = x.get('runs')
2292 if not isinstance(runs, list):
2293 return
2294 return ''.join([r['text'] for r in runs if isinstance(r.get('text'), compat_str)])
2295
2296 ytm_streaming_data = {}
2297 if is_music_url:
109dd3b2 2298 ytm_webpage = None
2299 sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2300 if sts and not force_mobile_client and 'configs' not in player_skip:
2301 ytm_webpage = self._download_webpage(
2302 'https://music.youtube.com',
2d6659b9 2303 video_id, fatal=False, note='Downloading remix client config')
109dd3b2 2304
2305 ytm_cfg = self._extract_ytcfg(video_id, ytm_webpage) or {}
2306 ytm_client = 'WEB_REMIX'
2307 if not sts or force_mobile_client:
2308 # Android client already has signature descrambled
2309 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2310 if not sts:
2311 self.report_warning('Falling back to mobile remix client for player API.')
2312 ytm_client = 'ANDROID_MUSIC'
2313 ytm_cfg = {}
2314
2315 ytm_headers = self._generate_api_headers(
2316 ytm_cfg, identity_token, syncid,
2317 client=ytm_client)
2318 ytm_query = {'videoId': video_id}
2319 ytm_query.update(self._generate_player_context(sts))
2320
2321 ytm_player_response = self._extract_response(
2322 item_id=video_id, ep='player', query=ytm_query,
2323 ytcfg=ytm_cfg, headers=ytm_headers, fatal=False,
2324 default_client=ytm_client,
2325 note='Downloading %sremix player API JSON' % ('mobile ' if force_mobile_client else ''))
2d6659b9 2326 ytm_streaming_data = try_get(ytm_player_response, lambda x: x['streamingData'], dict) or {}
109dd3b2 2327
545cc85d 2328 player_response = None
2329 if webpage:
2330 player_response = self._extract_yt_initial_variable(
2331 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2332 video_id, 'initial player response')
f4f751af 2333
109dd3b2 2334 if not player_response or force_mobile_client:
2335 sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2336 yt_client = 'WEB'
2337 ytpcfg = ytcfg
2338 ytp_headers = headers
2339 if not sts or force_mobile_client:
2340 # Android client already has signature descrambled
2341 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2342 if not sts:
2343 self.report_warning('Falling back to mobile client for player API.')
2344 yt_client = 'ANDROID'
2345 ytpcfg = {}
2346 ytp_headers = self._generate_api_headers(ytpcfg, identity_token, syncid, yt_client)
2347
2348 yt_query = {'videoId': video_id}
2349 yt_query.update(self._generate_player_context(sts))
2350 player_response = self._extract_response(
2351 item_id=video_id, ep='player', query=yt_query,
2352 ytcfg=ytpcfg, headers=ytp_headers, fatal=False,
2353 default_client=yt_client,
2354 note='Downloading %splayer API JSON' % ('mobile ' if force_mobile_client else '')
2355 )
545cc85d 2356
109dd3b2 2357 # Age-gate workarounds
545cc85d 2358 playability_status = player_response.get('playabilityStatus') or {}
109dd3b2 2359 if playability_status.get('reason') in self._AGE_GATE_REASONS:
545cc85d 2360 pr = self._parse_json(try_get(compat_parse_qs(
2361 self._download_webpage(
2362 base_url + 'get_video_info', video_id,
4e6767b5 2363 'Refetching age-gated info webpage', 'unable to download video info webpage',
2364 query=self._get_video_info_params(video_id), fatal=False)),
545cc85d 2365 lambda x: x['player_response'][0],
2366 compat_str) or '{}', video_id)
109dd3b2 2367 if not pr:
2368 self.report_warning('Falling back to embedded-only age-gate workaround.')
2369 embed_webpage = None
2370 sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2371 if sts and not force_mobile_client and 'configs' not in player_skip:
2372 embed_webpage = self._download_webpage(
2373 'https://www.youtube.com/embed/%s?html5=1' % video_id,
2374 video_id=video_id, note='Downloading age-gated embed config')
2375
2376 ytcfg_age = self._extract_ytcfg(video_id, embed_webpage) or {}
2377 # If we extracted the embed webpage, it'll tell us if we can view the video
2378 embedded_pr = self._parse_json(
2379 try_get(ytcfg_age, lambda x: x['PLAYER_VARS']['embedded_player_response'], str) or '{}',
2380 video_id=video_id)
2381 embedded_ps_reason = try_get(embedded_pr, lambda x: x['playabilityStatus']['reason'], str) or ''
2382 if embedded_ps_reason not in self._AGE_GATE_REASONS:
2383 yt_client = 'WEB_EMBEDDED_PLAYER'
2384 if not sts or force_mobile_client:
2385 # Android client already has signature descrambled
2386 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2387 if not sts:
2388 self.report_warning(
2389 'Falling back to mobile embedded client for player API (note: some formats may be missing).')
2390 yt_client = 'ANDROID_EMBEDDED_PLAYER'
2391 ytcfg_age = {}
2392
2393 ytage_headers = self._generate_api_headers(
2394 ytcfg_age, identity_token, syncid, client=yt_client)
2395 yt_age_query = {'videoId': video_id}
2396 yt_age_query.update(self._generate_player_context(sts))
2397 pr = self._extract_response(
2398 item_id=video_id, ep='player', query=yt_age_query,
2399 ytcfg=ytcfg_age, headers=ytage_headers, fatal=False,
2400 default_client=yt_client,
2401 note='Downloading %sage-gated player API JSON' % ('mobile ' if force_mobile_client else '')
2402 ) or {}
2403
545cc85d 2404 if pr:
2405 player_response = pr
2406
2407 trailer_video_id = try_get(
2408 playability_status,
2409 lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
2410 compat_str)
2411 if trailer_video_id:
2412 return self.url_result(
2413 trailer_video_id, self.ie_key(), trailer_video_id)
cf7e015f 2414
545cc85d 2415 search_meta = (
2416 lambda x: self._html_search_meta(x, webpage, default=None)) \
2417 if webpage else lambda x: None
dbdaaa23 2418
545cc85d 2419 video_details = player_response.get('videoDetails') or {}
37357d21 2420 microformat = try_get(
545cc85d 2421 player_response,
2422 lambda x: x['microformat']['playerMicroformatRenderer'],
2423 dict) or {}
2424 video_title = video_details.get('title') \
2425 or get_text(microformat.get('title')) \
2426 or search_meta(['og:title', 'twitter:title', 'title'])
2427 video_description = video_details.get('shortDescription')
cf7e015f 2428
8fe10494 2429 if not smuggled_data.get('force_singlefeed', False):
a06916d9 2430 if not self.get_param('noplaylist'):
8fe10494
S
2431 multifeed_metadata_list = try_get(
2432 player_response,
2433 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
545cc85d 2434 compat_str)
8fe10494
S
2435 if multifeed_metadata_list:
2436 entries = []
2437 feed_ids = []
2438 for feed in multifeed_metadata_list.split(','):
2439 # Unquote should take place before split on comma (,) since textual
2440 # fields may contain comma as well (see
067aa17e 2441 # https://github.com/ytdl-org/youtube-dl/issues/8536)
545cc85d 2442 feed_data = compat_parse_qs(
2443 compat_urllib_parse_unquote_plus(feed))
6b09401b
S
2444
2445 def feed_entry(name):
545cc85d 2446 return try_get(
2447 feed_data, lambda x: x[name][0], compat_str)
6b09401b
S
2448
2449 feed_id = feed_entry('id')
2450 if not feed_id:
2451 continue
2452 feed_title = feed_entry('title')
2453 title = video_title
2454 if feed_title:
2455 title += ' (%s)' % feed_title
8fe10494
S
2456 entries.append({
2457 '_type': 'url_transparent',
2458 'ie_key': 'Youtube',
2459 'url': smuggle_url(
545cc85d 2460 base_url + 'watch?v=' + feed_data['id'][0],
8fe10494 2461 {'force_singlefeed': True}),
6b09401b 2462 'title': title,
8fe10494 2463 })
6b09401b 2464 feed_ids.append(feed_id)
8fe10494
S
2465 self.to_screen(
2466 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2467 % (', '.join(feed_ids), video_id))
545cc85d 2468 return self.playlist_result(
2469 entries, video_id, video_title, video_description)
8fe10494
S
2470 else:
2471 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 2472
9297939e 2473 formats, itags, stream_ids = [], [], []
cc2db878 2474 itag_qualities = {}
d3fc8074 2475 q = qualities([
60bdb7bd 2476 # "tiny" is the smallest video-only format. But some audio-only formats
2477 # was also labeled "tiny". It is not clear if such formats still exist
d3fc8074 2478 'tiny', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
2479 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2480 ])
9297939e 2481
545cc85d 2482 streaming_data = player_response.get('streamingData') or {}
2483 streaming_formats = streaming_data.get('formats') or []
2484 streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
9297939e 2485 streaming_formats.extend(ytm_streaming_data.get('formats') or [])
2486 streaming_formats.extend(ytm_streaming_data.get('adaptiveFormats') or [])
2487
545cc85d 2488 for fmt in streaming_formats:
2489 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2490 continue
321bf820 2491
cc2db878 2492 itag = str_or_none(fmt.get('itag'))
9297939e 2493 audio_track = fmt.get('audioTrack') or {}
2494 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2495 if stream_id in stream_ids:
2496 continue
2497
cc2db878 2498 quality = fmt.get('quality')
d3fc8074 2499 if quality == 'tiny' or not quality:
2500 quality = fmt.get('audioQuality', '').lower() or quality
cc2db878 2501 if itag and quality:
2502 itag_qualities[itag] = quality
2503 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2504 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2505 # number of fragment that would subsequently requested with (`&sq=N`)
2506 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2507 continue
2508
545cc85d 2509 fmt_url = fmt.get('url')
2510 if not fmt_url:
2511 sc = compat_parse_qs(fmt.get('signatureCipher'))
2512 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2513 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2514 if not (sc and fmt_url and encrypted_sig):
2515 continue
545cc85d 2516 if not player_url:
201e9eaa 2517 continue
545cc85d 2518 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2519 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2520 fmt_url += '&' + sp + '=' + signature
2521
545cc85d 2522 if itag:
2523 itags.append(itag)
9297939e 2524 stream_ids.append(stream_id)
2525
cc2db878 2526 tbr = float_or_none(
2527 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 2528 dct = {
2529 'asr': int_or_none(fmt.get('audioSampleRate')),
2530 'filesize': int_or_none(fmt.get('contentLength')),
2531 'format_id': itag,
0fb983f6 2532 'format_note': audio_track.get('displayName') or fmt.get('qualityLabel') or quality,
545cc85d 2533 'fps': int_or_none(fmt.get('fps')),
2534 'height': int_or_none(fmt.get('height')),
dca3ff4a 2535 'quality': q(quality),
cc2db878 2536 'tbr': tbr,
545cc85d 2537 'url': fmt_url,
2538 'width': fmt.get('width'),
0fb983f6 2539 'language': audio_track.get('id', '').split('.')[0],
545cc85d 2540 }
60bdb7bd 2541 mime_mobj = re.match(
2542 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2543 if mime_mobj:
2544 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2545 dct.update(parse_codecs(mime_mobj.group(2)))
2546 # The 3gp format in android client has a quality of "small",
2547 # but is actually worse than all other formats
2548 if dct['ext'] == '3gp':
2549 dct['quality'] = q('tiny')
cc2db878 2550 no_audio = dct.get('acodec') == 'none'
2551 no_video = dct.get('vcodec') == 'none'
2552 if no_audio:
2553 dct['vbr'] = tbr
2554 if no_video:
2555 dct['abr'] = tbr
2556 if no_audio or no_video:
545cc85d 2557 dct['downloader_options'] = {
2558 # Youtube throttles chunks >~10M
2559 'http_chunk_size': 10485760,
bf1317d2 2560 }
7c60c33e 2561 if dct.get('ext'):
2562 dct['container'] = dct['ext'] + '_dash'
545cc85d 2563 formats.append(dct)
2564
4bb6b02f 2565 skip_manifests = self._configuration_arg('skip')
5d3a0e79 2566 get_dash = 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True)
2567 get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2568
9297939e 2569 for sd in (streaming_data, ytm_streaming_data):
5d3a0e79 2570 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
9297939e 2571 if hls_manifest_url:
2572 for f in self._extract_m3u8_formats(
2573 hls_manifest_url, video_id, 'mp4', fatal=False):
2574 itag = self._search_regex(
2575 r'/itag/(\d+)', f['url'], 'itag', default=None)
2576 if itag:
2577 f['format_id'] = itag
8d68ab98 2578 formats.append(f)
545cc85d 2579
5d3a0e79 2580 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2581 if dash_manifest_url:
2582 for f in self._extract_mpd_formats(
2583 dash_manifest_url, video_id, fatal=False):
2584 itag = f['format_id']
2585 if itag in itags:
2586 continue
2587 if itag in itag_qualities:
2588 f['quality'] = q(itag_qualities[itag])
2589 filesize = int_or_none(self._search_regex(
2590 r'/clen/(\d+)', f.get('fragment_base_url')
2591 or f['url'], 'file size', default=None))
2592 if filesize:
2593 f['filesize'] = filesize
2594 formats.append(f)
bf1317d2 2595
545cc85d 2596 if not formats:
a06916d9 2597 if not self.get_param('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
b7da73eb 2598 self.raise_no_formats(
545cc85d 2599 'This video is DRM protected.', expected=True)
2600 pemr = try_get(
2601 playability_status,
2602 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
2603 dict) or {}
2604 reason = get_text(pemr.get('reason')) or playability_status.get('reason')
2605 subreason = pemr.get('subreason')
2606 if subreason:
2607 subreason = clean_html(get_text(subreason))
2608 if subreason == 'The uploader has not made this video available in your country.':
2609 countries = microformat.get('availableCountries')
2610 if not countries:
2611 regions_allowed = search_meta('regionsAllowed')
2612 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2613 self.raise_geo_restricted(subreason, countries, metadata_available=True)
545cc85d 2614 reason += '\n' + subreason
2615 if reason:
b7da73eb 2616 self.raise_no_formats(reason, expected=True)
bf1317d2 2617
545cc85d 2618 self._sort_formats(formats)
bf1317d2 2619
545cc85d 2620 keywords = video_details.get('keywords') or []
2621 if not keywords and webpage:
2622 keywords = [
2623 unescapeHTML(m.group('content'))
2624 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2625 for keyword in keywords:
2626 if keyword.startswith('yt:stretch='):
201c1459 2627 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2628 if mobj:
2629 # NB: float is intentional for forcing float division
2630 w, h = (float(v) for v in mobj.groups())
2631 if w > 0 and h > 0:
2632 ratio = w / h
2633 for f in formats:
2634 if f.get('vcodec') != 'none':
2635 f['stretched_ratio'] = ratio
2636 break
6449cd80 2637
545cc85d 2638 thumbnails = []
2639 for container in (video_details, microformat):
2640 for thumbnail in (try_get(
2641 container,
2642 lambda x: x['thumbnail']['thumbnails'], list) or []):
2643 thumbnail_url = thumbnail.get('url')
2644 if not thumbnail_url:
bf1317d2 2645 continue
1988fab7 2646 # Sometimes youtube gives a wrong thumbnail URL. See:
2647 # https://github.com/yt-dlp/yt-dlp/issues/233
2648 # https://github.com/ytdl-org/youtube-dl/issues/28023
2649 if 'maxresdefault' in thumbnail_url:
2650 thumbnail_url = thumbnail_url.split('?')[0]
545cc85d 2651 thumbnails.append({
545cc85d 2652 'url': thumbnail_url,
ff2751ac 2653 'height': int_or_none(thumbnail.get('height')),
545cc85d 2654 'width': int_or_none(thumbnail.get('width')),
ff2751ac 2655 'preference': 1 if 'maxresdefault' in thumbnail_url else -1
545cc85d 2656 })
ff2751ac 2657 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2658 if thumbnail_url:
2659 thumbnails.append({
2660 'url': thumbnail_url,
2661 'preference': 1 if 'maxresdefault' in thumbnail_url else -1
2662 })
2663 # All videos have a maxresdefault thumbnail, but sometimes it does not appear in the webpage
2664 # See: https://github.com/ytdl-org/youtube-dl/issues/29049
2665 thumbnails.append({
2666 'url': 'https://i.ytimg.com/vi/%s/maxresdefault.jpg' % video_id,
2667 'preference': 1,
2668 })
2669 self._remove_duplicate_formats(thumbnails)
545cc85d 2670
2671 category = microformat.get('category') or search_meta('genre')
2672 channel_id = video_details.get('channelId') \
2673 or microformat.get('externalChannelId') \
2674 or search_meta('channelId')
2675 duration = int_or_none(
2676 video_details.get('lengthSeconds')
2677 or microformat.get('lengthSeconds')) \
2678 or parse_duration(search_meta('duration'))
2679 is_live = video_details.get('isLive')
f6745c49 2680 is_upcoming = video_details.get('isUpcoming')
545cc85d 2681 owner_profile_url = microformat.get('ownerProfileUrl')
2682
2683 info = {
2684 'id': video_id,
2685 'title': self._live_title(video_title) if is_live else video_title,
2686 'formats': formats,
2687 'thumbnails': thumbnails,
2688 'description': video_description,
2689 'upload_date': unified_strdate(
2690 microformat.get('uploadDate')
2691 or search_meta('uploadDate')),
2692 'uploader': video_details['author'],
2693 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2694 'uploader_url': owner_profile_url,
2695 'channel_id': channel_id,
2696 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
2697 'duration': duration,
2698 'view_count': int_or_none(
2699 video_details.get('viewCount')
2700 or microformat.get('viewCount')
2701 or search_meta('interactionCount')),
2702 'average_rating': float_or_none(video_details.get('averageRating')),
2703 'age_limit': 18 if (
2704 microformat.get('isFamilySafe') is False
2705 or search_meta('isFamilyFriendly') == 'false'
2706 or search_meta('og:restrictions:age') == '18+') else 0,
2707 'webpage_url': webpage_url,
2708 'categories': [category] if category else None,
2709 'tags': keywords,
2710 'is_live': is_live,
2711 'playable_in_embed': playability_status.get('playableInEmbed'),
c224251a 2712 'was_live': video_details.get('isLiveContent'),
545cc85d 2713 }
b477fc13 2714
545cc85d 2715 pctr = try_get(
2716 player_response,
2717 lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
2718 subtitles = {}
2719 if pctr:
774d79cc 2720 def process_language(container, base_url, lang_code, sub_name, query):
120916da 2721 lang_subs = container.setdefault(lang_code, [])
545cc85d 2722 for fmt in self._SUBTITLE_FORMATS:
2723 query.update({
2724 'fmt': fmt,
2725 })
2726 lang_subs.append({
2727 'ext': fmt,
2728 'url': update_url_query(base_url, query),
774d79cc 2729 'name': sub_name,
545cc85d 2730 })
7e72694b 2731
545cc85d 2732 for caption_track in (pctr.get('captionTracks') or []):
2733 base_url = caption_track.get('baseUrl')
2734 if not base_url:
2735 continue
2736 if caption_track.get('kind') != 'asr':
120916da 2737 lang_code = (
2738 remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2739 or caption_track.get('languageCode'))
545cc85d 2740 if not lang_code:
2741 continue
2742 process_language(
774d79cc 2743 subtitles, base_url, lang_code,
2d6659b9 2744 try_get(caption_track, lambda x: x['name']['simpleText']),
774d79cc 2745 {})
545cc85d 2746 continue
2747 automatic_captions = {}
2748 for translation_language in (pctr.get('translationLanguages') or []):
2749 translation_language_code = translation_language.get('languageCode')
2750 if not translation_language_code:
2751 continue
2752 process_language(
2753 automatic_captions, base_url, translation_language_code,
49c258e1 2754 try_get(translation_language, (
2755 lambda x: x['languageName']['simpleText'],
2756 lambda x: x['languageName']['runs'][0]['text'])),
545cc85d 2757 {'tlang': translation_language_code})
2758 info['automatic_captions'] = automatic_captions
2759 info['subtitles'] = subtitles
7e72694b 2760
545cc85d 2761 parsed_url = compat_urllib_parse_urlparse(url)
2762 for component in [parsed_url.fragment, parsed_url.query]:
2763 query = compat_parse_qs(component)
2764 for k, v in query.items():
2765 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2766 d_k += '_time'
2767 if d_k not in info and k in s_ks:
2768 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2769
2770 # Youtube Music Auto-generated description
822b9d9c 2771 if video_description:
38d70284 2772 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2773 if mobj:
822b9d9c
RA
2774 release_year = mobj.group('release_year')
2775 release_date = mobj.group('release_date')
2776 if release_date:
2777 release_date = release_date.replace('-', '')
2778 if not release_year:
545cc85d 2779 release_year = release_date[:4]
2780 info.update({
2781 'album': mobj.group('album'.strip()),
2782 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2783 'track': mobj.group('track').strip(),
2784 'release_date': release_date,
cc2db878 2785 'release_year': int_or_none(release_year),
545cc85d 2786 })
7e72694b 2787
545cc85d 2788 initial_data = None
2789 if webpage:
2790 initial_data = self._extract_yt_initial_variable(
2791 webpage, self._YT_INITIAL_DATA_RE, video_id,
2792 'yt initial data')
2793 if not initial_data:
109dd3b2 2794 initial_data = self._extract_response(
2795 item_id=video_id, ep='next', fatal=False,
2796 ytcfg=ytcfg, headers=headers, query={'videoId': video_id},
2797 note='Downloading initial data API JSON')
545cc85d 2798
c60ee3a2 2799 try:
2800 # This will error if there is no livechat
2801 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2802 info['subtitles']['live_chat'] = [{
2803 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
2804 'video_id': video_id,
2805 'ext': 'json',
f6745c49 2806 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
c60ee3a2 2807 }]
2808 except (KeyError, IndexError, TypeError):
2809 pass
545cc85d 2810
2811 if initial_data:
2812 chapters = self._extract_chapters_from_json(
2813 initial_data, video_id, duration)
2814 if not chapters:
2815 for engagment_pannel in (initial_data.get('engagementPanels') or []):
2816 contents = try_get(
2817 engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
2818 list)
2819 if not contents:
2820 continue
2821
2822 def chapter_time(mmlir):
2823 return parse_duration(
2824 get_text(mmlir.get('timeDescription')))
2825
2826 chapters = []
2827 for next_num, content in enumerate(contents, start=1):
2828 mmlir = content.get('macroMarkersListItemRenderer') or {}
2829 start_time = chapter_time(mmlir)
2830 end_time = chapter_time(try_get(
2831 contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
2832 if next_num < len(contents) else duration
2833 if start_time is None or end_time is None:
2834 continue
2835 chapters.append({
2836 'start_time': start_time,
2837 'end_time': end_time,
2838 'title': get_text(mmlir.get('title')),
2839 })
2840 if chapters:
2841 break
2842 if chapters:
2843 info['chapters'] = chapters
2844
2845 contents = try_get(
2846 initial_data,
2847 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2848 list) or []
2849 for content in contents:
2850 vpir = content.get('videoPrimaryInfoRenderer')
2851 if vpir:
2852 stl = vpir.get('superTitleLink')
2853 if stl:
2854 stl = get_text(stl)
2855 if try_get(
2856 vpir,
2857 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2858 info['location'] = stl
2859 else:
2860 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2861 if mobj:
2862 info.update({
2863 'series': mobj.group(1),
2864 'season_number': int(mobj.group(2)),
2865 'episode_number': int(mobj.group(3)),
2866 })
2867 for tlb in (try_get(
2868 vpir,
2869 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2870 list) or []):
2871 tbr = tlb.get('toggleButtonRenderer') or {}
2872 for getter, regex in [(
2873 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2874 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2875 lambda x: x['accessibility'],
2876 lambda x: x['accessibilityData']['accessibilityData'],
2877 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2878 label = (try_get(tbr, getter, dict) or {}).get('label')
2879 if label:
2880 mobj = re.match(regex, label)
2881 if mobj:
2882 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2883 break
2884 sbr_tooltip = try_get(
2885 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2886 if sbr_tooltip:
2887 like_count, dislike_count = sbr_tooltip.split(' / ')
2888 info.update({
2889 'like_count': str_to_int(like_count),
2890 'dislike_count': str_to_int(dislike_count),
2891 })
2892 vsir = content.get('videoSecondaryInfoRenderer')
2893 if vsir:
2894 info['channel'] = get_text(try_get(
2895 vsir,
2896 lambda x: x['owner']['videoOwnerRenderer']['title'],
cce889b9 2897 dict))
545cc85d 2898 rows = try_get(
2899 vsir,
2900 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2901 list) or []
2902 multiple_songs = False
2903 for row in rows:
2904 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2905 multiple_songs = True
2906 break
2907 for row in rows:
2908 mrr = row.get('metadataRowRenderer') or {}
2909 mrr_title = mrr.get('title')
2910 if not mrr_title:
2911 continue
2912 mrr_title = get_text(mrr['title'])
2913 mrr_contents_text = get_text(mrr['contents'][0])
2914 if mrr_title == 'License':
2915 info['license'] = mrr_contents_text
2916 elif not multiple_songs:
2917 if mrr_title == 'Album':
2918 info['album'] = mrr_contents_text
2919 elif mrr_title == 'Artist':
2920 info['artist'] = mrr_contents_text
2921 elif mrr_title == 'Song':
2922 info['track'] = mrr_contents_text
2923
2924 fallbacks = {
2925 'channel': 'uploader',
2926 'channel_id': 'uploader_id',
2927 'channel_url': 'uploader_url',
2928 }
2929 for to, frm in fallbacks.items():
2930 if not info.get(to):
2931 info[to] = info.get(frm)
2932
2933 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
2934 v = info.get(s_k)
2935 if v:
2936 info[d_k] = v
b84071c0 2937
c224251a
M
2938 is_private = bool_or_none(video_details.get('isPrivate'))
2939 is_unlisted = bool_or_none(microformat.get('isUnlisted'))
2940 is_membersonly = None
b28f8d24 2941 is_premium = None
c224251a
M
2942 if initial_data and is_private is not None:
2943 is_membersonly = False
b28f8d24 2944 is_premium = False
c224251a
M
2945 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list)
2946 for content in contents or []:
2947 badges = try_get(content, lambda x: x['videoPrimaryInfoRenderer']['badges'], list)
2948 for badge in badges or []:
2949 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label']) or ''
2950 if label.lower() == 'members only':
2951 is_membersonly = True
2952 break
b28f8d24
M
2953 elif label.lower() == 'premium':
2954 is_premium = True
2955 break
2956 if is_membersonly or is_premium:
c224251a
M
2957 break
2958
2959 # TODO: Add this for playlists
2960 info['availability'] = self._availability(
2961 is_private=is_private,
b28f8d24 2962 needs_premium=is_premium,
c224251a
M
2963 needs_subscription=is_membersonly,
2964 needs_auth=info['age_limit'] >= 18,
2965 is_unlisted=None if is_private is None else is_unlisted)
2966
06167fbb 2967 # get xsrf for annotations or comments
a06916d9 2968 get_annotations = self.get_param('writeannotations', False)
2969 get_comments = self.get_param('getcomments', False)
06167fbb 2970 if get_annotations or get_comments:
29f7c58a 2971 xsrf_token = None
545cc85d 2972 ytcfg = self._extract_ytcfg(video_id, webpage)
29f7c58a 2973 if ytcfg:
2974 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2975 if not xsrf_token:
2976 xsrf_token = self._search_regex(
2977 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 2978 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 2979
2980 # annotations
06167fbb 2981 if get_annotations:
64b6a4e9
RA
2982 invideo_url = try_get(
2983 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2984 if xsrf_token and invideo_url:
29f7c58a 2985 xsrf_field_name = None
2986 if ytcfg:
2987 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2988 if not xsrf_field_name:
2989 xsrf_field_name = self._search_regex(
2990 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 2991 webpage, 'xsrf field name',
29f7c58a 2992 group='xsrf_field_name', default='session_token')
8a784c74 2993 info['annotations'] = self._download_webpage(
64b6a4e9
RA
2994 self._proto_relative_url(invideo_url),
2995 video_id, note='Downloading annotations',
2996 errnote='Unable to download video annotations', fatal=False,
2997 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 2998
277d6ff5 2999 if get_comments:
2d6659b9 3000 info['__post_extractor'] = lambda: self._extract_comments(ytcfg, video_id, contents, webpage)
4ea3be0a 3001
545cc85d 3002 self.mark_watched(video_id, player_response)
d77ab8e2 3003
545cc85d 3004 return info
c5e8d7af 3005
5f6a1245 3006
8bdd16b4 3007class YoutubeTabIE(YoutubeBaseInfoExtractor):
3008 IE_DESC = 'YouTube.com tab'
70d5c17b 3009 _VALID_URL = r'''(?x)
3010 https?://
3011 (?:\w+\.)?
3012 (?:
3013 youtube(?:kids)?\.com|
3014 invidio\.us
3015 )/
3016 (?:
fe03a6cd 3017 (?P<channel_type>channel|c|user|browse)/|
70d5c17b 3018 (?P<not_channel>
9ba5705a 3019 feed/|hashtag/|
70d5c17b 3020 (?:playlist|watch)\?.*?\blist=
3021 )|
29f7c58a 3022 (?!(?:%s)\b) # Direct URLs
70d5c17b 3023 )
3024 (?P<id>[^/?\#&]+)
3025 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 3026 IE_NAME = 'youtube:tab'
3027
81127aa5 3028 _TESTS = [{
da692b79 3029 'note': 'playlists, multipage',
8bdd16b4 3030 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3031 'playlist_mincount': 94,
3032 'info_dict': {
3033 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3034 'title': 'Игорь Клейнер - Playlists',
3035 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3036 'uploader': 'Игорь Клейнер',
3037 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 3038 },
3039 }, {
da692b79 3040 'note': 'playlists, multipage, different order',
8bdd16b4 3041 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3042 'playlist_mincount': 94,
3043 'info_dict': {
3044 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3045 'title': 'Игорь Клейнер - Playlists',
3046 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3047 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3048 'uploader': 'Игорь Клейнер',
8bdd16b4 3049 },
201c1459 3050 }, {
da692b79 3051 'note': 'playlists, series',
201c1459 3052 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3053 'playlist_mincount': 5,
3054 'info_dict': {
3055 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3056 'title': '3Blue1Brown - Playlists',
3057 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
da692b79 3058 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3059 'uploader': '3Blue1Brown',
201c1459 3060 },
8bdd16b4 3061 }, {
da692b79 3062 'note': 'playlists, singlepage',
8bdd16b4 3063 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3064 'playlist_mincount': 4,
3065 'info_dict': {
3066 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3067 'title': 'ThirstForScience - Playlists',
3068 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 3069 'uploader': 'ThirstForScience',
3070 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 3071 }
3072 }, {
3073 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3074 'only_matching': True,
3075 }, {
da692b79 3076 'note': 'basic, single video playlist',
0e30a7b9 3077 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 3078 'info_dict': {
0e30a7b9 3079 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3080 'uploader': 'Sergey M.',
3081 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 3082 'title': 'youtube-dl public playlist',
81127aa5 3083 },
0e30a7b9 3084 'playlist_count': 1,
9291475f 3085 }, {
da692b79 3086 'note': 'empty playlist',
0e30a7b9 3087 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 3088 'info_dict': {
0e30a7b9 3089 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3090 'uploader': 'Sergey M.',
3091 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 3092 'title': 'youtube-dl empty playlist',
9291475f
PH
3093 },
3094 'playlist_count': 0,
3095 }, {
da692b79 3096 'note': 'Home tab',
8bdd16b4 3097 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 3098 'info_dict': {
8bdd16b4 3099 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3100 'title': 'lex will - Home',
3101 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3102 'uploader': 'lex will',
3103 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3104 },
8bdd16b4 3105 'playlist_mincount': 2,
9291475f 3106 }, {
da692b79 3107 'note': 'Videos tab',
8bdd16b4 3108 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 3109 'info_dict': {
8bdd16b4 3110 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3111 'title': 'lex will - Videos',
3112 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3113 'uploader': 'lex will',
3114 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3115 },
8bdd16b4 3116 'playlist_mincount': 975,
9291475f 3117 }, {
da692b79 3118 'note': 'Videos tab, sorted by popular',
8bdd16b4 3119 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 3120 'info_dict': {
8bdd16b4 3121 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3122 'title': 'lex will - Videos',
3123 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3124 'uploader': 'lex will',
3125 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3126 },
8bdd16b4 3127 'playlist_mincount': 199,
9291475f 3128 }, {
da692b79 3129 'note': 'Playlists tab',
8bdd16b4 3130 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 3131 'info_dict': {
8bdd16b4 3132 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3133 'title': 'lex will - Playlists',
3134 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3135 'uploader': 'lex will',
3136 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3137 },
8bdd16b4 3138 'playlist_mincount': 17,
ac7553d0 3139 }, {
da692b79 3140 'note': 'Community tab',
8bdd16b4 3141 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 3142 'info_dict': {
8bdd16b4 3143 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3144 'title': 'lex will - Community',
3145 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3146 'uploader': 'lex will',
3147 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3148 },
3149 'playlist_mincount': 18,
87dadd45 3150 }, {
da692b79 3151 'note': 'Channels tab',
8bdd16b4 3152 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 3153 'info_dict': {
8bdd16b4 3154 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3155 'title': 'lex will - Channels',
3156 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3157 'uploader': 'lex will',
3158 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3159 },
deaec5af 3160 'playlist_mincount': 12,
cd684175 3161 }, {
3162 'note': 'Search tab',
3163 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3164 'playlist_mincount': 40,
3165 'info_dict': {
3166 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3167 'title': '3Blue1Brown - Search - linear algebra',
3168 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3169 'uploader': '3Blue1Brown',
3170 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3171 },
6b08cdf6 3172 }, {
a0566bbf 3173 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3174 'only_matching': True,
3175 }, {
a0566bbf 3176 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3177 'only_matching': True,
3178 }, {
a0566bbf 3179 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3180 'only_matching': True,
3181 }, {
3182 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3183 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3184 'info_dict': {
3185 'title': '29C3: Not my department',
3186 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3187 'uploader': 'Christiaan008',
3188 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 3189 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 3190 },
3191 'playlist_count': 96,
3192 }, {
3193 'note': 'Large playlist',
3194 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 3195 'info_dict': {
8bdd16b4 3196 'title': 'Uploads from Cauchemar',
3197 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3198 'uploader': 'Cauchemar',
3199 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 3200 },
8bdd16b4 3201 'playlist_mincount': 1123,
3202 }, {
da692b79 3203 'note': 'even larger playlist, 8832 videos',
8bdd16b4 3204 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3205 'only_matching': True,
4b7df0d3
JMF
3206 }, {
3207 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3208 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3209 'info_dict': {
acf757f4
PH
3210 'title': 'Uploads from Interstellar Movie',
3211 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 3212 'uploader': 'Interstellar Movie',
8bdd16b4 3213 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 3214 },
481cc733 3215 'playlist_mincount': 21,
358de58c 3216 }, {
3217 'note': 'Playlist with "show unavailable videos" button',
3218 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3219 'info_dict': {
3220 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3221 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3222 'uploader': 'Phim Siêu Nhân Nhật Bản',
3223 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3224 },
da692b79 3225 'playlist_mincount': 200,
5d342002 3226 }, {
da692b79 3227 'note': 'Playlist with unavailable videos in page 7',
5d342002 3228 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3229 'info_dict': {
3230 'title': 'Uploads from BlankTV',
3231 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3232 'uploader': 'BlankTV',
3233 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3234 },
da692b79 3235 'playlist_mincount': 1000,
8bdd16b4 3236 }, {
da692b79 3237 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
8bdd16b4 3238 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3239 'info_dict': {
3240 'title': 'Data Analysis with Dr Mike Pound',
3241 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3242 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3243 'uploader': 'Computerphile',
deaec5af 3244 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 3245 },
3246 'playlist_mincount': 11,
3247 }, {
a0566bbf 3248 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 3249 'only_matching': True,
dacb3a86 3250 }, {
da692b79 3251 'note': 'Playlist URL that does not actually serve a playlist',
dacb3a86
S
3252 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3253 'info_dict': {
3254 'id': 'FqZTN594JQw',
3255 'ext': 'webm',
3256 'title': "Smiley's People 01 detective, Adventure Series, Action",
3257 'uploader': 'STREEM',
3258 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 3259 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
3260 'upload_date': '20150526',
3261 'license': 'Standard YouTube License',
3262 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3263 'categories': ['People & Blogs'],
3264 'tags': list,
dbdaaa23 3265 'view_count': int,
dacb3a86
S
3266 'like_count': int,
3267 'dislike_count': int,
3268 },
3269 'params': {
3270 'skip_download': True,
3271 },
13a75688 3272 'skip': 'This video is not available.',
dacb3a86 3273 'add_ie': [YoutubeIE.ie_key()],
481cc733 3274 }, {
8bdd16b4 3275 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 3276 'only_matching': True,
66b48727 3277 }, {
8bdd16b4 3278 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 3279 'only_matching': True,
a0566bbf 3280 }, {
3281 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3282 'info_dict': {
da692b79 3283 'id': 'X1whbWASnNQ', # This will keep changing
a0566bbf 3284 'ext': 'mp4',
deaec5af 3285 'title': compat_str,
a0566bbf 3286 'uploader': 'Sky News',
3287 'uploader_id': 'skynews',
3288 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
da692b79 3289 'upload_date': r're:\d{8}',
3290 'description': compat_str,
a0566bbf 3291 'categories': ['News & Politics'],
3292 'tags': list,
3293 'like_count': int,
3294 'dislike_count': int,
3295 },
3296 'params': {
3297 'skip_download': True,
3298 },
da692b79 3299 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
a0566bbf 3300 }, {
3301 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3302 'info_dict': {
3303 'id': 'a48o2S1cPoo',
3304 'ext': 'mp4',
3305 'title': 'The Young Turks - Live Main Show',
3306 'uploader': 'The Young Turks',
3307 'uploader_id': 'TheYoungTurks',
3308 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3309 'upload_date': '20150715',
3310 'license': 'Standard YouTube License',
3311 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3312 'categories': ['News & Politics'],
3313 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3314 'like_count': int,
3315 'dislike_count': int,
3316 },
3317 'params': {
3318 'skip_download': True,
3319 },
3320 'only_matching': True,
3321 }, {
3322 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3323 'only_matching': True,
3324 }, {
3325 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3326 'only_matching': True,
09f1580e 3327 }, {
3328 'note': 'A channel that is not live. Should raise error',
3329 'url': 'https://www.youtube.com/user/numberphile/live',
3330 'only_matching': True,
3d3dddc9 3331 }, {
3332 'url': 'https://www.youtube.com/feed/trending',
3333 'only_matching': True,
3334 }, {
3d3dddc9 3335 'url': 'https://www.youtube.com/feed/library',
3336 'only_matching': True,
3337 }, {
3d3dddc9 3338 'url': 'https://www.youtube.com/feed/history',
3339 'only_matching': True,
3340 }, {
3d3dddc9 3341 'url': 'https://www.youtube.com/feed/subscriptions',
3342 'only_matching': True,
3343 }, {
3d3dddc9 3344 'url': 'https://www.youtube.com/feed/watch_later',
3345 'only_matching': True,
3346 }, {
da692b79 3347 'note': 'Recommended - redirects to home page',
3d3dddc9 3348 'url': 'https://www.youtube.com/feed/recommended',
3349 'only_matching': True,
29f7c58a 3350 }, {
da692b79 3351 'note': 'inline playlist with not always working continuations',
29f7c58a 3352 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3353 'only_matching': True,
3354 }, {
3355 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3356 'only_matching': True,
3357 }, {
3358 'url': 'https://www.youtube.com/course',
3359 'only_matching': True,
3360 }, {
3361 'url': 'https://www.youtube.com/zsecurity',
3362 'only_matching': True,
3363 }, {
3364 'url': 'http://www.youtube.com/NASAgovVideo/videos',
3365 'only_matching': True,
3366 }, {
3367 'url': 'https://www.youtube.com/TheYoungTurks/live',
3368 'only_matching': True,
39ed931e 3369 }, {
3370 'url': 'https://www.youtube.com/hashtag/cctv9',
3371 'info_dict': {
3372 'id': 'cctv9',
3373 'title': '#cctv9',
3374 },
3375 'playlist_mincount': 350,
201c1459 3376 }, {
3377 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3378 'only_matching': True,
9297939e 3379 }, {
da692b79 3380 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
9297939e 3381 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3382 'only_matching': True
fe03a6cd 3383 }, {
3384 'note': '/browse/ should redirect to /channel/',
3385 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3386 'only_matching': True
3387 }, {
3388 'note': 'VLPL, should redirect to playlist?list=PL...',
3389 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3390 'info_dict': {
3391 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3392 'uploader': 'NoCopyrightSounds',
3393 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3394 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3395 'title': 'NCS Releases',
3396 },
3397 'playlist_mincount': 166,
18db7548 3398 }, {
3399 'note': 'Topic, should redirect to playlist?list=UU...',
3400 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3401 'info_dict': {
3402 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3403 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3404 'title': 'Uploads from Royalty Free Music - Topic',
3405 'uploader': 'Royalty Free Music - Topic',
3406 },
3407 'expected_warnings': [
3408 'A channel/user page was given',
3409 'The URL does not have a videos tab',
3410 ],
3411 'playlist_mincount': 101,
3412 }, {
3413 'note': 'Topic without a UU playlist',
3414 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3415 'info_dict': {
3416 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3417 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3418 },
3419 'expected_warnings': [
3420 'A channel/user page was given',
3421 'The URL does not have a videos tab',
3422 'Falling back to channel URL',
3423 ],
3424 'playlist_mincount': 9,
abcdd12b 3425 }, {
3426 'note': 'Youtube music Album',
3427 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3428 'info_dict': {
3429 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3430 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3431 },
3432 'playlist_count': 50,
29f7c58a 3433 }]
3434
3435 @classmethod
3436 def suitable(cls, url):
3437 return False if YoutubeIE.suitable(url) else super(
3438 YoutubeTabIE, cls).suitable(url)
8bdd16b4 3439
3440 def _extract_channel_id(self, webpage):
3441 channel_id = self._html_search_meta(
3442 'channelId', webpage, 'channel id', default=None)
3443 if channel_id:
3444 return channel_id
3445 channel_url = self._html_search_meta(
3446 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3447 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3448 'twitter:app:url:googleplay'), webpage, 'channel url')
3449 return self._search_regex(
3450 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3451 channel_url, 'channel id')
15f6397c 3452
8bdd16b4 3453 @staticmethod
cd7c66cf 3454 def _extract_basic_item_renderer(item):
3455 # Modified from _extract_grid_item_renderer
201c1459 3456 known_basic_renderers = (
3457 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 3458 )
3459 for key, renderer in item.items():
201c1459 3460 if not isinstance(renderer, dict):
cd7c66cf 3461 continue
201c1459 3462 elif key in known_basic_renderers:
3463 return renderer
3464 elif key.startswith('grid') and key.endswith('Renderer'):
3465 return renderer
8bdd16b4 3466
8bdd16b4 3467 def _grid_entries(self, grid_renderer):
3468 for item in grid_renderer['items']:
3469 if not isinstance(item, dict):
39b62db1 3470 continue
cd7c66cf 3471 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 3472 if not isinstance(renderer, dict):
3473 continue
3474 title = try_get(
201c1459 3475 renderer, (lambda x: x['title']['runs'][0]['text'],
3476 lambda x: x['title']['simpleText']), compat_str)
8bdd16b4 3477 # playlist
3478 playlist_id = renderer.get('playlistId')
3479 if playlist_id:
3480 yield self.url_result(
3481 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3482 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3483 video_title=title)
201c1459 3484 continue
8bdd16b4 3485 # video
3486 video_id = renderer.get('videoId')
3487 if video_id:
3488 yield self._extract_video(renderer)
201c1459 3489 continue
8bdd16b4 3490 # channel
3491 channel_id = renderer.get('channelId')
3492 if channel_id:
3493 title = try_get(
3494 renderer, lambda x: x['title']['simpleText'], compat_str)
3495 yield self.url_result(
3496 'https://www.youtube.com/channel/%s' % channel_id,
3497 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 3498 continue
3499 # generic endpoint URL support
3500 ep_url = urljoin('https://www.youtube.com/', try_get(
3501 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3502 compat_str))
3503 if ep_url:
3504 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3505 if ie.suitable(ep_url):
3506 yield self.url_result(
3507 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3508 break
8bdd16b4 3509
3d3dddc9 3510 def _shelf_entries_from_content(self, shelf_renderer):
3511 content = shelf_renderer.get('content')
3512 if not isinstance(content, dict):
8bdd16b4 3513 return
cd7c66cf 3514 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 3515 if renderer:
3516 # TODO: add support for nested playlists so each shelf is processed
3517 # as separate playlist
3518 # TODO: this includes only first N items
3519 for entry in self._grid_entries(renderer):
3520 yield entry
3521 renderer = content.get('horizontalListRenderer')
3522 if renderer:
3523 # TODO
3524 pass
8bdd16b4 3525
29f7c58a 3526 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3527 ep = try_get(
3528 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3529 compat_str)
3530 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3531 if shelf_url:
29f7c58a 3532 # Skipping links to another channels, note that checking for
3533 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3534 # will not work
3535 if skip_channels and '/channels?' in shelf_url:
3536 return
3d3dddc9 3537 title = try_get(
3538 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
3539 yield self.url_result(shelf_url, video_title=title)
3540 # Shelf may not contain shelf URL, fallback to extraction from content
3541 for entry in self._shelf_entries_from_content(shelf_renderer):
3542 yield entry
c5e8d7af 3543
8bdd16b4 3544 def _playlist_entries(self, video_list_renderer):
3545 for content in video_list_renderer['contents']:
3546 if not isinstance(content, dict):
3547 continue
3548 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3549 if not isinstance(renderer, dict):
3550 continue
3551 video_id = renderer.get('videoId')
3552 if not video_id:
3553 continue
3554 yield self._extract_video(renderer)
07aeced6 3555
3462ffa8 3556 def _rich_entries(self, rich_grid_renderer):
3557 renderer = try_get(
70d5c17b 3558 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3559 video_id = renderer.get('videoId')
3560 if not video_id:
3561 return
3562 yield self._extract_video(renderer)
3563
8bdd16b4 3564 def _video_entry(self, video_renderer):
3565 video_id = video_renderer.get('videoId')
3566 if video_id:
3567 return self._extract_video(video_renderer)
dacb3a86 3568
8bdd16b4 3569 def _post_thread_entries(self, post_thread_renderer):
3570 post_renderer = try_get(
3571 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3572 if not post_renderer:
3573 return
3574 # video attachment
3575 video_renderer = try_get(
895b0931 3576 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3577 video_id = video_renderer.get('videoId')
3578 if video_id:
3579 entry = self._extract_video(video_renderer)
8bdd16b4 3580 if entry:
3581 yield entry
895b0931 3582 # playlist attachment
3583 playlist_id = try_get(
3584 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3585 if playlist_id:
3586 yield self.url_result(
e28f1c0a 3587 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3588 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3589 # inline video links
3590 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3591 for run in runs:
3592 if not isinstance(run, dict):
3593 continue
3594 ep_url = try_get(
3595 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3596 if not ep_url:
3597 continue
3598 if not YoutubeIE.suitable(ep_url):
3599 continue
3600 ep_video_id = YoutubeIE._match_id(ep_url)
3601 if video_id == ep_video_id:
3602 continue
895b0931 3603 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 3604
8bdd16b4 3605 def _post_thread_continuation_entries(self, post_thread_continuation):
3606 contents = post_thread_continuation.get('contents')
3607 if not isinstance(contents, list):
3608 return
3609 for content in contents:
3610 renderer = content.get('backstagePostThreadRenderer')
3611 if not isinstance(renderer, dict):
3612 continue
3613 for entry in self._post_thread_entries(renderer):
3614 yield entry
07aeced6 3615
39ed931e 3616 r''' # unused
3617 def _rich_grid_entries(self, contents):
3618 for content in contents:
3619 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3620 if video_renderer:
3621 entry = self._video_entry(video_renderer)
3622 if entry:
3623 yield entry
3624 '''
f4f751af 3625 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3462ffa8 3626
70d5c17b 3627 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3628 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3629 for content in contents:
3630 if not isinstance(content, dict):
8bdd16b4 3631 continue
70d5c17b 3632 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3633 if not is_renderer:
70d5c17b 3634 renderer = content.get('richItemRenderer')
3462ffa8 3635 if renderer:
3636 for entry in self._rich_entries(renderer):
3637 yield entry
3638 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3639 continue
3462ffa8 3640 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3641 for isr_content in isr_contents:
3642 if not isinstance(isr_content, dict):
3643 continue
69184e41 3644
3645 known_renderers = {
3646 'playlistVideoListRenderer': self._playlist_entries,
3647 'gridRenderer': self._grid_entries,
3648 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3649 'backstagePostThreadRenderer': self._post_thread_entries,
3650 'videoRenderer': lambda x: [self._video_entry(x)],
3651 }
3652 for key, renderer in isr_content.items():
3653 if key not in known_renderers:
3654 continue
3655 for entry in known_renderers[key](renderer):
3656 if entry:
3657 yield entry
3462ffa8 3658 continuation_list[0] = self._extract_continuation(renderer)
69184e41 3659 break
70d5c17b 3660
3462ffa8 3661 if not continuation_list[0]:
3662 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3663
3664 if not continuation_list[0]:
3665 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3666
3667 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3668 tab_content = try_get(tab, lambda x: x['content'], dict)
3669 if not tab_content:
3670 return
3462ffa8 3671 parent_renderer = (
29f7c58a 3672 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3673 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3674 for entry in extract_entries(parent_renderer):
3675 yield entry
3462ffa8 3676 continuation = continuation_list[0]
f4f751af 3677 context = self._extract_context(ytcfg)
3678 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
d069eca7 3679
8bdd16b4 3680 for page_num in itertools.count(1):
3681 if not continuation:
3682 break
79360d99 3683 query = {
3684 'continuation': continuation['continuation'],
3685 'clickTracking': {'clickTrackingParams': continuation['itct']}
3686 }
f4f751af 3687 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
79360d99 3688 response = self._extract_response(
3689 item_id='%s page %s' % (item_id, page_num),
3690 query=query, headers=headers, ytcfg=ytcfg,
3691 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3692
3693 if not response:
8bdd16b4 3694 break
f4f751af 3695 visitor_data = try_get(
3696 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
ebf1b291 3697
69184e41 3698 known_continuation_renderers = {
3699 'playlistVideoListContinuation': self._playlist_entries,
3700 'gridContinuation': self._grid_entries,
3701 'itemSectionContinuation': self._post_thread_continuation_entries,
3702 'sectionListContinuation': extract_entries, # for feeds
3703 }
8bdd16b4 3704 continuation_contents = try_get(
69184e41 3705 response, lambda x: x['continuationContents'], dict) or {}
3706 continuation_renderer = None
3707 for key, value in continuation_contents.items():
3708 if key not in known_continuation_renderers:
3462ffa8 3709 continue
69184e41 3710 continuation_renderer = value
3711 continuation_list = [None]
3712 for entry in known_continuation_renderers[key](continuation_renderer):
3713 yield entry
3714 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3715 break
3716 if continuation_renderer:
3717 continue
c5e8d7af 3718
a1b535bd 3719 known_renderers = {
3720 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3721 'gridVideoRenderer': (self._grid_entries, 'items'),
3722 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3723 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3724 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3725 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3726 }
cce889b9 3727 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3728 continuation_items = try_get(
cce889b9 3729 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3730 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3731 video_items_renderer = None
3732 for key, value in continuation_item.items():
3733 if key not in known_renderers:
8bdd16b4 3734 continue
a1b535bd 3735 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3736 continuation_list = [None]
a1b535bd 3737 for entry in known_renderers[key][0](video_items_renderer):
3738 yield entry
9ba5705a 3739 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3740 break
3741 if video_items_renderer:
3742 continue
8bdd16b4 3743 break
9558dcec 3744
8bdd16b4 3745 @staticmethod
3746 def _extract_selected_tab(tabs):
3747 for tab in tabs:
cd684175 3748 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3749 if renderer.get('selected') is True:
3750 return renderer
2b3c2546 3751 else:
8bdd16b4 3752 raise ExtractorError('Unable to find selected tab')
b82f815f 3753
8bdd16b4 3754 @staticmethod
3755 def _extract_uploader(data):
3756 uploader = {}
3757 sidebar_renderer = try_get(
3758 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3759 if sidebar_renderer:
3760 for item in sidebar_renderer:
3761 if not isinstance(item, dict):
3762 continue
3763 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3764 if not isinstance(renderer, dict):
3765 continue
3766 owner = try_get(
3767 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3768 if owner:
3769 uploader['uploader'] = owner.get('text')
3770 uploader['uploader_id'] = try_get(
3771 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3772 uploader['uploader_url'] = urljoin(
3773 'https://www.youtube.com/',
3774 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3775 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3776
d069eca7 3777 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3778 playlist_id = title = description = channel_url = channel_name = channel_id = None
3779 thumbnails_list = tags = []
3780
8bdd16b4 3781 selected_tab = self._extract_selected_tab(tabs)
3782 renderer = try_get(
3783 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3784 if renderer:
b60419c5 3785 channel_name = renderer.get('title')
3786 channel_url = renderer.get('channelUrl')
3787 channel_id = renderer.get('externalId')
39ed931e 3788 else:
64c0d954 3789 renderer = try_get(
3790 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3791
8bdd16b4 3792 if renderer:
3793 title = renderer.get('title')
ecc97af3 3794 description = renderer.get('description', '')
b60419c5 3795 playlist_id = channel_id
3796 tags = renderer.get('keywords', '').split()
3797 thumbnails_list = (
3798 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3799 or try_get(
3800 data,
3801 lambda x: x['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3802 list)
b60419c5 3803 or [])
3804
3805 thumbnails = []
3806 for t in thumbnails_list:
3807 if not isinstance(t, dict):
3808 continue
3809 thumbnail_url = url_or_none(t.get('url'))
3810 if not thumbnail_url:
3811 continue
3812 thumbnails.append({
3813 'url': thumbnail_url,
3814 'width': int_or_none(t.get('width')),
3815 'height': int_or_none(t.get('height')),
3816 })
3462ffa8 3817 if playlist_id is None:
70d5c17b 3818 playlist_id = item_id
3819 if title is None:
39ed931e 3820 title = (
3821 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3822 or playlist_id)
b60419c5 3823 title += format_field(selected_tab, 'title', ' - %s')
cd684175 3824 title += format_field(selected_tab, 'expandedText', ' - %s')
b60419c5 3825
3826 metadata = {
3827 'playlist_id': playlist_id,
3828 'playlist_title': title,
3829 'playlist_description': description,
3830 'uploader': channel_name,
3831 'uploader_id': channel_id,
3832 'uploader_url': channel_url,
3833 'thumbnails': thumbnails,
3834 'tags': tags,
3835 }
3836 if not channel_id:
3837 metadata.update(self._extract_uploader(data))
3838 metadata.update({
3839 'channel': metadata['uploader'],
3840 'channel_id': metadata['uploader_id'],
3841 'channel_url': metadata['uploader_url']})
3842 return self.playlist_result(
d069eca7
M
3843 self._entries(
3844 selected_tab, playlist_id,
3845 self._extract_identity_token(webpage, item_id),
f4f751af 3846 self._extract_account_syncid(data),
3847 self._extract_ytcfg(item_id, webpage)),
b60419c5 3848 **metadata)
73c4ac2c 3849
79360d99 3850 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
2be71994 3851 first_id = last_id = None
79360d99 3852 ytcfg = self._extract_ytcfg(playlist_id, webpage)
3853 headers = self._generate_api_headers(
3854 ytcfg, account_syncid=self._extract_account_syncid(data),
3855 identity_token=self._extract_identity_token(webpage, item_id=playlist_id),
3856 visitor_data=try_get(self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
2be71994 3857 for page_num in itertools.count(1):
cd7c66cf 3858 videos = list(self._playlist_entries(playlist))
3859 if not videos:
3860 return
2be71994 3861 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3862 if start >= len(videos):
3863 return
3864 for video in videos[start:]:
3865 if video['id'] == first_id:
3866 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3867 return
3868 yield video
3869 first_id = first_id or videos[0]['id']
3870 last_id = videos[-1]['id']
79360d99 3871 watch_endpoint = try_get(
3872 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3873 query = {
3874 'playlistId': playlist_id,
3875 'videoId': watch_endpoint.get('videoId') or last_id,
3876 'index': watch_endpoint.get('index') or len(videos),
3877 'params': watch_endpoint.get('params') or 'OAE%3D'
3878 }
3879 response = self._extract_response(
3880 item_id='%s page %d' % (playlist_id, page_num),
3881 query=query,
3882 ep='next',
3883 headers=headers,
3884 check_get_keys='contents'
3885 )
cd7c66cf 3886 playlist = try_get(
79360d99 3887 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 3888
79360d99 3889 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
8bdd16b4 3890 title = playlist.get('title') or try_get(
3891 data, lambda x: x['titleText']['simpleText'], compat_str)
3892 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3893
3894 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3895 playlist_url = urljoin(url, try_get(
3896 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3897 compat_str))
3898 if playlist_url and playlist_url != url:
3899 return self.url_result(
3900 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3901 video_title=title)
cd7c66cf 3902
8bdd16b4 3903 return self.playlist_result(
79360d99 3904 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
cd7c66cf 3905 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 3906
358de58c 3907 def _reload_with_unavailable_videos(self, item_id, data, webpage):
3908 """
3909 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3910 """
3911 sidebar_renderer = try_get(
5d342002 3912 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3913 if not sidebar_renderer:
3914 return
3915 browse_id = params = None
358de58c 3916 for item in sidebar_renderer:
3917 if not isinstance(item, dict):
3918 continue
3919 renderer = item.get('playlistSidebarPrimaryInfoRenderer')
3920 menu_renderer = try_get(
3921 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3922 for menu_item in menu_renderer:
3923 if not isinstance(menu_item, dict):
3924 continue
3925 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3926 text = try_get(
3927 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3928 if not text or text.lower() != 'show unavailable videos':
3929 continue
3930 browse_endpoint = try_get(
3931 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3932 browse_id = browse_endpoint.get('browseId')
3933 params = browse_endpoint.get('params')
5d342002 3934 break
3935
3936 ytcfg = self._extract_ytcfg(item_id, webpage)
3937 headers = self._generate_api_headers(
3938 ytcfg, account_syncid=self._extract_account_syncid(ytcfg),
3939 identity_token=self._extract_identity_token(webpage, item_id=item_id),
3940 visitor_data=try_get(
3941 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
3942 query = {
3943 'params': params or 'wgYCCAA=',
3944 'browseId': browse_id or 'VL%s' % item_id
3945 }
3946 return self._extract_response(
3947 item_id=item_id, headers=headers, query=query,
3948 check_get_keys='contents', fatal=False,
3949 note='Downloading API JSON with unavailable videos')
358de58c 3950
cd7c66cf 3951 def _extract_webpage(self, url, item_id):
a06916d9 3952 retries = self.get_param('extractor_retries', 3)
62bff2c1 3953 count = -1
c705177d 3954 last_error = 'Incomplete yt initial data recieved'
14fdfea9 3955 while count < retries:
62bff2c1 3956 count += 1
14fdfea9 3957 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 3958 # See: https://github.com/yt-dlp/yt-dlp/issues/116
3959 if count:
c705177d 3960 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 3961 webpage = self._download_webpage(
3962 url, item_id,
cd7c66cf 3963 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
14fdfea9 3964 data = self._extract_yt_initial_data(item_id, webpage)
14fdfea9 3965 if data.get('contents') or data.get('currentVideoEndpoint'):
3966 break
95c01b6c 3967 # Extract alerts here only when there is error
3968 self._extract_and_report_alerts(data)
c705177d 3969 if count >= retries:
6a39ee13 3970 raise ExtractorError(last_error)
cd7c66cf 3971 return webpage, data
3972
9297939e 3973 @staticmethod
3974 def _smuggle_data(entries, data):
3975 for entry in entries:
3976 if data:
3977 entry['url'] = smuggle_url(entry['url'], data)
3978 yield entry
3979
cd7c66cf 3980 def _real_extract(self, url):
9297939e 3981 url, smuggled_data = unsmuggle_url(url, {})
3982 if self.is_music_url(url):
3983 smuggled_data['is_music_url'] = True
fe03a6cd 3984 info_dict = self.__real_extract(url, smuggled_data)
9297939e 3985 if info_dict.get('entries'):
3986 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
3987 return info_dict
3988
fe03a6cd 3989 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
3990
3991 def __real_extract(self, url, smuggled_data):
cd7c66cf 3992 item_id = self._match_id(url)
3993 url = compat_urlparse.urlunparse(
3994 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 3995 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 3996
fe03a6cd 3997 def get_mobj(url):
3998 mobj = self._url_re.match(url).groupdict()
07cce701 3999 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 4000 return mobj
4001
4002 mobj = get_mobj(url)
4003 # Youtube returns incomplete data if tabname is not lower case
4004 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4005
4006 if is_channel:
4007 if smuggled_data.get('is_music_url'):
4008 if item_id[:2] == 'VL':
4009 # Youtube music VL channels have an equivalent playlist
4010 item_id = item_id[2:]
4011 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
abcdd12b 4012 elif item_id[:2] == 'MP':
4013 # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4014 item_id = self._search_regex(
4015 r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4016 self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4017 'playlist id')
4018 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
fe03a6cd 4019 elif mobj['channel_type'] == 'browse':
4020 # Youtube music /browse/ should be changed to /channel/
4021 pre = 'https://www.youtube.com/channel/%s' % item_id
4022 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4023 # Home URLs should redirect to /videos/
6a39ee13 4024 self.report_warning(
cd7c66cf 4025 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4026 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 4027 tab = '/videos'
4028
4029 url = ''.join((pre, tab, post))
4030 mobj = get_mobj(url)
cd7c66cf 4031
4032 # Handle both video/playlist URLs
201c1459 4033 qs = parse_qs(url)
cd7c66cf 4034 video_id = qs.get('v', [None])[0]
4035 playlist_id = qs.get('list', [None])[0]
4036
fe03a6cd 4037 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 4038 if not playlist_id:
fe03a6cd 4039 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 4040 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 4041 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6a39ee13 4042 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
cd7c66cf 4043 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
18db7548 4044 mobj = get_mobj(url)
cd7c66cf 4045
4046 if video_id and playlist_id:
a06916d9 4047 if self.get_param('noplaylist'):
cd7c66cf 4048 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4049 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4050 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4051
4052 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 4053
18db7548 4054 tabs = try_get(
4055 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4056 if tabs:
4057 selected_tab = self._extract_selected_tab(tabs)
4058 tab_name = selected_tab.get('title', '')
09f1580e 4059 if 'no-youtube-channel-redirect' not in compat_opts:
4060 if mobj['tab'] == '/live':
4061 # Live tab should have redirected to the video
4062 raise ExtractorError('The channel is not currently live', expected=True)
4063 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4064 if not mobj['not_channel'] and item_id[:2] == 'UC':
4065 # Topic channels don't have /videos. Use the equivalent playlist instead
4066 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4067 pl_id = 'UU%s' % item_id[2:]
4068 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4069 try:
4070 pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4071 for alert_type, alert_message in self._extract_alerts(pl_data):
4072 if alert_type == 'error':
4073 raise ExtractorError('Youtube said: %s' % alert_message)
4074 item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4075 except ExtractorError:
4076 self.report_warning('The playlist gave error. Falling back to channel URL')
4077 else:
4078 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
18db7548 4079
4080 self.write_debug('Final URL: %s' % url)
4081
358de58c 4082 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 4083 if 'no-youtube-unavailable-videos' not in compat_opts:
4084 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
95c01b6c 4085 self._extract_and_report_alerts(data)
358de58c 4086
8bdd16b4 4087 tabs = try_get(
4088 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4089 if tabs:
d069eca7 4090 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 4091
8bdd16b4 4092 playlist = try_get(
4093 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4094 if playlist:
79360d99 4095 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
cd7c66cf 4096
a0566bbf 4097 video_id = try_get(
4098 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4099 compat_str) or video_id
8bdd16b4 4100 if video_id:
09f1580e 4101 if mobj['tab'] != '/live': # live tab is expected to redirect to video
4102 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 4103 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 4104
8bdd16b4 4105 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 4106
c5e8d7af 4107
8bdd16b4 4108class YoutubePlaylistIE(InfoExtractor):
4109 IE_DESC = 'YouTube.com playlists'
4110 _VALID_URL = r'''(?x)(?:
4111 (?:https?://)?
4112 (?:\w+\.)?
4113 (?:
4114 (?:
4115 youtube(?:kids)?\.com|
29f7c58a 4116 invidio\.us
8bdd16b4 4117 )
4118 /.*?\?.*?\blist=
4119 )?
4120 (?P<id>%(playlist_id)s)
4121 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4122 IE_NAME = 'youtube:playlist'
cdc628a4 4123 _TESTS = [{
8bdd16b4 4124 'note': 'issue #673',
4125 'url': 'PLBB231211A4F62143',
cdc628a4 4126 'info_dict': {
8bdd16b4 4127 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4128 'id': 'PLBB231211A4F62143',
4129 'uploader': 'Wickydoo',
4130 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4131 },
4132 'playlist_mincount': 29,
4133 }, {
4134 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4135 'info_dict': {
4136 'title': 'YDL_safe_search',
4137 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4138 },
4139 'playlist_count': 2,
4140 'skip': 'This playlist is private',
9558dcec 4141 }, {
8bdd16b4 4142 'note': 'embedded',
4143 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4144 'playlist_count': 4,
9558dcec 4145 'info_dict': {
8bdd16b4 4146 'title': 'JODA15',
4147 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4148 'uploader': 'milan',
4149 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 4150 }
cdc628a4 4151 }, {
8bdd16b4 4152 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4153 'playlist_mincount': 982,
4154 'info_dict': {
4155 'title': '2018 Chinese New Singles (11/6 updated)',
4156 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4157 'uploader': 'LBK',
4158 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4159 }
daa0df9e 4160 }, {
29f7c58a 4161 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4162 'only_matching': True,
4163 }, {
4164 # music album playlist
4165 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4166 'only_matching': True,
4167 }]
4168
4169 @classmethod
4170 def suitable(cls, url):
201c1459 4171 if YoutubeTabIE.suitable(url):
4172 return False
1bdae7d3 4173 # Hack for lazy extractors until more generic solution is implemented
4174 # (see #28780)
4175 from .youtube import parse_qs
201c1459 4176 qs = parse_qs(url)
4177 if qs.get('v', [None])[0]:
4178 return False
4179 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 4180
4181 def _real_extract(self, url):
4182 playlist_id = self._match_id(url)
46953e7e 4183 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 4184 url = update_url_query(
4185 'https://www.youtube.com/playlist',
4186 parse_qs(url) or {'list': playlist_id})
4187 if is_music_url:
4188 url = smuggle_url(url, {'is_music_url': True})
4189 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 4190
4191
4192class YoutubeYtBeIE(InfoExtractor):
c76eb41b 4193 IE_DESC = 'youtu.be'
29f7c58a 4194 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4195 _TESTS = [{
8bdd16b4 4196 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4197 'info_dict': {
4198 'id': 'yeWKywCrFtk',
4199 'ext': 'mp4',
4200 'title': 'Small Scale Baler and Braiding Rugs',
4201 'uploader': 'Backus-Page House Museum',
4202 'uploader_id': 'backuspagemuseum',
4203 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4204 'upload_date': '20161008',
4205 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4206 'categories': ['Nonprofits & Activism'],
4207 'tags': list,
4208 'like_count': int,
4209 'dislike_count': int,
4210 },
4211 'params': {
4212 'noplaylist': True,
4213 'skip_download': True,
4214 },
39e7107d 4215 }, {
8bdd16b4 4216 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 4217 'only_matching': True,
cdc628a4
PH
4218 }]
4219
8bdd16b4 4220 def _real_extract(self, url):
29f7c58a 4221 mobj = re.match(self._VALID_URL, url)
4222 video_id = mobj.group('id')
4223 playlist_id = mobj.group('playlist_id')
8bdd16b4 4224 return self.url_result(
29f7c58a 4225 update_url_query('https://www.youtube.com/watch', {
4226 'v': video_id,
4227 'list': playlist_id,
4228 'feature': 'youtu.be',
4229 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 4230
4231
4232class YoutubeYtUserIE(InfoExtractor):
c76eb41b 4233 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 4234 _VALID_URL = r'ytuser:(?P<id>.+)'
4235 _TESTS = [{
4236 'url': 'ytuser:phihag',
4237 'only_matching': True,
4238 }]
4239
4240 def _real_extract(self, url):
4241 user_id = self._match_id(url)
4242 return self.url_result(
4243 'https://www.youtube.com/user/%s' % user_id,
4244 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 4245
b05654f0 4246
3d3dddc9 4247class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 4248 IE_NAME = 'youtube:favorites'
4249 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4250 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4251 _LOGIN_REQUIRED = True
4252 _TESTS = [{
4253 'url': ':ytfav',
4254 'only_matching': True,
4255 }, {
4256 'url': ':ytfavorites',
4257 'only_matching': True,
4258 }]
4259
4260 def _real_extract(self, url):
4261 return self.url_result(
4262 'https://www.youtube.com/playlist?list=LL',
4263 ie=YoutubeTabIE.ie_key())
4264
4265
79360d99 4266class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
69184e41 4267 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
4268 # there doesn't appear to be a real limit, for example if you search for
4269 # 'python' you get more than 8.000.000 results
4270 _MAX_RESULTS = float('inf')
78caa52a 4271 IE_NAME = 'youtube:search'
b05654f0 4272 _SEARCH_KEY = 'ytsearch'
6c894ea1 4273 _SEARCH_PARAMS = None
9dd8e46a 4274 _TESTS = []
b05654f0 4275
6c894ea1 4276 def _entries(self, query, n):
a5c56234 4277 data = {'query': query}
6c894ea1
U
4278 if self._SEARCH_PARAMS:
4279 data['params'] = self._SEARCH_PARAMS
4280 total = 0
4281 for page_num in itertools.count(1):
79360d99 4282 search = self._extract_response(
4283 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4284 check_get_keys=('contents', 'onResponseReceivedCommands')
4285 )
6c894ea1 4286 if not search:
b4c08069 4287 break
6c894ea1
U
4288 slr_contents = try_get(
4289 search,
4290 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4291 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4292 list)
4293 if not slr_contents:
a22b2fd1 4294 break
0366ae87 4295
0366ae87
M
4296 # Youtube sometimes adds promoted content to searches,
4297 # changing the index location of videos and token.
4298 # So we search through all entries till we find them.
30a074c2 4299 continuation_token = None
4300 for slr_content in slr_contents:
a96c6d15 4301 if continuation_token is None:
4302 continuation_token = try_get(
4303 slr_content,
4304 lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
4305 compat_str)
4306
30a074c2 4307 isr_contents = try_get(
4308 slr_content,
4309 lambda x: x['itemSectionRenderer']['contents'],
4310 list)
9da76d30 4311 if not isr_contents:
30a074c2 4312 continue
4313 for content in isr_contents:
4314 if not isinstance(content, dict):
4315 continue
4316 video = content.get('videoRenderer')
4317 if not isinstance(video, dict):
4318 continue
4319 video_id = video.get('videoId')
4320 if not video_id:
4321 continue
4322
4323 yield self._extract_video(video)
4324 total += 1
4325 if total == n:
4326 return
0366ae87 4327
0366ae87 4328 if not continuation_token:
6c894ea1 4329 break
0366ae87 4330 data['continuation'] = continuation_token
b05654f0 4331
6c894ea1
U
4332 def _get_n_results(self, query, n):
4333 """Get a specified number of results for a query"""
4334 return self.playlist_result(self._entries(query, n), query)
75dff0ee 4335
c9ae7b95 4336
a3dd9248 4337class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 4338 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 4339 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 4340 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 4341 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 4342
c9ae7b95 4343
386e1dd9 4344class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 4345 IE_DESC = 'YouTube.com search URLs'
386e1dd9 4346 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4347 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 4348 # _MAX_RESULTS = 100
3462ffa8 4349 _TESTS = [{
4350 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4351 'playlist_mincount': 5,
4352 'info_dict': {
4353 'title': 'youtube-dl test video',
4354 }
4355 }, {
4356 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4357 'only_matching': True,
4358 }]
4359
386e1dd9 4360 @classmethod
4361 def _make_valid_url(cls):
4362 return cls._VALID_URL
4363
3462ffa8 4364 def _real_extract(self, url):
386e1dd9 4365 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4366 query = (qs.get('search_query') or qs.get('q'))[0]
4367 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4368 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 4369
4370
4371class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 4372 """
25f14e9f 4373 Base class for feed extractors
3d3dddc9 4374 Subclasses must define the _FEED_NAME property.
d7ae0639 4375 """
b2e8bc1b 4376 _LOGIN_REQUIRED = True
ef2f3c7f 4377 _TESTS = []
d7ae0639
JMF
4378
4379 @property
4380 def IE_NAME(self):
78caa52a 4381 return 'youtube:%s' % self._FEED_NAME
04cc9617 4382
3853309f 4383 def _real_extract(self, url):
3d3dddc9 4384 return self.url_result(
4385 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4386 ie=YoutubeTabIE.ie_key())
25f14e9f
S
4387
4388
ef2f3c7f 4389class YoutubeWatchLaterIE(InfoExtractor):
4390 IE_NAME = 'youtube:watchlater'
70d5c17b 4391 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 4392 _VALID_URL = r':ytwatchlater'
bc7a9cd8 4393 _TESTS = [{
8bdd16b4 4394 'url': ':ytwatchlater',
bc7a9cd8
S
4395 'only_matching': True,
4396 }]
25f14e9f
S
4397
4398 def _real_extract(self, url):
ef2f3c7f 4399 return self.url_result(
4400 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 4401
4402
25f14e9f
S
4403class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4404 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 4405 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 4406 _FEED_NAME = 'recommended'
45db527f 4407 _LOGIN_REQUIRED = False
3d3dddc9 4408 _TESTS = [{
4409 'url': ':ytrec',
4410 'only_matching': True,
4411 }, {
4412 'url': ':ytrecommended',
4413 'only_matching': True,
4414 }, {
4415 'url': 'https://youtube.com',
4416 'only_matching': True,
4417 }]
1ed5b5c9 4418
1ed5b5c9 4419
25f14e9f 4420class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 4421 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 4422 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 4423 _FEED_NAME = 'subscriptions'
3d3dddc9 4424 _TESTS = [{
4425 'url': ':ytsubs',
4426 'only_matching': True,
4427 }, {
4428 'url': ':ytsubscriptions',
4429 'only_matching': True,
4430 }]
1ed5b5c9 4431
1ed5b5c9 4432
25f14e9f 4433class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
4434 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4435 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 4436 _FEED_NAME = 'history'
3d3dddc9 4437 _TESTS = [{
4438 'url': ':ythistory',
4439 'only_matching': True,
4440 }]
1ed5b5c9
JMF
4441
4442
15870e90
PH
4443class YoutubeTruncatedURLIE(InfoExtractor):
4444 IE_NAME = 'youtube:truncated_url'
4445 IE_DESC = False # Do not list
975d35db 4446 _VALID_URL = r'''(?x)
b95aab84
PH
4447 (?:https?://)?
4448 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4449 (?:watch\?(?:
c4808c60 4450 feature=[a-z_]+|
b95aab84
PH
4451 annotation_id=annotation_[^&]+|
4452 x-yt-cl=[0-9]+|
c1708b89 4453 hl=[^&]*|
287be8c6 4454 t=[0-9]+
b95aab84
PH
4455 )?
4456 |
4457 attribution_link\?a=[^&]+
4458 )
4459 $
975d35db 4460 '''
15870e90 4461
c4808c60 4462 _TESTS = [{
2d3d2997 4463 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 4464 'only_matching': True,
dc2fc736 4465 }, {
2d3d2997 4466 'url': 'https://www.youtube.com/watch?',
dc2fc736 4467 'only_matching': True,
b95aab84
PH
4468 }, {
4469 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4470 'only_matching': True,
4471 }, {
4472 'url': 'https://www.youtube.com/watch?feature=foo',
4473 'only_matching': True,
c1708b89
PH
4474 }, {
4475 'url': 'https://www.youtube.com/watch?hl=en-GB',
4476 'only_matching': True,
287be8c6
PH
4477 }, {
4478 'url': 'https://www.youtube.com/watch?t=2372',
4479 'only_matching': True,
c4808c60
PH
4480 }]
4481
15870e90
PH
4482 def _real_extract(self, url):
4483 raise ExtractorError(
78caa52a
PH
4484 'Did you forget to quote the URL? Remember that & is a meta '
4485 'character in most shells, so you want to put the URL in quotes, '
3867038a 4486 'like youtube-dl '
2d3d2997 4487 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 4488 ' or simply youtube-dl BaW_jenozKc .',
15870e90 4489 expected=True)
772fd5cc
PH
4490
4491
4492class YoutubeTruncatedIDIE(InfoExtractor):
4493 IE_NAME = 'youtube:truncated_id'
4494 IE_DESC = False # Do not list
b95aab84 4495 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
4496
4497 _TESTS = [{
4498 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4499 'only_matching': True,
4500 }]
4501
4502 def _real_extract(self, url):
4503 video_id = self._match_id(url)
4504 raise ExtractorError(
4505 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4506 expected=True)