]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[RTP] Fix extraction and add subtitles (#497)
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
2d6659b9 5import base64
d92f5d5a 6import calendar
109dd3b2 7import copy
a5c56234 8import hashlib
0ca96d48 9import itertools
c5e8d7af 10import json
c4417ddb 11import os.path
d77ab8e2 12import random
c5e8d7af 13import re
8a784c74 14import time
e0df6211 15import traceback
c5e8d7af 16
b05654f0 17from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 18from ..compat import (
edf3e38e 19 compat_chr,
29f7c58a 20 compat_HTTPError,
c5e8d7af 21 compat_parse_qs,
545cc85d 22 compat_str,
7fd002c0 23 compat_urllib_parse_unquote_plus,
15707c7e 24 compat_urllib_parse_urlencode,
7c80519c 25 compat_urllib_parse_urlparse,
7c61bd36 26 compat_urlparse,
4bb4a188 27)
545cc85d 28from ..jsinterp import JSInterpreter
4bb4a188 29from ..utils import (
c224251a 30 bool_or_none,
2d6659b9 31 bytes_to_intlist,
c5e8d7af 32 clean_html,
26fe8ffe 33 dict_get,
d92f5d5a 34 datetime_from_str,
358de58c 35 error_to_compat_str,
c5e8d7af 36 ExtractorError,
b60419c5 37 format_field,
2d30521a 38 float_or_none,
dd27fd17 39 int_or_none,
2d6659b9 40 intlist_to_bytes,
94278f72 41 mimetype2ext,
6310acf5 42 parse_codecs,
7c80519c 43 parse_duration,
dca3ff4a 44 qualities,
3995d37d 45 remove_start,
cf7e015f 46 smuggle_url,
dbdaaa23 47 str_or_none,
c93d53f5 48 str_to_int,
556dbe7f 49 try_get,
c5e8d7af
PH
50 unescapeHTML,
51 unified_strdate,
cf7e015f 52 unsmuggle_url,
8bdd16b4 53 update_url_query,
21c340b8 54 url_or_none,
6e6bc8da 55 urlencode_postdata,
d92f5d5a 56 urljoin
c5e8d7af
PH
57)
58
5f6a1245 59
201c1459 60def parse_qs(url):
61 return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
62
63
de7f3446 64class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
65 """Provide base functions for Youtube extractors"""
66 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 67 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
68
69 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
70 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
71 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 72
3462ffa8 73 _RESERVED_NAMES = (
bea74222 74 r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|shorts|'
46953e7e 75 r'movies|results|shared|hashtag|trending|feed|feeds|oembed|get_video_info|'
cd7c66cf 76 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 77
b2e8bc1b
JMF
78 _NETRC_MACHINE = 'youtube'
79 # If True it will raise an error if no login info is provided
80 _LOGIN_REQUIRED = False
81
70d5c17b 82 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 83
b2e8bc1b 84 def _login(self):
83317f69 85 """
86 Attempt to log in to YouTube.
87 True is returned if successful or skipped.
88 False is returned if login failed.
89
90 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
91 """
9d5d4d64 92
93 def warn(message):
94 self.report_warning(message)
95
96 # username+password login is broken
97 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
98 self.raise_login_required(
99 'Login details are needed to download this content', method='cookies')
68217024 100 username, password = self._get_login_info()
9d5d4d64 101 if username:
102 warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
103 return
9d5d4d64 104
2d6659b9 105 # Everything below this is broken!
106 r'''
b2e8bc1b
JMF
107 # No authentication to be performed
108 if username is None:
a06916d9 109 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
69ea8ca4 110 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
a06916d9 111 # if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
545cc85d 112 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 113 return True
b2e8bc1b 114
7cc3570e
PH
115 login_page = self._download_webpage(
116 self._LOGIN_URL, None,
69ea8ca4
PH
117 note='Downloading login page',
118 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
119 if login_page is False:
120 return
b2e8bc1b 121
1212e997 122 login_form = self._hidden_inputs(login_page)
c5e8d7af 123
e00eb564
S
124 def req(url, f_req, note, errnote):
125 data = login_form.copy()
126 data.update({
127 'pstMsg': 1,
128 'checkConnection': 'youtube',
129 'checkedDomains': 'youtube',
130 'hl': 'en',
131 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 132 'f.req': json.dumps(f_req),
e00eb564
S
133 'flowName': 'GlifWebSignIn',
134 'flowEntry': 'ServiceLogin',
baf67a60
S
135 # TODO: reverse actual botguard identifier generation algo
136 'bgRequest': '["identifier",""]',
041bc3ad 137 })
e00eb564
S
138 return self._download_json(
139 url, None, note=note, errnote=errnote,
140 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
141 fatal=False,
142 data=urlencode_postdata(data), headers={
143 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
144 'Google-Accounts-XSRF': 1,
145 })
146
3995d37d
S
147 lookup_req = [
148 username,
149 None, [], None, 'US', None, None, 2, False, True,
150 [
151 None, None,
152 [2, 1, None, 1,
153 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
154 None, [], 4],
155 1, [None, None, []], None, None, None, True
156 ],
157 username,
158 ]
159
e00eb564 160 lookup_results = req(
3995d37d 161 self._LOOKUP_URL, lookup_req,
e00eb564
S
162 'Looking up account info', 'Unable to look up account info')
163
164 if lookup_results is False:
165 return False
041bc3ad 166
3995d37d
S
167 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
168 if not user_hash:
169 warn('Unable to extract user hash')
170 return False
171
172 challenge_req = [
173 user_hash,
174 None, 1, None, [1, None, None, None, [password, None, True]],
175 [
176 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
177 1, [None, None, []], None, None, None, True
178 ]]
83317f69 179
3995d37d
S
180 challenge_results = req(
181 self._CHALLENGE_URL, challenge_req,
182 'Logging in', 'Unable to log in')
83317f69 183
3995d37d 184 if challenge_results is False:
e00eb564 185 return
83317f69 186
3995d37d
S
187 login_res = try_get(challenge_results, lambda x: x[0][5], list)
188 if login_res:
189 login_msg = try_get(login_res, lambda x: x[5], compat_str)
190 warn(
191 'Unable to login: %s' % 'Invalid password'
192 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
193 return False
194
195 res = try_get(challenge_results, lambda x: x[0][-1], list)
196 if not res:
197 warn('Unable to extract result entry')
198 return False
199
9a6628aa
S
200 login_challenge = try_get(res, lambda x: x[0][0], list)
201 if login_challenge:
202 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
203 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
204 # SEND_SUCCESS - TFA code has been successfully sent to phone
205 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 206 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
207 if status == 'QUOTA_EXCEEDED':
208 warn('Exceeded the limit of TFA codes, try later')
209 return False
210
211 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
212 if not tl:
213 warn('Unable to extract TL')
214 return False
215
216 tfa_code = self._get_tfa_info('2-step verification code')
217
218 if not tfa_code:
219 warn(
220 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
221 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
222 return False
223
224 tfa_code = remove_start(tfa_code, 'G-')
225
226 tfa_req = [
227 user_hash, None, 2, None,
228 [
229 9, None, None, None, None, None, None, None,
230 [None, tfa_code, True, 2]
231 ]]
232
233 tfa_results = req(
234 self._TFA_URL.format(tl), tfa_req,
235 'Submitting TFA code', 'Unable to submit TFA code')
236
237 if tfa_results is False:
238 return False
239
240 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
241 if tfa_res:
242 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
243 warn(
244 'Unable to finish TFA: %s' % 'Invalid TFA code'
245 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
246 return False
247
248 check_cookie_url = try_get(
249 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
250 else:
251 CHALLENGES = {
252 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
253 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
254 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
255 }
256 challenge = CHALLENGES.get(
257 challenge_str,
258 '%s returned error %s.' % (self.IE_NAME, challenge_str))
259 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
260 return False
3995d37d
S
261 else:
262 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
263
264 if not check_cookie_url:
265 warn('Unable to extract CheckCookie URL')
266 return False
e00eb564
S
267
268 check_cookie_results = self._download_webpage(
3995d37d
S
269 check_cookie_url, None, 'Checking cookie', fatal=False)
270
271 if check_cookie_results is False:
272 return False
e00eb564 273
3995d37d
S
274 if 'https://myaccount.google.com/' not in check_cookie_results:
275 warn('Unable to log in')
b2e8bc1b 276 return False
e00eb564 277
b2e8bc1b 278 return True
2d6659b9 279 '''
b2e8bc1b 280
cce889b9 281 def _initialize_consent(self):
282 cookies = self._get_cookies('https://www.youtube.com/')
283 if cookies.get('__Secure-3PSID'):
284 return
285 consent_id = None
286 consent = cookies.get('CONSENT')
287 if consent:
288 if 'YES' in consent.value:
289 return
290 consent_id = self._search_regex(
291 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
292 if not consent_id:
293 consent_id = random.randint(100, 999)
294 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 295
b2e8bc1b 296 def _real_initialize(self):
cce889b9 297 self._initialize_consent()
b2e8bc1b
JMF
298 if self._downloader is None:
299 return
b2e8bc1b
JMF
300 if not self._login():
301 return
c5e8d7af 302
a0566bbf 303 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 304 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
305 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 306
109dd3b2 307 _YT_DEFAULT_YTCFGS = {
308 'WEB': {
309 'INNERTUBE_API_VERSION': 'v1',
310 'INNERTUBE_CLIENT_NAME': 'WEB',
311 'INNERTUBE_CLIENT_VERSION': '2.20210622.10.00',
312 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
313 'INNERTUBE_CONTEXT': {
314 'client': {
315 'clientName': 'WEB',
316 'clientVersion': '2.20210622.10.00',
317 'hl': 'en',
318 }
319 },
320 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
321 },
322 'WEB_REMIX': {
323 'INNERTUBE_API_VERSION': 'v1',
324 'INNERTUBE_CLIENT_NAME': 'WEB_REMIX',
325 'INNERTUBE_CLIENT_VERSION': '1.20210621.00.00',
326 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
327 'INNERTUBE_CONTEXT': {
328 'client': {
329 'clientName': 'WEB_REMIX',
330 'clientVersion': '1.20210621.00.00',
331 'hl': 'en',
332 }
333 },
334 'INNERTUBE_CONTEXT_CLIENT_NAME': 67
335 },
336 'WEB_EMBEDDED_PLAYER': {
337 'INNERTUBE_API_VERSION': 'v1',
338 'INNERTUBE_CLIENT_NAME': 'WEB_EMBEDDED_PLAYER',
339 'INNERTUBE_CLIENT_VERSION': '1.20210620.0.1',
340 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
341 'INNERTUBE_CONTEXT': {
342 'client': {
343 'clientName': 'WEB_EMBEDDED_PLAYER',
344 'clientVersion': '1.20210620.0.1',
345 'hl': 'en',
346 }
347 },
348 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
349 },
350 'ANDROID': {
351 'INNERTUBE_API_VERSION': 'v1',
352 'INNERTUBE_CLIENT_NAME': 'ANDROID',
353 'INNERTUBE_CLIENT_VERSION': '16.20',
354 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
355 'INNERTUBE_CONTEXT': {
356 'client': {
357 'clientName': 'ANDROID',
358 'clientVersion': '16.20',
359 'hl': 'en',
360 }
361 },
362 'INNERTUBE_CONTEXT_CLIENT_NAME': 'ANDROID'
363 },
364 'ANDROID_EMBEDDED_PLAYER': {
365 'INNERTUBE_API_VERSION': 'v1',
366 'INNERTUBE_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER',
367 'INNERTUBE_CLIENT_VERSION': '16.20',
368 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
369 'INNERTUBE_CONTEXT': {
370 'client': {
371 'clientName': 'ANDROID_EMBEDDED_PLAYER',
372 'clientVersion': '16.20',
373 'hl': 'en',
374 }
375 },
376 'INNERTUBE_CONTEXT_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER'
377 },
378 'ANDROID_MUSIC': {
379 'INNERTUBE_API_VERSION': 'v1',
380 'INNERTUBE_CLIENT_NAME': 'ANDROID_MUSIC',
381 'INNERTUBE_CLIENT_VERSION': '4.32',
382 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
383 'INNERTUBE_CONTEXT': {
384 'client': {
385 'clientName': 'ANDROID_MUSIC',
386 'clientVersion': '4.32',
387 'hl': 'en',
388 }
389 },
390 'INNERTUBE_CONTEXT_CLIENT_NAME': 'ANDROID_MUSIC'
391 }
392 }
393
394 _YT_DEFAULT_INNERTUBE_HOSTS = {
395 'DIRECT': 'youtubei.googleapis.com',
396 'WEB': 'www.youtube.com',
397 'WEB_REMIX': 'music.youtube.com',
398 'ANDROID_MUSIC': 'music.youtube.com'
399 }
400
401 def _get_default_ytcfg(self, client='WEB'):
402 if client in self._YT_DEFAULT_YTCFGS:
403 return copy.deepcopy(self._YT_DEFAULT_YTCFGS[client])
404 self.write_debug(f'INNERTUBE default client {client} does not exist - falling back to WEB client.')
405 return copy.deepcopy(self._YT_DEFAULT_YTCFGS['WEB'])
406
407 def _get_innertube_host(self, client='WEB'):
408 return dict_get(self._YT_DEFAULT_INNERTUBE_HOSTS, (client, 'WEB'))
409
410 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='WEB'):
411 # try_get but with fallback to default ytcfg client values when present
412 _func = lambda y: try_get(y, getter, expected_type)
413 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
414
415 def _extract_client_name(self, ytcfg, default_client='WEB'):
416 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str, default_client)
417
418 def _extract_client_version(self, ytcfg, default_client='WEB'):
419 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str, default_client)
420
421 def _extract_api_key(self, ytcfg=None, default_client='WEB'):
422 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
423
424 def _extract_context(self, ytcfg=None, default_client='WEB'):
425 _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
426 context = _get_context(ytcfg)
427 if context:
428 return context
429
430 context = _get_context(self._get_default_ytcfg(default_client))
431 if not ytcfg:
432 return context
433
434 # Recreate the client context (required)
435 context['client'].update({
436 'clientVersion': self._extract_client_version(ytcfg, default_client),
437 'clientName': self._extract_client_name(ytcfg, default_client),
438 })
439 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
440 if visitor_data:
441 context['client']['visitorData'] = visitor_data
442 return context
443
444 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
1974e99f 445 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
446 # See: https://github.com/yt-dlp/yt-dlp/issues/393
447 yt_cookies = self._get_cookies('https://www.youtube.com')
448 sapisid_cookie = dict_get(
449 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
a5c56234
M
450 if sapisid_cookie is None:
451 return
452 time_now = round(time.time())
1974e99f 453 # SAPISID cookie is required if not already present
454 if not yt_cookies.get('SAPISID'):
455 self._set_cookie(
456 '.youtube.com', 'SAPISID', sapisid_cookie.value, secure=True, expire_time=time_now + 3600)
457 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
458 sapisidhash = hashlib.sha1(
109dd3b2 459 f'{time_now} {sapisid_cookie.value} {origin}'.encode('utf-8')).hexdigest()
1974e99f 460 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
461
462 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 463 note='Downloading API JSON', errnote='Unable to download API page',
109dd3b2 464 context=None, api_key=None, api_hostname=None, default_client='WEB'):
f4f751af 465
109dd3b2 466 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 467 data.update(query)
109dd3b2 468 real_headers = self._generate_api_headers(client=default_client)
f4f751af 469 real_headers.update({'content-type': 'application/json'})
470 if headers:
471 real_headers.update(headers)
545cc85d 472 return self._download_json(
109dd3b2 473 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
a5c56234 474 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 475 data=json.dumps(data).encode('utf8'), headers=real_headers,
476 query={'key': api_key or self._extract_api_key()})
477
8bdd16b4 478 def _extract_yt_initial_data(self, video_id, webpage):
479 return self._parse_json(
480 self._search_regex(
29f7c58a 481 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 482 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 483 video_id)
0c148415 484
a1c5d2ca
M
485 def _extract_identity_token(self, webpage, item_id):
486 ytcfg = self._extract_ytcfg(item_id, webpage)
487 if ytcfg:
488 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
489 if token:
490 return token
491 return self._search_regex(
492 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
493 'identity token', default=None)
494
495 @staticmethod
496 def _extract_account_syncid(data):
8ea3f7b9 497 """
498 Extract syncId required to download private playlists of secondary channels
499 @param data Either response or ytcfg
500 """
501 sync_ids = (try_get(
502 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
503 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
a1c5d2ca
M
504 if len(sync_ids) >= 2 and sync_ids[1]:
505 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
506 # and just "user_syncid||" for primary channel. We only want the channel_syncid
507 return sync_ids[0]
8ea3f7b9 508 # ytcfg includes channel_syncid if on secondary channel
509 return data.get('DELEGATED_SESSION_ID')
a1c5d2ca 510
29f7c58a 511 def _extract_ytcfg(self, video_id, webpage):
8c54a305 512 if not webpage:
513 return {}
29f7c58a 514 return self._parse_json(
515 self._search_regex(
516 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 517 default='{}'), video_id, fatal=False) or {}
518
109dd3b2 519 def _generate_api_headers(self, ytcfg=None, identity_token=None, account_syncid=None,
520 visitor_data=None, api_hostname=None, client='WEB'):
521 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(client))
f4f751af 522 headers = {
109dd3b2 523 'X-YouTube-Client-Name': compat_str(
524 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=client)),
525 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, client),
526 'Origin': origin
f4f751af 527 }
2d6659b9 528 if not visitor_data and ytcfg:
529 visitor_data = try_get(
530 self._extract_context(ytcfg, client), lambda x: x['client']['visitorData'], compat_str)
f4f751af 531 if identity_token:
109dd3b2 532 headers['X-Youtube-Identity-Token'] = identity_token
f4f751af 533 if account_syncid:
534 headers['X-Goog-PageId'] = account_syncid
535 headers['X-Goog-AuthUser'] = 0
536 if visitor_data:
109dd3b2 537 headers['X-Goog-Visitor-Id'] = visitor_data
538 auth = self._generate_sapisidhash_header(origin)
f4f751af 539 if auth is not None:
540 headers['Authorization'] = auth
109dd3b2 541 headers['X-Origin'] = origin
f4f751af 542 return headers
29f7c58a 543
2d6659b9 544 @staticmethod
545 def _build_api_continuation_query(continuation, ctp=None):
546 query = {
547 'continuation': continuation
548 }
549 # TODO: Inconsistency with clickTrackingParams.
550 # Currently we have a fixed ctp contained within context (from ytcfg)
551 # and a ctp in root query for continuation.
552 if ctp:
553 query['clickTracking'] = {'clickTrackingParams': ctp}
554 return query
555
556 @classmethod
557 def _continuation_query_ajax_to_api(cls, continuation_query):
558 continuation = dict_get(continuation_query, ('continuation', 'ctoken'))
559 return cls._build_api_continuation_query(continuation, continuation_query.get('itct'))
560
561 @staticmethod
562 def _build_continuation_query(continuation, ctp=None):
563 query = {
564 'ctoken': continuation,
565 'continuation': continuation,
566 }
567 if ctp:
568 query['itct'] = ctp
569 return query
570
571 @classmethod
572 def _extract_next_continuation_data(cls, renderer):
573 next_continuation = try_get(
574 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
575 lambda x: x['continuation']['reloadContinuationData']), dict)
576 if not next_continuation:
577 return
578 continuation = next_continuation.get('continuation')
579 if not continuation:
580 return
581 ctp = next_continuation.get('clickTrackingParams')
582 return cls._build_continuation_query(continuation, ctp)
583
584 @classmethod
585 def _extract_continuation_ep_data(cls, continuation_ep: dict):
586 if isinstance(continuation_ep, dict):
587 continuation = try_get(
588 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
589 if not continuation:
590 return
591 ctp = continuation_ep.get('clickTrackingParams')
592 return cls._build_continuation_query(continuation, ctp)
593
594 @classmethod
595 def _extract_continuation(cls, renderer):
596 next_continuation = cls._extract_next_continuation_data(renderer)
597 if next_continuation:
598 return next_continuation
599 contents = []
600 for key in ('contents', 'items'):
601 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
602 for content in contents:
603 if not isinstance(content, dict):
604 continue
605 continuation_ep = try_get(
606 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
607 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
608 dict)
609 continuation = cls._extract_continuation_ep_data(continuation_ep)
610 if continuation:
611 return continuation
612
109dd3b2 613 @staticmethod
614 def _extract_alerts(data):
615 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
616 if not isinstance(alert_dict, dict):
617 continue
618 for alert in alert_dict.values():
619 alert_type = alert.get('type')
620 if not alert_type:
621 continue
622 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str) or ''
623 if message:
624 yield alert_type, message
625 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
626 message += try_get(run, lambda x: x['text'], compat_str)
627 if message:
628 yield alert_type, message
629
630 def _report_alerts(self, alerts, expected=True):
631 errors = []
632 warnings = []
633 for alert_type, alert_message in alerts:
634 if alert_type.lower() == 'error':
635 errors.append([alert_type, alert_message])
636 else:
637 warnings.append([alert_type, alert_message])
638
639 for alert_type, alert_message in (warnings + errors[:-1]):
640 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
641 if errors:
642 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
643
644 def _extract_and_report_alerts(self, data, *args, **kwargs):
645 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
646
647 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
648 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
649 default_client='WEB'):
650 response = None
651 last_error = None
652 count = -1
653 retries = self.get_param('extractor_retries', 3)
654 if check_get_keys is None:
655 check_get_keys = []
656 while count < retries:
657 count += 1
658 if last_error:
659 self.report_warning('%s. Retrying ...' % last_error)
660 try:
661 response = self._call_api(
662 ep=ep, fatal=True, headers=headers,
663 video_id=item_id, query=query,
664 context=self._extract_context(ytcfg, default_client),
665 api_key=self._extract_api_key(ytcfg, default_client),
666 api_hostname=api_hostname, default_client=default_client,
667 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
668 except ExtractorError as e:
669 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
670 # Downloading page may result in intermittent 5xx HTTP error
671 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
672 last_error = 'HTTP Error %s' % e.cause.code
673 if count < retries:
674 continue
675 if fatal:
676 raise
677 else:
678 self.report_warning(error_to_compat_str(e))
679 return
680
681 else:
682 # Youtube may send alerts if there was an issue with the continuation page
683 try:
684 self._extract_and_report_alerts(response, expected=False)
685 except ExtractorError as e:
686 if fatal:
687 raise
688 self.report_warning(error_to_compat_str(e))
689 return
690 if not check_get_keys or dict_get(response, check_get_keys):
691 break
692 # Youtube sometimes sends incomplete data
693 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
694 last_error = 'Incomplete data received'
695 if count >= retries:
696 if fatal:
697 raise ExtractorError(last_error)
698 else:
699 self.report_warning(last_error)
700 return
701 return response
702
9297939e 703 @staticmethod
704 def is_music_url(url):
705 return re.match(r'https?://music\.youtube\.com/', url) is not None
706
30a074c2 707 def _extract_video(self, renderer):
708 video_id = renderer.get('videoId')
709 title = try_get(
710 renderer,
711 (lambda x: x['title']['runs'][0]['text'],
712 lambda x: x['title']['simpleText']), compat_str)
713 description = try_get(
714 renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
715 compat_str)
716 duration = parse_duration(try_get(
717 renderer, lambda x: x['lengthText']['simpleText'], compat_str))
718 view_count_text = try_get(
719 renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
720 view_count = str_to_int(self._search_regex(
721 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
722 'view count', default=None))
723 uploader = try_get(
bc2ca1bb 724 renderer,
725 (lambda x: x['ownerText']['runs'][0]['text'],
726 lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
30a074c2 727 return {
39ed931e 728 '_type': 'url',
30a074c2 729 'ie_key': YoutubeIE.ie_key(),
730 'id': video_id,
731 'url': video_id,
732 'title': title,
733 'description': description,
734 'duration': duration,
735 'view_count': view_count,
736 'uploader': uploader,
737 }
738
0c148415 739
360e1ca5 740class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 741 IE_DESC = 'YouTube.com'
bc2ca1bb 742 _INVIDIOUS_SITES = (
743 # invidious-redirect websites
744 r'(?:www\.)?redirect\.invidious\.io',
745 r'(?:(?:www|dev)\.)?invidio\.us',
746 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
747 r'(?:www\.)?invidious\.pussthecat\.org',
bc2ca1bb 748 r'(?:www\.)?invidious\.zee\.li',
bc2ca1bb 749 r'(?:www\.)?invidious\.ethibox\.fr',
bc2ca1bb 750 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
bc2ca1bb 751 # youtube-dl invidious instances list
752 r'(?:(?:www|no)\.)?invidiou\.sh',
753 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
754 r'(?:www\.)?invidious\.kabi\.tk',
bc2ca1bb 755 r'(?:www\.)?invidious\.mastodon\.host',
756 r'(?:www\.)?invidious\.zapashcanon\.fr',
ed807c18 757 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
201c1459 758 r'(?:www\.)?invidious\.tinfoil-hat\.net',
759 r'(?:www\.)?invidious\.himiko\.cloud',
760 r'(?:www\.)?invidious\.reallyancient\.tech',
bc2ca1bb 761 r'(?:www\.)?invidious\.tube',
762 r'(?:www\.)?invidiou\.site',
763 r'(?:www\.)?invidious\.site',
764 r'(?:www\.)?invidious\.xyz',
765 r'(?:www\.)?invidious\.nixnet\.xyz',
201c1459 766 r'(?:www\.)?invidious\.048596\.xyz',
bc2ca1bb 767 r'(?:www\.)?invidious\.drycat\.fr',
201c1459 768 r'(?:www\.)?inv\.skyn3t\.in',
bc2ca1bb 769 r'(?:www\.)?tube\.poal\.co',
770 r'(?:www\.)?tube\.connect\.cafe',
771 r'(?:www\.)?vid\.wxzm\.sx',
772 r'(?:www\.)?vid\.mint\.lgbt',
201c1459 773 r'(?:www\.)?vid\.puffyan\.us',
bc2ca1bb 774 r'(?:www\.)?yewtu\.be',
775 r'(?:www\.)?yt\.elukerio\.org',
776 r'(?:www\.)?yt\.lelux\.fi',
777 r'(?:www\.)?invidious\.ggc-project\.de',
778 r'(?:www\.)?yt\.maisputain\.ovh',
201c1459 779 r'(?:www\.)?ytprivate\.com',
780 r'(?:www\.)?invidious\.13ad\.de',
bc2ca1bb 781 r'(?:www\.)?invidious\.toot\.koeln',
782 r'(?:www\.)?invidious\.fdn\.fr',
783 r'(?:www\.)?watch\.nettohikari\.com',
ed807c18 784 r'(?:www\.)?invidious\.namazso\.eu',
785 r'(?:www\.)?invidious\.silkky\.cloud',
786 r'(?:www\.)?invidious\.exonip\.de',
787 r'(?:www\.)?invidious\.riverside\.rocks',
788 r'(?:www\.)?invidious\.blamefran\.net',
789 r'(?:www\.)?invidious\.moomoo\.de',
790 r'(?:www\.)?ytb\.trom\.tf',
791 r'(?:www\.)?yt\.cyberhost\.uk',
bc2ca1bb 792 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
793 r'(?:www\.)?qklhadlycap4cnod\.onion',
794 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
795 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
796 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
797 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
798 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
799 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
ed807c18 800 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
801 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
802 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
803 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
bc2ca1bb 804 )
cb7dfeea 805 _VALID_URL = r"""(?x)^
c5e8d7af 806 (
edb53e2d 807 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 808 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
809 (?:www\.)?deturl\.com/www\.youtube\.com|
810 (?:www\.)?pwnyoutube\.com|
811 (?:www\.)?hooktube\.com|
812 (?:www\.)?yourepeat\.com|
813 tube\.majestyc\.net|
814 %(invidious)s|
815 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
816 (?:.*?\#/)? # handle anchor (#/) redirect urls
817 (?: # the various things that can precede the ID:
ac7553d0 818 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 819 |(?: # or the v= param in all its forms
f7000f3a 820 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 821 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 822 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
823 v=
824 )
f4b05232 825 ))
cbaed4bb
S
826 |(?:
827 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
828 vid\.plus| # or vid.plus/xxxx
829 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 830 %(invidious)s
cbaed4bb 831 )/
edb53e2d 832 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 833 )
c5e8d7af 834 )? # all until now is optional -> you can pass the naked ID
201c1459 835 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 836 (?(1).+)? # if we found the ID, everything can follow
9297939e 837 (?:\#|$)""" % {
bc2ca1bb 838 'invidious': '|'.join(_INVIDIOUS_SITES),
839 }
e40c758c 840 _PLAYER_INFO_RE = (
cc2db878 841 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
842 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 843 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 844 )
2c62dc26 845 _formats = {
c2d3cb4c 846 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
847 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
848 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
849 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
850 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
851 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
852 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
853 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 854 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 855 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
856 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
857 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
858 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
859 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
860 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 861 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 862 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
863 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 864
865
866 # 3D videos
c2d3cb4c 867 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
868 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
869 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
870 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 871 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
872 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
873 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 874
96fb5605 875 # Apple HTTP Live Streaming
11f12195 876 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 877 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
878 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
879 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
880 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
881 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 882 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
883 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
884
885 # DASH mp4 video
d23028a8
S
886 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
887 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
888 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
889 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
890 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 891 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
892 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
893 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
894 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
895 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
896 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
897 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 898
f6f1fc92 899 # Dash mp4 audio
d23028a8
S
900 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
901 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
902 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
903 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
904 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
905 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
906 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
907
908 # Dash webm
d23028a8
S
909 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
910 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
911 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
912 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
913 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
914 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
915 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
916 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
917 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
918 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
919 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
920 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
921 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
922 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
923 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 924 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
925 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
926 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
927 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
928 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
929 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
930 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
931
932 # Dash webm audio
d23028a8
S
933 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
934 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 935
0857baad 936 # Dash webm audio with opus inside
d23028a8
S
937 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
938 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
939 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 940
ce6b9a2d
PH
941 # RTMP (unnamed)
942 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
943
944 # av01 video only formats sometimes served with "unknown" codecs
945 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
946 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
947 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
948 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 949 }
29f7c58a 950 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 951
109dd3b2 952 _AGE_GATE_REASONS = (
953 'Sign in to confirm your age',
954 'This video may be inappropriate for some users.',
955 'Sorry, this content is age-restricted.')
956
fd5c4aab
S
957 _GEO_BYPASS = False
958
78caa52a 959 IE_NAME = 'youtube'
2eb88d95
PH
960 _TESTS = [
961 {
2d3d2997 962 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
963 'info_dict': {
964 'id': 'BaW_jenozKc',
965 'ext': 'mp4',
3867038a 966 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
967 'uploader': 'Philipp Hagemeister',
968 'uploader_id': 'phihag',
ec85ded8 969 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
970 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
971 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 972 'upload_date': '20121002',
3867038a 973 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 974 'categories': ['Science & Technology'],
3867038a 975 'tags': ['youtube-dl'],
556dbe7f 976 'duration': 10,
dbdaaa23 977 'view_count': int,
3e7c1224
PH
978 'like_count': int,
979 'dislike_count': int,
7c80519c 980 'start_time': 1,
297a564b 981 'end_time': 9,
2eb88d95 982 }
0e853ca4 983 },
fccd3771 984 {
4bc3a23e
PH
985 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
986 'note': 'Embed-only video (#1746)',
987 'info_dict': {
988 'id': 'yZIXLfi8CZQ',
989 'ext': 'mp4',
990 'upload_date': '20120608',
991 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
992 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
993 'uploader': 'SET India',
94bfcd23 994 'uploader_id': 'setindia',
ec85ded8 995 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 996 'age_limit': 18,
545cc85d 997 },
998 'skip': 'Private video',
fccd3771 999 },
11b56058 1000 {
8bdd16b4 1001 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
1002 'note': 'Use the first video ID in the URL',
1003 'info_dict': {
1004 'id': 'BaW_jenozKc',
1005 'ext': 'mp4',
3867038a 1006 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
1007 'uploader': 'Philipp Hagemeister',
1008 'uploader_id': 'phihag',
ec85ded8 1009 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 1010 'upload_date': '20121002',
3867038a 1011 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 1012 'categories': ['Science & Technology'],
3867038a 1013 'tags': ['youtube-dl'],
556dbe7f 1014 'duration': 10,
dbdaaa23 1015 'view_count': int,
11b56058
PM
1016 'like_count': int,
1017 'dislike_count': int,
34a7de29
S
1018 },
1019 'params': {
1020 'skip_download': True,
1021 },
11b56058 1022 },
dd27fd17 1023 {
2d3d2997 1024 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
1025 'note': '256k DASH audio (format 141) via DASH manifest',
1026 'info_dict': {
1027 'id': 'a9LDPn-MO4I',
1028 'ext': 'm4a',
1029 'upload_date': '20121002',
1030 'uploader_id': '8KVIDEO',
ec85ded8 1031 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
1032 'description': '',
1033 'uploader': '8KVIDEO',
1034 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 1035 },
4bc3a23e
PH
1036 'params': {
1037 'youtube_include_dash_manifest': True,
1038 'format': '141',
4919603f 1039 },
de3c7fe0 1040 'skip': 'format 141 not served anymore',
dd27fd17 1041 },
8bdd16b4 1042 # DASH manifest with encrypted signature
1043 {
1044 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1045 'info_dict': {
1046 'id': 'IB3lcPjvWLA',
1047 'ext': 'm4a',
1048 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1049 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1050 'duration': 244,
1051 'uploader': 'AfrojackVEVO',
1052 'uploader_id': 'AfrojackVEVO',
1053 'upload_date': '20131011',
cc2db878 1054 'abr': 129.495,
8bdd16b4 1055 },
1056 'params': {
1057 'youtube_include_dash_manifest': True,
1058 'format': '141/bestaudio[ext=m4a]',
1059 },
1060 },
aa79ac0c
PH
1061 # Controversy video
1062 {
1063 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
1064 'info_dict': {
1065 'id': 'T4XJQO3qol8',
1066 'ext': 'mp4',
556dbe7f 1067 'duration': 219,
aa79ac0c 1068 'upload_date': '20100909',
4fe54c12 1069 'uploader': 'Amazing Atheist',
aa79ac0c 1070 'uploader_id': 'TheAmazingAtheist',
ec85ded8 1071 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c 1072 'title': 'Burning Everyone\'s Koran',
545cc85d 1073 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
aa79ac0c 1074 }
c522adb1 1075 },
dd2d55f1 1076 # Normal age-gate video (embed allowed)
c522adb1 1077 {
2d3d2997 1078 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1079 'info_dict': {
1080 'id': 'HtVdAasjOgU',
1081 'ext': 'mp4',
1082 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1083 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1084 'duration': 142,
c522adb1
JMF
1085 'uploader': 'The Witcher',
1086 'uploader_id': 'WitcherGame',
ec85ded8 1087 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 1088 'upload_date': '20140605',
34952f09 1089 'age_limit': 18,
c522adb1
JMF
1090 },
1091 },
8bdd16b4 1092 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1093 # YouTube Red ad is not captured for creator
1094 {
1095 'url': '__2ABJjxzNo',
1096 'info_dict': {
1097 'id': '__2ABJjxzNo',
1098 'ext': 'mp4',
1099 'duration': 266,
1100 'upload_date': '20100430',
1101 'uploader_id': 'deadmau5',
1102 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 1103 'creator': 'deadmau5',
1104 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1105 'uploader': 'deadmau5',
1106 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1107 'alt_title': 'Some Chords',
8bdd16b4 1108 },
1109 'expected_warnings': [
1110 'DASH manifest missing',
1111 ]
1112 },
067aa17e 1113 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1114 {
1115 'url': 'lqQg6PlCWgI',
1116 'info_dict': {
1117 'id': 'lqQg6PlCWgI',
1118 'ext': 'mp4',
556dbe7f 1119 'duration': 6085,
90227264 1120 'upload_date': '20150827',
cbe2bd91 1121 'uploader_id': 'olympic',
ec85ded8 1122 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 1123 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 1124 'uploader': 'Olympic',
cbe2bd91
PH
1125 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1126 },
1127 'params': {
1128 'skip_download': 'requires avconv',
e52a40ab 1129 }
cbe2bd91 1130 },
6271f1ca
PH
1131 # Non-square pixels
1132 {
1133 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1134 'info_dict': {
1135 'id': '_b-2C3KPAM0',
1136 'ext': 'mp4',
1137 'stretched_ratio': 16 / 9.,
556dbe7f 1138 'duration': 85,
6271f1ca
PH
1139 'upload_date': '20110310',
1140 'uploader_id': 'AllenMeow',
ec85ded8 1141 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 1142 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 1143 'uploader': '孫ᄋᄅ',
6271f1ca
PH
1144 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1145 },
06b491eb
S
1146 },
1147 # url_encoded_fmt_stream_map is empty string
1148 {
1149 'url': 'qEJwOuvDf7I',
1150 'info_dict': {
1151 'id': 'qEJwOuvDf7I',
f57b7835 1152 'ext': 'webm',
06b491eb
S
1153 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1154 'description': '',
1155 'upload_date': '20150404',
1156 'uploader_id': 'spbelect',
1157 'uploader': 'Наблюдатели Петербурга',
1158 },
1159 'params': {
1160 'skip_download': 'requires avconv',
e323cf3f
S
1161 },
1162 'skip': 'This live event has ended.',
06b491eb 1163 },
067aa17e 1164 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1165 {
1166 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1167 'info_dict': {
1168 'id': 'FIl7x6_3R5Y',
eb6793ba 1169 'ext': 'webm',
da77d856
S
1170 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1171 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1172 'duration': 220,
da77d856
S
1173 'upload_date': '20150625',
1174 'uploader_id': 'dorappi2000',
ec85ded8 1175 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 1176 'uploader': 'dorappi2000',
eb6793ba 1177 'formats': 'mincount:31',
da77d856 1178 },
eb6793ba 1179 'skip': 'not actual anymore',
2ee8f5d8 1180 },
8a1a26ce
YCH
1181 # DASH manifest with segment_list
1182 {
1183 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1184 'md5': '8ce563a1d667b599d21064e982ab9e31',
1185 'info_dict': {
1186 'id': 'CsmdDsKjzN8',
1187 'ext': 'mp4',
17ee98e1 1188 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
1189 'uploader': 'Airtek',
1190 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1191 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1192 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1193 },
1194 'params': {
1195 'youtube_include_dash_manifest': True,
1196 'format': '135', # bestvideo
be49068d
S
1197 },
1198 'skip': 'This live event has ended.',
2ee8f5d8 1199 },
cf7e015f
S
1200 {
1201 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 1202 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 1203 'info_dict': {
545cc85d 1204 'id': 'jvGDaLqkpTg',
1205 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1206 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
1207 },
1208 'playlist': [{
1209 'info_dict': {
545cc85d 1210 'id': 'jvGDaLqkpTg',
cf7e015f 1211 'ext': 'mp4',
545cc85d 1212 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1213 'description': 'md5:e03b909557865076822aa169218d6a5d',
1214 'duration': 10643,
1215 'upload_date': '20161111',
1216 'uploader': 'Team PGP',
1217 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1218 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1219 },
1220 }, {
1221 'info_dict': {
545cc85d 1222 'id': '3AKt1R1aDnw',
cf7e015f 1223 'ext': 'mp4',
545cc85d 1224 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1225 'description': 'md5:e03b909557865076822aa169218d6a5d',
1226 'duration': 10991,
1227 'upload_date': '20161111',
1228 'uploader': 'Team PGP',
1229 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1230 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1231 },
1232 }, {
1233 'info_dict': {
545cc85d 1234 'id': 'RtAMM00gpVc',
cf7e015f 1235 'ext': 'mp4',
545cc85d 1236 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1237 'description': 'md5:e03b909557865076822aa169218d6a5d',
1238 'duration': 10995,
1239 'upload_date': '20161111',
1240 'uploader': 'Team PGP',
1241 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1242 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1243 },
1244 }, {
1245 'info_dict': {
545cc85d 1246 'id': '6N2fdlP3C5U',
cf7e015f 1247 'ext': 'mp4',
545cc85d 1248 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1249 'description': 'md5:e03b909557865076822aa169218d6a5d',
1250 'duration': 10990,
1251 'upload_date': '20161111',
1252 'uploader': 'Team PGP',
1253 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1254 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1255 },
1256 }],
1257 'params': {
1258 'skip_download': True,
1259 },
cbaed4bb 1260 },
f9f49d87 1261 {
067aa17e 1262 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1263 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1264 'info_dict': {
1265 'id': 'gVfLd0zydlo',
1266 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1267 },
1268 'playlist_count': 2,
be49068d 1269 'skip': 'Not multifeed anymore',
f9f49d87 1270 },
cbaed4bb 1271 {
2d3d2997 1272 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1273 'only_matching': True,
0e49d9a6 1274 },
6d4fc66b 1275 {
2d3d2997 1276 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1277 'only_matching': True,
1278 },
0e49d9a6 1279 {
067aa17e 1280 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1281 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1282 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1283 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1284 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1285 'info_dict': {
1286 'id': 'lsguqyKfVQg',
1287 'ext': 'mp4',
1288 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 1289 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 1290 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1291 'duration': 133,
0e49d9a6
LL
1292 'upload_date': '20151119',
1293 'uploader_id': 'IronSoulElf',
ec85ded8 1294 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1295 'uploader': 'IronSoulElf',
eb6793ba
S
1296 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
1297 'track': 'Dark Walk - Position Music',
1298 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 1299 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
1300 },
1301 'params': {
1302 'skip_download': True,
1303 },
1304 },
61f92af1 1305 {
067aa17e 1306 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1307 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1308 'only_matching': True,
1309 },
313dfc45
LL
1310 {
1311 # Video with yt:stretch=17:0
1312 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1313 'info_dict': {
1314 'id': 'Q39EVAstoRM',
1315 'ext': 'mp4',
1316 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1317 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1318 'upload_date': '20151107',
1319 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1320 'uploader': 'CH GAMER DROID',
1321 },
1322 'params': {
1323 'skip_download': True,
1324 },
be49068d 1325 'skip': 'This video does not exist.',
313dfc45 1326 },
201c1459 1327 {
1328 # Video with incomplete 'yt:stretch=16:'
1329 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1330 'only_matching': True,
1331 },
7caf9830
S
1332 {
1333 # Video licensed under Creative Commons
1334 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1335 'info_dict': {
1336 'id': 'M4gD1WSo5mA',
1337 'ext': 'mp4',
1338 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1339 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1340 'duration': 721,
7caf9830
S
1341 'upload_date': '20150127',
1342 'uploader_id': 'BerkmanCenter',
ec85ded8 1343 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1344 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1345 'license': 'Creative Commons Attribution license (reuse allowed)',
1346 },
1347 'params': {
1348 'skip_download': True,
1349 },
1350 },
fd050249
S
1351 {
1352 # Channel-like uploader_url
1353 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1354 'info_dict': {
1355 'id': 'eQcmzGIKrzg',
1356 'ext': 'mp4',
1357 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1358 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1359 'duration': 4060,
fd050249 1360 'upload_date': '20151119',
eb6793ba 1361 'uploader': 'Bernie Sanders',
fd050249 1362 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1363 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1364 'license': 'Creative Commons Attribution license (reuse allowed)',
1365 },
1366 'params': {
1367 'skip_download': True,
1368 },
1369 },
040ac686
S
1370 {
1371 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1372 'only_matching': True,
7f29cf54
S
1373 },
1374 {
067aa17e 1375 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1376 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1377 'only_matching': True,
6496ccb4
S
1378 },
1379 {
1380 # Rental video preview
1381 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1382 'info_dict': {
1383 'id': 'uGpuVWrhIzE',
1384 'ext': 'mp4',
1385 'title': 'Piku - Trailer',
1386 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1387 'upload_date': '20150811',
1388 'uploader': 'FlixMatrix',
1389 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1390 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1391 'license': 'Standard YouTube License',
1392 },
1393 'params': {
1394 'skip_download': True,
1395 },
eb6793ba 1396 'skip': 'This video is not available.',
022a5d66 1397 },
12afdc2a
S
1398 {
1399 # YouTube Red video with episode data
1400 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1401 'info_dict': {
1402 'id': 'iqKdEhx-dD4',
1403 'ext': 'mp4',
1404 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1405 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1406 'duration': 2085,
12afdc2a
S
1407 'upload_date': '20170118',
1408 'uploader': 'Vsauce',
1409 'uploader_id': 'Vsauce',
1410 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1411 'series': 'Mind Field',
1412 'season_number': 1,
1413 'episode_number': 1,
1414 },
1415 'params': {
1416 'skip_download': True,
1417 },
1418 'expected_warnings': [
1419 'Skipping DASH manifest',
1420 ],
1421 },
c7121fa7
S
1422 {
1423 # The following content has been identified by the YouTube community
1424 # as inappropriate or offensive to some audiences.
1425 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1426 'info_dict': {
1427 'id': '6SJNVb0GnPI',
1428 'ext': 'mp4',
1429 'title': 'Race Differences in Intelligence',
1430 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1431 'duration': 965,
1432 'upload_date': '20140124',
1433 'uploader': 'New Century Foundation',
1434 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1435 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1436 },
1437 'params': {
1438 'skip_download': True,
1439 },
545cc85d 1440 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1441 },
022a5d66
S
1442 {
1443 # itag 212
1444 'url': '1t24XAntNCY',
1445 'only_matching': True,
fd5c4aab
S
1446 },
1447 {
1448 # geo restricted to JP
1449 'url': 'sJL6WA-aGkQ',
1450 'only_matching': True,
1451 },
cd5a74a2
S
1452 {
1453 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1454 'only_matching': True,
1455 },
bc2ca1bb 1456 {
1457 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1458 'only_matching': True,
1459 },
1460 {
1461 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1462 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1463 'only_matching': True,
1464 },
825cd268
RA
1465 {
1466 # DRM protected
1467 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1468 'only_matching': True,
4fe54c12
S
1469 },
1470 {
1471 # Video with unsupported adaptive stream type formats
1472 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1473 'info_dict': {
1474 'id': 'Z4Vy8R84T1U',
1475 'ext': 'mp4',
1476 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1477 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1478 'duration': 433,
1479 'upload_date': '20130923',
1480 'uploader': 'Amelia Putri Harwita',
1481 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1482 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1483 'formats': 'maxcount:10',
1484 },
1485 'params': {
1486 'skip_download': True,
1487 'youtube_include_dash_manifest': False,
1488 },
5429d6a9 1489 'skip': 'not actual anymore',
5caabd3c 1490 },
1491 {
822b9d9c 1492 # Youtube Music Auto-generated description
5caabd3c 1493 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1494 'info_dict': {
1495 'id': 'MgNrAu2pzNs',
1496 'ext': 'mp4',
1497 'title': 'Voyeur Girl',
1498 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1499 'upload_date': '20190312',
5429d6a9
S
1500 'uploader': 'Stephen - Topic',
1501 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1502 'artist': 'Stephen',
1503 'track': 'Voyeur Girl',
1504 'album': 'it\'s too much love to know my dear',
1505 'release_date': '20190313',
1506 'release_year': 2019,
1507 },
1508 'params': {
1509 'skip_download': True,
1510 },
1511 },
66b48727
RA
1512 {
1513 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1514 'only_matching': True,
1515 },
011e75e6
S
1516 {
1517 # invalid -> valid video id redirection
1518 'url': 'DJztXj2GPfl',
1519 'info_dict': {
1520 'id': 'DJztXj2GPfk',
1521 'ext': 'mp4',
1522 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1523 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1524 'upload_date': '20090125',
1525 'uploader': 'Prochorowka',
1526 'uploader_id': 'Prochorowka',
1527 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1528 'artist': 'Panjabi MC',
1529 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1530 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1531 },
1532 'params': {
1533 'skip_download': True,
1534 },
545cc85d 1535 'skip': 'Video unavailable',
ea74e00b
DP
1536 },
1537 {
1538 # empty description results in an empty string
1539 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1540 'info_dict': {
1541 'id': 'x41yOUIvK2k',
1542 'ext': 'mp4',
1543 'title': 'IMG 3456',
1544 'description': '',
1545 'upload_date': '20170613',
1546 'uploader_id': 'ElevageOrVert',
1547 'uploader': 'ElevageOrVert',
1548 },
1549 'params': {
1550 'skip_download': True,
1551 },
1552 },
a0566bbf 1553 {
29f7c58a 1554 # with '};' inside yt initial data (see [1])
1555 # see [2] for an example with '};' inside ytInitialPlayerResponse
1556 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1557 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1558 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1559 'info_dict': {
1560 'id': 'CHqg6qOn4no',
1561 'ext': 'mp4',
1562 'title': 'Part 77 Sort a list of simple types in c#',
1563 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1564 'upload_date': '20130831',
1565 'uploader_id': 'kudvenkat',
1566 'uploader': 'kudvenkat',
1567 },
1568 'params': {
1569 'skip_download': True,
1570 },
1571 },
29f7c58a 1572 {
1573 # another example of '};' in ytInitialData
1574 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1575 'only_matching': True,
1576 },
1577 {
1578 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1579 'only_matching': True,
1580 },
545cc85d 1581 {
cc2db878 1582 # https://github.com/ytdl-org/youtube-dl/pull/28094
1583 'url': 'OtqTfy26tG0',
1584 'info_dict': {
1585 'id': 'OtqTfy26tG0',
1586 'ext': 'mp4',
1587 'title': 'Burn Out',
1588 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1589 'upload_date': '20141120',
1590 'uploader': 'The Cinematic Orchestra - Topic',
1591 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1592 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1593 'artist': 'The Cinematic Orchestra',
1594 'track': 'Burn Out',
1595 'album': 'Every Day',
1596 'release_data': None,
1597 'release_year': None,
1598 },
1599 'params': {
1600 'skip_download': True,
1601 },
545cc85d 1602 },
bc2ca1bb 1603 {
1604 # controversial video, only works with bpctr when authenticated with cookies
1605 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1606 'only_matching': True,
1607 },
f7ad7160 1608 {
1609 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1610 'url': 'cBvYw8_A0vQ',
1611 'info_dict': {
1612 'id': 'cBvYw8_A0vQ',
1613 'ext': 'mp4',
1614 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1615 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1616 'upload_date': '20201120',
1617 'uploader': 'Walk around Japan',
1618 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1619 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1620 },
1621 'params': {
1622 'skip_download': True,
1623 },
0fb983f6 1624 }, {
1625 # Has multiple audio streams
1626 'url': 'WaOKSUlf4TM',
1627 'only_matching': True
9297939e 1628 }, {
1629 # Requires Premium: has format 141 when requested using YTM url
1630 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1631 'only_matching': True
1632 }, {
120916da 1633 # multiple subtitles with same lang_code
1634 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1635 'only_matching': True,
109dd3b2 1636 }, {
1637 # Force use android client fallback
1638 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1639 'info_dict': {
1640 'id': 'YOelRv7fMxY',
1641 'title': 'Digging a Secret Tunnel from my Workshop',
1642 'ext': '3gp',
1643 'upload_date': '20210624',
1644 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1645 'uploader': 'colinfurze',
1646 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1647 'description': 'md5:ecb672623246d98c6c562eed6ae798c3'
1648 },
1649 'params': {
1650 'format': '17', # 3gp format available on android
1651 'extractor_args': {'youtube': {'player_client': ['android']}},
1652 },
120916da 1653 },
109dd3b2 1654 {
1655 # Skip download of additional client configs (remix client config in this case)
1656 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1657 'only_matching': True,
1658 'params': {
1659 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1660 },
1661 }
2eb88d95
PH
1662 ]
1663
201c1459 1664 @classmethod
1665 def suitable(cls, url):
1bdae7d3 1666 # Hack for lazy extractors until more generic solution is implemented
1667 # (see #28780)
1668 from .youtube import parse_qs
201c1459 1669 qs = parse_qs(url)
1670 if qs.get('list', [None])[0]:
1671 return False
1672 return super(YoutubeIE, cls).suitable(url)
1673
e0df6211
PH
1674 def __init__(self, *args, **kwargs):
1675 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1676 self._code_cache = {}
83799698 1677 self._player_cache = {}
e0df6211 1678
109dd3b2 1679 def _extract_player_url(self, ytcfg=None, webpage=None):
1680 player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)
1681 if not player_url:
1682 player_url = self._search_regex(
1683 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1684 webpage, 'player URL', fatal=False)
1685 if player_url.startswith('//'):
1686 player_url = 'https:' + player_url
1687 elif not re.match(r'https?://', player_url):
1688 player_url = compat_urlparse.urljoin(
1689 'https://www.youtube.com', player_url)
1690 return player_url
1691
60064c53
PH
1692 def _signature_cache_id(self, example_sig):
1693 """ Return a string representation of a signature """
78caa52a 1694 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1695
e40c758c
S
1696 @classmethod
1697 def _extract_player_info(cls, player_url):
1698 for player_re in cls._PLAYER_INFO_RE:
1699 id_m = re.search(player_re, player_url)
1700 if id_m:
1701 break
1702 else:
c081b35c 1703 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1704 return id_m.group('id')
e40c758c 1705
109dd3b2 1706 def _load_player(self, video_id, player_url, fatal=True) -> bool:
1707 player_id = self._extract_player_info(player_url)
1708 if player_id not in self._code_cache:
1709 self._code_cache[player_id] = self._download_webpage(
1710 player_url, video_id, fatal=fatal,
1711 note='Downloading player ' + player_id,
1712 errnote='Download of %s failed' % player_url)
1713 return player_id in self._code_cache
1714
e40c758c 1715 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1716 player_id = self._extract_player_info(player_url)
e0df6211 1717
c4417ddb 1718 # Read from filesystem cache
545cc85d 1719 func_id = 'js_%s_%s' % (
1720 player_id, self._signature_cache_id(example_sig))
c4417ddb 1721 assert os.path.basename(func_id) == func_id
a0e07d31 1722
69ea8ca4 1723 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1724 if cache_spec is not None:
78caa52a 1725 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1726
109dd3b2 1727 if self._load_player(video_id, player_url):
1728 code = self._code_cache[player_id]
1729 res = self._parse_sig_js(code)
e0df6211 1730
109dd3b2 1731 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1732 cache_res = res(test_string)
1733 cache_spec = [ord(c) for c in cache_res]
83799698 1734
109dd3b2 1735 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1736 return res
83799698 1737
60064c53 1738 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1739 def gen_sig_code(idxs):
1740 def _genslice(start, end, step):
78caa52a 1741 starts = '' if start == 0 else str(start)
8bcc8756 1742 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1743 steps = '' if step == 1 else (':%d' % step)
78caa52a 1744 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1745
1746 step = None
7af808a5
PH
1747 # Quelch pyflakes warnings - start will be set when step is set
1748 start = '(Never used)'
edf3e38e
PH
1749 for i, prev in zip(idxs[1:], idxs[:-1]):
1750 if step is not None:
1751 if i - prev == step:
1752 continue
1753 yield _genslice(start, prev, step)
1754 step = None
1755 continue
1756 if i - prev in [-1, 1]:
1757 step = i - prev
1758 start = prev
1759 continue
1760 else:
78caa52a 1761 yield 's[%d]' % prev
edf3e38e 1762 if step is None:
78caa52a 1763 yield 's[%d]' % i
edf3e38e
PH
1764 else:
1765 yield _genslice(start, i, step)
1766
78caa52a 1767 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1768 cache_res = func(test_string)
edf3e38e 1769 cache_spec = [ord(c) for c in cache_res]
78caa52a 1770 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1771 signature_id_tuple = '(%s)' % (
1772 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1773 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1774 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1775 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1776
e0df6211
PH
1777 def _parse_sig_js(self, jscode):
1778 funcname = self._search_regex(
abefc03f
S
1779 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1780 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1781 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1782 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1783 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1784 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1785 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1786 # Obsolete patterns
1787 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1788 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1789 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1790 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1791 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1792 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1793 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1794 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1795 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1796
1797 jsi = JSInterpreter(jscode)
1798 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1799 return lambda s: initial_function([s])
1800
545cc85d 1801 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1802 """Turn the encrypted s field into a working signature"""
6b37f0be 1803
c8bf86d5 1804 if player_url is None:
69ea8ca4 1805 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1806
c8bf86d5 1807 try:
62af3a0e 1808 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1809 if player_id not in self._player_cache:
1810 func = self._extract_signature_function(
60064c53 1811 video_id, player_url, s
c8bf86d5
PH
1812 )
1813 self._player_cache[player_id] = func
1814 func = self._player_cache[player_id]
a06916d9 1815 if self.get_param('youtube_print_sig_code'):
60064c53 1816 self._print_sig_code(func, s)
c8bf86d5
PH
1817 return func(s)
1818 except Exception as e:
1819 tb = traceback.format_exc()
1820 raise ExtractorError(
78caa52a 1821 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1822
109dd3b2 1823 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1824 """
1825 Extract signatureTimestamp (sts)
1826 Required to tell API what sig/player version is in use.
1827 """
1828 sts = None
1829 if isinstance(ytcfg, dict):
1830 sts = int_or_none(ytcfg.get('STS'))
1831
1832 if not sts:
1833 # Attempt to extract from player
1834 if player_url is None:
1835 error_msg = 'Cannot extract signature timestamp without player_url.'
1836 if fatal:
1837 raise ExtractorError(error_msg)
1838 self.report_warning(error_msg)
1839 return
1840 if self._load_player(video_id, player_url, fatal=fatal):
1841 player_id = self._extract_player_info(player_url)
1842 code = self._code_cache[player_id]
1843 sts = int_or_none(self._search_regex(
1844 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
1845 'JS player signature timestamp', group='sts', fatal=fatal))
1846 return sts
1847
545cc85d 1848 def _mark_watched(self, video_id, player_response):
21c340b8
S
1849 playback_url = url_or_none(try_get(
1850 player_response,
545cc85d 1851 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
d77ab8e2
S
1852 if not playback_url:
1853 return
1854 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1855 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1856
1857 # cpn generation algorithm is reverse engineered from base.js.
1858 # In fact it works even with dummy cpn.
1859 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1860 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1861
1862 qs.update({
1863 'ver': ['2'],
1864 'cpn': [cpn],
1865 })
1866 playback_url = compat_urlparse.urlunparse(
15707c7e 1867 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1868
1869 self._download_webpage(
1870 playback_url, video_id, 'Marking watched',
1871 'Unable to mark watched', fatal=False)
1872
66c9fa36
S
1873 @staticmethod
1874 def _extract_urls(webpage):
1875 # Embedded YouTube player
1876 entries = [
1877 unescapeHTML(mobj.group('url'))
1878 for mobj in re.finditer(r'''(?x)
1879 (?:
1880 <iframe[^>]+?src=|
1881 data-video-url=|
1882 <embed[^>]+?src=|
1883 embedSWF\(?:\s*|
1884 <object[^>]+data=|
1885 new\s+SWFObject\(
1886 )
1887 (["\'])
1888 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1889 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1890 \1''', webpage)]
1891
1892 # lazyYT YouTube embed
1893 entries.extend(list(map(
1894 unescapeHTML,
1895 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1896
1897 # Wordpress "YouTube Video Importer" plugin
1898 matches = re.findall(r'''(?x)<div[^>]+
1899 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1900 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1901 entries.extend(m[-1] for m in matches)
1902
1903 return entries
1904
1905 @staticmethod
1906 def _extract_url(webpage):
1907 urls = YoutubeIE._extract_urls(webpage)
1908 return urls[0] if urls else None
1909
97665381
PH
1910 @classmethod
1911 def extract_id(cls, url):
1912 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1913 if mobj is None:
69ea8ca4 1914 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1915 video_id = mobj.group(2)
1916 return video_id
1917
545cc85d 1918 def _extract_chapters_from_json(self, data, video_id, duration):
84213ea8 1919 chapters_list = try_get(
8bdd16b4 1920 data,
84213ea8
S
1921 lambda x: x['playerOverlays']
1922 ['playerOverlayRenderer']
1923 ['decoratedPlayerBarRenderer']
1924 ['decoratedPlayerBarRenderer']
1925 ['playerBar']
1926 ['chapteredPlayerBarRenderer']
1927 ['chapters'],
1928 list)
1929 if not chapters_list:
1930 return
1931
1932 def chapter_time(chapter):
1933 return float_or_none(
1934 try_get(
1935 chapter,
1936 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1937 int),
1938 scale=1000)
1939 chapters = []
1940 for next_num, chapter in enumerate(chapters_list, start=1):
1941 start_time = chapter_time(chapter)
1942 if start_time is None:
1943 continue
1944 end_time = (chapter_time(chapters_list[next_num])
1945 if next_num < len(chapters_list) else duration)
1946 if end_time is None:
1947 continue
1948 title = try_get(
1949 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1950 compat_str)
1951 chapters.append({
1952 'start_time': start_time,
1953 'end_time': end_time,
1954 'title': title,
1955 })
1956 return chapters
1957
545cc85d 1958 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1959 return self._parse_json(self._search_regex(
1960 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1961 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 1962
d92f5d5a 1963 @staticmethod
1964 def parse_time_text(time_text):
1965 """
1966 Parse the comment time text
1967 time_text is in the format 'X units ago (edited)'
1968 """
1969 time_text_split = time_text.split(' ')
1970 if len(time_text_split) >= 3:
1971 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
1972
a1c5d2ca
M
1973 @staticmethod
1974 def _join_text_entries(runs):
1975 text = None
1976 for run in runs:
1977 if not isinstance(run, dict):
1978 continue
1979 sub_text = try_get(run, lambda x: x['text'], compat_str)
1980 if sub_text:
1981 if not text:
1982 text = sub_text
1983 continue
1984 text += sub_text
1985 return text
1986
1987 def _extract_comment(self, comment_renderer, parent=None):
1988 comment_id = comment_renderer.get('commentId')
1989 if not comment_id:
1990 return
1991 comment_text_runs = try_get(comment_renderer, lambda x: x['contentText']['runs']) or []
1992 text = self._join_text_entries(comment_text_runs) or ''
1993 comment_time_text = try_get(comment_renderer, lambda x: x['publishedTimeText']['runs']) or []
1994 time_text = self._join_text_entries(comment_time_text)
d92f5d5a 1995 timestamp = calendar.timegm(self.parse_time_text(time_text).timetuple())
a1c5d2ca
M
1996 author = try_get(comment_renderer, lambda x: x['authorText']['simpleText'], compat_str)
1997 author_id = try_get(comment_renderer,
1998 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
1999 votes = str_to_int(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2000 lambda x: x['likeCount']), compat_str)) or 0
2001 author_thumbnail = try_get(comment_renderer,
2002 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2003
2004 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
97524332 2005 is_favorited = 'creatorHeart' in (try_get(
2006 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
a1c5d2ca
M
2007 return {
2008 'id': comment_id,
2009 'text': text,
d92f5d5a 2010 'timestamp': timestamp,
a1c5d2ca
M
2011 'time_text': time_text,
2012 'like_count': votes,
97524332 2013 'is_favorited': is_favorited,
a1c5d2ca
M
2014 'author': author,
2015 'author_id': author_id,
2016 'author_thumbnail': author_thumbnail,
2017 'author_is_uploader': author_is_uploader,
2018 'parent': parent or 'root'
2019 }
2020
2021 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
2d6659b9 2022 ytcfg, video_id, parent=None, comment_counts=None):
2023
2024 def extract_header(contents):
2025 _total_comments = 0
2026 _continuation = None
2027 for content in contents:
2028 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
2029 expected_comment_count = try_get(comments_header_renderer,
2030 (lambda x: x['countText']['runs'][0]['text'],
2031 lambda x: x['commentsCount']['runs'][0]['text']),
2032 compat_str)
2033 if expected_comment_count:
2034 comment_counts[1] = str_to_int(expected_comment_count)
2035 self.to_screen('Downloading ~%d comments' % str_to_int(expected_comment_count))
2036 _total_comments = comment_counts[1]
2037 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2038 comment_sort_index = int(sort_mode_str != 'top') # 1 = new, 0 = top
2039
2040 sort_menu_item = try_get(
2041 comments_header_renderer,
2042 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2043 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2044
2045 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2046 if not _continuation:
2047 continue
2048
2049 sort_text = sort_menu_item.get('title')
2050 if isinstance(sort_text, compat_str):
2051 sort_text = sort_text.lower()
2052 else:
2053 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2054 self.to_screen('Sorting comments by %s' % sort_text)
2055 break
2056 return _total_comments, _continuation
a1c5d2ca 2057
2d6659b9 2058 def extract_thread(contents):
a1c5d2ca
M
2059 if not parent:
2060 comment_counts[2] = 0
2061 for content in contents:
2062 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2063 comment_renderer = try_get(
2064 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2065 content, (lambda x: x['commentRenderer'], dict))
2066
2067 if not comment_renderer:
2068 continue
2069 comment = self._extract_comment(comment_renderer, parent)
2070 if not comment:
2071 continue
2072 comment_counts[0] += 1
2073 yield comment
2074 # Attempt to get the replies
2075 comment_replies_renderer = try_get(
2076 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2077
2078 if comment_replies_renderer:
2079 comment_counts[2] += 1
2080 comment_entries_iter = self._comment_entries(
f4f751af 2081 comment_replies_renderer, identity_token, account_syncid, ytcfg,
2d6659b9 2082 video_id, parent=comment.get('id'), comment_counts=comment_counts)
a1c5d2ca
M
2083
2084 for reply_comment in comment_entries_iter:
2085 yield reply_comment
2086
2d6659b9 2087 # YouTube comments have a max depth of 2
2088 max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2089 if max_depth == 1 and parent:
2090 return
a1c5d2ca
M
2091 if not comment_counts:
2092 # comment so far, est. total comments, current comment thread #
2093 comment_counts = [0, 0, 0]
a1c5d2ca 2094
2d6659b9 2095 continuation = self._extract_continuation(root_continuation_data)
2096 if continuation and len(continuation['ctoken']) < 27:
2097 self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2098 continuation_token = self._generate_comment_continuation(video_id)
2099 continuation = self._build_continuation_query(continuation_token, None)
2100
2101 visitor_data = None
2102 is_first_continuation = parent is None
a1c5d2ca
M
2103
2104 for page_num in itertools.count(0):
2105 if not continuation:
2106 break
f4f751af 2107 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
2d6659b9 2108 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2109 if page_num == 0:
2110 if is_first_continuation:
2111 note_prefix = 'Downloading comment section API JSON'
a1c5d2ca 2112 else:
2d6659b9 2113 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
2114 comment_counts[2], comment_prog_str)
2115 else:
2116 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2117 ' ' if parent else '', ' replies' if parent else '',
2118 page_num, comment_prog_str)
2119
2120 response = self._extract_response(
2121 item_id=None, query=self._continuation_query_ajax_to_api(continuation),
2122 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2123 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca
M
2124 if not response:
2125 break
f4f751af 2126 visitor_data = try_get(
2127 response,
2128 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2129 compat_str) or visitor_data
a1c5d2ca 2130
2d6659b9 2131 continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca 2132
2d6659b9 2133 continuation = None
2134 if isinstance(continuation_contents, list):
2135 for continuation_section in continuation_contents:
2136 if not isinstance(continuation_section, dict):
2137 continue
2138 continuation_items = try_get(
2139 continuation_section,
2140 (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2141 lambda x: x['appendContinuationItemsAction']['continuationItems']),
2142 list) or []
2143 if is_first_continuation:
2144 total_comments, continuation = extract_header(continuation_items)
2145 if total_comments:
2146 yield total_comments
2147 is_first_continuation = False
2148 if continuation:
2149 break
2150 continue
2151 count = 0
2152 for count, entry in enumerate(extract_thread(continuation_items)):
2153 yield entry
2154 continuation = self._extract_continuation({'contents': continuation_items})
2155 if continuation:
2156 # Sometimes YouTube provides a continuation without any comments
2157 # In most cases we end up just downloading these with very little comments to come.
2158 if count == 0:
2159 if not parent:
2160 self.report_warning('No comments received - assuming end of comments')
2161 continuation = None
a1c5d2ca
M
2162 break
2163
2d6659b9 2164 # Deprecated response structure
2165 elif isinstance(continuation_contents, dict):
2166 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2167 for key, continuation_renderer in continuation_contents.items():
2168 if key not in known_continuation_renderers:
2169 continue
2170 if not isinstance(continuation_renderer, dict):
2171 continue
2172 if is_first_continuation:
2173 header_continuation_items = [continuation_renderer.get('header') or {}]
2174 total_comments, continuation = extract_header(header_continuation_items)
2175 if total_comments:
2176 yield total_comments
2177 is_first_continuation = False
2178 if continuation:
2179 break
a1c5d2ca 2180
2d6659b9 2181 # Sometimes YouTube provides a continuation without any comments
2182 # In most cases we end up just downloading these with very little comments to come.
2183 count = 0
2184 for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2185 yield entry
2186 continuation = self._extract_continuation(continuation_renderer)
2187 if count == 0:
2188 if not parent:
2189 self.report_warning('No comments received - assuming end of comments')
2190 continuation = None
2191 break
a1c5d2ca 2192
2d6659b9 2193 @staticmethod
2194 def _generate_comment_continuation(video_id):
2195 """
2196 Generates initial comment section continuation token from given video id
2197 """
2198 b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2199 parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2200 new_continuation_intlist = list(itertools.chain.from_iterable(
2201 [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2202 return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2203
2204 def _extract_comments(self, ytcfg, video_id, contents, webpage):
a1c5d2ca 2205 """Entry for comment extraction"""
2d6659b9 2206 def _real_comment_extract(contents):
2207 if isinstance(contents, list):
2208 for entry in contents:
2209 for key, renderer in entry.items():
2210 if key not in known_entry_comment_renderers:
2211 continue
2212 yield from self._comment_entries(
2213 renderer, video_id=video_id, ytcfg=ytcfg,
2214 identity_token=self._extract_identity_token(webpage, item_id=video_id),
2215 account_syncid=self._extract_account_syncid(ytcfg))
2216 break
a1c5d2ca 2217 comments = []
2d6659b9 2218 known_entry_comment_renderers = ('itemSectionRenderer',)
a1c5d2ca 2219 estimated_total = 0
2d6659b9 2220 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
a1c5d2ca 2221
2d6659b9 2222 try:
2223 for comment in _real_comment_extract(contents):
2224 if len(comments) >= max_comments:
2225 break
2226 if isinstance(comment, int):
2227 estimated_total = comment
2228 continue
2229 comments.append(comment)
2230 except KeyboardInterrupt:
2231 self.to_screen('Interrupted by user')
d92f5d5a 2232 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
a1c5d2ca
M
2233 return {
2234 'comments': comments,
2235 'comment_count': len(comments),
2236 }
2237
109dd3b2 2238 @staticmethod
2239 def _generate_player_context(sts=None):
2240 context = {
2241 'html5Preference': 'HTML5_PREF_WANTS',
2242 }
2243 if sts is not None:
2244 context['signatureTimestamp'] = sts
2245 return {
2246 'playbackContext': {
2247 'contentPlaybackContext': context
2248 }
2249 }
2250
4e6767b5 2251 @staticmethod
c888ffb9 2252 def _get_video_info_params(video_id, client='TVHTML5'):
2253 GVI_CLIENTS = {
2254 'ANDROID': {
2255 'c': 'ANDROID',
2256 'cver': '16.20',
2257 },
2258 'TVHTML5': {
2259 'c': 'TVHTML5',
2260 'cver': '6.20180913',
2261 }
2262 }
2263 query = {
4e6767b5 2264 'video_id': video_id,
2265 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
c888ffb9 2266 'html5': '1'
4e6767b5 2267 }
c888ffb9 2268 query.update(GVI_CLIENTS.get(client))
2269 return query
4e6767b5 2270
c5e8d7af 2271 def _real_extract(self, url):
cf7e015f 2272 url, smuggled_data = unsmuggle_url(url, {})
545cc85d 2273 video_id = self._match_id(url)
9297939e 2274
2275 is_music_url = smuggled_data.get('is_music_url') or self.is_music_url(url)
2276
545cc85d 2277 base_url = self.http_scheme() + '//www.youtube.com/'
b3d12425 2278 webpage_url = base_url + 'watch?v=' + video_id
2279 webpage = self._download_webpage(
cce889b9 2280 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
545cc85d 2281
109dd3b2 2282 ytcfg = self._extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2283 identity_token = self._extract_identity_token(webpage, video_id)
2284 syncid = self._extract_account_syncid(ytcfg)
2285 headers = self._generate_api_headers(ytcfg, identity_token, syncid)
2286
2287 player_url = self._extract_player_url(ytcfg, webpage)
2288
2d6659b9 2289 player_client = self._configuration_arg('player_client', [''])[0]
4bb6b02f 2290 if player_client not in ('web', 'android', ''):
c888ffb9 2291 self.report_warning(f'Invalid player_client {player_client} given. Falling back to android client.')
2292 force_mobile_client = player_client != 'web'
4bb6b02f 2293 player_skip = self._configuration_arg('player_skip')
109dd3b2 2294
9297939e 2295 def get_text(x):
2296 if not x:
2297 return
2298 text = x.get('simpleText')
2299 if text and isinstance(text, compat_str):
2300 return text
2301 runs = x.get('runs')
2302 if not isinstance(runs, list):
2303 return
2304 return ''.join([r['text'] for r in runs if isinstance(r.get('text'), compat_str)])
2305
2306 ytm_streaming_data = {}
2307 if is_music_url:
109dd3b2 2308 ytm_webpage = None
2309 sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2310 if sts and not force_mobile_client and 'configs' not in player_skip:
2311 ytm_webpage = self._download_webpage(
2312 'https://music.youtube.com',
2d6659b9 2313 video_id, fatal=False, note='Downloading remix client config')
109dd3b2 2314
2315 ytm_cfg = self._extract_ytcfg(video_id, ytm_webpage) or {}
2316 ytm_client = 'WEB_REMIX'
2317 if not sts or force_mobile_client:
2318 # Android client already has signature descrambled
2319 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2320 if not sts:
c888ffb9 2321 self.report_warning('Falling back to android remix client for player API.')
109dd3b2 2322 ytm_client = 'ANDROID_MUSIC'
2323 ytm_cfg = {}
2324
2325 ytm_headers = self._generate_api_headers(
2326 ytm_cfg, identity_token, syncid,
2327 client=ytm_client)
2328 ytm_query = {'videoId': video_id}
2329 ytm_query.update(self._generate_player_context(sts))
2330
2331 ytm_player_response = self._extract_response(
2332 item_id=video_id, ep='player', query=ytm_query,
2333 ytcfg=ytm_cfg, headers=ytm_headers, fatal=False,
2334 default_client=ytm_client,
c888ffb9 2335 note='Downloading %sremix player API JSON' % ('android ' if force_mobile_client else ''))
2d6659b9 2336 ytm_streaming_data = try_get(ytm_player_response, lambda x: x['streamingData'], dict) or {}
109dd3b2 2337
545cc85d 2338 player_response = None
2339 if webpage:
2340 player_response = self._extract_yt_initial_variable(
2341 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2342 video_id, 'initial player response')
f4f751af 2343
109dd3b2 2344 if not player_response or force_mobile_client:
2345 sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2346 yt_client = 'WEB'
2347 ytpcfg = ytcfg
2348 ytp_headers = headers
2349 if not sts or force_mobile_client:
2350 # Android client already has signature descrambled
2351 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2352 if not sts:
c888ffb9 2353 self.report_warning('Falling back to android client for player API.')
109dd3b2 2354 yt_client = 'ANDROID'
2355 ytpcfg = {}
2356 ytp_headers = self._generate_api_headers(ytpcfg, identity_token, syncid, yt_client)
2357
2358 yt_query = {'videoId': video_id}
2359 yt_query.update(self._generate_player_context(sts))
2360 player_response = self._extract_response(
2361 item_id=video_id, ep='player', query=yt_query,
2362 ytcfg=ytpcfg, headers=ytp_headers, fatal=False,
2363 default_client=yt_client,
c888ffb9 2364 note='Downloading %splayer API JSON' % ('android ' if force_mobile_client else '')
2365 ) or player_response
545cc85d 2366
109dd3b2 2367 # Age-gate workarounds
545cc85d 2368 playability_status = player_response.get('playabilityStatus') or {}
109dd3b2 2369 if playability_status.get('reason') in self._AGE_GATE_REASONS:
c888ffb9 2370 gvi_clients = ('ANDROID', 'TVHTML5') if force_mobile_client else ('TVHTML5', 'ANDROID')
2371 for gvi_client in gvi_clients:
2372 pr = self._parse_json(try_get(compat_parse_qs(
2373 self._download_webpage(
2374 base_url + 'get_video_info', video_id,
2375 'Refetching age-gated %s info webpage' % gvi_client.lower(),
2376 'unable to download video info webpage', fatal=False,
2377 query=self._get_video_info_params(video_id, client=gvi_client))),
2378 lambda x: x['player_response'][0],
2379 compat_str) or '{}', video_id)
2380 if pr:
2381 break
109dd3b2 2382 if not pr:
2383 self.report_warning('Falling back to embedded-only age-gate workaround.')
2384 embed_webpage = None
2385 sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2386 if sts and not force_mobile_client and 'configs' not in player_skip:
2387 embed_webpage = self._download_webpage(
2388 'https://www.youtube.com/embed/%s?html5=1' % video_id,
2389 video_id=video_id, note='Downloading age-gated embed config')
2390
2391 ytcfg_age = self._extract_ytcfg(video_id, embed_webpage) or {}
2392 # If we extracted the embed webpage, it'll tell us if we can view the video
2393 embedded_pr = self._parse_json(
2394 try_get(ytcfg_age, lambda x: x['PLAYER_VARS']['embedded_player_response'], str) or '{}',
2395 video_id=video_id)
2396 embedded_ps_reason = try_get(embedded_pr, lambda x: x['playabilityStatus']['reason'], str) or ''
2397 if embedded_ps_reason not in self._AGE_GATE_REASONS:
2398 yt_client = 'WEB_EMBEDDED_PLAYER'
2399 if not sts or force_mobile_client:
2400 # Android client already has signature descrambled
2401 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2402 if not sts:
2403 self.report_warning(
c888ffb9 2404 'Falling back to android embedded client for player API (note: some formats may be missing).')
109dd3b2 2405 yt_client = 'ANDROID_EMBEDDED_PLAYER'
2406 ytcfg_age = {}
2407
2408 ytage_headers = self._generate_api_headers(
2409 ytcfg_age, identity_token, syncid, client=yt_client)
2410 yt_age_query = {'videoId': video_id}
2411 yt_age_query.update(self._generate_player_context(sts))
2412 pr = self._extract_response(
2413 item_id=video_id, ep='player', query=yt_age_query,
2414 ytcfg=ytcfg_age, headers=ytage_headers, fatal=False,
2415 default_client=yt_client,
c888ffb9 2416 note='Downloading %sage-gated player API JSON' % ('android ' if force_mobile_client else '')
109dd3b2 2417 ) or {}
2418
545cc85d 2419 if pr:
2420 player_response = pr
2421
2422 trailer_video_id = try_get(
2423 playability_status,
2424 lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
2425 compat_str)
2426 if trailer_video_id:
2427 return self.url_result(
2428 trailer_video_id, self.ie_key(), trailer_video_id)
cf7e015f 2429
545cc85d 2430 search_meta = (
2431 lambda x: self._html_search_meta(x, webpage, default=None)) \
2432 if webpage else lambda x: None
dbdaaa23 2433
545cc85d 2434 video_details = player_response.get('videoDetails') or {}
37357d21 2435 microformat = try_get(
545cc85d 2436 player_response,
2437 lambda x: x['microformat']['playerMicroformatRenderer'],
2438 dict) or {}
2439 video_title = video_details.get('title') \
2440 or get_text(microformat.get('title')) \
2441 or search_meta(['og:title', 'twitter:title', 'title'])
2442 video_description = video_details.get('shortDescription')
cf7e015f 2443
8fe10494 2444 if not smuggled_data.get('force_singlefeed', False):
a06916d9 2445 if not self.get_param('noplaylist'):
8fe10494
S
2446 multifeed_metadata_list = try_get(
2447 player_response,
2448 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
545cc85d 2449 compat_str)
8fe10494
S
2450 if multifeed_metadata_list:
2451 entries = []
2452 feed_ids = []
2453 for feed in multifeed_metadata_list.split(','):
2454 # Unquote should take place before split on comma (,) since textual
2455 # fields may contain comma as well (see
067aa17e 2456 # https://github.com/ytdl-org/youtube-dl/issues/8536)
545cc85d 2457 feed_data = compat_parse_qs(
2458 compat_urllib_parse_unquote_plus(feed))
6b09401b
S
2459
2460 def feed_entry(name):
545cc85d 2461 return try_get(
2462 feed_data, lambda x: x[name][0], compat_str)
6b09401b
S
2463
2464 feed_id = feed_entry('id')
2465 if not feed_id:
2466 continue
2467 feed_title = feed_entry('title')
2468 title = video_title
2469 if feed_title:
2470 title += ' (%s)' % feed_title
8fe10494
S
2471 entries.append({
2472 '_type': 'url_transparent',
2473 'ie_key': 'Youtube',
2474 'url': smuggle_url(
545cc85d 2475 base_url + 'watch?v=' + feed_data['id'][0],
8fe10494 2476 {'force_singlefeed': True}),
6b09401b 2477 'title': title,
8fe10494 2478 })
6b09401b 2479 feed_ids.append(feed_id)
8fe10494
S
2480 self.to_screen(
2481 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2482 % (', '.join(feed_ids), video_id))
545cc85d 2483 return self.playlist_result(
2484 entries, video_id, video_title, video_description)
8fe10494
S
2485 else:
2486 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 2487
9297939e 2488 formats, itags, stream_ids = [], [], []
cc2db878 2489 itag_qualities = {}
d3fc8074 2490 q = qualities([
60bdb7bd 2491 # "tiny" is the smallest video-only format. But some audio-only formats
2492 # was also labeled "tiny". It is not clear if such formats still exist
d3fc8074 2493 'tiny', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
2494 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2495 ])
9297939e 2496
545cc85d 2497 streaming_data = player_response.get('streamingData') or {}
2498 streaming_formats = streaming_data.get('formats') or []
2499 streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
9297939e 2500 streaming_formats.extend(ytm_streaming_data.get('formats') or [])
2501 streaming_formats.extend(ytm_streaming_data.get('adaptiveFormats') or [])
2502
545cc85d 2503 for fmt in streaming_formats:
2504 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2505 continue
321bf820 2506
cc2db878 2507 itag = str_or_none(fmt.get('itag'))
9297939e 2508 audio_track = fmt.get('audioTrack') or {}
2509 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2510 if stream_id in stream_ids:
2511 continue
2512
cc2db878 2513 quality = fmt.get('quality')
d3fc8074 2514 if quality == 'tiny' or not quality:
2515 quality = fmt.get('audioQuality', '').lower() or quality
cc2db878 2516 if itag and quality:
2517 itag_qualities[itag] = quality
2518 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2519 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2520 # number of fragment that would subsequently requested with (`&sq=N`)
2521 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2522 continue
2523
545cc85d 2524 fmt_url = fmt.get('url')
2525 if not fmt_url:
2526 sc = compat_parse_qs(fmt.get('signatureCipher'))
2527 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2528 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2529 if not (sc and fmt_url and encrypted_sig):
2530 continue
545cc85d 2531 if not player_url:
201e9eaa 2532 continue
545cc85d 2533 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2534 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2535 fmt_url += '&' + sp + '=' + signature
2536
545cc85d 2537 if itag:
2538 itags.append(itag)
9297939e 2539 stream_ids.append(stream_id)
2540
cc2db878 2541 tbr = float_or_none(
2542 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 2543 dct = {
2544 'asr': int_or_none(fmt.get('audioSampleRate')),
2545 'filesize': int_or_none(fmt.get('contentLength')),
2546 'format_id': itag,
0fb983f6 2547 'format_note': audio_track.get('displayName') or fmt.get('qualityLabel') or quality,
545cc85d 2548 'fps': int_or_none(fmt.get('fps')),
2549 'height': int_or_none(fmt.get('height')),
dca3ff4a 2550 'quality': q(quality),
cc2db878 2551 'tbr': tbr,
545cc85d 2552 'url': fmt_url,
2553 'width': fmt.get('width'),
0fb983f6 2554 'language': audio_track.get('id', '').split('.')[0],
545cc85d 2555 }
60bdb7bd 2556 mime_mobj = re.match(
2557 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2558 if mime_mobj:
2559 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2560 dct.update(parse_codecs(mime_mobj.group(2)))
2561 # The 3gp format in android client has a quality of "small",
2562 # but is actually worse than all other formats
2563 if dct['ext'] == '3gp':
2564 dct['quality'] = q('tiny')
cc2db878 2565 no_audio = dct.get('acodec') == 'none'
2566 no_video = dct.get('vcodec') == 'none'
2567 if no_audio:
2568 dct['vbr'] = tbr
2569 if no_video:
2570 dct['abr'] = tbr
2571 if no_audio or no_video:
545cc85d 2572 dct['downloader_options'] = {
2573 # Youtube throttles chunks >~10M
2574 'http_chunk_size': 10485760,
bf1317d2 2575 }
7c60c33e 2576 if dct.get('ext'):
2577 dct['container'] = dct['ext'] + '_dash'
545cc85d 2578 formats.append(dct)
2579
4bb6b02f 2580 skip_manifests = self._configuration_arg('skip')
5d3a0e79 2581 get_dash = 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True)
2582 get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2583
9297939e 2584 for sd in (streaming_data, ytm_streaming_data):
5d3a0e79 2585 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
9297939e 2586 if hls_manifest_url:
2587 for f in self._extract_m3u8_formats(
2588 hls_manifest_url, video_id, 'mp4', fatal=False):
2589 itag = self._search_regex(
2590 r'/itag/(\d+)', f['url'], 'itag', default=None)
2591 if itag:
2592 f['format_id'] = itag
8d68ab98 2593 formats.append(f)
545cc85d 2594
5d3a0e79 2595 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2596 if dash_manifest_url:
2597 for f in self._extract_mpd_formats(
2598 dash_manifest_url, video_id, fatal=False):
2599 itag = f['format_id']
2600 if itag in itags:
2601 continue
2602 if itag in itag_qualities:
2603 f['quality'] = q(itag_qualities[itag])
2604 filesize = int_or_none(self._search_regex(
2605 r'/clen/(\d+)', f.get('fragment_base_url')
2606 or f['url'], 'file size', default=None))
2607 if filesize:
2608 f['filesize'] = filesize
2609 formats.append(f)
bf1317d2 2610
545cc85d 2611 if not formats:
a06916d9 2612 if not self.get_param('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
b7da73eb 2613 self.raise_no_formats(
545cc85d 2614 'This video is DRM protected.', expected=True)
2615 pemr = try_get(
2616 playability_status,
2617 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
2618 dict) or {}
2619 reason = get_text(pemr.get('reason')) or playability_status.get('reason')
2620 subreason = pemr.get('subreason')
2621 if subreason:
2622 subreason = clean_html(get_text(subreason))
2623 if subreason == 'The uploader has not made this video available in your country.':
2624 countries = microformat.get('availableCountries')
2625 if not countries:
2626 regions_allowed = search_meta('regionsAllowed')
2627 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2628 self.raise_geo_restricted(subreason, countries, metadata_available=True)
545cc85d 2629 reason += '\n' + subreason
2630 if reason:
b7da73eb 2631 self.raise_no_formats(reason, expected=True)
bf1317d2 2632
545cc85d 2633 self._sort_formats(formats)
bf1317d2 2634
545cc85d 2635 keywords = video_details.get('keywords') or []
2636 if not keywords and webpage:
2637 keywords = [
2638 unescapeHTML(m.group('content'))
2639 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2640 for keyword in keywords:
2641 if keyword.startswith('yt:stretch='):
201c1459 2642 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2643 if mobj:
2644 # NB: float is intentional for forcing float division
2645 w, h = (float(v) for v in mobj.groups())
2646 if w > 0 and h > 0:
2647 ratio = w / h
2648 for f in formats:
2649 if f.get('vcodec') != 'none':
2650 f['stretched_ratio'] = ratio
2651 break
6449cd80 2652
545cc85d 2653 thumbnails = []
2654 for container in (video_details, microformat):
2655 for thumbnail in (try_get(
2656 container,
2657 lambda x: x['thumbnail']['thumbnails'], list) or []):
2658 thumbnail_url = thumbnail.get('url')
2659 if not thumbnail_url:
bf1317d2 2660 continue
1988fab7 2661 # Sometimes youtube gives a wrong thumbnail URL. See:
2662 # https://github.com/yt-dlp/yt-dlp/issues/233
2663 # https://github.com/ytdl-org/youtube-dl/issues/28023
2664 if 'maxresdefault' in thumbnail_url:
2665 thumbnail_url = thumbnail_url.split('?')[0]
545cc85d 2666 thumbnails.append({
545cc85d 2667 'url': thumbnail_url,
ff2751ac 2668 'height': int_or_none(thumbnail.get('height')),
545cc85d 2669 'width': int_or_none(thumbnail.get('width')),
ff2751ac 2670 'preference': 1 if 'maxresdefault' in thumbnail_url else -1
545cc85d 2671 })
ff2751ac 2672 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2673 if thumbnail_url:
2674 thumbnails.append({
2675 'url': thumbnail_url,
2676 'preference': 1 if 'maxresdefault' in thumbnail_url else -1
2677 })
2678 # All videos have a maxresdefault thumbnail, but sometimes it does not appear in the webpage
2679 # See: https://github.com/ytdl-org/youtube-dl/issues/29049
2680 thumbnails.append({
2681 'url': 'https://i.ytimg.com/vi/%s/maxresdefault.jpg' % video_id,
2682 'preference': 1,
2683 })
2684 self._remove_duplicate_formats(thumbnails)
545cc85d 2685
2686 category = microformat.get('category') or search_meta('genre')
2687 channel_id = video_details.get('channelId') \
2688 or microformat.get('externalChannelId') \
2689 or search_meta('channelId')
2690 duration = int_or_none(
2691 video_details.get('lengthSeconds')
2692 or microformat.get('lengthSeconds')) \
2693 or parse_duration(search_meta('duration'))
2694 is_live = video_details.get('isLive')
f6745c49 2695 is_upcoming = video_details.get('isUpcoming')
545cc85d 2696 owner_profile_url = microformat.get('ownerProfileUrl')
2697
2698 info = {
2699 'id': video_id,
2700 'title': self._live_title(video_title) if is_live else video_title,
2701 'formats': formats,
2702 'thumbnails': thumbnails,
2703 'description': video_description,
2704 'upload_date': unified_strdate(
2705 microformat.get('uploadDate')
2706 or search_meta('uploadDate')),
2707 'uploader': video_details['author'],
2708 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2709 'uploader_url': owner_profile_url,
2710 'channel_id': channel_id,
2711 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
2712 'duration': duration,
2713 'view_count': int_or_none(
2714 video_details.get('viewCount')
2715 or microformat.get('viewCount')
2716 or search_meta('interactionCount')),
2717 'average_rating': float_or_none(video_details.get('averageRating')),
2718 'age_limit': 18 if (
2719 microformat.get('isFamilySafe') is False
2720 or search_meta('isFamilyFriendly') == 'false'
2721 or search_meta('og:restrictions:age') == '18+') else 0,
2722 'webpage_url': webpage_url,
2723 'categories': [category] if category else None,
2724 'tags': keywords,
2725 'is_live': is_live,
2726 'playable_in_embed': playability_status.get('playableInEmbed'),
c224251a 2727 'was_live': video_details.get('isLiveContent'),
545cc85d 2728 }
b477fc13 2729
545cc85d 2730 pctr = try_get(
2731 player_response,
2732 lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
2733 subtitles = {}
2734 if pctr:
774d79cc 2735 def process_language(container, base_url, lang_code, sub_name, query):
120916da 2736 lang_subs = container.setdefault(lang_code, [])
545cc85d 2737 for fmt in self._SUBTITLE_FORMATS:
2738 query.update({
2739 'fmt': fmt,
2740 })
2741 lang_subs.append({
2742 'ext': fmt,
2743 'url': update_url_query(base_url, query),
774d79cc 2744 'name': sub_name,
545cc85d 2745 })
7e72694b 2746
545cc85d 2747 for caption_track in (pctr.get('captionTracks') or []):
2748 base_url = caption_track.get('baseUrl')
2749 if not base_url:
2750 continue
2751 if caption_track.get('kind') != 'asr':
120916da 2752 lang_code = (
2753 remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2754 or caption_track.get('languageCode'))
545cc85d 2755 if not lang_code:
2756 continue
2757 process_language(
774d79cc 2758 subtitles, base_url, lang_code,
2d6659b9 2759 try_get(caption_track, lambda x: x['name']['simpleText']),
774d79cc 2760 {})
545cc85d 2761 continue
2762 automatic_captions = {}
2763 for translation_language in (pctr.get('translationLanguages') or []):
2764 translation_language_code = translation_language.get('languageCode')
2765 if not translation_language_code:
2766 continue
2767 process_language(
2768 automatic_captions, base_url, translation_language_code,
49c258e1 2769 try_get(translation_language, (
2770 lambda x: x['languageName']['simpleText'],
2771 lambda x: x['languageName']['runs'][0]['text'])),
545cc85d 2772 {'tlang': translation_language_code})
2773 info['automatic_captions'] = automatic_captions
2774 info['subtitles'] = subtitles
7e72694b 2775
545cc85d 2776 parsed_url = compat_urllib_parse_urlparse(url)
2777 for component in [parsed_url.fragment, parsed_url.query]:
2778 query = compat_parse_qs(component)
2779 for k, v in query.items():
2780 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2781 d_k += '_time'
2782 if d_k not in info and k in s_ks:
2783 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2784
2785 # Youtube Music Auto-generated description
822b9d9c 2786 if video_description:
38d70284 2787 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2788 if mobj:
822b9d9c
RA
2789 release_year = mobj.group('release_year')
2790 release_date = mobj.group('release_date')
2791 if release_date:
2792 release_date = release_date.replace('-', '')
2793 if not release_year:
545cc85d 2794 release_year = release_date[:4]
2795 info.update({
2796 'album': mobj.group('album'.strip()),
2797 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2798 'track': mobj.group('track').strip(),
2799 'release_date': release_date,
cc2db878 2800 'release_year': int_or_none(release_year),
545cc85d 2801 })
7e72694b 2802
545cc85d 2803 initial_data = None
2804 if webpage:
2805 initial_data = self._extract_yt_initial_variable(
2806 webpage, self._YT_INITIAL_DATA_RE, video_id,
2807 'yt initial data')
2808 if not initial_data:
109dd3b2 2809 initial_data = self._extract_response(
2810 item_id=video_id, ep='next', fatal=False,
2811 ytcfg=ytcfg, headers=headers, query={'videoId': video_id},
2812 note='Downloading initial data API JSON')
545cc85d 2813
c60ee3a2 2814 try:
2815 # This will error if there is no livechat
2816 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2817 info['subtitles']['live_chat'] = [{
2818 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
2819 'video_id': video_id,
2820 'ext': 'json',
f6745c49 2821 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
c60ee3a2 2822 }]
2823 except (KeyError, IndexError, TypeError):
2824 pass
545cc85d 2825
2826 if initial_data:
2827 chapters = self._extract_chapters_from_json(
2828 initial_data, video_id, duration)
2829 if not chapters:
2830 for engagment_pannel in (initial_data.get('engagementPanels') or []):
2831 contents = try_get(
2832 engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
2833 list)
2834 if not contents:
2835 continue
2836
2837 def chapter_time(mmlir):
2838 return parse_duration(
2839 get_text(mmlir.get('timeDescription')))
2840
2841 chapters = []
2842 for next_num, content in enumerate(contents, start=1):
2843 mmlir = content.get('macroMarkersListItemRenderer') or {}
2844 start_time = chapter_time(mmlir)
2845 end_time = chapter_time(try_get(
2846 contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
2847 if next_num < len(contents) else duration
2848 if start_time is None or end_time is None:
2849 continue
2850 chapters.append({
2851 'start_time': start_time,
2852 'end_time': end_time,
2853 'title': get_text(mmlir.get('title')),
2854 })
2855 if chapters:
2856 break
2857 if chapters:
2858 info['chapters'] = chapters
2859
2860 contents = try_get(
2861 initial_data,
2862 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2863 list) or []
2864 for content in contents:
2865 vpir = content.get('videoPrimaryInfoRenderer')
2866 if vpir:
2867 stl = vpir.get('superTitleLink')
2868 if stl:
2869 stl = get_text(stl)
2870 if try_get(
2871 vpir,
2872 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2873 info['location'] = stl
2874 else:
2875 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2876 if mobj:
2877 info.update({
2878 'series': mobj.group(1),
2879 'season_number': int(mobj.group(2)),
2880 'episode_number': int(mobj.group(3)),
2881 })
2882 for tlb in (try_get(
2883 vpir,
2884 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2885 list) or []):
2886 tbr = tlb.get('toggleButtonRenderer') or {}
2887 for getter, regex in [(
2888 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2889 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2890 lambda x: x['accessibility'],
2891 lambda x: x['accessibilityData']['accessibilityData'],
2892 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2893 label = (try_get(tbr, getter, dict) or {}).get('label')
2894 if label:
2895 mobj = re.match(regex, label)
2896 if mobj:
2897 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2898 break
2899 sbr_tooltip = try_get(
2900 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2901 if sbr_tooltip:
2902 like_count, dislike_count = sbr_tooltip.split(' / ')
2903 info.update({
2904 'like_count': str_to_int(like_count),
2905 'dislike_count': str_to_int(dislike_count),
2906 })
2907 vsir = content.get('videoSecondaryInfoRenderer')
2908 if vsir:
2909 info['channel'] = get_text(try_get(
2910 vsir,
2911 lambda x: x['owner']['videoOwnerRenderer']['title'],
cce889b9 2912 dict))
545cc85d 2913 rows = try_get(
2914 vsir,
2915 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2916 list) or []
2917 multiple_songs = False
2918 for row in rows:
2919 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2920 multiple_songs = True
2921 break
2922 for row in rows:
2923 mrr = row.get('metadataRowRenderer') or {}
2924 mrr_title = mrr.get('title')
2925 if not mrr_title:
2926 continue
2927 mrr_title = get_text(mrr['title'])
2928 mrr_contents_text = get_text(mrr['contents'][0])
2929 if mrr_title == 'License':
2930 info['license'] = mrr_contents_text
2931 elif not multiple_songs:
2932 if mrr_title == 'Album':
2933 info['album'] = mrr_contents_text
2934 elif mrr_title == 'Artist':
2935 info['artist'] = mrr_contents_text
2936 elif mrr_title == 'Song':
2937 info['track'] = mrr_contents_text
2938
2939 fallbacks = {
2940 'channel': 'uploader',
2941 'channel_id': 'uploader_id',
2942 'channel_url': 'uploader_url',
2943 }
2944 for to, frm in fallbacks.items():
2945 if not info.get(to):
2946 info[to] = info.get(frm)
2947
2948 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
2949 v = info.get(s_k)
2950 if v:
2951 info[d_k] = v
b84071c0 2952
c224251a
M
2953 is_private = bool_or_none(video_details.get('isPrivate'))
2954 is_unlisted = bool_or_none(microformat.get('isUnlisted'))
2955 is_membersonly = None
b28f8d24 2956 is_premium = None
c224251a
M
2957 if initial_data and is_private is not None:
2958 is_membersonly = False
b28f8d24 2959 is_premium = False
c224251a
M
2960 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list)
2961 for content in contents or []:
2962 badges = try_get(content, lambda x: x['videoPrimaryInfoRenderer']['badges'], list)
2963 for badge in badges or []:
2964 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label']) or ''
2965 if label.lower() == 'members only':
2966 is_membersonly = True
2967 break
b28f8d24
M
2968 elif label.lower() == 'premium':
2969 is_premium = True
2970 break
2971 if is_membersonly or is_premium:
c224251a
M
2972 break
2973
2974 # TODO: Add this for playlists
2975 info['availability'] = self._availability(
2976 is_private=is_private,
b28f8d24 2977 needs_premium=is_premium,
c224251a
M
2978 needs_subscription=is_membersonly,
2979 needs_auth=info['age_limit'] >= 18,
2980 is_unlisted=None if is_private is None else is_unlisted)
2981
06167fbb 2982 # get xsrf for annotations or comments
a06916d9 2983 get_annotations = self.get_param('writeannotations', False)
2984 get_comments = self.get_param('getcomments', False)
06167fbb 2985 if get_annotations or get_comments:
29f7c58a 2986 xsrf_token = None
545cc85d 2987 ytcfg = self._extract_ytcfg(video_id, webpage)
29f7c58a 2988 if ytcfg:
2989 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2990 if not xsrf_token:
2991 xsrf_token = self._search_regex(
2992 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 2993 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 2994
2995 # annotations
06167fbb 2996 if get_annotations:
64b6a4e9
RA
2997 invideo_url = try_get(
2998 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2999 if xsrf_token and invideo_url:
29f7c58a 3000 xsrf_field_name = None
3001 if ytcfg:
3002 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
3003 if not xsrf_field_name:
3004 xsrf_field_name = self._search_regex(
3005 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 3006 webpage, 'xsrf field name',
29f7c58a 3007 group='xsrf_field_name', default='session_token')
8a784c74 3008 info['annotations'] = self._download_webpage(
64b6a4e9
RA
3009 self._proto_relative_url(invideo_url),
3010 video_id, note='Downloading annotations',
3011 errnote='Unable to download video annotations', fatal=False,
3012 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 3013
277d6ff5 3014 if get_comments:
2d6659b9 3015 info['__post_extractor'] = lambda: self._extract_comments(ytcfg, video_id, contents, webpage)
4ea3be0a 3016
545cc85d 3017 self.mark_watched(video_id, player_response)
d77ab8e2 3018
545cc85d 3019 return info
c5e8d7af 3020
5f6a1245 3021
8bdd16b4 3022class YoutubeTabIE(YoutubeBaseInfoExtractor):
3023 IE_DESC = 'YouTube.com tab'
70d5c17b 3024 _VALID_URL = r'''(?x)
3025 https?://
3026 (?:\w+\.)?
3027 (?:
3028 youtube(?:kids)?\.com|
3029 invidio\.us
3030 )/
3031 (?:
fe03a6cd 3032 (?P<channel_type>channel|c|user|browse)/|
70d5c17b 3033 (?P<not_channel>
9ba5705a 3034 feed/|hashtag/|
70d5c17b 3035 (?:playlist|watch)\?.*?\blist=
3036 )|
29f7c58a 3037 (?!(?:%s)\b) # Direct URLs
70d5c17b 3038 )
3039 (?P<id>[^/?\#&]+)
3040 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 3041 IE_NAME = 'youtube:tab'
3042
81127aa5 3043 _TESTS = [{
da692b79 3044 'note': 'playlists, multipage',
8bdd16b4 3045 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3046 'playlist_mincount': 94,
3047 'info_dict': {
3048 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3049 'title': 'Игорь Клейнер - Playlists',
3050 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3051 'uploader': 'Игорь Клейнер',
3052 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 3053 },
3054 }, {
da692b79 3055 'note': 'playlists, multipage, different order',
8bdd16b4 3056 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3057 'playlist_mincount': 94,
3058 'info_dict': {
3059 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3060 'title': 'Игорь Клейнер - Playlists',
3061 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3062 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3063 'uploader': 'Игорь Клейнер',
8bdd16b4 3064 },
201c1459 3065 }, {
da692b79 3066 'note': 'playlists, series',
201c1459 3067 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3068 'playlist_mincount': 5,
3069 'info_dict': {
3070 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3071 'title': '3Blue1Brown - Playlists',
3072 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
da692b79 3073 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3074 'uploader': '3Blue1Brown',
201c1459 3075 },
8bdd16b4 3076 }, {
da692b79 3077 'note': 'playlists, singlepage',
8bdd16b4 3078 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3079 'playlist_mincount': 4,
3080 'info_dict': {
3081 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3082 'title': 'ThirstForScience - Playlists',
3083 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 3084 'uploader': 'ThirstForScience',
3085 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 3086 }
3087 }, {
3088 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3089 'only_matching': True,
3090 }, {
da692b79 3091 'note': 'basic, single video playlist',
0e30a7b9 3092 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 3093 'info_dict': {
0e30a7b9 3094 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3095 'uploader': 'Sergey M.',
3096 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 3097 'title': 'youtube-dl public playlist',
81127aa5 3098 },
0e30a7b9 3099 'playlist_count': 1,
9291475f 3100 }, {
da692b79 3101 'note': 'empty playlist',
0e30a7b9 3102 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 3103 'info_dict': {
0e30a7b9 3104 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3105 'uploader': 'Sergey M.',
3106 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 3107 'title': 'youtube-dl empty playlist',
9291475f
PH
3108 },
3109 'playlist_count': 0,
3110 }, {
da692b79 3111 'note': 'Home tab',
8bdd16b4 3112 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 3113 'info_dict': {
8bdd16b4 3114 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3115 'title': 'lex will - Home',
3116 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3117 'uploader': 'lex will',
3118 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3119 },
8bdd16b4 3120 'playlist_mincount': 2,
9291475f 3121 }, {
da692b79 3122 'note': 'Videos tab',
8bdd16b4 3123 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 3124 'info_dict': {
8bdd16b4 3125 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3126 'title': 'lex will - Videos',
3127 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3128 'uploader': 'lex will',
3129 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3130 },
8bdd16b4 3131 'playlist_mincount': 975,
9291475f 3132 }, {
da692b79 3133 'note': 'Videos tab, sorted by popular',
8bdd16b4 3134 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 3135 'info_dict': {
8bdd16b4 3136 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3137 'title': 'lex will - Videos',
3138 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3139 'uploader': 'lex will',
3140 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3141 },
8bdd16b4 3142 'playlist_mincount': 199,
9291475f 3143 }, {
da692b79 3144 'note': 'Playlists tab',
8bdd16b4 3145 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 3146 'info_dict': {
8bdd16b4 3147 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3148 'title': 'lex will - Playlists',
3149 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3150 'uploader': 'lex will',
3151 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3152 },
8bdd16b4 3153 'playlist_mincount': 17,
ac7553d0 3154 }, {
da692b79 3155 'note': 'Community tab',
8bdd16b4 3156 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 3157 'info_dict': {
8bdd16b4 3158 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3159 'title': 'lex will - Community',
3160 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3161 'uploader': 'lex will',
3162 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3163 },
3164 'playlist_mincount': 18,
87dadd45 3165 }, {
da692b79 3166 'note': 'Channels tab',
8bdd16b4 3167 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 3168 'info_dict': {
8bdd16b4 3169 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3170 'title': 'lex will - Channels',
3171 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3172 'uploader': 'lex will',
3173 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3174 },
deaec5af 3175 'playlist_mincount': 12,
cd684175 3176 }, {
3177 'note': 'Search tab',
3178 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3179 'playlist_mincount': 40,
3180 'info_dict': {
3181 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3182 'title': '3Blue1Brown - Search - linear algebra',
3183 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3184 'uploader': '3Blue1Brown',
3185 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3186 },
6b08cdf6 3187 }, {
a0566bbf 3188 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3189 'only_matching': True,
3190 }, {
a0566bbf 3191 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3192 'only_matching': True,
3193 }, {
a0566bbf 3194 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3195 'only_matching': True,
3196 }, {
3197 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3198 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3199 'info_dict': {
3200 'title': '29C3: Not my department',
3201 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3202 'uploader': 'Christiaan008',
3203 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 3204 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 3205 },
3206 'playlist_count': 96,
3207 }, {
3208 'note': 'Large playlist',
3209 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 3210 'info_dict': {
8bdd16b4 3211 'title': 'Uploads from Cauchemar',
3212 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3213 'uploader': 'Cauchemar',
3214 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 3215 },
8bdd16b4 3216 'playlist_mincount': 1123,
3217 }, {
da692b79 3218 'note': 'even larger playlist, 8832 videos',
8bdd16b4 3219 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3220 'only_matching': True,
4b7df0d3
JMF
3221 }, {
3222 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3223 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3224 'info_dict': {
acf757f4
PH
3225 'title': 'Uploads from Interstellar Movie',
3226 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 3227 'uploader': 'Interstellar Movie',
8bdd16b4 3228 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 3229 },
481cc733 3230 'playlist_mincount': 21,
358de58c 3231 }, {
3232 'note': 'Playlist with "show unavailable videos" button',
3233 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3234 'info_dict': {
3235 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3236 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3237 'uploader': 'Phim Siêu Nhân Nhật Bản',
3238 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3239 },
da692b79 3240 'playlist_mincount': 200,
5d342002 3241 }, {
da692b79 3242 'note': 'Playlist with unavailable videos in page 7',
5d342002 3243 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3244 'info_dict': {
3245 'title': 'Uploads from BlankTV',
3246 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3247 'uploader': 'BlankTV',
3248 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3249 },
da692b79 3250 'playlist_mincount': 1000,
8bdd16b4 3251 }, {
da692b79 3252 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
8bdd16b4 3253 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3254 'info_dict': {
3255 'title': 'Data Analysis with Dr Mike Pound',
3256 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3257 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3258 'uploader': 'Computerphile',
deaec5af 3259 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 3260 },
3261 'playlist_mincount': 11,
3262 }, {
a0566bbf 3263 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 3264 'only_matching': True,
dacb3a86 3265 }, {
da692b79 3266 'note': 'Playlist URL that does not actually serve a playlist',
dacb3a86
S
3267 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3268 'info_dict': {
3269 'id': 'FqZTN594JQw',
3270 'ext': 'webm',
3271 'title': "Smiley's People 01 detective, Adventure Series, Action",
3272 'uploader': 'STREEM',
3273 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 3274 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
3275 'upload_date': '20150526',
3276 'license': 'Standard YouTube License',
3277 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3278 'categories': ['People & Blogs'],
3279 'tags': list,
dbdaaa23 3280 'view_count': int,
dacb3a86
S
3281 'like_count': int,
3282 'dislike_count': int,
3283 },
3284 'params': {
3285 'skip_download': True,
3286 },
13a75688 3287 'skip': 'This video is not available.',
dacb3a86 3288 'add_ie': [YoutubeIE.ie_key()],
481cc733 3289 }, {
8bdd16b4 3290 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 3291 'only_matching': True,
66b48727 3292 }, {
8bdd16b4 3293 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 3294 'only_matching': True,
a0566bbf 3295 }, {
3296 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3297 'info_dict': {
da692b79 3298 'id': 'X1whbWASnNQ', # This will keep changing
a0566bbf 3299 'ext': 'mp4',
deaec5af 3300 'title': compat_str,
a0566bbf 3301 'uploader': 'Sky News',
3302 'uploader_id': 'skynews',
3303 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
da692b79 3304 'upload_date': r're:\d{8}',
3305 'description': compat_str,
a0566bbf 3306 'categories': ['News & Politics'],
3307 'tags': list,
3308 'like_count': int,
3309 'dislike_count': int,
3310 },
3311 'params': {
3312 'skip_download': True,
3313 },
da692b79 3314 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
a0566bbf 3315 }, {
3316 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3317 'info_dict': {
3318 'id': 'a48o2S1cPoo',
3319 'ext': 'mp4',
3320 'title': 'The Young Turks - Live Main Show',
3321 'uploader': 'The Young Turks',
3322 'uploader_id': 'TheYoungTurks',
3323 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3324 'upload_date': '20150715',
3325 'license': 'Standard YouTube License',
3326 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3327 'categories': ['News & Politics'],
3328 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3329 'like_count': int,
3330 'dislike_count': int,
3331 },
3332 'params': {
3333 'skip_download': True,
3334 },
3335 'only_matching': True,
3336 }, {
3337 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3338 'only_matching': True,
3339 }, {
3340 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3341 'only_matching': True,
09f1580e 3342 }, {
3343 'note': 'A channel that is not live. Should raise error',
3344 'url': 'https://www.youtube.com/user/numberphile/live',
3345 'only_matching': True,
3d3dddc9 3346 }, {
3347 'url': 'https://www.youtube.com/feed/trending',
3348 'only_matching': True,
3349 }, {
3d3dddc9 3350 'url': 'https://www.youtube.com/feed/library',
3351 'only_matching': True,
3352 }, {
3d3dddc9 3353 'url': 'https://www.youtube.com/feed/history',
3354 'only_matching': True,
3355 }, {
3d3dddc9 3356 'url': 'https://www.youtube.com/feed/subscriptions',
3357 'only_matching': True,
3358 }, {
3d3dddc9 3359 'url': 'https://www.youtube.com/feed/watch_later',
3360 'only_matching': True,
3361 }, {
da692b79 3362 'note': 'Recommended - redirects to home page',
3d3dddc9 3363 'url': 'https://www.youtube.com/feed/recommended',
3364 'only_matching': True,
29f7c58a 3365 }, {
da692b79 3366 'note': 'inline playlist with not always working continuations',
29f7c58a 3367 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3368 'only_matching': True,
3369 }, {
3370 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3371 'only_matching': True,
3372 }, {
3373 'url': 'https://www.youtube.com/course',
3374 'only_matching': True,
3375 }, {
3376 'url': 'https://www.youtube.com/zsecurity',
3377 'only_matching': True,
3378 }, {
3379 'url': 'http://www.youtube.com/NASAgovVideo/videos',
3380 'only_matching': True,
3381 }, {
3382 'url': 'https://www.youtube.com/TheYoungTurks/live',
3383 'only_matching': True,
39ed931e 3384 }, {
3385 'url': 'https://www.youtube.com/hashtag/cctv9',
3386 'info_dict': {
3387 'id': 'cctv9',
3388 'title': '#cctv9',
3389 },
3390 'playlist_mincount': 350,
201c1459 3391 }, {
3392 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3393 'only_matching': True,
9297939e 3394 }, {
da692b79 3395 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
9297939e 3396 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3397 'only_matching': True
fe03a6cd 3398 }, {
3399 'note': '/browse/ should redirect to /channel/',
3400 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3401 'only_matching': True
3402 }, {
3403 'note': 'VLPL, should redirect to playlist?list=PL...',
3404 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3405 'info_dict': {
3406 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3407 'uploader': 'NoCopyrightSounds',
3408 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3409 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3410 'title': 'NCS Releases',
3411 },
3412 'playlist_mincount': 166,
18db7548 3413 }, {
3414 'note': 'Topic, should redirect to playlist?list=UU...',
3415 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3416 'info_dict': {
3417 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3418 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3419 'title': 'Uploads from Royalty Free Music - Topic',
3420 'uploader': 'Royalty Free Music - Topic',
3421 },
3422 'expected_warnings': [
3423 'A channel/user page was given',
3424 'The URL does not have a videos tab',
3425 ],
3426 'playlist_mincount': 101,
3427 }, {
3428 'note': 'Topic without a UU playlist',
3429 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3430 'info_dict': {
3431 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3432 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3433 },
3434 'expected_warnings': [
3435 'A channel/user page was given',
3436 'The URL does not have a videos tab',
3437 'Falling back to channel URL',
3438 ],
3439 'playlist_mincount': 9,
abcdd12b 3440 }, {
3441 'note': 'Youtube music Album',
3442 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3443 'info_dict': {
3444 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3445 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3446 },
3447 'playlist_count': 50,
29f7c58a 3448 }]
3449
3450 @classmethod
3451 def suitable(cls, url):
3452 return False if YoutubeIE.suitable(url) else super(
3453 YoutubeTabIE, cls).suitable(url)
8bdd16b4 3454
3455 def _extract_channel_id(self, webpage):
3456 channel_id = self._html_search_meta(
3457 'channelId', webpage, 'channel id', default=None)
3458 if channel_id:
3459 return channel_id
3460 channel_url = self._html_search_meta(
3461 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3462 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3463 'twitter:app:url:googleplay'), webpage, 'channel url')
3464 return self._search_regex(
3465 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3466 channel_url, 'channel id')
15f6397c 3467
8bdd16b4 3468 @staticmethod
cd7c66cf 3469 def _extract_basic_item_renderer(item):
3470 # Modified from _extract_grid_item_renderer
201c1459 3471 known_basic_renderers = (
3472 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 3473 )
3474 for key, renderer in item.items():
201c1459 3475 if not isinstance(renderer, dict):
cd7c66cf 3476 continue
201c1459 3477 elif key in known_basic_renderers:
3478 return renderer
3479 elif key.startswith('grid') and key.endswith('Renderer'):
3480 return renderer
8bdd16b4 3481
8bdd16b4 3482 def _grid_entries(self, grid_renderer):
3483 for item in grid_renderer['items']:
3484 if not isinstance(item, dict):
39b62db1 3485 continue
cd7c66cf 3486 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 3487 if not isinstance(renderer, dict):
3488 continue
3489 title = try_get(
201c1459 3490 renderer, (lambda x: x['title']['runs'][0]['text'],
3491 lambda x: x['title']['simpleText']), compat_str)
8bdd16b4 3492 # playlist
3493 playlist_id = renderer.get('playlistId')
3494 if playlist_id:
3495 yield self.url_result(
3496 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3497 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3498 video_title=title)
201c1459 3499 continue
8bdd16b4 3500 # video
3501 video_id = renderer.get('videoId')
3502 if video_id:
3503 yield self._extract_video(renderer)
201c1459 3504 continue
8bdd16b4 3505 # channel
3506 channel_id = renderer.get('channelId')
3507 if channel_id:
3508 title = try_get(
3509 renderer, lambda x: x['title']['simpleText'], compat_str)
3510 yield self.url_result(
3511 'https://www.youtube.com/channel/%s' % channel_id,
3512 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 3513 continue
3514 # generic endpoint URL support
3515 ep_url = urljoin('https://www.youtube.com/', try_get(
3516 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3517 compat_str))
3518 if ep_url:
3519 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3520 if ie.suitable(ep_url):
3521 yield self.url_result(
3522 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3523 break
8bdd16b4 3524
3d3dddc9 3525 def _shelf_entries_from_content(self, shelf_renderer):
3526 content = shelf_renderer.get('content')
3527 if not isinstance(content, dict):
8bdd16b4 3528 return
cd7c66cf 3529 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 3530 if renderer:
3531 # TODO: add support for nested playlists so each shelf is processed
3532 # as separate playlist
3533 # TODO: this includes only first N items
3534 for entry in self._grid_entries(renderer):
3535 yield entry
3536 renderer = content.get('horizontalListRenderer')
3537 if renderer:
3538 # TODO
3539 pass
8bdd16b4 3540
29f7c58a 3541 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3542 ep = try_get(
3543 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3544 compat_str)
3545 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3546 if shelf_url:
29f7c58a 3547 # Skipping links to another channels, note that checking for
3548 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3549 # will not work
3550 if skip_channels and '/channels?' in shelf_url:
3551 return
3d3dddc9 3552 title = try_get(
3553 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
3554 yield self.url_result(shelf_url, video_title=title)
3555 # Shelf may not contain shelf URL, fallback to extraction from content
3556 for entry in self._shelf_entries_from_content(shelf_renderer):
3557 yield entry
c5e8d7af 3558
8bdd16b4 3559 def _playlist_entries(self, video_list_renderer):
3560 for content in video_list_renderer['contents']:
3561 if not isinstance(content, dict):
3562 continue
3563 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3564 if not isinstance(renderer, dict):
3565 continue
3566 video_id = renderer.get('videoId')
3567 if not video_id:
3568 continue
3569 yield self._extract_video(renderer)
07aeced6 3570
3462ffa8 3571 def _rich_entries(self, rich_grid_renderer):
3572 renderer = try_get(
70d5c17b 3573 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3574 video_id = renderer.get('videoId')
3575 if not video_id:
3576 return
3577 yield self._extract_video(renderer)
3578
8bdd16b4 3579 def _video_entry(self, video_renderer):
3580 video_id = video_renderer.get('videoId')
3581 if video_id:
3582 return self._extract_video(video_renderer)
dacb3a86 3583
8bdd16b4 3584 def _post_thread_entries(self, post_thread_renderer):
3585 post_renderer = try_get(
3586 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3587 if not post_renderer:
3588 return
3589 # video attachment
3590 video_renderer = try_get(
895b0931 3591 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3592 video_id = video_renderer.get('videoId')
3593 if video_id:
3594 entry = self._extract_video(video_renderer)
8bdd16b4 3595 if entry:
3596 yield entry
895b0931 3597 # playlist attachment
3598 playlist_id = try_get(
3599 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3600 if playlist_id:
3601 yield self.url_result(
e28f1c0a 3602 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3603 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3604 # inline video links
3605 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3606 for run in runs:
3607 if not isinstance(run, dict):
3608 continue
3609 ep_url = try_get(
3610 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3611 if not ep_url:
3612 continue
3613 if not YoutubeIE.suitable(ep_url):
3614 continue
3615 ep_video_id = YoutubeIE._match_id(ep_url)
3616 if video_id == ep_video_id:
3617 continue
895b0931 3618 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 3619
8bdd16b4 3620 def _post_thread_continuation_entries(self, post_thread_continuation):
3621 contents = post_thread_continuation.get('contents')
3622 if not isinstance(contents, list):
3623 return
3624 for content in contents:
3625 renderer = content.get('backstagePostThreadRenderer')
3626 if not isinstance(renderer, dict):
3627 continue
3628 for entry in self._post_thread_entries(renderer):
3629 yield entry
07aeced6 3630
39ed931e 3631 r''' # unused
3632 def _rich_grid_entries(self, contents):
3633 for content in contents:
3634 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3635 if video_renderer:
3636 entry = self._video_entry(video_renderer)
3637 if entry:
3638 yield entry
3639 '''
f4f751af 3640 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3462ffa8 3641
70d5c17b 3642 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3643 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3644 for content in contents:
3645 if not isinstance(content, dict):
8bdd16b4 3646 continue
70d5c17b 3647 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3648 if not is_renderer:
70d5c17b 3649 renderer = content.get('richItemRenderer')
3462ffa8 3650 if renderer:
3651 for entry in self._rich_entries(renderer):
3652 yield entry
3653 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3654 continue
3462ffa8 3655 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3656 for isr_content in isr_contents:
3657 if not isinstance(isr_content, dict):
3658 continue
69184e41 3659
3660 known_renderers = {
3661 'playlistVideoListRenderer': self._playlist_entries,
3662 'gridRenderer': self._grid_entries,
3663 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3664 'backstagePostThreadRenderer': self._post_thread_entries,
3665 'videoRenderer': lambda x: [self._video_entry(x)],
3666 }
3667 for key, renderer in isr_content.items():
3668 if key not in known_renderers:
3669 continue
3670 for entry in known_renderers[key](renderer):
3671 if entry:
3672 yield entry
3462ffa8 3673 continuation_list[0] = self._extract_continuation(renderer)
69184e41 3674 break
70d5c17b 3675
3462ffa8 3676 if not continuation_list[0]:
3677 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3678
3679 if not continuation_list[0]:
3680 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3681
3682 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3683 tab_content = try_get(tab, lambda x: x['content'], dict)
3684 if not tab_content:
3685 return
3462ffa8 3686 parent_renderer = (
29f7c58a 3687 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3688 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3689 for entry in extract_entries(parent_renderer):
3690 yield entry
3462ffa8 3691 continuation = continuation_list[0]
f4f751af 3692 context = self._extract_context(ytcfg)
3693 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
d069eca7 3694
8bdd16b4 3695 for page_num in itertools.count(1):
3696 if not continuation:
3697 break
79360d99 3698 query = {
3699 'continuation': continuation['continuation'],
3700 'clickTracking': {'clickTrackingParams': continuation['itct']}
3701 }
f4f751af 3702 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
79360d99 3703 response = self._extract_response(
3704 item_id='%s page %s' % (item_id, page_num),
3705 query=query, headers=headers, ytcfg=ytcfg,
3706 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3707
3708 if not response:
8bdd16b4 3709 break
f4f751af 3710 visitor_data = try_get(
3711 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
ebf1b291 3712
69184e41 3713 known_continuation_renderers = {
3714 'playlistVideoListContinuation': self._playlist_entries,
3715 'gridContinuation': self._grid_entries,
3716 'itemSectionContinuation': self._post_thread_continuation_entries,
3717 'sectionListContinuation': extract_entries, # for feeds
3718 }
8bdd16b4 3719 continuation_contents = try_get(
69184e41 3720 response, lambda x: x['continuationContents'], dict) or {}
3721 continuation_renderer = None
3722 for key, value in continuation_contents.items():
3723 if key not in known_continuation_renderers:
3462ffa8 3724 continue
69184e41 3725 continuation_renderer = value
3726 continuation_list = [None]
3727 for entry in known_continuation_renderers[key](continuation_renderer):
3728 yield entry
3729 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3730 break
3731 if continuation_renderer:
3732 continue
c5e8d7af 3733
a1b535bd 3734 known_renderers = {
3735 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3736 'gridVideoRenderer': (self._grid_entries, 'items'),
3737 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3738 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3739 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3740 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3741 }
cce889b9 3742 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3743 continuation_items = try_get(
cce889b9 3744 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3745 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3746 video_items_renderer = None
3747 for key, value in continuation_item.items():
3748 if key not in known_renderers:
8bdd16b4 3749 continue
a1b535bd 3750 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3751 continuation_list = [None]
a1b535bd 3752 for entry in known_renderers[key][0](video_items_renderer):
3753 yield entry
9ba5705a 3754 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3755 break
3756 if video_items_renderer:
3757 continue
8bdd16b4 3758 break
9558dcec 3759
8bdd16b4 3760 @staticmethod
3761 def _extract_selected_tab(tabs):
3762 for tab in tabs:
cd684175 3763 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3764 if renderer.get('selected') is True:
3765 return renderer
2b3c2546 3766 else:
8bdd16b4 3767 raise ExtractorError('Unable to find selected tab')
b82f815f 3768
8bdd16b4 3769 @staticmethod
3770 def _extract_uploader(data):
3771 uploader = {}
3772 sidebar_renderer = try_get(
3773 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3774 if sidebar_renderer:
3775 for item in sidebar_renderer:
3776 if not isinstance(item, dict):
3777 continue
3778 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3779 if not isinstance(renderer, dict):
3780 continue
3781 owner = try_get(
3782 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3783 if owner:
3784 uploader['uploader'] = owner.get('text')
3785 uploader['uploader_id'] = try_get(
3786 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3787 uploader['uploader_url'] = urljoin(
3788 'https://www.youtube.com/',
3789 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3790 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3791
d069eca7 3792 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3793 playlist_id = title = description = channel_url = channel_name = channel_id = None
3794 thumbnails_list = tags = []
3795
8bdd16b4 3796 selected_tab = self._extract_selected_tab(tabs)
3797 renderer = try_get(
3798 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3799 if renderer:
b60419c5 3800 channel_name = renderer.get('title')
3801 channel_url = renderer.get('channelUrl')
3802 channel_id = renderer.get('externalId')
39ed931e 3803 else:
64c0d954 3804 renderer = try_get(
3805 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3806
8bdd16b4 3807 if renderer:
3808 title = renderer.get('title')
ecc97af3 3809 description = renderer.get('description', '')
b60419c5 3810 playlist_id = channel_id
3811 tags = renderer.get('keywords', '').split()
3812 thumbnails_list = (
3813 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3814 or try_get(
3815 data,
3816 lambda x: x['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3817 list)
b60419c5 3818 or [])
3819
3820 thumbnails = []
3821 for t in thumbnails_list:
3822 if not isinstance(t, dict):
3823 continue
3824 thumbnail_url = url_or_none(t.get('url'))
3825 if not thumbnail_url:
3826 continue
3827 thumbnails.append({
3828 'url': thumbnail_url,
3829 'width': int_or_none(t.get('width')),
3830 'height': int_or_none(t.get('height')),
3831 })
3462ffa8 3832 if playlist_id is None:
70d5c17b 3833 playlist_id = item_id
3834 if title is None:
39ed931e 3835 title = (
3836 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3837 or playlist_id)
b60419c5 3838 title += format_field(selected_tab, 'title', ' - %s')
cd684175 3839 title += format_field(selected_tab, 'expandedText', ' - %s')
b60419c5 3840
3841 metadata = {
3842 'playlist_id': playlist_id,
3843 'playlist_title': title,
3844 'playlist_description': description,
3845 'uploader': channel_name,
3846 'uploader_id': channel_id,
3847 'uploader_url': channel_url,
3848 'thumbnails': thumbnails,
3849 'tags': tags,
3850 }
3851 if not channel_id:
3852 metadata.update(self._extract_uploader(data))
3853 metadata.update({
3854 'channel': metadata['uploader'],
3855 'channel_id': metadata['uploader_id'],
3856 'channel_url': metadata['uploader_url']})
3857 return self.playlist_result(
d069eca7
M
3858 self._entries(
3859 selected_tab, playlist_id,
3860 self._extract_identity_token(webpage, item_id),
f4f751af 3861 self._extract_account_syncid(data),
3862 self._extract_ytcfg(item_id, webpage)),
b60419c5 3863 **metadata)
73c4ac2c 3864
79360d99 3865 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
2be71994 3866 first_id = last_id = None
79360d99 3867 ytcfg = self._extract_ytcfg(playlist_id, webpage)
3868 headers = self._generate_api_headers(
3869 ytcfg, account_syncid=self._extract_account_syncid(data),
3870 identity_token=self._extract_identity_token(webpage, item_id=playlist_id),
3871 visitor_data=try_get(self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
2be71994 3872 for page_num in itertools.count(1):
cd7c66cf 3873 videos = list(self._playlist_entries(playlist))
3874 if not videos:
3875 return
2be71994 3876 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3877 if start >= len(videos):
3878 return
3879 for video in videos[start:]:
3880 if video['id'] == first_id:
3881 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3882 return
3883 yield video
3884 first_id = first_id or videos[0]['id']
3885 last_id = videos[-1]['id']
79360d99 3886 watch_endpoint = try_get(
3887 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3888 query = {
3889 'playlistId': playlist_id,
3890 'videoId': watch_endpoint.get('videoId') or last_id,
3891 'index': watch_endpoint.get('index') or len(videos),
3892 'params': watch_endpoint.get('params') or 'OAE%3D'
3893 }
3894 response = self._extract_response(
3895 item_id='%s page %d' % (playlist_id, page_num),
3896 query=query,
3897 ep='next',
3898 headers=headers,
3899 check_get_keys='contents'
3900 )
cd7c66cf 3901 playlist = try_get(
79360d99 3902 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 3903
79360d99 3904 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
8bdd16b4 3905 title = playlist.get('title') or try_get(
3906 data, lambda x: x['titleText']['simpleText'], compat_str)
3907 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3908
3909 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3910 playlist_url = urljoin(url, try_get(
3911 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3912 compat_str))
3913 if playlist_url and playlist_url != url:
3914 return self.url_result(
3915 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3916 video_title=title)
cd7c66cf 3917
8bdd16b4 3918 return self.playlist_result(
79360d99 3919 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
cd7c66cf 3920 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 3921
358de58c 3922 def _reload_with_unavailable_videos(self, item_id, data, webpage):
3923 """
3924 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3925 """
3926 sidebar_renderer = try_get(
5d342002 3927 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3928 if not sidebar_renderer:
3929 return
3930 browse_id = params = None
358de58c 3931 for item in sidebar_renderer:
3932 if not isinstance(item, dict):
3933 continue
3934 renderer = item.get('playlistSidebarPrimaryInfoRenderer')
3935 menu_renderer = try_get(
3936 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3937 for menu_item in menu_renderer:
3938 if not isinstance(menu_item, dict):
3939 continue
3940 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3941 text = try_get(
3942 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3943 if not text or text.lower() != 'show unavailable videos':
3944 continue
3945 browse_endpoint = try_get(
3946 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3947 browse_id = browse_endpoint.get('browseId')
3948 params = browse_endpoint.get('params')
5d342002 3949 break
3950
3951 ytcfg = self._extract_ytcfg(item_id, webpage)
3952 headers = self._generate_api_headers(
3953 ytcfg, account_syncid=self._extract_account_syncid(ytcfg),
3954 identity_token=self._extract_identity_token(webpage, item_id=item_id),
3955 visitor_data=try_get(
3956 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
3957 query = {
3958 'params': params or 'wgYCCAA=',
3959 'browseId': browse_id or 'VL%s' % item_id
3960 }
3961 return self._extract_response(
3962 item_id=item_id, headers=headers, query=query,
3963 check_get_keys='contents', fatal=False,
3964 note='Downloading API JSON with unavailable videos')
358de58c 3965
cd7c66cf 3966 def _extract_webpage(self, url, item_id):
a06916d9 3967 retries = self.get_param('extractor_retries', 3)
62bff2c1 3968 count = -1
c705177d 3969 last_error = 'Incomplete yt initial data recieved'
14fdfea9 3970 while count < retries:
62bff2c1 3971 count += 1
14fdfea9 3972 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 3973 # See: https://github.com/yt-dlp/yt-dlp/issues/116
3974 if count:
c705177d 3975 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 3976 webpage = self._download_webpage(
3977 url, item_id,
cd7c66cf 3978 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
14fdfea9 3979 data = self._extract_yt_initial_data(item_id, webpage)
14fdfea9 3980 if data.get('contents') or data.get('currentVideoEndpoint'):
3981 break
95c01b6c 3982 # Extract alerts here only when there is error
3983 self._extract_and_report_alerts(data)
c705177d 3984 if count >= retries:
6a39ee13 3985 raise ExtractorError(last_error)
cd7c66cf 3986 return webpage, data
3987
9297939e 3988 @staticmethod
3989 def _smuggle_data(entries, data):
3990 for entry in entries:
3991 if data:
3992 entry['url'] = smuggle_url(entry['url'], data)
3993 yield entry
3994
cd7c66cf 3995 def _real_extract(self, url):
9297939e 3996 url, smuggled_data = unsmuggle_url(url, {})
3997 if self.is_music_url(url):
3998 smuggled_data['is_music_url'] = True
fe03a6cd 3999 info_dict = self.__real_extract(url, smuggled_data)
9297939e 4000 if info_dict.get('entries'):
4001 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4002 return info_dict
4003
fe03a6cd 4004 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4005
4006 def __real_extract(self, url, smuggled_data):
cd7c66cf 4007 item_id = self._match_id(url)
4008 url = compat_urlparse.urlunparse(
4009 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 4010 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 4011
fe03a6cd 4012 def get_mobj(url):
4013 mobj = self._url_re.match(url).groupdict()
07cce701 4014 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 4015 return mobj
4016
4017 mobj = get_mobj(url)
4018 # Youtube returns incomplete data if tabname is not lower case
4019 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4020
4021 if is_channel:
4022 if smuggled_data.get('is_music_url'):
4023 if item_id[:2] == 'VL':
4024 # Youtube music VL channels have an equivalent playlist
4025 item_id = item_id[2:]
4026 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
abcdd12b 4027 elif item_id[:2] == 'MP':
4028 # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4029 item_id = self._search_regex(
4030 r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4031 self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4032 'playlist id')
4033 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
fe03a6cd 4034 elif mobj['channel_type'] == 'browse':
4035 # Youtube music /browse/ should be changed to /channel/
4036 pre = 'https://www.youtube.com/channel/%s' % item_id
4037 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4038 # Home URLs should redirect to /videos/
6a39ee13 4039 self.report_warning(
cd7c66cf 4040 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4041 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 4042 tab = '/videos'
4043
4044 url = ''.join((pre, tab, post))
4045 mobj = get_mobj(url)
cd7c66cf 4046
4047 # Handle both video/playlist URLs
201c1459 4048 qs = parse_qs(url)
cd7c66cf 4049 video_id = qs.get('v', [None])[0]
4050 playlist_id = qs.get('list', [None])[0]
4051
fe03a6cd 4052 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 4053 if not playlist_id:
fe03a6cd 4054 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 4055 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 4056 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6a39ee13 4057 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
cd7c66cf 4058 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
18db7548 4059 mobj = get_mobj(url)
cd7c66cf 4060
4061 if video_id and playlist_id:
a06916d9 4062 if self.get_param('noplaylist'):
cd7c66cf 4063 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4064 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4065 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4066
4067 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 4068
18db7548 4069 tabs = try_get(
4070 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4071 if tabs:
4072 selected_tab = self._extract_selected_tab(tabs)
4073 tab_name = selected_tab.get('title', '')
09f1580e 4074 if 'no-youtube-channel-redirect' not in compat_opts:
4075 if mobj['tab'] == '/live':
4076 # Live tab should have redirected to the video
4077 raise ExtractorError('The channel is not currently live', expected=True)
4078 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4079 if not mobj['not_channel'] and item_id[:2] == 'UC':
4080 # Topic channels don't have /videos. Use the equivalent playlist instead
4081 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4082 pl_id = 'UU%s' % item_id[2:]
4083 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4084 try:
4085 pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4086 for alert_type, alert_message in self._extract_alerts(pl_data):
4087 if alert_type == 'error':
4088 raise ExtractorError('Youtube said: %s' % alert_message)
4089 item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4090 except ExtractorError:
4091 self.report_warning('The playlist gave error. Falling back to channel URL')
4092 else:
4093 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
18db7548 4094
4095 self.write_debug('Final URL: %s' % url)
4096
358de58c 4097 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 4098 if 'no-youtube-unavailable-videos' not in compat_opts:
4099 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
95c01b6c 4100 self._extract_and_report_alerts(data)
358de58c 4101
8bdd16b4 4102 tabs = try_get(
4103 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4104 if tabs:
d069eca7 4105 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 4106
8bdd16b4 4107 playlist = try_get(
4108 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4109 if playlist:
79360d99 4110 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
cd7c66cf 4111
a0566bbf 4112 video_id = try_get(
4113 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4114 compat_str) or video_id
8bdd16b4 4115 if video_id:
09f1580e 4116 if mobj['tab'] != '/live': # live tab is expected to redirect to video
4117 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 4118 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 4119
8bdd16b4 4120 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 4121
c5e8d7af 4122
8bdd16b4 4123class YoutubePlaylistIE(InfoExtractor):
4124 IE_DESC = 'YouTube.com playlists'
4125 _VALID_URL = r'''(?x)(?:
4126 (?:https?://)?
4127 (?:\w+\.)?
4128 (?:
4129 (?:
4130 youtube(?:kids)?\.com|
29f7c58a 4131 invidio\.us
8bdd16b4 4132 )
4133 /.*?\?.*?\blist=
4134 )?
4135 (?P<id>%(playlist_id)s)
4136 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4137 IE_NAME = 'youtube:playlist'
cdc628a4 4138 _TESTS = [{
8bdd16b4 4139 'note': 'issue #673',
4140 'url': 'PLBB231211A4F62143',
cdc628a4 4141 'info_dict': {
8bdd16b4 4142 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4143 'id': 'PLBB231211A4F62143',
4144 'uploader': 'Wickydoo',
4145 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4146 },
4147 'playlist_mincount': 29,
4148 }, {
4149 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4150 'info_dict': {
4151 'title': 'YDL_safe_search',
4152 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4153 },
4154 'playlist_count': 2,
4155 'skip': 'This playlist is private',
9558dcec 4156 }, {
8bdd16b4 4157 'note': 'embedded',
4158 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4159 'playlist_count': 4,
9558dcec 4160 'info_dict': {
8bdd16b4 4161 'title': 'JODA15',
4162 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4163 'uploader': 'milan',
4164 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 4165 }
cdc628a4 4166 }, {
8bdd16b4 4167 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4168 'playlist_mincount': 982,
4169 'info_dict': {
4170 'title': '2018 Chinese New Singles (11/6 updated)',
4171 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4172 'uploader': 'LBK',
4173 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4174 }
daa0df9e 4175 }, {
29f7c58a 4176 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4177 'only_matching': True,
4178 }, {
4179 # music album playlist
4180 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4181 'only_matching': True,
4182 }]
4183
4184 @classmethod
4185 def suitable(cls, url):
201c1459 4186 if YoutubeTabIE.suitable(url):
4187 return False
1bdae7d3 4188 # Hack for lazy extractors until more generic solution is implemented
4189 # (see #28780)
4190 from .youtube import parse_qs
201c1459 4191 qs = parse_qs(url)
4192 if qs.get('v', [None])[0]:
4193 return False
4194 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 4195
4196 def _real_extract(self, url):
4197 playlist_id = self._match_id(url)
46953e7e 4198 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 4199 url = update_url_query(
4200 'https://www.youtube.com/playlist',
4201 parse_qs(url) or {'list': playlist_id})
4202 if is_music_url:
4203 url = smuggle_url(url, {'is_music_url': True})
4204 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 4205
4206
4207class YoutubeYtBeIE(InfoExtractor):
c76eb41b 4208 IE_DESC = 'youtu.be'
29f7c58a 4209 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4210 _TESTS = [{
8bdd16b4 4211 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4212 'info_dict': {
4213 'id': 'yeWKywCrFtk',
4214 'ext': 'mp4',
4215 'title': 'Small Scale Baler and Braiding Rugs',
4216 'uploader': 'Backus-Page House Museum',
4217 'uploader_id': 'backuspagemuseum',
4218 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4219 'upload_date': '20161008',
4220 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4221 'categories': ['Nonprofits & Activism'],
4222 'tags': list,
4223 'like_count': int,
4224 'dislike_count': int,
4225 },
4226 'params': {
4227 'noplaylist': True,
4228 'skip_download': True,
4229 },
39e7107d 4230 }, {
8bdd16b4 4231 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 4232 'only_matching': True,
cdc628a4
PH
4233 }]
4234
8bdd16b4 4235 def _real_extract(self, url):
29f7c58a 4236 mobj = re.match(self._VALID_URL, url)
4237 video_id = mobj.group('id')
4238 playlist_id = mobj.group('playlist_id')
8bdd16b4 4239 return self.url_result(
29f7c58a 4240 update_url_query('https://www.youtube.com/watch', {
4241 'v': video_id,
4242 'list': playlist_id,
4243 'feature': 'youtu.be',
4244 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 4245
4246
4247class YoutubeYtUserIE(InfoExtractor):
c76eb41b 4248 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 4249 _VALID_URL = r'ytuser:(?P<id>.+)'
4250 _TESTS = [{
4251 'url': 'ytuser:phihag',
4252 'only_matching': True,
4253 }]
4254
4255 def _real_extract(self, url):
4256 user_id = self._match_id(url)
4257 return self.url_result(
4258 'https://www.youtube.com/user/%s' % user_id,
4259 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 4260
b05654f0 4261
3d3dddc9 4262class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 4263 IE_NAME = 'youtube:favorites'
4264 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4265 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4266 _LOGIN_REQUIRED = True
4267 _TESTS = [{
4268 'url': ':ytfav',
4269 'only_matching': True,
4270 }, {
4271 'url': ':ytfavorites',
4272 'only_matching': True,
4273 }]
4274
4275 def _real_extract(self, url):
4276 return self.url_result(
4277 'https://www.youtube.com/playlist?list=LL',
4278 ie=YoutubeTabIE.ie_key())
4279
4280
79360d99 4281class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
69184e41 4282 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
4283 # there doesn't appear to be a real limit, for example if you search for
4284 # 'python' you get more than 8.000.000 results
4285 _MAX_RESULTS = float('inf')
78caa52a 4286 IE_NAME = 'youtube:search'
b05654f0 4287 _SEARCH_KEY = 'ytsearch'
6c894ea1 4288 _SEARCH_PARAMS = None
9dd8e46a 4289 _TESTS = []
b05654f0 4290
6c894ea1 4291 def _entries(self, query, n):
a5c56234 4292 data = {'query': query}
6c894ea1
U
4293 if self._SEARCH_PARAMS:
4294 data['params'] = self._SEARCH_PARAMS
4295 total = 0
4296 for page_num in itertools.count(1):
79360d99 4297 search = self._extract_response(
4298 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4299 check_get_keys=('contents', 'onResponseReceivedCommands')
4300 )
6c894ea1 4301 if not search:
b4c08069 4302 break
6c894ea1
U
4303 slr_contents = try_get(
4304 search,
4305 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4306 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4307 list)
4308 if not slr_contents:
a22b2fd1 4309 break
0366ae87 4310
0366ae87
M
4311 # Youtube sometimes adds promoted content to searches,
4312 # changing the index location of videos and token.
4313 # So we search through all entries till we find them.
30a074c2 4314 continuation_token = None
4315 for slr_content in slr_contents:
a96c6d15 4316 if continuation_token is None:
4317 continuation_token = try_get(
4318 slr_content,
4319 lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
4320 compat_str)
4321
30a074c2 4322 isr_contents = try_get(
4323 slr_content,
4324 lambda x: x['itemSectionRenderer']['contents'],
4325 list)
9da76d30 4326 if not isr_contents:
30a074c2 4327 continue
4328 for content in isr_contents:
4329 if not isinstance(content, dict):
4330 continue
4331 video = content.get('videoRenderer')
4332 if not isinstance(video, dict):
4333 continue
4334 video_id = video.get('videoId')
4335 if not video_id:
4336 continue
4337
4338 yield self._extract_video(video)
4339 total += 1
4340 if total == n:
4341 return
0366ae87 4342
0366ae87 4343 if not continuation_token:
6c894ea1 4344 break
0366ae87 4345 data['continuation'] = continuation_token
b05654f0 4346
6c894ea1
U
4347 def _get_n_results(self, query, n):
4348 """Get a specified number of results for a query"""
4349 return self.playlist_result(self._entries(query, n), query)
75dff0ee 4350
c9ae7b95 4351
a3dd9248 4352class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 4353 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 4354 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 4355 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 4356 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 4357
c9ae7b95 4358
386e1dd9 4359class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 4360 IE_DESC = 'YouTube.com search URLs'
386e1dd9 4361 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4362 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 4363 # _MAX_RESULTS = 100
3462ffa8 4364 _TESTS = [{
4365 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4366 'playlist_mincount': 5,
4367 'info_dict': {
4368 'title': 'youtube-dl test video',
4369 }
4370 }, {
4371 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4372 'only_matching': True,
4373 }]
4374
386e1dd9 4375 @classmethod
4376 def _make_valid_url(cls):
4377 return cls._VALID_URL
4378
3462ffa8 4379 def _real_extract(self, url):
386e1dd9 4380 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4381 query = (qs.get('search_query') or qs.get('q'))[0]
4382 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4383 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 4384
4385
4386class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 4387 """
25f14e9f 4388 Base class for feed extractors
3d3dddc9 4389 Subclasses must define the _FEED_NAME property.
d7ae0639 4390 """
b2e8bc1b 4391 _LOGIN_REQUIRED = True
ef2f3c7f 4392 _TESTS = []
d7ae0639
JMF
4393
4394 @property
4395 def IE_NAME(self):
78caa52a 4396 return 'youtube:%s' % self._FEED_NAME
04cc9617 4397
3853309f 4398 def _real_extract(self, url):
3d3dddc9 4399 return self.url_result(
4400 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4401 ie=YoutubeTabIE.ie_key())
25f14e9f
S
4402
4403
ef2f3c7f 4404class YoutubeWatchLaterIE(InfoExtractor):
4405 IE_NAME = 'youtube:watchlater'
70d5c17b 4406 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 4407 _VALID_URL = r':ytwatchlater'
bc7a9cd8 4408 _TESTS = [{
8bdd16b4 4409 'url': ':ytwatchlater',
bc7a9cd8
S
4410 'only_matching': True,
4411 }]
25f14e9f
S
4412
4413 def _real_extract(self, url):
ef2f3c7f 4414 return self.url_result(
4415 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 4416
4417
25f14e9f
S
4418class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4419 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 4420 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 4421 _FEED_NAME = 'recommended'
45db527f 4422 _LOGIN_REQUIRED = False
3d3dddc9 4423 _TESTS = [{
4424 'url': ':ytrec',
4425 'only_matching': True,
4426 }, {
4427 'url': ':ytrecommended',
4428 'only_matching': True,
4429 }, {
4430 'url': 'https://youtube.com',
4431 'only_matching': True,
4432 }]
1ed5b5c9 4433
1ed5b5c9 4434
25f14e9f 4435class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 4436 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 4437 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 4438 _FEED_NAME = 'subscriptions'
3d3dddc9 4439 _TESTS = [{
4440 'url': ':ytsubs',
4441 'only_matching': True,
4442 }, {
4443 'url': ':ytsubscriptions',
4444 'only_matching': True,
4445 }]
1ed5b5c9 4446
1ed5b5c9 4447
25f14e9f 4448class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
4449 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4450 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 4451 _FEED_NAME = 'history'
3d3dddc9 4452 _TESTS = [{
4453 'url': ':ythistory',
4454 'only_matching': True,
4455 }]
1ed5b5c9
JMF
4456
4457
15870e90
PH
4458class YoutubeTruncatedURLIE(InfoExtractor):
4459 IE_NAME = 'youtube:truncated_url'
4460 IE_DESC = False # Do not list
975d35db 4461 _VALID_URL = r'''(?x)
b95aab84
PH
4462 (?:https?://)?
4463 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4464 (?:watch\?(?:
c4808c60 4465 feature=[a-z_]+|
b95aab84
PH
4466 annotation_id=annotation_[^&]+|
4467 x-yt-cl=[0-9]+|
c1708b89 4468 hl=[^&]*|
287be8c6 4469 t=[0-9]+
b95aab84
PH
4470 )?
4471 |
4472 attribution_link\?a=[^&]+
4473 )
4474 $
975d35db 4475 '''
15870e90 4476
c4808c60 4477 _TESTS = [{
2d3d2997 4478 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 4479 'only_matching': True,
dc2fc736 4480 }, {
2d3d2997 4481 'url': 'https://www.youtube.com/watch?',
dc2fc736 4482 'only_matching': True,
b95aab84
PH
4483 }, {
4484 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4485 'only_matching': True,
4486 }, {
4487 'url': 'https://www.youtube.com/watch?feature=foo',
4488 'only_matching': True,
c1708b89
PH
4489 }, {
4490 'url': 'https://www.youtube.com/watch?hl=en-GB',
4491 'only_matching': True,
287be8c6
PH
4492 }, {
4493 'url': 'https://www.youtube.com/watch?t=2372',
4494 'only_matching': True,
c4808c60
PH
4495 }]
4496
15870e90
PH
4497 def _real_extract(self, url):
4498 raise ExtractorError(
78caa52a
PH
4499 'Did you forget to quote the URL? Remember that & is a meta '
4500 'character in most shells, so you want to put the URL in quotes, '
3867038a 4501 'like youtube-dl '
2d3d2997 4502 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 4503 ' or simply youtube-dl BaW_jenozKc .',
15870e90 4504 expected=True)
772fd5cc
PH
4505
4506
4507class YoutubeTruncatedIDIE(InfoExtractor):
4508 IE_NAME = 'youtube:truncated_id'
4509 IE_DESC = False # Do not list
b95aab84 4510 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
4511
4512 _TESTS = [{
4513 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4514 'only_matching': True,
4515 }]
4516
4517 def _real_extract(self, url):
4518 video_id = self._match_id(url)
4519 raise ExtractorError(
4520 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4521 expected=True)