]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
Add `only_once` param for `report_warning`
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
2d6659b9 5import base64
d92f5d5a 6import calendar
109dd3b2 7import copy
fe93e2c4 8import datetime
a5c56234 9import hashlib
0ca96d48 10import itertools
c5e8d7af 11import json
c4417ddb 12import os.path
d77ab8e2 13import random
c5e8d7af 14import re
8a784c74 15import time
e0df6211 16import traceback
c5e8d7af 17
b05654f0 18from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 19from ..compat import (
edf3e38e 20 compat_chr,
29f7c58a 21 compat_HTTPError,
c5e8d7af 22 compat_parse_qs,
545cc85d 23 compat_str,
7fd002c0 24 compat_urllib_parse_unquote_plus,
15707c7e 25 compat_urllib_parse_urlencode,
7c80519c 26 compat_urllib_parse_urlparse,
7c61bd36 27 compat_urlparse,
4bb4a188 28)
545cc85d 29from ..jsinterp import JSInterpreter
4bb4a188 30from ..utils import (
c224251a 31 bool_or_none,
2d6659b9 32 bytes_to_intlist,
c5e8d7af 33 clean_html,
26fe8ffe 34 dict_get,
d92f5d5a 35 datetime_from_str,
358de58c 36 error_to_compat_str,
c5e8d7af 37 ExtractorError,
b60419c5 38 format_field,
2d30521a 39 float_or_none,
dd27fd17 40 int_or_none,
2d6659b9 41 intlist_to_bytes,
94278f72 42 mimetype2ext,
6310acf5 43 parse_codecs,
49bd8c66 44 parse_count,
7c80519c 45 parse_duration,
dca3ff4a 46 qualities,
3995d37d 47 remove_start,
cf7e015f 48 smuggle_url,
dbdaaa23 49 str_or_none,
c93d53f5 50 str_to_int,
7c365c21 51 traverse_obj,
556dbe7f 52 try_get,
c5e8d7af
PH
53 unescapeHTML,
54 unified_strdate,
cf7e015f 55 unsmuggle_url,
8bdd16b4 56 update_url_query,
21c340b8 57 url_or_none,
6e6bc8da 58 urlencode_postdata,
fe93e2c4 59 urljoin,
7c365c21 60 variadic,
c5e8d7af
PH
61)
62
5f6a1245 63
201c1459 64def parse_qs(url):
65 return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
66
67
de7f3446 68class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
69 """Provide base functions for Youtube extractors"""
70 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 71 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
72
73 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
74 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
75 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 76
3462ffa8 77 _RESERVED_NAMES = (
bea74222 78 r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|shorts|'
46953e7e 79 r'movies|results|shared|hashtag|trending|feed|feeds|oembed|get_video_info|'
cd7c66cf 80 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 81
b2e8bc1b
JMF
82 _NETRC_MACHINE = 'youtube'
83 # If True it will raise an error if no login info is provided
84 _LOGIN_REQUIRED = False
85
70d5c17b 86 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 87
b2e8bc1b 88 def _login(self):
83317f69 89 """
90 Attempt to log in to YouTube.
91 True is returned if successful or skipped.
92 False is returned if login failed.
93
94 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
95 """
9d5d4d64 96
97 def warn(message):
98 self.report_warning(message)
99
100 # username+password login is broken
101 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
102 self.raise_login_required(
103 'Login details are needed to download this content', method='cookies')
68217024 104 username, password = self._get_login_info()
9d5d4d64 105 if username:
106 warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
107 return
9d5d4d64 108
2d6659b9 109 # Everything below this is broken!
110 r'''
b2e8bc1b
JMF
111 # No authentication to be performed
112 if username is None:
a06916d9 113 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
69ea8ca4 114 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
a06916d9 115 # if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
545cc85d 116 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 117 return True
b2e8bc1b 118
7cc3570e
PH
119 login_page = self._download_webpage(
120 self._LOGIN_URL, None,
69ea8ca4
PH
121 note='Downloading login page',
122 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
123 if login_page is False:
124 return
b2e8bc1b 125
1212e997 126 login_form = self._hidden_inputs(login_page)
c5e8d7af 127
e00eb564
S
128 def req(url, f_req, note, errnote):
129 data = login_form.copy()
130 data.update({
131 'pstMsg': 1,
132 'checkConnection': 'youtube',
133 'checkedDomains': 'youtube',
134 'hl': 'en',
135 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 136 'f.req': json.dumps(f_req),
e00eb564
S
137 'flowName': 'GlifWebSignIn',
138 'flowEntry': 'ServiceLogin',
baf67a60
S
139 # TODO: reverse actual botguard identifier generation algo
140 'bgRequest': '["identifier",""]',
041bc3ad 141 })
e00eb564
S
142 return self._download_json(
143 url, None, note=note, errnote=errnote,
144 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
145 fatal=False,
146 data=urlencode_postdata(data), headers={
147 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
148 'Google-Accounts-XSRF': 1,
149 })
150
3995d37d
S
151 lookup_req = [
152 username,
153 None, [], None, 'US', None, None, 2, False, True,
154 [
155 None, None,
156 [2, 1, None, 1,
157 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
158 None, [], 4],
159 1, [None, None, []], None, None, None, True
160 ],
161 username,
162 ]
163
e00eb564 164 lookup_results = req(
3995d37d 165 self._LOOKUP_URL, lookup_req,
e00eb564
S
166 'Looking up account info', 'Unable to look up account info')
167
168 if lookup_results is False:
169 return False
041bc3ad 170
3995d37d
S
171 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
172 if not user_hash:
173 warn('Unable to extract user hash')
174 return False
175
176 challenge_req = [
177 user_hash,
178 None, 1, None, [1, None, None, None, [password, None, True]],
179 [
180 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
181 1, [None, None, []], None, None, None, True
182 ]]
83317f69 183
3995d37d
S
184 challenge_results = req(
185 self._CHALLENGE_URL, challenge_req,
186 'Logging in', 'Unable to log in')
83317f69 187
3995d37d 188 if challenge_results is False:
e00eb564 189 return
83317f69 190
3995d37d
S
191 login_res = try_get(challenge_results, lambda x: x[0][5], list)
192 if login_res:
193 login_msg = try_get(login_res, lambda x: x[5], compat_str)
194 warn(
195 'Unable to login: %s' % 'Invalid password'
196 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
197 return False
198
199 res = try_get(challenge_results, lambda x: x[0][-1], list)
200 if not res:
201 warn('Unable to extract result entry')
202 return False
203
9a6628aa
S
204 login_challenge = try_get(res, lambda x: x[0][0], list)
205 if login_challenge:
206 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
207 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
208 # SEND_SUCCESS - TFA code has been successfully sent to phone
209 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 210 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
211 if status == 'QUOTA_EXCEEDED':
212 warn('Exceeded the limit of TFA codes, try later')
213 return False
214
215 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
216 if not tl:
217 warn('Unable to extract TL')
218 return False
219
220 tfa_code = self._get_tfa_info('2-step verification code')
221
222 if not tfa_code:
223 warn(
224 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
225 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
226 return False
227
228 tfa_code = remove_start(tfa_code, 'G-')
229
230 tfa_req = [
231 user_hash, None, 2, None,
232 [
233 9, None, None, None, None, None, None, None,
234 [None, tfa_code, True, 2]
235 ]]
236
237 tfa_results = req(
238 self._TFA_URL.format(tl), tfa_req,
239 'Submitting TFA code', 'Unable to submit TFA code')
240
241 if tfa_results is False:
242 return False
243
244 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
245 if tfa_res:
246 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
247 warn(
248 'Unable to finish TFA: %s' % 'Invalid TFA code'
249 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
250 return False
251
252 check_cookie_url = try_get(
253 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
254 else:
255 CHALLENGES = {
256 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
257 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
258 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
259 }
260 challenge = CHALLENGES.get(
261 challenge_str,
262 '%s returned error %s.' % (self.IE_NAME, challenge_str))
263 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
264 return False
3995d37d
S
265 else:
266 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
267
268 if not check_cookie_url:
269 warn('Unable to extract CheckCookie URL')
270 return False
e00eb564
S
271
272 check_cookie_results = self._download_webpage(
3995d37d
S
273 check_cookie_url, None, 'Checking cookie', fatal=False)
274
275 if check_cookie_results is False:
276 return False
e00eb564 277
3995d37d
S
278 if 'https://myaccount.google.com/' not in check_cookie_results:
279 warn('Unable to log in')
b2e8bc1b 280 return False
e00eb564 281
b2e8bc1b 282 return True
2d6659b9 283 '''
b2e8bc1b 284
cce889b9 285 def _initialize_consent(self):
286 cookies = self._get_cookies('https://www.youtube.com/')
287 if cookies.get('__Secure-3PSID'):
288 return
289 consent_id = None
290 consent = cookies.get('CONSENT')
291 if consent:
292 if 'YES' in consent.value:
293 return
294 consent_id = self._search_regex(
295 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
296 if not consent_id:
297 consent_id = random.randint(100, 999)
298 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 299
b2e8bc1b 300 def _real_initialize(self):
cce889b9 301 self._initialize_consent()
b2e8bc1b
JMF
302 if self._downloader is None:
303 return
b2e8bc1b
JMF
304 if not self._login():
305 return
c5e8d7af 306
a0566bbf 307 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 308 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
309 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 310
109dd3b2 311 _YT_DEFAULT_YTCFGS = {
312 'WEB': {
313 'INNERTUBE_API_VERSION': 'v1',
314 'INNERTUBE_CLIENT_NAME': 'WEB',
315 'INNERTUBE_CLIENT_VERSION': '2.20210622.10.00',
316 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
317 'INNERTUBE_CONTEXT': {
318 'client': {
319 'clientName': 'WEB',
320 'clientVersion': '2.20210622.10.00',
321 'hl': 'en',
322 }
323 },
324 'INNERTUBE_CONTEXT_CLIENT_NAME': 1
325 },
326 'WEB_REMIX': {
327 'INNERTUBE_API_VERSION': 'v1',
328 'INNERTUBE_CLIENT_NAME': 'WEB_REMIX',
329 'INNERTUBE_CLIENT_VERSION': '1.20210621.00.00',
330 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
331 'INNERTUBE_CONTEXT': {
332 'client': {
333 'clientName': 'WEB_REMIX',
334 'clientVersion': '1.20210621.00.00',
335 'hl': 'en',
336 }
337 },
338 'INNERTUBE_CONTEXT_CLIENT_NAME': 67
339 },
340 'WEB_EMBEDDED_PLAYER': {
341 'INNERTUBE_API_VERSION': 'v1',
342 'INNERTUBE_CLIENT_NAME': 'WEB_EMBEDDED_PLAYER',
343 'INNERTUBE_CLIENT_VERSION': '1.20210620.0.1',
344 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
345 'INNERTUBE_CONTEXT': {
346 'client': {
347 'clientName': 'WEB_EMBEDDED_PLAYER',
348 'clientVersion': '1.20210620.0.1',
349 'hl': 'en',
350 }
351 },
352 'INNERTUBE_CONTEXT_CLIENT_NAME': 56
353 },
354 'ANDROID': {
355 'INNERTUBE_API_VERSION': 'v1',
356 'INNERTUBE_CLIENT_NAME': 'ANDROID',
357 'INNERTUBE_CLIENT_VERSION': '16.20',
358 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
359 'INNERTUBE_CONTEXT': {
360 'client': {
361 'clientName': 'ANDROID',
362 'clientVersion': '16.20',
363 'hl': 'en',
364 }
365 },
fe93e2c4 366 'INNERTUBE_CONTEXT_CLIENT_NAME': 3
109dd3b2 367 },
368 'ANDROID_EMBEDDED_PLAYER': {
369 'INNERTUBE_API_VERSION': 'v1',
370 'INNERTUBE_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER',
371 'INNERTUBE_CLIENT_VERSION': '16.20',
372 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
373 'INNERTUBE_CONTEXT': {
374 'client': {
375 'clientName': 'ANDROID_EMBEDDED_PLAYER',
376 'clientVersion': '16.20',
377 'hl': 'en',
378 }
379 },
fe93e2c4 380 'INNERTUBE_CONTEXT_CLIENT_NAME': 55
109dd3b2 381 },
382 'ANDROID_MUSIC': {
383 'INNERTUBE_API_VERSION': 'v1',
384 'INNERTUBE_CLIENT_NAME': 'ANDROID_MUSIC',
385 'INNERTUBE_CLIENT_VERSION': '4.32',
386 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
387 'INNERTUBE_CONTEXT': {
388 'client': {
389 'clientName': 'ANDROID_MUSIC',
390 'clientVersion': '4.32',
391 'hl': 'en',
392 }
393 },
fe93e2c4 394 'INNERTUBE_CONTEXT_CLIENT_NAME': 21
109dd3b2 395 }
396 }
397
398 _YT_DEFAULT_INNERTUBE_HOSTS = {
399 'DIRECT': 'youtubei.googleapis.com',
400 'WEB': 'www.youtube.com',
401 'WEB_REMIX': 'music.youtube.com',
402 'ANDROID_MUSIC': 'music.youtube.com'
403 }
404
405 def _get_default_ytcfg(self, client='WEB'):
406 if client in self._YT_DEFAULT_YTCFGS:
407 return copy.deepcopy(self._YT_DEFAULT_YTCFGS[client])
408 self.write_debug(f'INNERTUBE default client {client} does not exist - falling back to WEB client.')
409 return copy.deepcopy(self._YT_DEFAULT_YTCFGS['WEB'])
410
411 def _get_innertube_host(self, client='WEB'):
412 return dict_get(self._YT_DEFAULT_INNERTUBE_HOSTS, (client, 'WEB'))
413
414 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='WEB'):
415 # try_get but with fallback to default ytcfg client values when present
416 _func = lambda y: try_get(y, getter, expected_type)
417 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
418
419 def _extract_client_name(self, ytcfg, default_client='WEB'):
420 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str, default_client)
421
314ee305 422 @staticmethod
423 def _extract_session_index(ytcfg):
424 return int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
425
109dd3b2 426 def _extract_client_version(self, ytcfg, default_client='WEB'):
427 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str, default_client)
428
429 def _extract_api_key(self, ytcfg=None, default_client='WEB'):
430 return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
431
432 def _extract_context(self, ytcfg=None, default_client='WEB'):
433 _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
434 context = _get_context(ytcfg)
435 if context:
436 return context
437
438 context = _get_context(self._get_default_ytcfg(default_client))
439 if not ytcfg:
440 return context
441
442 # Recreate the client context (required)
443 context['client'].update({
444 'clientVersion': self._extract_client_version(ytcfg, default_client),
445 'clientName': self._extract_client_name(ytcfg, default_client),
446 })
447 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
448 if visitor_data:
449 context['client']['visitorData'] = visitor_data
450 return context
451
452 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
1974e99f 453 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
454 # See: https://github.com/yt-dlp/yt-dlp/issues/393
455 yt_cookies = self._get_cookies('https://www.youtube.com')
456 sapisid_cookie = dict_get(
457 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
a5c56234
M
458 if sapisid_cookie is None:
459 return
460 time_now = round(time.time())
1974e99f 461 # SAPISID cookie is required if not already present
462 if not yt_cookies.get('SAPISID'):
463 self._set_cookie(
464 '.youtube.com', 'SAPISID', sapisid_cookie.value, secure=True, expire_time=time_now + 3600)
465 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
466 sapisidhash = hashlib.sha1(
109dd3b2 467 f'{time_now} {sapisid_cookie.value} {origin}'.encode('utf-8')).hexdigest()
1974e99f 468 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
469
470 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 471 note='Downloading API JSON', errnote='Unable to download API page',
109dd3b2 472 context=None, api_key=None, api_hostname=None, default_client='WEB'):
f4f751af 473
109dd3b2 474 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
8bdd16b4 475 data.update(query)
109dd3b2 476 real_headers = self._generate_api_headers(client=default_client)
f4f751af 477 real_headers.update({'content-type': 'application/json'})
478 if headers:
479 real_headers.update(headers)
545cc85d 480 return self._download_json(
109dd3b2 481 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
a5c56234 482 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 483 data=json.dumps(data).encode('utf8'), headers=real_headers,
484 query={'key': api_key or self._extract_api_key()})
485
8bdd16b4 486 def _extract_yt_initial_data(self, video_id, webpage):
487 return self._parse_json(
488 self._search_regex(
29f7c58a 489 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 490 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 491 video_id)
0c148415 492
a1c5d2ca
M
493 def _extract_identity_token(self, webpage, item_id):
494 ytcfg = self._extract_ytcfg(item_id, webpage)
495 if ytcfg:
496 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
497 if token:
498 return token
499 return self._search_regex(
500 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
501 'identity token', default=None)
502
503 @staticmethod
fe93e2c4 504 def _extract_account_syncid(*args):
8ea3f7b9 505 """
506 Extract syncId required to download private playlists of secondary channels
fe93e2c4 507 @params response and/or ytcfg
8ea3f7b9 508 """
fe93e2c4 509 for data in args:
510 # ytcfg includes channel_syncid if on secondary channel
511 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
512 if delegated_sid:
513 return delegated_sid
514 sync_ids = (try_get(
515 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
516 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
517 if len(sync_ids) >= 2 and sync_ids[1]:
518 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
519 # and just "user_syncid||" for primary channel. We only want the channel_syncid
520 return sync_ids[0]
a1c5d2ca 521
29f7c58a 522 def _extract_ytcfg(self, video_id, webpage):
8c54a305 523 if not webpage:
524 return {}
29f7c58a 525 return self._parse_json(
526 self._search_regex(
527 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 528 default='{}'), video_id, fatal=False) or {}
529
109dd3b2 530 def _generate_api_headers(self, ytcfg=None, identity_token=None, account_syncid=None,
314ee305 531 visitor_data=None, api_hostname=None, client='WEB', session_index=None):
109dd3b2 532 origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(client))
f4f751af 533 headers = {
109dd3b2 534 'X-YouTube-Client-Name': compat_str(
535 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=client)),
536 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, client),
537 'Origin': origin
f4f751af 538 }
2d6659b9 539 if not visitor_data and ytcfg:
540 visitor_data = try_get(
541 self._extract_context(ytcfg, client), lambda x: x['client']['visitorData'], compat_str)
f4f751af 542 if identity_token:
109dd3b2 543 headers['X-Youtube-Identity-Token'] = identity_token
f4f751af 544 if account_syncid:
545 headers['X-Goog-PageId'] = account_syncid
314ee305 546 if session_index is None and ytcfg:
547 session_index = self._extract_session_index(ytcfg)
548 if account_syncid or session_index is not None:
549 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
f4f751af 550 if visitor_data:
109dd3b2 551 headers['X-Goog-Visitor-Id'] = visitor_data
552 auth = self._generate_sapisidhash_header(origin)
f4f751af 553 if auth is not None:
554 headers['Authorization'] = auth
109dd3b2 555 headers['X-Origin'] = origin
f4f751af 556 return headers
29f7c58a 557
2d6659b9 558 @staticmethod
559 def _build_api_continuation_query(continuation, ctp=None):
560 query = {
561 'continuation': continuation
562 }
563 # TODO: Inconsistency with clickTrackingParams.
564 # Currently we have a fixed ctp contained within context (from ytcfg)
565 # and a ctp in root query for continuation.
566 if ctp:
567 query['clickTracking'] = {'clickTrackingParams': ctp}
568 return query
569
2d6659b9 570 @classmethod
571 def _extract_next_continuation_data(cls, renderer):
572 next_continuation = try_get(
573 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
574 lambda x: x['continuation']['reloadContinuationData']), dict)
575 if not next_continuation:
576 return
577 continuation = next_continuation.get('continuation')
578 if not continuation:
579 return
580 ctp = next_continuation.get('clickTrackingParams')
fe93e2c4 581 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 582
583 @classmethod
584 def _extract_continuation_ep_data(cls, continuation_ep: dict):
585 if isinstance(continuation_ep, dict):
586 continuation = try_get(
587 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
588 if not continuation:
589 return
590 ctp = continuation_ep.get('clickTrackingParams')
fe93e2c4 591 return cls._build_api_continuation_query(continuation, ctp)
2d6659b9 592
593 @classmethod
594 def _extract_continuation(cls, renderer):
595 next_continuation = cls._extract_next_continuation_data(renderer)
596 if next_continuation:
597 return next_continuation
fe93e2c4 598
2d6659b9 599 contents = []
600 for key in ('contents', 'items'):
601 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
fe93e2c4 602
2d6659b9 603 for content in contents:
604 if not isinstance(content, dict):
605 continue
606 continuation_ep = try_get(
607 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
608 lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
609 dict)
610 continuation = cls._extract_continuation_ep_data(continuation_ep)
611 if continuation:
612 return continuation
613
fe93e2c4 614 @classmethod
615 def _extract_alerts(cls, data):
109dd3b2 616 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
617 if not isinstance(alert_dict, dict):
618 continue
619 for alert in alert_dict.values():
620 alert_type = alert.get('type')
621 if not alert_type:
622 continue
fe93e2c4 623 message = cls._get_text(alert.get('text'))
109dd3b2 624 if message:
625 yield alert_type, message
626
627 def _report_alerts(self, alerts, expected=True):
628 errors = []
629 warnings = []
630 for alert_type, alert_message in alerts:
631 if alert_type.lower() == 'error':
632 errors.append([alert_type, alert_message])
633 else:
634 warnings.append([alert_type, alert_message])
635
636 for alert_type, alert_message in (warnings + errors[:-1]):
637 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
638 if errors:
639 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
640
641 def _extract_and_report_alerts(self, data, *args, **kwargs):
642 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
643
47193e02 644 def _extract_badges(self, renderer: dict):
645 badges = set()
646 for badge in try_get(renderer, lambda x: x['badges'], list) or []:
647 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
648 if label:
649 badges.add(label.lower())
650 return badges
651
652 @staticmethod
fe93e2c4 653 def _get_text(data, getter=None, max_runs=None):
654 for get in variadic(getter):
655 d = try_get(data, get) if get is not None else data
656 text = try_get(d, lambda x: x['simpleText'], compat_str)
657 if text:
658 return text
659 runs = try_get(d, lambda x: x['runs'], list) or []
660 if not runs and isinstance(d, list):
661 runs = d
662
663 def get_runs(runs):
664 for run in runs[:min(len(runs), max_runs or len(runs))]:
665 yield try_get(run, lambda x: x['text'], compat_str) or ''
666
667 text = ''.join(get_runs(runs))
668 if text:
669 return text
47193e02 670
109dd3b2 671 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
672 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
673 default_client='WEB'):
674 response = None
675 last_error = None
676 count = -1
677 retries = self.get_param('extractor_retries', 3)
678 if check_get_keys is None:
679 check_get_keys = []
680 while count < retries:
681 count += 1
682 if last_error:
683 self.report_warning('%s. Retrying ...' % last_error)
684 try:
685 response = self._call_api(
686 ep=ep, fatal=True, headers=headers,
687 video_id=item_id, query=query,
688 context=self._extract_context(ytcfg, default_client),
689 api_key=self._extract_api_key(ytcfg, default_client),
690 api_hostname=api_hostname, default_client=default_client,
691 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
692 except ExtractorError as e:
693 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
694 # Downloading page may result in intermittent 5xx HTTP error
695 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
696 last_error = 'HTTP Error %s' % e.cause.code
697 if count < retries:
698 continue
699 if fatal:
700 raise
701 else:
702 self.report_warning(error_to_compat_str(e))
703 return
704
705 else:
706 # Youtube may send alerts if there was an issue with the continuation page
707 try:
708 self._extract_and_report_alerts(response, expected=False)
709 except ExtractorError as e:
710 if fatal:
711 raise
712 self.report_warning(error_to_compat_str(e))
713 return
714 if not check_get_keys or dict_get(response, check_get_keys):
715 break
716 # Youtube sometimes sends incomplete data
717 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
718 last_error = 'Incomplete data received'
719 if count >= retries:
720 if fatal:
721 raise ExtractorError(last_error)
722 else:
723 self.report_warning(last_error)
724 return
725 return response
726
9297939e 727 @staticmethod
728 def is_music_url(url):
729 return re.match(r'https?://music\.youtube\.com/', url) is not None
730
30a074c2 731 def _extract_video(self, renderer):
732 video_id = renderer.get('videoId')
fe93e2c4 733 title = self._get_text(renderer.get('title'))
734 description = self._get_text(renderer.get('descriptionSnippet'))
735 duration = parse_duration(self._get_text(renderer.get('lengthText')))
736 view_count_text = self._get_text(renderer.get('viewCountText')) or ''
30a074c2 737 view_count = str_to_int(self._search_regex(
738 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
739 'view count', default=None))
fe93e2c4 740
741 uploader = self._get_text(renderer, (lambda x: x['ownerText'], lambda x: x['shortBylineText']))
742
30a074c2 743 return {
39ed931e 744 '_type': 'url',
30a074c2 745 'ie_key': YoutubeIE.ie_key(),
746 'id': video_id,
747 'url': video_id,
748 'title': title,
749 'description': description,
750 'duration': duration,
751 'view_count': view_count,
752 'uploader': uploader,
753 }
754
0c148415 755
360e1ca5 756class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 757 IE_DESC = 'YouTube.com'
bc2ca1bb 758 _INVIDIOUS_SITES = (
759 # invidious-redirect websites
760 r'(?:www\.)?redirect\.invidious\.io',
761 r'(?:(?:www|dev)\.)?invidio\.us',
762 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
763 r'(?:www\.)?invidious\.pussthecat\.org',
bc2ca1bb 764 r'(?:www\.)?invidious\.zee\.li',
bc2ca1bb 765 r'(?:www\.)?invidious\.ethibox\.fr',
bc2ca1bb 766 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
bc2ca1bb 767 # youtube-dl invidious instances list
768 r'(?:(?:www|no)\.)?invidiou\.sh',
769 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
770 r'(?:www\.)?invidious\.kabi\.tk',
bc2ca1bb 771 r'(?:www\.)?invidious\.mastodon\.host',
772 r'(?:www\.)?invidious\.zapashcanon\.fr',
ed807c18 773 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
201c1459 774 r'(?:www\.)?invidious\.tinfoil-hat\.net',
775 r'(?:www\.)?invidious\.himiko\.cloud',
776 r'(?:www\.)?invidious\.reallyancient\.tech',
bc2ca1bb 777 r'(?:www\.)?invidious\.tube',
778 r'(?:www\.)?invidiou\.site',
779 r'(?:www\.)?invidious\.site',
780 r'(?:www\.)?invidious\.xyz',
781 r'(?:www\.)?invidious\.nixnet\.xyz',
201c1459 782 r'(?:www\.)?invidious\.048596\.xyz',
bc2ca1bb 783 r'(?:www\.)?invidious\.drycat\.fr',
201c1459 784 r'(?:www\.)?inv\.skyn3t\.in',
bc2ca1bb 785 r'(?:www\.)?tube\.poal\.co',
786 r'(?:www\.)?tube\.connect\.cafe',
787 r'(?:www\.)?vid\.wxzm\.sx',
788 r'(?:www\.)?vid\.mint\.lgbt',
201c1459 789 r'(?:www\.)?vid\.puffyan\.us',
bc2ca1bb 790 r'(?:www\.)?yewtu\.be',
791 r'(?:www\.)?yt\.elukerio\.org',
792 r'(?:www\.)?yt\.lelux\.fi',
793 r'(?:www\.)?invidious\.ggc-project\.de',
794 r'(?:www\.)?yt\.maisputain\.ovh',
201c1459 795 r'(?:www\.)?ytprivate\.com',
796 r'(?:www\.)?invidious\.13ad\.de',
bc2ca1bb 797 r'(?:www\.)?invidious\.toot\.koeln',
798 r'(?:www\.)?invidious\.fdn\.fr',
799 r'(?:www\.)?watch\.nettohikari\.com',
ed807c18 800 r'(?:www\.)?invidious\.namazso\.eu',
801 r'(?:www\.)?invidious\.silkky\.cloud',
802 r'(?:www\.)?invidious\.exonip\.de',
803 r'(?:www\.)?invidious\.riverside\.rocks',
804 r'(?:www\.)?invidious\.blamefran\.net',
805 r'(?:www\.)?invidious\.moomoo\.de',
806 r'(?:www\.)?ytb\.trom\.tf',
807 r'(?:www\.)?yt\.cyberhost\.uk',
bc2ca1bb 808 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
809 r'(?:www\.)?qklhadlycap4cnod\.onion',
810 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
811 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
812 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
813 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
814 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
815 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
ed807c18 816 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
817 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
818 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
819 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
bc2ca1bb 820 )
cb7dfeea 821 _VALID_URL = r"""(?x)^
c5e8d7af 822 (
edb53e2d 823 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 824 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
825 (?:www\.)?deturl\.com/www\.youtube\.com|
826 (?:www\.)?pwnyoutube\.com|
827 (?:www\.)?hooktube\.com|
828 (?:www\.)?yourepeat\.com|
829 tube\.majestyc\.net|
830 %(invidious)s|
831 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
832 (?:.*?\#/)? # handle anchor (#/) redirect urls
833 (?: # the various things that can precede the ID:
ac7553d0 834 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 835 |(?: # or the v= param in all its forms
f7000f3a 836 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 837 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 838 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
839 v=
840 )
f4b05232 841 ))
cbaed4bb
S
842 |(?:
843 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
844 vid\.plus| # or vid.plus/xxxx
845 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 846 %(invidious)s
cbaed4bb 847 )/
edb53e2d 848 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 849 )
c5e8d7af 850 )? # all until now is optional -> you can pass the naked ID
201c1459 851 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 852 (?(1).+)? # if we found the ID, everything can follow
9297939e 853 (?:\#|$)""" % {
bc2ca1bb 854 'invidious': '|'.join(_INVIDIOUS_SITES),
855 }
e40c758c 856 _PLAYER_INFO_RE = (
cc2db878 857 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
858 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 859 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 860 )
2c62dc26 861 _formats = {
c2d3cb4c 862 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
863 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
864 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
865 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
866 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
867 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
868 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
869 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 870 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 871 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
872 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
873 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
874 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
875 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
876 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 877 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 878 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
879 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 880
881
882 # 3D videos
c2d3cb4c 883 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
884 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
885 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
886 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 887 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
888 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
889 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 890
96fb5605 891 # Apple HTTP Live Streaming
11f12195 892 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 893 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
894 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
895 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
896 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
897 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 898 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
899 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
900
901 # DASH mp4 video
d23028a8
S
902 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
903 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
904 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
905 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
906 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 907 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
908 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
909 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
910 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
911 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
912 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
913 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 914
f6f1fc92 915 # Dash mp4 audio
d23028a8
S
916 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
917 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
918 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
919 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
920 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
921 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
922 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
923
924 # Dash webm
d23028a8
S
925 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
926 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
927 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
928 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
929 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
930 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
931 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
932 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
933 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
934 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
935 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
936 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
937 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
938 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
939 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 940 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
941 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
942 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
943 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
944 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
945 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
946 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
947
948 # Dash webm audio
d23028a8
S
949 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
950 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 951
0857baad 952 # Dash webm audio with opus inside
d23028a8
S
953 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
954 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
955 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 956
ce6b9a2d
PH
957 # RTMP (unnamed)
958 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
959
960 # av01 video only formats sometimes served with "unknown" codecs
961 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
962 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
963 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
964 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 965 }
29f7c58a 966 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 967
109dd3b2 968 _AGE_GATE_REASONS = (
969 'Sign in to confirm your age',
970 'This video may be inappropriate for some users.',
971 'Sorry, this content is age-restricted.')
972
fd5c4aab
S
973 _GEO_BYPASS = False
974
78caa52a 975 IE_NAME = 'youtube'
2eb88d95
PH
976 _TESTS = [
977 {
2d3d2997 978 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
979 'info_dict': {
980 'id': 'BaW_jenozKc',
981 'ext': 'mp4',
3867038a 982 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
983 'uploader': 'Philipp Hagemeister',
984 'uploader_id': 'phihag',
ec85ded8 985 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
986 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
987 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 988 'upload_date': '20121002',
3867038a 989 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 990 'categories': ['Science & Technology'],
3867038a 991 'tags': ['youtube-dl'],
556dbe7f 992 'duration': 10,
dbdaaa23 993 'view_count': int,
3e7c1224
PH
994 'like_count': int,
995 'dislike_count': int,
7c80519c 996 'start_time': 1,
297a564b 997 'end_time': 9,
2eb88d95 998 }
0e853ca4 999 },
fccd3771 1000 {
4bc3a23e
PH
1001 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1002 'note': 'Embed-only video (#1746)',
1003 'info_dict': {
1004 'id': 'yZIXLfi8CZQ',
1005 'ext': 'mp4',
1006 'upload_date': '20120608',
1007 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1008 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1009 'uploader': 'SET India',
94bfcd23 1010 'uploader_id': 'setindia',
ec85ded8 1011 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 1012 'age_limit': 18,
545cc85d 1013 },
1014 'skip': 'Private video',
fccd3771 1015 },
11b56058 1016 {
8bdd16b4 1017 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
1018 'note': 'Use the first video ID in the URL',
1019 'info_dict': {
1020 'id': 'BaW_jenozKc',
1021 'ext': 'mp4',
3867038a 1022 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
1023 'uploader': 'Philipp Hagemeister',
1024 'uploader_id': 'phihag',
ec85ded8 1025 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 1026 'upload_date': '20121002',
3867038a 1027 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 1028 'categories': ['Science & Technology'],
3867038a 1029 'tags': ['youtube-dl'],
556dbe7f 1030 'duration': 10,
dbdaaa23 1031 'view_count': int,
11b56058
PM
1032 'like_count': int,
1033 'dislike_count': int,
34a7de29
S
1034 },
1035 'params': {
1036 'skip_download': True,
1037 },
11b56058 1038 },
dd27fd17 1039 {
2d3d2997 1040 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
1041 'note': '256k DASH audio (format 141) via DASH manifest',
1042 'info_dict': {
1043 'id': 'a9LDPn-MO4I',
1044 'ext': 'm4a',
1045 'upload_date': '20121002',
1046 'uploader_id': '8KVIDEO',
ec85ded8 1047 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
1048 'description': '',
1049 'uploader': '8KVIDEO',
1050 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 1051 },
4bc3a23e
PH
1052 'params': {
1053 'youtube_include_dash_manifest': True,
1054 'format': '141',
4919603f 1055 },
de3c7fe0 1056 'skip': 'format 141 not served anymore',
dd27fd17 1057 },
8bdd16b4 1058 # DASH manifest with encrypted signature
1059 {
1060 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1061 'info_dict': {
1062 'id': 'IB3lcPjvWLA',
1063 'ext': 'm4a',
1064 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1065 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1066 'duration': 244,
1067 'uploader': 'AfrojackVEVO',
1068 'uploader_id': 'AfrojackVEVO',
1069 'upload_date': '20131011',
cc2db878 1070 'abr': 129.495,
8bdd16b4 1071 },
1072 'params': {
1073 'youtube_include_dash_manifest': True,
1074 'format': '141/bestaudio[ext=m4a]',
1075 },
1076 },
dd2d55f1 1077 # Normal age-gate video (embed allowed)
c522adb1 1078 {
2d3d2997 1079 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
1080 'info_dict': {
1081 'id': 'HtVdAasjOgU',
1082 'ext': 'mp4',
1083 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 1084 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 1085 'duration': 142,
c522adb1
JMF
1086 'uploader': 'The Witcher',
1087 'uploader_id': 'WitcherGame',
ec85ded8 1088 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 1089 'upload_date': '20140605',
34952f09 1090 'age_limit': 18,
c522adb1
JMF
1091 },
1092 },
8bdd16b4 1093 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1094 # YouTube Red ad is not captured for creator
1095 {
1096 'url': '__2ABJjxzNo',
1097 'info_dict': {
1098 'id': '__2ABJjxzNo',
1099 'ext': 'mp4',
1100 'duration': 266,
1101 'upload_date': '20100430',
1102 'uploader_id': 'deadmau5',
1103 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 1104 'creator': 'deadmau5',
1105 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 1106 'uploader': 'deadmau5',
1107 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 1108 'alt_title': 'Some Chords',
8bdd16b4 1109 },
1110 'expected_warnings': [
1111 'DASH manifest missing',
1112 ]
1113 },
067aa17e 1114 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
1115 {
1116 'url': 'lqQg6PlCWgI',
1117 'info_dict': {
1118 'id': 'lqQg6PlCWgI',
1119 'ext': 'mp4',
556dbe7f 1120 'duration': 6085,
90227264 1121 'upload_date': '20150827',
cbe2bd91 1122 'uploader_id': 'olympic',
ec85ded8 1123 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 1124 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 1125 'uploader': 'Olympic',
cbe2bd91
PH
1126 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1127 },
1128 'params': {
1129 'skip_download': 'requires avconv',
e52a40ab 1130 }
cbe2bd91 1131 },
6271f1ca
PH
1132 # Non-square pixels
1133 {
1134 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1135 'info_dict': {
1136 'id': '_b-2C3KPAM0',
1137 'ext': 'mp4',
1138 'stretched_ratio': 16 / 9.,
556dbe7f 1139 'duration': 85,
6271f1ca
PH
1140 'upload_date': '20110310',
1141 'uploader_id': 'AllenMeow',
ec85ded8 1142 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 1143 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 1144 'uploader': '孫ᄋᄅ',
6271f1ca
PH
1145 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1146 },
06b491eb
S
1147 },
1148 # url_encoded_fmt_stream_map is empty string
1149 {
1150 'url': 'qEJwOuvDf7I',
1151 'info_dict': {
1152 'id': 'qEJwOuvDf7I',
f57b7835 1153 'ext': 'webm',
06b491eb
S
1154 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1155 'description': '',
1156 'upload_date': '20150404',
1157 'uploader_id': 'spbelect',
1158 'uploader': 'Наблюдатели Петербурга',
1159 },
1160 'params': {
1161 'skip_download': 'requires avconv',
e323cf3f
S
1162 },
1163 'skip': 'This live event has ended.',
06b491eb 1164 },
067aa17e 1165 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
1166 {
1167 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1168 'info_dict': {
1169 'id': 'FIl7x6_3R5Y',
eb6793ba 1170 'ext': 'webm',
da77d856
S
1171 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1172 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 1173 'duration': 220,
da77d856
S
1174 'upload_date': '20150625',
1175 'uploader_id': 'dorappi2000',
ec85ded8 1176 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 1177 'uploader': 'dorappi2000',
eb6793ba 1178 'formats': 'mincount:31',
da77d856 1179 },
eb6793ba 1180 'skip': 'not actual anymore',
2ee8f5d8 1181 },
8a1a26ce
YCH
1182 # DASH manifest with segment_list
1183 {
1184 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1185 'md5': '8ce563a1d667b599d21064e982ab9e31',
1186 'info_dict': {
1187 'id': 'CsmdDsKjzN8',
1188 'ext': 'mp4',
17ee98e1 1189 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
1190 'uploader': 'Airtek',
1191 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1192 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1193 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1194 },
1195 'params': {
1196 'youtube_include_dash_manifest': True,
1197 'format': '135', # bestvideo
be49068d
S
1198 },
1199 'skip': 'This live event has ended.',
2ee8f5d8 1200 },
cf7e015f
S
1201 {
1202 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 1203 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 1204 'info_dict': {
545cc85d 1205 'id': 'jvGDaLqkpTg',
1206 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1207 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
1208 },
1209 'playlist': [{
1210 'info_dict': {
545cc85d 1211 'id': 'jvGDaLqkpTg',
cf7e015f 1212 'ext': 'mp4',
545cc85d 1213 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1214 'description': 'md5:e03b909557865076822aa169218d6a5d',
1215 'duration': 10643,
1216 'upload_date': '20161111',
1217 'uploader': 'Team PGP',
1218 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1219 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1220 },
1221 }, {
1222 'info_dict': {
545cc85d 1223 'id': '3AKt1R1aDnw',
cf7e015f 1224 'ext': 'mp4',
545cc85d 1225 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1226 'description': 'md5:e03b909557865076822aa169218d6a5d',
1227 'duration': 10991,
1228 'upload_date': '20161111',
1229 'uploader': 'Team PGP',
1230 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1231 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1232 },
1233 }, {
1234 'info_dict': {
545cc85d 1235 'id': 'RtAMM00gpVc',
cf7e015f 1236 'ext': 'mp4',
545cc85d 1237 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1238 'description': 'md5:e03b909557865076822aa169218d6a5d',
1239 'duration': 10995,
1240 'upload_date': '20161111',
1241 'uploader': 'Team PGP',
1242 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1243 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1244 },
1245 }, {
1246 'info_dict': {
545cc85d 1247 'id': '6N2fdlP3C5U',
cf7e015f 1248 'ext': 'mp4',
545cc85d 1249 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1250 'description': 'md5:e03b909557865076822aa169218d6a5d',
1251 'duration': 10990,
1252 'upload_date': '20161111',
1253 'uploader': 'Team PGP',
1254 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1255 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
1256 },
1257 }],
1258 'params': {
1259 'skip_download': True,
1260 },
cbaed4bb 1261 },
f9f49d87 1262 {
067aa17e 1263 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
1264 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1265 'info_dict': {
1266 'id': 'gVfLd0zydlo',
1267 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1268 },
1269 'playlist_count': 2,
be49068d 1270 'skip': 'Not multifeed anymore',
f9f49d87 1271 },
cbaed4bb 1272 {
2d3d2997 1273 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 1274 'only_matching': True,
0e49d9a6 1275 },
6d4fc66b 1276 {
2d3d2997 1277 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
1278 'only_matching': True,
1279 },
0e49d9a6 1280 {
067aa17e 1281 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 1282 # Also tests cut-off URL expansion in video description (see
067aa17e
S
1283 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1284 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
1285 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1286 'info_dict': {
1287 'id': 'lsguqyKfVQg',
1288 'ext': 'mp4',
1289 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 1290 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 1291 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1292 'duration': 133,
0e49d9a6
LL
1293 'upload_date': '20151119',
1294 'uploader_id': 'IronSoulElf',
ec85ded8 1295 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1296 'uploader': 'IronSoulElf',
eb6793ba
S
1297 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
1298 'track': 'Dark Walk - Position Music',
1299 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 1300 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
1301 },
1302 'params': {
1303 'skip_download': True,
1304 },
1305 },
61f92af1 1306 {
067aa17e 1307 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1308 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1309 'only_matching': True,
1310 },
313dfc45
LL
1311 {
1312 # Video with yt:stretch=17:0
1313 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1314 'info_dict': {
1315 'id': 'Q39EVAstoRM',
1316 'ext': 'mp4',
1317 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1318 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1319 'upload_date': '20151107',
1320 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1321 'uploader': 'CH GAMER DROID',
1322 },
1323 'params': {
1324 'skip_download': True,
1325 },
be49068d 1326 'skip': 'This video does not exist.',
313dfc45 1327 },
201c1459 1328 {
1329 # Video with incomplete 'yt:stretch=16:'
1330 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1331 'only_matching': True,
1332 },
7caf9830
S
1333 {
1334 # Video licensed under Creative Commons
1335 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1336 'info_dict': {
1337 'id': 'M4gD1WSo5mA',
1338 'ext': 'mp4',
1339 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1340 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1341 'duration': 721,
7caf9830
S
1342 'upload_date': '20150127',
1343 'uploader_id': 'BerkmanCenter',
ec85ded8 1344 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1345 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1346 'license': 'Creative Commons Attribution license (reuse allowed)',
1347 },
1348 'params': {
1349 'skip_download': True,
1350 },
1351 },
fd050249
S
1352 {
1353 # Channel-like uploader_url
1354 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1355 'info_dict': {
1356 'id': 'eQcmzGIKrzg',
1357 'ext': 'mp4',
1358 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1359 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1360 'duration': 4060,
fd050249 1361 'upload_date': '20151119',
eb6793ba 1362 'uploader': 'Bernie Sanders',
fd050249 1363 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1364 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1365 'license': 'Creative Commons Attribution license (reuse allowed)',
1366 },
1367 'params': {
1368 'skip_download': True,
1369 },
1370 },
040ac686
S
1371 {
1372 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1373 'only_matching': True,
7f29cf54
S
1374 },
1375 {
067aa17e 1376 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1377 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1378 'only_matching': True,
6496ccb4
S
1379 },
1380 {
1381 # Rental video preview
1382 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1383 'info_dict': {
1384 'id': 'uGpuVWrhIzE',
1385 'ext': 'mp4',
1386 'title': 'Piku - Trailer',
1387 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1388 'upload_date': '20150811',
1389 'uploader': 'FlixMatrix',
1390 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1391 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1392 'license': 'Standard YouTube License',
1393 },
1394 'params': {
1395 'skip_download': True,
1396 },
eb6793ba 1397 'skip': 'This video is not available.',
022a5d66 1398 },
12afdc2a
S
1399 {
1400 # YouTube Red video with episode data
1401 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1402 'info_dict': {
1403 'id': 'iqKdEhx-dD4',
1404 'ext': 'mp4',
1405 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1406 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1407 'duration': 2085,
12afdc2a
S
1408 'upload_date': '20170118',
1409 'uploader': 'Vsauce',
1410 'uploader_id': 'Vsauce',
1411 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1412 'series': 'Mind Field',
1413 'season_number': 1,
1414 'episode_number': 1,
1415 },
1416 'params': {
1417 'skip_download': True,
1418 },
1419 'expected_warnings': [
1420 'Skipping DASH manifest',
1421 ],
1422 },
c7121fa7
S
1423 {
1424 # The following content has been identified by the YouTube community
1425 # as inappropriate or offensive to some audiences.
1426 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1427 'info_dict': {
1428 'id': '6SJNVb0GnPI',
1429 'ext': 'mp4',
1430 'title': 'Race Differences in Intelligence',
1431 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1432 'duration': 965,
1433 'upload_date': '20140124',
1434 'uploader': 'New Century Foundation',
1435 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1436 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1437 },
1438 'params': {
1439 'skip_download': True,
1440 },
545cc85d 1441 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1442 },
022a5d66
S
1443 {
1444 # itag 212
1445 'url': '1t24XAntNCY',
1446 'only_matching': True,
fd5c4aab
S
1447 },
1448 {
1449 # geo restricted to JP
1450 'url': 'sJL6WA-aGkQ',
1451 'only_matching': True,
1452 },
cd5a74a2
S
1453 {
1454 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1455 'only_matching': True,
1456 },
bc2ca1bb 1457 {
1458 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1459 'only_matching': True,
1460 },
1461 {
1462 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1463 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1464 'only_matching': True,
1465 },
825cd268
RA
1466 {
1467 # DRM protected
1468 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1469 'only_matching': True,
4fe54c12
S
1470 },
1471 {
1472 # Video with unsupported adaptive stream type formats
1473 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1474 'info_dict': {
1475 'id': 'Z4Vy8R84T1U',
1476 'ext': 'mp4',
1477 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1478 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1479 'duration': 433,
1480 'upload_date': '20130923',
1481 'uploader': 'Amelia Putri Harwita',
1482 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1483 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1484 'formats': 'maxcount:10',
1485 },
1486 'params': {
1487 'skip_download': True,
1488 'youtube_include_dash_manifest': False,
1489 },
5429d6a9 1490 'skip': 'not actual anymore',
5caabd3c 1491 },
1492 {
822b9d9c 1493 # Youtube Music Auto-generated description
5caabd3c 1494 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1495 'info_dict': {
1496 'id': 'MgNrAu2pzNs',
1497 'ext': 'mp4',
1498 'title': 'Voyeur Girl',
1499 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1500 'upload_date': '20190312',
5429d6a9
S
1501 'uploader': 'Stephen - Topic',
1502 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1503 'artist': 'Stephen',
1504 'track': 'Voyeur Girl',
1505 'album': 'it\'s too much love to know my dear',
1506 'release_date': '20190313',
1507 'release_year': 2019,
1508 },
1509 'params': {
1510 'skip_download': True,
1511 },
1512 },
66b48727
RA
1513 {
1514 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1515 'only_matching': True,
1516 },
011e75e6
S
1517 {
1518 # invalid -> valid video id redirection
1519 'url': 'DJztXj2GPfl',
1520 'info_dict': {
1521 'id': 'DJztXj2GPfk',
1522 'ext': 'mp4',
1523 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1524 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1525 'upload_date': '20090125',
1526 'uploader': 'Prochorowka',
1527 'uploader_id': 'Prochorowka',
1528 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1529 'artist': 'Panjabi MC',
1530 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1531 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1532 },
1533 'params': {
1534 'skip_download': True,
1535 },
545cc85d 1536 'skip': 'Video unavailable',
ea74e00b
DP
1537 },
1538 {
1539 # empty description results in an empty string
1540 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1541 'info_dict': {
1542 'id': 'x41yOUIvK2k',
1543 'ext': 'mp4',
1544 'title': 'IMG 3456',
1545 'description': '',
1546 'upload_date': '20170613',
1547 'uploader_id': 'ElevageOrVert',
1548 'uploader': 'ElevageOrVert',
1549 },
1550 'params': {
1551 'skip_download': True,
1552 },
1553 },
a0566bbf 1554 {
29f7c58a 1555 # with '};' inside yt initial data (see [1])
1556 # see [2] for an example with '};' inside ytInitialPlayerResponse
1557 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1558 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1559 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1560 'info_dict': {
1561 'id': 'CHqg6qOn4no',
1562 'ext': 'mp4',
1563 'title': 'Part 77 Sort a list of simple types in c#',
1564 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1565 'upload_date': '20130831',
1566 'uploader_id': 'kudvenkat',
1567 'uploader': 'kudvenkat',
1568 },
1569 'params': {
1570 'skip_download': True,
1571 },
1572 },
29f7c58a 1573 {
1574 # another example of '};' in ytInitialData
1575 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1576 'only_matching': True,
1577 },
1578 {
1579 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1580 'only_matching': True,
1581 },
545cc85d 1582 {
cc2db878 1583 # https://github.com/ytdl-org/youtube-dl/pull/28094
1584 'url': 'OtqTfy26tG0',
1585 'info_dict': {
1586 'id': 'OtqTfy26tG0',
1587 'ext': 'mp4',
1588 'title': 'Burn Out',
1589 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1590 'upload_date': '20141120',
1591 'uploader': 'The Cinematic Orchestra - Topic',
1592 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1593 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1594 'artist': 'The Cinematic Orchestra',
1595 'track': 'Burn Out',
1596 'album': 'Every Day',
1597 'release_data': None,
1598 'release_year': None,
1599 },
1600 'params': {
1601 'skip_download': True,
1602 },
545cc85d 1603 },
bc2ca1bb 1604 {
1605 # controversial video, only works with bpctr when authenticated with cookies
1606 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1607 'only_matching': True,
1608 },
a1a7907b 1609 {
1610 # controversial video, requires bpctr/contentCheckOk
1611 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1612 'info_dict': {
1613 'id': 'SZJvDhaSDnc',
1614 'ext': 'mp4',
1615 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1616 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1617 'uploader': 'CBS This Morning',
1618 'upload_date': '20140716',
1619 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1620 }
1621 },
f7ad7160 1622 {
1623 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1624 'url': 'cBvYw8_A0vQ',
1625 'info_dict': {
1626 'id': 'cBvYw8_A0vQ',
1627 'ext': 'mp4',
1628 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1629 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1630 'upload_date': '20201120',
1631 'uploader': 'Walk around Japan',
1632 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1633 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1634 },
1635 'params': {
1636 'skip_download': True,
1637 },
0fb983f6 1638 }, {
1639 # Has multiple audio streams
1640 'url': 'WaOKSUlf4TM',
1641 'only_matching': True
9297939e 1642 }, {
1643 # Requires Premium: has format 141 when requested using YTM url
1644 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1645 'only_matching': True
1646 }, {
120916da 1647 # multiple subtitles with same lang_code
1648 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1649 'only_matching': True,
109dd3b2 1650 }, {
1651 # Force use android client fallback
1652 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1653 'info_dict': {
1654 'id': 'YOelRv7fMxY',
1655 'title': 'Digging a Secret Tunnel from my Workshop',
1656 'ext': '3gp',
1657 'upload_date': '20210624',
1658 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1659 'uploader': 'colinfurze',
1660 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1661 'description': 'md5:ecb672623246d98c6c562eed6ae798c3'
1662 },
1663 'params': {
1664 'format': '17', # 3gp format available on android
1665 'extractor_args': {'youtube': {'player_client': ['android']}},
1666 },
120916da 1667 },
109dd3b2 1668 {
1669 # Skip download of additional client configs (remix client config in this case)
1670 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1671 'only_matching': True,
1672 'params': {
1673 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1674 },
1675 }
2eb88d95
PH
1676 ]
1677
201c1459 1678 @classmethod
1679 def suitable(cls, url):
1bdae7d3 1680 # Hack for lazy extractors until more generic solution is implemented
1681 # (see #28780)
1682 from .youtube import parse_qs
201c1459 1683 qs = parse_qs(url)
1684 if qs.get('list', [None])[0]:
1685 return False
1686 return super(YoutubeIE, cls).suitable(url)
1687
e0df6211
PH
1688 def __init__(self, *args, **kwargs):
1689 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1690 self._code_cache = {}
83799698 1691 self._player_cache = {}
e0df6211 1692
109dd3b2 1693 def _extract_player_url(self, ytcfg=None, webpage=None):
1694 player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)
1695 if not player_url:
1696 player_url = self._search_regex(
1697 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1698 webpage, 'player URL', fatal=False)
1699 if player_url.startswith('//'):
1700 player_url = 'https:' + player_url
1701 elif not re.match(r'https?://', player_url):
1702 player_url = compat_urlparse.urljoin(
1703 'https://www.youtube.com', player_url)
1704 return player_url
1705
60064c53
PH
1706 def _signature_cache_id(self, example_sig):
1707 """ Return a string representation of a signature """
78caa52a 1708 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1709
e40c758c
S
1710 @classmethod
1711 def _extract_player_info(cls, player_url):
1712 for player_re in cls._PLAYER_INFO_RE:
1713 id_m = re.search(player_re, player_url)
1714 if id_m:
1715 break
1716 else:
c081b35c 1717 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1718 return id_m.group('id')
e40c758c 1719
109dd3b2 1720 def _load_player(self, video_id, player_url, fatal=True) -> bool:
1721 player_id = self._extract_player_info(player_url)
1722 if player_id not in self._code_cache:
1723 self._code_cache[player_id] = self._download_webpage(
1724 player_url, video_id, fatal=fatal,
1725 note='Downloading player ' + player_id,
1726 errnote='Download of %s failed' % player_url)
1727 return player_id in self._code_cache
1728
e40c758c 1729 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1730 player_id = self._extract_player_info(player_url)
e0df6211 1731
c4417ddb 1732 # Read from filesystem cache
545cc85d 1733 func_id = 'js_%s_%s' % (
1734 player_id, self._signature_cache_id(example_sig))
c4417ddb 1735 assert os.path.basename(func_id) == func_id
a0e07d31 1736
69ea8ca4 1737 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1738 if cache_spec is not None:
78caa52a 1739 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1740
109dd3b2 1741 if self._load_player(video_id, player_url):
1742 code = self._code_cache[player_id]
1743 res = self._parse_sig_js(code)
e0df6211 1744
109dd3b2 1745 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1746 cache_res = res(test_string)
1747 cache_spec = [ord(c) for c in cache_res]
83799698 1748
109dd3b2 1749 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1750 return res
83799698 1751
60064c53 1752 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1753 def gen_sig_code(idxs):
1754 def _genslice(start, end, step):
78caa52a 1755 starts = '' if start == 0 else str(start)
8bcc8756 1756 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1757 steps = '' if step == 1 else (':%d' % step)
78caa52a 1758 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1759
1760 step = None
7af808a5
PH
1761 # Quelch pyflakes warnings - start will be set when step is set
1762 start = '(Never used)'
edf3e38e
PH
1763 for i, prev in zip(idxs[1:], idxs[:-1]):
1764 if step is not None:
1765 if i - prev == step:
1766 continue
1767 yield _genslice(start, prev, step)
1768 step = None
1769 continue
1770 if i - prev in [-1, 1]:
1771 step = i - prev
1772 start = prev
1773 continue
1774 else:
78caa52a 1775 yield 's[%d]' % prev
edf3e38e 1776 if step is None:
78caa52a 1777 yield 's[%d]' % i
edf3e38e
PH
1778 else:
1779 yield _genslice(start, i, step)
1780
78caa52a 1781 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1782 cache_res = func(test_string)
edf3e38e 1783 cache_spec = [ord(c) for c in cache_res]
78caa52a 1784 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1785 signature_id_tuple = '(%s)' % (
1786 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1787 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1788 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1789 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1790
e0df6211
PH
1791 def _parse_sig_js(self, jscode):
1792 funcname = self._search_regex(
abefc03f
S
1793 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1794 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1795 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1796 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1797 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1798 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1799 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1800 # Obsolete patterns
1801 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1802 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1803 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1804 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1805 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1806 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1807 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1808 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1809 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1810
1811 jsi = JSInterpreter(jscode)
1812 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1813 return lambda s: initial_function([s])
1814
545cc85d 1815 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1816 """Turn the encrypted s field into a working signature"""
6b37f0be 1817
c8bf86d5 1818 if player_url is None:
69ea8ca4 1819 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1820
c8bf86d5 1821 try:
62af3a0e 1822 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1823 if player_id not in self._player_cache:
1824 func = self._extract_signature_function(
60064c53 1825 video_id, player_url, s
c8bf86d5
PH
1826 )
1827 self._player_cache[player_id] = func
1828 func = self._player_cache[player_id]
a06916d9 1829 if self.get_param('youtube_print_sig_code'):
60064c53 1830 self._print_sig_code(func, s)
c8bf86d5
PH
1831 return func(s)
1832 except Exception as e:
1833 tb = traceback.format_exc()
1834 raise ExtractorError(
78caa52a 1835 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1836
109dd3b2 1837 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1838 """
1839 Extract signatureTimestamp (sts)
1840 Required to tell API what sig/player version is in use.
1841 """
1842 sts = None
1843 if isinstance(ytcfg, dict):
1844 sts = int_or_none(ytcfg.get('STS'))
1845
1846 if not sts:
1847 # Attempt to extract from player
1848 if player_url is None:
1849 error_msg = 'Cannot extract signature timestamp without player_url.'
1850 if fatal:
1851 raise ExtractorError(error_msg)
1852 self.report_warning(error_msg)
1853 return
1854 if self._load_player(video_id, player_url, fatal=fatal):
1855 player_id = self._extract_player_info(player_url)
1856 code = self._code_cache[player_id]
1857 sts = int_or_none(self._search_regex(
1858 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
1859 'JS player signature timestamp', group='sts', fatal=fatal))
1860 return sts
1861
545cc85d 1862 def _mark_watched(self, video_id, player_response):
21c340b8
S
1863 playback_url = url_or_none(try_get(
1864 player_response,
545cc85d 1865 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
d77ab8e2
S
1866 if not playback_url:
1867 return
1868 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1869 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1870
1871 # cpn generation algorithm is reverse engineered from base.js.
1872 # In fact it works even with dummy cpn.
1873 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1874 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1875
1876 qs.update({
1877 'ver': ['2'],
1878 'cpn': [cpn],
1879 })
1880 playback_url = compat_urlparse.urlunparse(
15707c7e 1881 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1882
1883 self._download_webpage(
1884 playback_url, video_id, 'Marking watched',
1885 'Unable to mark watched', fatal=False)
1886
66c9fa36
S
1887 @staticmethod
1888 def _extract_urls(webpage):
1889 # Embedded YouTube player
1890 entries = [
1891 unescapeHTML(mobj.group('url'))
1892 for mobj in re.finditer(r'''(?x)
1893 (?:
1894 <iframe[^>]+?src=|
1895 data-video-url=|
1896 <embed[^>]+?src=|
1897 embedSWF\(?:\s*|
1898 <object[^>]+data=|
1899 new\s+SWFObject\(
1900 )
1901 (["\'])
1902 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1903 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1904 \1''', webpage)]
1905
1906 # lazyYT YouTube embed
1907 entries.extend(list(map(
1908 unescapeHTML,
1909 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1910
1911 # Wordpress "YouTube Video Importer" plugin
1912 matches = re.findall(r'''(?x)<div[^>]+
1913 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1914 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1915 entries.extend(m[-1] for m in matches)
1916
1917 return entries
1918
1919 @staticmethod
1920 def _extract_url(webpage):
1921 urls = YoutubeIE._extract_urls(webpage)
1922 return urls[0] if urls else None
1923
97665381
PH
1924 @classmethod
1925 def extract_id(cls, url):
1926 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1927 if mobj is None:
69ea8ca4 1928 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1929 video_id = mobj.group(2)
1930 return video_id
1931
7c365c21 1932 def _extract_chapters_from_json(self, data, duration):
1933 chapter_list = traverse_obj(
1934 data, (
1935 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
1936 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
1937 ), expected_type=list)
1938
1939 return self._extract_chapters(
1940 chapter_list,
1941 chapter_time=lambda chapter: float_or_none(
1942 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
1943 chapter_title=lambda chapter: traverse_obj(
1944 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
1945 duration=duration)
1946
1947 def _extract_chapters_from_engagement_panel(self, data, duration):
1948 content_list = traverse_obj(
8bdd16b4 1949 data,
7c365c21 1950 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
da503b7a 1951 expected_type=list, default=[])
7c365c21 1952 chapter_time = lambda chapter: parse_duration(self._get_text(chapter.get('timeDescription')))
1953 chapter_title = lambda chapter: self._get_text(chapter.get('title'))
1954
1955 return next((
1956 filter(None, (
1957 self._extract_chapters(
1958 traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
1959 chapter_time, chapter_title, duration)
1960 for contents in content_list
1961 ))), [])
1962
1963 def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
84213ea8 1964 chapters = []
7c365c21 1965 last_chapter = {'start_time': 0}
1966 for idx, chapter in enumerate(chapter_list or []):
1967 title = chapter_title(chapter)
84213ea8
S
1968 start_time = chapter_time(chapter)
1969 if start_time is None:
1970 continue
7c365c21 1971 last_chapter['end_time'] = start_time
1972 if start_time < last_chapter['start_time']:
1973 if idx == 1:
1974 chapters.pop()
1975 self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
1976 else:
1977 self.report_warning(f'Invalid start time for chapter "{title}"')
1978 continue
1979 last_chapter = {'start_time': start_time, 'title': title}
1980 chapters.append(last_chapter)
1981 last_chapter['end_time'] = duration
84213ea8
S
1982 return chapters
1983
545cc85d 1984 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1985 return self._parse_json(self._search_regex(
1986 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1987 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 1988
d92f5d5a 1989 @staticmethod
1990 def parse_time_text(time_text):
1991 """
1992 Parse the comment time text
1993 time_text is in the format 'X units ago (edited)'
1994 """
1995 time_text_split = time_text.split(' ')
1996 if len(time_text_split) >= 3:
da503b7a 1997 try:
1998 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
1999 except ValueError:
2000 return None
d92f5d5a 2001
a1c5d2ca
M
2002 def _extract_comment(self, comment_renderer, parent=None):
2003 comment_id = comment_renderer.get('commentId')
2004 if not comment_id:
2005 return
fe93e2c4 2006
2007 text = self._get_text(comment_renderer.get('contentText'))
2008
49bd8c66 2009 # note: timestamp is an estimate calculated from the current time and time_text
fe93e2c4 2010 time_text = self._get_text(comment_renderer.get('publishedTimeText')) or ''
2011 time_text_dt = self.parse_time_text(time_text)
2012 if isinstance(time_text_dt, datetime.datetime):
2013 timestamp = calendar.timegm(time_text_dt.timetuple())
2014 author = self._get_text(comment_renderer.get('authorText'))
a1c5d2ca
M
2015 author_id = try_get(comment_renderer,
2016 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
fe93e2c4 2017
49bd8c66 2018 votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2019 lambda x: x['likeCount']), compat_str)) or 0
a1c5d2ca
M
2020 author_thumbnail = try_get(comment_renderer,
2021 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2022
2023 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
97524332 2024 is_favorited = 'creatorHeart' in (try_get(
2025 comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
a1c5d2ca
M
2026 return {
2027 'id': comment_id,
2028 'text': text,
d92f5d5a 2029 'timestamp': timestamp,
a1c5d2ca
M
2030 'time_text': time_text,
2031 'like_count': votes,
97524332 2032 'is_favorited': is_favorited,
a1c5d2ca
M
2033 'author': author,
2034 'author_id': author_id,
2035 'author_thumbnail': author_thumbnail,
2036 'author_is_uploader': author_is_uploader,
2037 'parent': parent or 'root'
2038 }
2039
2040 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
2d6659b9 2041 ytcfg, video_id, parent=None, comment_counts=None):
2042
2043 def extract_header(contents):
2044 _total_comments = 0
2045 _continuation = None
2046 for content in contents:
2047 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
fe93e2c4 2048 expected_comment_count = parse_count(self._get_text(
2049 comments_header_renderer, (lambda x: x['countText'], lambda x: x['commentsCount']), max_runs=1))
2050
2d6659b9 2051 if expected_comment_count:
fe93e2c4 2052 comment_counts[1] = expected_comment_count
2053 self.to_screen('Downloading ~%d comments' % expected_comment_count)
2d6659b9 2054 _total_comments = comment_counts[1]
2055 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2056 comment_sort_index = int(sort_mode_str != 'top') # 1 = new, 0 = top
2057
2058 sort_menu_item = try_get(
2059 comments_header_renderer,
2060 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2061 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2062
2063 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2064 if not _continuation:
2065 continue
2066
2067 sort_text = sort_menu_item.get('title')
2068 if isinstance(sort_text, compat_str):
2069 sort_text = sort_text.lower()
2070 else:
2071 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2072 self.to_screen('Sorting comments by %s' % sort_text)
2073 break
2074 return _total_comments, _continuation
a1c5d2ca 2075
2d6659b9 2076 def extract_thread(contents):
a1c5d2ca
M
2077 if not parent:
2078 comment_counts[2] = 0
2079 for content in contents:
2080 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2081 comment_renderer = try_get(
2082 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2083 content, (lambda x: x['commentRenderer'], dict))
2084
2085 if not comment_renderer:
2086 continue
2087 comment = self._extract_comment(comment_renderer, parent)
2088 if not comment:
2089 continue
2090 comment_counts[0] += 1
2091 yield comment
2092 # Attempt to get the replies
2093 comment_replies_renderer = try_get(
2094 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2095
2096 if comment_replies_renderer:
2097 comment_counts[2] += 1
2098 comment_entries_iter = self._comment_entries(
f4f751af 2099 comment_replies_renderer, identity_token, account_syncid, ytcfg,
2d6659b9 2100 video_id, parent=comment.get('id'), comment_counts=comment_counts)
a1c5d2ca
M
2101
2102 for reply_comment in comment_entries_iter:
2103 yield reply_comment
2104
2d6659b9 2105 # YouTube comments have a max depth of 2
2106 max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2107 if max_depth == 1 and parent:
2108 return
a1c5d2ca
M
2109 if not comment_counts:
2110 # comment so far, est. total comments, current comment thread #
2111 comment_counts = [0, 0, 0]
a1c5d2ca 2112
2d6659b9 2113 continuation = self._extract_continuation(root_continuation_data)
fe93e2c4 2114 if continuation and len(continuation['continuation']) < 27:
2d6659b9 2115 self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2116 continuation_token = self._generate_comment_continuation(video_id)
fe93e2c4 2117 continuation = self._build_api_continuation_query(continuation_token, None)
2d6659b9 2118
2119 visitor_data = None
2120 is_first_continuation = parent is None
a1c5d2ca
M
2121
2122 for page_num in itertools.count(0):
2123 if not continuation:
2124 break
f4f751af 2125 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
2d6659b9 2126 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2127 if page_num == 0:
2128 if is_first_continuation:
2129 note_prefix = 'Downloading comment section API JSON'
a1c5d2ca 2130 else:
2d6659b9 2131 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
2132 comment_counts[2], comment_prog_str)
2133 else:
2134 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2135 ' ' if parent else '', ' replies' if parent else '',
2136 page_num, comment_prog_str)
2137
2138 response = self._extract_response(
fe93e2c4 2139 item_id=None, query=continuation,
2d6659b9 2140 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2141 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca
M
2142 if not response:
2143 break
f4f751af 2144 visitor_data = try_get(
2145 response,
2146 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2147 compat_str) or visitor_data
a1c5d2ca 2148
2d6659b9 2149 continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
a1c5d2ca 2150
2d6659b9 2151 continuation = None
2152 if isinstance(continuation_contents, list):
2153 for continuation_section in continuation_contents:
2154 if not isinstance(continuation_section, dict):
2155 continue
2156 continuation_items = try_get(
2157 continuation_section,
2158 (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2159 lambda x: x['appendContinuationItemsAction']['continuationItems']),
2160 list) or []
2161 if is_first_continuation:
2162 total_comments, continuation = extract_header(continuation_items)
2163 if total_comments:
2164 yield total_comments
2165 is_first_continuation = False
2166 if continuation:
2167 break
2168 continue
2169 count = 0
2170 for count, entry in enumerate(extract_thread(continuation_items)):
2171 yield entry
2172 continuation = self._extract_continuation({'contents': continuation_items})
2173 if continuation:
2174 # Sometimes YouTube provides a continuation without any comments
2175 # In most cases we end up just downloading these with very little comments to come.
2176 if count == 0:
2177 if not parent:
2178 self.report_warning('No comments received - assuming end of comments')
2179 continuation = None
a1c5d2ca
M
2180 break
2181
2d6659b9 2182 # Deprecated response structure
2183 elif isinstance(continuation_contents, dict):
2184 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2185 for key, continuation_renderer in continuation_contents.items():
2186 if key not in known_continuation_renderers:
2187 continue
2188 if not isinstance(continuation_renderer, dict):
2189 continue
2190 if is_first_continuation:
2191 header_continuation_items = [continuation_renderer.get('header') or {}]
2192 total_comments, continuation = extract_header(header_continuation_items)
2193 if total_comments:
2194 yield total_comments
2195 is_first_continuation = False
2196 if continuation:
2197 break
a1c5d2ca 2198
2d6659b9 2199 # Sometimes YouTube provides a continuation without any comments
2200 # In most cases we end up just downloading these with very little comments to come.
2201 count = 0
2202 for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2203 yield entry
2204 continuation = self._extract_continuation(continuation_renderer)
2205 if count == 0:
2206 if not parent:
2207 self.report_warning('No comments received - assuming end of comments')
2208 continuation = None
2209 break
a1c5d2ca 2210
2d6659b9 2211 @staticmethod
2212 def _generate_comment_continuation(video_id):
2213 """
2214 Generates initial comment section continuation token from given video id
2215 """
2216 b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2217 parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2218 new_continuation_intlist = list(itertools.chain.from_iterable(
2219 [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2220 return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2221
2222 def _extract_comments(self, ytcfg, video_id, contents, webpage):
a1c5d2ca 2223 """Entry for comment extraction"""
2d6659b9 2224 def _real_comment_extract(contents):
2225 if isinstance(contents, list):
2226 for entry in contents:
2227 for key, renderer in entry.items():
2228 if key not in known_entry_comment_renderers:
2229 continue
2230 yield from self._comment_entries(
2231 renderer, video_id=video_id, ytcfg=ytcfg,
2232 identity_token=self._extract_identity_token(webpage, item_id=video_id),
2233 account_syncid=self._extract_account_syncid(ytcfg))
2234 break
a1c5d2ca 2235 comments = []
2d6659b9 2236 known_entry_comment_renderers = ('itemSectionRenderer',)
a1c5d2ca 2237 estimated_total = 0
2d6659b9 2238 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
a1c5d2ca 2239
2d6659b9 2240 try:
2241 for comment in _real_comment_extract(contents):
2242 if len(comments) >= max_comments:
2243 break
2244 if isinstance(comment, int):
2245 estimated_total = comment
2246 continue
2247 comments.append(comment)
2248 except KeyboardInterrupt:
2249 self.to_screen('Interrupted by user')
d92f5d5a 2250 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
a1c5d2ca
M
2251 return {
2252 'comments': comments,
2253 'comment_count': len(comments),
2254 }
2255
109dd3b2 2256 @staticmethod
2257 def _generate_player_context(sts=None):
2258 context = {
2259 'html5Preference': 'HTML5_PREF_WANTS',
2260 }
2261 if sts is not None:
2262 context['signatureTimestamp'] = sts
2263 return {
2264 'playbackContext': {
2265 'contentPlaybackContext': context
a1a7907b 2266 },
2267 'contentCheckOk': True
109dd3b2 2268 }
2269
4e6767b5 2270 @staticmethod
c888ffb9 2271 def _get_video_info_params(video_id, client='TVHTML5'):
2272 GVI_CLIENTS = {
2273 'ANDROID': {
2274 'c': 'ANDROID',
2275 'cver': '16.20',
2276 },
2277 'TVHTML5': {
2278 'c': 'TVHTML5',
2279 'cver': '6.20180913',
2280 }
2281 }
2282 query = {
4e6767b5 2283 'video_id': video_id,
2284 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
c888ffb9 2285 'html5': '1'
4e6767b5 2286 }
c888ffb9 2287 query.update(GVI_CLIENTS.get(client))
2288 return query
4e6767b5 2289
c5e8d7af 2290 def _real_extract(self, url):
cf7e015f 2291 url, smuggled_data = unsmuggle_url(url, {})
545cc85d 2292 video_id = self._match_id(url)
9297939e 2293
2294 is_music_url = smuggled_data.get('is_music_url') or self.is_music_url(url)
2295
545cc85d 2296 base_url = self.http_scheme() + '//www.youtube.com/'
b3d12425 2297 webpage_url = base_url + 'watch?v=' + video_id
2298 webpage = self._download_webpage(
cce889b9 2299 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
545cc85d 2300
109dd3b2 2301 ytcfg = self._extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2302 identity_token = self._extract_identity_token(webpage, video_id)
314ee305 2303 session_index = self._extract_session_index(ytcfg)
109dd3b2 2304 player_url = self._extract_player_url(ytcfg, webpage)
2305
2d6659b9 2306 player_client = self._configuration_arg('player_client', [''])[0]
4bb6b02f 2307 if player_client not in ('web', 'android', ''):
c888ffb9 2308 self.report_warning(f'Invalid player_client {player_client} given. Falling back to android client.')
2309 force_mobile_client = player_client != 'web'
4bb6b02f 2310 player_skip = self._configuration_arg('player_skip')
fe93e2c4 2311 player_response = None
2312 if webpage:
2313 player_response = self._extract_yt_initial_variable(
2314 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2315 video_id, 'initial player response')
109dd3b2 2316
fe93e2c4 2317 syncid = self._extract_account_syncid(ytcfg, player_response)
2318 headers = self._generate_api_headers(ytcfg, identity_token, syncid, session_index=session_index)
9297939e 2319
2320 ytm_streaming_data = {}
2321 if is_music_url:
109dd3b2 2322 ytm_webpage = None
2323 sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2324 if sts and not force_mobile_client and 'configs' not in player_skip:
2325 ytm_webpage = self._download_webpage(
2326 'https://music.youtube.com',
2d6659b9 2327 video_id, fatal=False, note='Downloading remix client config')
109dd3b2 2328
2329 ytm_cfg = self._extract_ytcfg(video_id, ytm_webpage) or {}
2330 ytm_client = 'WEB_REMIX'
2331 if not sts or force_mobile_client:
2332 # Android client already has signature descrambled
2333 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2334 if not sts:
c888ffb9 2335 self.report_warning('Falling back to android remix client for player API.')
109dd3b2 2336 ytm_client = 'ANDROID_MUSIC'
2337 ytm_cfg = {}
2338
2339 ytm_headers = self._generate_api_headers(
2340 ytm_cfg, identity_token, syncid,
314ee305 2341 client=ytm_client, session_index=session_index)
109dd3b2 2342 ytm_query = {'videoId': video_id}
2343 ytm_query.update(self._generate_player_context(sts))
2344
2345 ytm_player_response = self._extract_response(
2346 item_id=video_id, ep='player', query=ytm_query,
2347 ytcfg=ytm_cfg, headers=ytm_headers, fatal=False,
2348 default_client=ytm_client,
c888ffb9 2349 note='Downloading %sremix player API JSON' % ('android ' if force_mobile_client else ''))
2d6659b9 2350 ytm_streaming_data = try_get(ytm_player_response, lambda x: x['streamingData'], dict) or {}
109dd3b2 2351
109dd3b2 2352 if not player_response or force_mobile_client:
2353 sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2354 yt_client = 'WEB'
2355 ytpcfg = ytcfg
2356 ytp_headers = headers
2357 if not sts or force_mobile_client:
2358 # Android client already has signature descrambled
2359 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2360 if not sts:
c888ffb9 2361 self.report_warning('Falling back to android client for player API.')
109dd3b2 2362 yt_client = 'ANDROID'
2363 ytpcfg = {}
314ee305 2364 ytp_headers = self._generate_api_headers(ytpcfg, identity_token, syncid,
2365 client=yt_client, session_index=session_index)
109dd3b2 2366
2367 yt_query = {'videoId': video_id}
2368 yt_query.update(self._generate_player_context(sts))
2369 player_response = self._extract_response(
2370 item_id=video_id, ep='player', query=yt_query,
2371 ytcfg=ytpcfg, headers=ytp_headers, fatal=False,
2372 default_client=yt_client,
c888ffb9 2373 note='Downloading %splayer API JSON' % ('android ' if force_mobile_client else '')
2374 ) or player_response
545cc85d 2375
109dd3b2 2376 # Age-gate workarounds
545cc85d 2377 playability_status = player_response.get('playabilityStatus') or {}
109dd3b2 2378 if playability_status.get('reason') in self._AGE_GATE_REASONS:
c888ffb9 2379 gvi_clients = ('ANDROID', 'TVHTML5') if force_mobile_client else ('TVHTML5', 'ANDROID')
2380 for gvi_client in gvi_clients:
2381 pr = self._parse_json(try_get(compat_parse_qs(
2382 self._download_webpage(
2383 base_url + 'get_video_info', video_id,
2384 'Refetching age-gated %s info webpage' % gvi_client.lower(),
2385 'unable to download video info webpage', fatal=False,
2386 query=self._get_video_info_params(video_id, client=gvi_client))),
2387 lambda x: x['player_response'][0],
2388 compat_str) or '{}', video_id)
2389 if pr:
2390 break
109dd3b2 2391 if not pr:
2392 self.report_warning('Falling back to embedded-only age-gate workaround.')
2393 embed_webpage = None
2394 sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2395 if sts and not force_mobile_client and 'configs' not in player_skip:
2396 embed_webpage = self._download_webpage(
2397 'https://www.youtube.com/embed/%s?html5=1' % video_id,
2398 video_id=video_id, note='Downloading age-gated embed config')
2399
2400 ytcfg_age = self._extract_ytcfg(video_id, embed_webpage) or {}
2401 # If we extracted the embed webpage, it'll tell us if we can view the video
2402 embedded_pr = self._parse_json(
2403 try_get(ytcfg_age, lambda x: x['PLAYER_VARS']['embedded_player_response'], str) or '{}',
2404 video_id=video_id)
2405 embedded_ps_reason = try_get(embedded_pr, lambda x: x['playabilityStatus']['reason'], str) or ''
2406 if embedded_ps_reason not in self._AGE_GATE_REASONS:
2407 yt_client = 'WEB_EMBEDDED_PLAYER'
2408 if not sts or force_mobile_client:
2409 # Android client already has signature descrambled
2410 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2411 if not sts:
2412 self.report_warning(
c888ffb9 2413 'Falling back to android embedded client for player API (note: some formats may be missing).')
109dd3b2 2414 yt_client = 'ANDROID_EMBEDDED_PLAYER'
2415 ytcfg_age = {}
2416
2417 ytage_headers = self._generate_api_headers(
314ee305 2418 ytcfg_age, identity_token, syncid,
2419 client=yt_client, session_index=session_index)
109dd3b2 2420 yt_age_query = {'videoId': video_id}
2421 yt_age_query.update(self._generate_player_context(sts))
2422 pr = self._extract_response(
2423 item_id=video_id, ep='player', query=yt_age_query,
2424 ytcfg=ytcfg_age, headers=ytage_headers, fatal=False,
2425 default_client=yt_client,
c888ffb9 2426 note='Downloading %sage-gated player API JSON' % ('android ' if force_mobile_client else '')
109dd3b2 2427 ) or {}
2428
545cc85d 2429 if pr:
2430 player_response = pr
2431
2432 trailer_video_id = try_get(
2433 playability_status,
2434 lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
2435 compat_str)
2436 if trailer_video_id:
2437 return self.url_result(
2438 trailer_video_id, self.ie_key(), trailer_video_id)
cf7e015f 2439
545cc85d 2440 search_meta = (
2441 lambda x: self._html_search_meta(x, webpage, default=None)) \
2442 if webpage else lambda x: None
dbdaaa23 2443
545cc85d 2444 video_details = player_response.get('videoDetails') or {}
37357d21 2445 microformat = try_get(
545cc85d 2446 player_response,
2447 lambda x: x['microformat']['playerMicroformatRenderer'],
2448 dict) or {}
2449 video_title = video_details.get('title') \
fe93e2c4 2450 or self._get_text(microformat.get('title')) \
545cc85d 2451 or search_meta(['og:title', 'twitter:title', 'title'])
2452 video_description = video_details.get('shortDescription')
cf7e015f 2453
8fe10494 2454 if not smuggled_data.get('force_singlefeed', False):
a06916d9 2455 if not self.get_param('noplaylist'):
8fe10494
S
2456 multifeed_metadata_list = try_get(
2457 player_response,
2458 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
545cc85d 2459 compat_str)
8fe10494
S
2460 if multifeed_metadata_list:
2461 entries = []
2462 feed_ids = []
2463 for feed in multifeed_metadata_list.split(','):
2464 # Unquote should take place before split on comma (,) since textual
2465 # fields may contain comma as well (see
067aa17e 2466 # https://github.com/ytdl-org/youtube-dl/issues/8536)
545cc85d 2467 feed_data = compat_parse_qs(
2468 compat_urllib_parse_unquote_plus(feed))
6b09401b
S
2469
2470 def feed_entry(name):
545cc85d 2471 return try_get(
2472 feed_data, lambda x: x[name][0], compat_str)
6b09401b
S
2473
2474 feed_id = feed_entry('id')
2475 if not feed_id:
2476 continue
2477 feed_title = feed_entry('title')
2478 title = video_title
2479 if feed_title:
2480 title += ' (%s)' % feed_title
8fe10494
S
2481 entries.append({
2482 '_type': 'url_transparent',
2483 'ie_key': 'Youtube',
2484 'url': smuggle_url(
545cc85d 2485 base_url + 'watch?v=' + feed_data['id'][0],
8fe10494 2486 {'force_singlefeed': True}),
6b09401b 2487 'title': title,
8fe10494 2488 })
6b09401b 2489 feed_ids.append(feed_id)
8fe10494
S
2490 self.to_screen(
2491 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2492 % (', '.join(feed_ids), video_id))
545cc85d 2493 return self.playlist_result(
2494 entries, video_id, video_title, video_description)
8fe10494
S
2495 else:
2496 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 2497
9297939e 2498 formats, itags, stream_ids = [], [], []
cc2db878 2499 itag_qualities = {}
d3fc8074 2500 q = qualities([
60bdb7bd 2501 # "tiny" is the smallest video-only format. But some audio-only formats
2502 # was also labeled "tiny". It is not clear if such formats still exist
d3fc8074 2503 'tiny', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
2504 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2505 ])
9297939e 2506
545cc85d 2507 streaming_data = player_response.get('streamingData') or {}
2508 streaming_formats = streaming_data.get('formats') or []
2509 streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
9297939e 2510 streaming_formats.extend(ytm_streaming_data.get('formats') or [])
2511 streaming_formats.extend(ytm_streaming_data.get('adaptiveFormats') or [])
2512
545cc85d 2513 for fmt in streaming_formats:
2514 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2515 continue
321bf820 2516
cc2db878 2517 itag = str_or_none(fmt.get('itag'))
9297939e 2518 audio_track = fmt.get('audioTrack') or {}
2519 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2520 if stream_id in stream_ids:
2521 continue
2522
cc2db878 2523 quality = fmt.get('quality')
d3fc8074 2524 if quality == 'tiny' or not quality:
2525 quality = fmt.get('audioQuality', '').lower() or quality
cc2db878 2526 if itag and quality:
2527 itag_qualities[itag] = quality
2528 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2529 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2530 # number of fragment that would subsequently requested with (`&sq=N`)
2531 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2532 continue
2533
545cc85d 2534 fmt_url = fmt.get('url')
2535 if not fmt_url:
2536 sc = compat_parse_qs(fmt.get('signatureCipher'))
2537 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2538 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2539 if not (sc and fmt_url and encrypted_sig):
2540 continue
545cc85d 2541 if not player_url:
201e9eaa 2542 continue
545cc85d 2543 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2544 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2545 fmt_url += '&' + sp + '=' + signature
2546
545cc85d 2547 if itag:
2548 itags.append(itag)
9297939e 2549 stream_ids.append(stream_id)
2550
cc2db878 2551 tbr = float_or_none(
2552 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 2553 dct = {
2554 'asr': int_or_none(fmt.get('audioSampleRate')),
2555 'filesize': int_or_none(fmt.get('contentLength')),
2556 'format_id': itag,
0fb983f6 2557 'format_note': audio_track.get('displayName') or fmt.get('qualityLabel') or quality,
545cc85d 2558 'fps': int_or_none(fmt.get('fps')),
2559 'height': int_or_none(fmt.get('height')),
dca3ff4a 2560 'quality': q(quality),
cc2db878 2561 'tbr': tbr,
545cc85d 2562 'url': fmt_url,
2563 'width': fmt.get('width'),
0fb983f6 2564 'language': audio_track.get('id', '').split('.')[0],
545cc85d 2565 }
60bdb7bd 2566 mime_mobj = re.match(
2567 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2568 if mime_mobj:
2569 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2570 dct.update(parse_codecs(mime_mobj.group(2)))
2571 # The 3gp format in android client has a quality of "small",
2572 # but is actually worse than all other formats
2573 if dct['ext'] == '3gp':
2574 dct['quality'] = q('tiny')
cc2db878 2575 no_audio = dct.get('acodec') == 'none'
2576 no_video = dct.get('vcodec') == 'none'
2577 if no_audio:
2578 dct['vbr'] = tbr
2579 if no_video:
2580 dct['abr'] = tbr
2581 if no_audio or no_video:
545cc85d 2582 dct['downloader_options'] = {
2583 # Youtube throttles chunks >~10M
2584 'http_chunk_size': 10485760,
bf1317d2 2585 }
7c60c33e 2586 if dct.get('ext'):
2587 dct['container'] = dct['ext'] + '_dash'
545cc85d 2588 formats.append(dct)
2589
4bb6b02f 2590 skip_manifests = self._configuration_arg('skip')
5d3a0e79 2591 get_dash = 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True)
2592 get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2593
9297939e 2594 for sd in (streaming_data, ytm_streaming_data):
5d3a0e79 2595 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
9297939e 2596 if hls_manifest_url:
2597 for f in self._extract_m3u8_formats(
2598 hls_manifest_url, video_id, 'mp4', fatal=False):
2599 itag = self._search_regex(
2600 r'/itag/(\d+)', f['url'], 'itag', default=None)
2601 if itag:
2602 f['format_id'] = itag
8d68ab98 2603 formats.append(f)
545cc85d 2604
5d3a0e79 2605 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2606 if dash_manifest_url:
2607 for f in self._extract_mpd_formats(
2608 dash_manifest_url, video_id, fatal=False):
2609 itag = f['format_id']
2610 if itag in itags:
2611 continue
2612 if itag in itag_qualities:
2613 f['quality'] = q(itag_qualities[itag])
2614 filesize = int_or_none(self._search_regex(
2615 r'/clen/(\d+)', f.get('fragment_base_url')
2616 or f['url'], 'file size', default=None))
2617 if filesize:
2618 f['filesize'] = filesize
2619 formats.append(f)
bf1317d2 2620
545cc85d 2621 if not formats:
a06916d9 2622 if not self.get_param('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
b7da73eb 2623 self.raise_no_formats(
545cc85d 2624 'This video is DRM protected.', expected=True)
2625 pemr = try_get(
2626 playability_status,
2627 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
2628 dict) or {}
fe93e2c4 2629 reason = self._get_text(pemr.get('reason')) or playability_status.get('reason')
545cc85d 2630 subreason = pemr.get('subreason')
2631 if subreason:
fe93e2c4 2632 subreason = clean_html(self._get_text(subreason))
545cc85d 2633 if subreason == 'The uploader has not made this video available in your country.':
2634 countries = microformat.get('availableCountries')
2635 if not countries:
2636 regions_allowed = search_meta('regionsAllowed')
2637 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2638 self.raise_geo_restricted(subreason, countries, metadata_available=True)
545cc85d 2639 reason += '\n' + subreason
2640 if reason:
b7da73eb 2641 self.raise_no_formats(reason, expected=True)
bf1317d2 2642
545cc85d 2643 self._sort_formats(formats)
bf1317d2 2644
545cc85d 2645 keywords = video_details.get('keywords') or []
2646 if not keywords and webpage:
2647 keywords = [
2648 unescapeHTML(m.group('content'))
2649 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2650 for keyword in keywords:
2651 if keyword.startswith('yt:stretch='):
201c1459 2652 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2653 if mobj:
2654 # NB: float is intentional for forcing float division
2655 w, h = (float(v) for v in mobj.groups())
2656 if w > 0 and h > 0:
2657 ratio = w / h
2658 for f in formats:
2659 if f.get('vcodec') != 'none':
2660 f['stretched_ratio'] = ratio
2661 break
6449cd80 2662
0ba692ac 2663 category = microformat.get('category') or search_meta('genre')
2664 channel_id = video_details.get('channelId') \
2665 or microformat.get('externalChannelId') \
2666 or search_meta('channelId')
2667 duration = int_or_none(
2668 video_details.get('lengthSeconds')
2669 or microformat.get('lengthSeconds')) \
2670 or parse_duration(search_meta('duration'))
2671 is_live = video_details.get('isLive')
2672 is_upcoming = video_details.get('isUpcoming')
2673 owner_profile_url = microformat.get('ownerProfileUrl')
2674
545cc85d 2675 thumbnails = []
2676 for container in (video_details, microformat):
2677 for thumbnail in (try_get(
2678 container,
2679 lambda x: x['thumbnail']['thumbnails'], list) or []):
2680 thumbnail_url = thumbnail.get('url')
2681 if not thumbnail_url:
bf1317d2 2682 continue
1988fab7 2683 # Sometimes youtube gives a wrong thumbnail URL. See:
2684 # https://github.com/yt-dlp/yt-dlp/issues/233
2685 # https://github.com/ytdl-org/youtube-dl/issues/28023
2686 if 'maxresdefault' in thumbnail_url:
2687 thumbnail_url = thumbnail_url.split('?')[0]
545cc85d 2688 thumbnails.append({
545cc85d 2689 'url': thumbnail_url,
ff2751ac 2690 'height': int_or_none(thumbnail.get('height')),
545cc85d 2691 'width': int_or_none(thumbnail.get('width')),
2692 })
ff2751ac 2693 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2694 if thumbnail_url:
2695 thumbnails.append({
2696 'url': thumbnail_url,
ff2751ac 2697 })
0ba692ac 2698 # The best resolution thumbnails sometimes does not appear in the webpage
2699 # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
cca80fe6 2700 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2701 hq_thumbnail_names = ['maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3']
2702 guaranteed_thumbnail_names = [
2703 'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2704 'mqdefault', 'mq1', 'mq2', 'mq3',
2705 'default', '1', '2', '3'
2706 ]
2707 thumbnail_names = hq_thumbnail_names + guaranteed_thumbnail_names
2708 n_thumbnail_names = len(thumbnail_names)
2709
0ba692ac 2710 thumbnails.extend({
2711 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2712 video_id=video_id, name=name, ext=ext,
2713 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
cca80fe6 2714 '_test_url': name in hq_thumbnail_names,
2715 } for name in thumbnail_names for ext in ('webp', 'jpg'))
0ba692ac 2716 for thumb in thumbnails:
cca80fe6 2717 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
0ba692ac 2718 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
ff2751ac 2719 self._remove_duplicate_formats(thumbnails)
545cc85d 2720
545cc85d 2721 info = {
2722 'id': video_id,
2723 'title': self._live_title(video_title) if is_live else video_title,
2724 'formats': formats,
2725 'thumbnails': thumbnails,
2726 'description': video_description,
2727 'upload_date': unified_strdate(
2728 microformat.get('uploadDate')
2729 or search_meta('uploadDate')),
2730 'uploader': video_details['author'],
2731 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2732 'uploader_url': owner_profile_url,
2733 'channel_id': channel_id,
2734 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
2735 'duration': duration,
2736 'view_count': int_or_none(
2737 video_details.get('viewCount')
2738 or microformat.get('viewCount')
2739 or search_meta('interactionCount')),
2740 'average_rating': float_or_none(video_details.get('averageRating')),
2741 'age_limit': 18 if (
2742 microformat.get('isFamilySafe') is False
2743 or search_meta('isFamilyFriendly') == 'false'
2744 or search_meta('og:restrictions:age') == '18+') else 0,
2745 'webpage_url': webpage_url,
2746 'categories': [category] if category else None,
2747 'tags': keywords,
2748 'is_live': is_live,
2749 'playable_in_embed': playability_status.get('playableInEmbed'),
c224251a 2750 'was_live': video_details.get('isLiveContent'),
545cc85d 2751 }
b477fc13 2752
545cc85d 2753 pctr = try_get(
2754 player_response,
2755 lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
2756 subtitles = {}
2757 if pctr:
774d79cc 2758 def process_language(container, base_url, lang_code, sub_name, query):
120916da 2759 lang_subs = container.setdefault(lang_code, [])
545cc85d 2760 for fmt in self._SUBTITLE_FORMATS:
2761 query.update({
2762 'fmt': fmt,
2763 })
2764 lang_subs.append({
2765 'ext': fmt,
2766 'url': update_url_query(base_url, query),
774d79cc 2767 'name': sub_name,
545cc85d 2768 })
7e72694b 2769
545cc85d 2770 for caption_track in (pctr.get('captionTracks') or []):
2771 base_url = caption_track.get('baseUrl')
2772 if not base_url:
2773 continue
2774 if caption_track.get('kind') != 'asr':
120916da 2775 lang_code = (
2776 remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2777 or caption_track.get('languageCode'))
545cc85d 2778 if not lang_code:
2779 continue
2780 process_language(
774d79cc 2781 subtitles, base_url, lang_code,
2d6659b9 2782 try_get(caption_track, lambda x: x['name']['simpleText']),
774d79cc 2783 {})
545cc85d 2784 continue
2785 automatic_captions = {}
2786 for translation_language in (pctr.get('translationLanguages') or []):
2787 translation_language_code = translation_language.get('languageCode')
2788 if not translation_language_code:
2789 continue
2790 process_language(
2791 automatic_captions, base_url, translation_language_code,
fe93e2c4 2792 self._get_text(translation_language.get('languageName'), max_runs=1),
545cc85d 2793 {'tlang': translation_language_code})
2794 info['automatic_captions'] = automatic_captions
2795 info['subtitles'] = subtitles
7e72694b 2796
545cc85d 2797 parsed_url = compat_urllib_parse_urlparse(url)
2798 for component in [parsed_url.fragment, parsed_url.query]:
2799 query = compat_parse_qs(component)
2800 for k, v in query.items():
2801 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2802 d_k += '_time'
2803 if d_k not in info and k in s_ks:
2804 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2805
2806 # Youtube Music Auto-generated description
822b9d9c 2807 if video_description:
38d70284 2808 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2809 if mobj:
822b9d9c
RA
2810 release_year = mobj.group('release_year')
2811 release_date = mobj.group('release_date')
2812 if release_date:
2813 release_date = release_date.replace('-', '')
2814 if not release_year:
545cc85d 2815 release_year = release_date[:4]
2816 info.update({
2817 'album': mobj.group('album'.strip()),
2818 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2819 'track': mobj.group('track').strip(),
2820 'release_date': release_date,
cc2db878 2821 'release_year': int_or_none(release_year),
545cc85d 2822 })
7e72694b 2823
545cc85d 2824 initial_data = None
2825 if webpage:
2826 initial_data = self._extract_yt_initial_variable(
2827 webpage, self._YT_INITIAL_DATA_RE, video_id,
2828 'yt initial data')
2829 if not initial_data:
109dd3b2 2830 initial_data = self._extract_response(
2831 item_id=video_id, ep='next', fatal=False,
2832 ytcfg=ytcfg, headers=headers, query={'videoId': video_id},
2833 note='Downloading initial data API JSON')
545cc85d 2834
c60ee3a2 2835 try:
2836 # This will error if there is no livechat
2837 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2838 info['subtitles']['live_chat'] = [{
2839 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
2840 'video_id': video_id,
2841 'ext': 'json',
f6745c49 2842 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
c60ee3a2 2843 }]
2844 except (KeyError, IndexError, TypeError):
2845 pass
545cc85d 2846
2847 if initial_data:
7c365c21 2848 info['chapters'] = (
2849 self._extract_chapters_from_json(initial_data, duration)
2850 or self._extract_chapters_from_engagement_panel(initial_data, duration)
2851 or None)
545cc85d 2852
2853 contents = try_get(
2854 initial_data,
2855 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2856 list) or []
2857 for content in contents:
2858 vpir = content.get('videoPrimaryInfoRenderer')
2859 if vpir:
2860 stl = vpir.get('superTitleLink')
2861 if stl:
fe93e2c4 2862 stl = self._get_text(stl)
545cc85d 2863 if try_get(
2864 vpir,
2865 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2866 info['location'] = stl
2867 else:
2868 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2869 if mobj:
2870 info.update({
2871 'series': mobj.group(1),
2872 'season_number': int(mobj.group(2)),
2873 'episode_number': int(mobj.group(3)),
2874 })
2875 for tlb in (try_get(
2876 vpir,
2877 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2878 list) or []):
2879 tbr = tlb.get('toggleButtonRenderer') or {}
2880 for getter, regex in [(
2881 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2882 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2883 lambda x: x['accessibility'],
2884 lambda x: x['accessibilityData']['accessibilityData'],
2885 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2886 label = (try_get(tbr, getter, dict) or {}).get('label')
2887 if label:
2888 mobj = re.match(regex, label)
2889 if mobj:
2890 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2891 break
2892 sbr_tooltip = try_get(
2893 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2894 if sbr_tooltip:
2895 like_count, dislike_count = sbr_tooltip.split(' / ')
2896 info.update({
2897 'like_count': str_to_int(like_count),
2898 'dislike_count': str_to_int(dislike_count),
2899 })
2900 vsir = content.get('videoSecondaryInfoRenderer')
2901 if vsir:
fe93e2c4 2902 info['channel'] = self._get_text(try_get(
545cc85d 2903 vsir,
2904 lambda x: x['owner']['videoOwnerRenderer']['title'],
cce889b9 2905 dict))
545cc85d 2906 rows = try_get(
2907 vsir,
2908 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2909 list) or []
2910 multiple_songs = False
2911 for row in rows:
2912 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2913 multiple_songs = True
2914 break
2915 for row in rows:
2916 mrr = row.get('metadataRowRenderer') or {}
2917 mrr_title = mrr.get('title')
2918 if not mrr_title:
2919 continue
fe93e2c4 2920 mrr_title = self._get_text(mrr['title'])
2921 mrr_contents_text = self._get_text(mrr['contents'][0])
545cc85d 2922 if mrr_title == 'License':
2923 info['license'] = mrr_contents_text
2924 elif not multiple_songs:
2925 if mrr_title == 'Album':
2926 info['album'] = mrr_contents_text
2927 elif mrr_title == 'Artist':
2928 info['artist'] = mrr_contents_text
2929 elif mrr_title == 'Song':
2930 info['track'] = mrr_contents_text
2931
2932 fallbacks = {
2933 'channel': 'uploader',
2934 'channel_id': 'uploader_id',
2935 'channel_url': 'uploader_url',
2936 }
2937 for to, frm in fallbacks.items():
2938 if not info.get(to):
2939 info[to] = info.get(frm)
2940
2941 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
2942 v = info.get(s_k)
2943 if v:
2944 info[d_k] = v
b84071c0 2945
c224251a
M
2946 is_private = bool_or_none(video_details.get('isPrivate'))
2947 is_unlisted = bool_or_none(microformat.get('isUnlisted'))
2948 is_membersonly = None
b28f8d24 2949 is_premium = None
c224251a
M
2950 if initial_data and is_private is not None:
2951 is_membersonly = False
b28f8d24 2952 is_premium = False
47193e02 2953 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
2954 badge_labels = set()
2955 for content in contents:
2956 if not isinstance(content, dict):
2957 continue
2958 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
2959 for badge_label in badge_labels:
2960 if badge_label.lower() == 'members only':
2961 is_membersonly = True
2962 elif badge_label.lower() == 'premium':
2963 is_premium = True
2964 elif badge_label.lower() == 'unlisted':
2965 is_unlisted = True
c224251a 2966
c224251a
M
2967 info['availability'] = self._availability(
2968 is_private=is_private,
b28f8d24 2969 needs_premium=is_premium,
c224251a
M
2970 needs_subscription=is_membersonly,
2971 needs_auth=info['age_limit'] >= 18,
2972 is_unlisted=None if is_private is None else is_unlisted)
2973
06167fbb 2974 # get xsrf for annotations or comments
a06916d9 2975 get_annotations = self.get_param('writeannotations', False)
2976 get_comments = self.get_param('getcomments', False)
06167fbb 2977 if get_annotations or get_comments:
29f7c58a 2978 xsrf_token = None
545cc85d 2979 ytcfg = self._extract_ytcfg(video_id, webpage)
29f7c58a 2980 if ytcfg:
2981 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2982 if not xsrf_token:
2983 xsrf_token = self._search_regex(
2984 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 2985 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 2986
2987 # annotations
06167fbb 2988 if get_annotations:
64b6a4e9
RA
2989 invideo_url = try_get(
2990 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2991 if xsrf_token and invideo_url:
29f7c58a 2992 xsrf_field_name = None
2993 if ytcfg:
2994 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2995 if not xsrf_field_name:
2996 xsrf_field_name = self._search_regex(
2997 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 2998 webpage, 'xsrf field name',
29f7c58a 2999 group='xsrf_field_name', default='session_token')
8a784c74 3000 info['annotations'] = self._download_webpage(
64b6a4e9
RA
3001 self._proto_relative_url(invideo_url),
3002 video_id, note='Downloading annotations',
3003 errnote='Unable to download video annotations', fatal=False,
3004 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 3005
277d6ff5 3006 if get_comments:
2d6659b9 3007 info['__post_extractor'] = lambda: self._extract_comments(ytcfg, video_id, contents, webpage)
4ea3be0a 3008
545cc85d 3009 self.mark_watched(video_id, player_response)
d77ab8e2 3010
545cc85d 3011 return info
c5e8d7af 3012
5f6a1245 3013
8bdd16b4 3014class YoutubeTabIE(YoutubeBaseInfoExtractor):
3015 IE_DESC = 'YouTube.com tab'
70d5c17b 3016 _VALID_URL = r'''(?x)
3017 https?://
3018 (?:\w+\.)?
3019 (?:
3020 youtube(?:kids)?\.com|
3021 invidio\.us
3022 )/
3023 (?:
fe03a6cd 3024 (?P<channel_type>channel|c|user|browse)/|
70d5c17b 3025 (?P<not_channel>
9ba5705a 3026 feed/|hashtag/|
70d5c17b 3027 (?:playlist|watch)\?.*?\blist=
3028 )|
29f7c58a 3029 (?!(?:%s)\b) # Direct URLs
70d5c17b 3030 )
3031 (?P<id>[^/?\#&]+)
3032 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 3033 IE_NAME = 'youtube:tab'
3034
81127aa5 3035 _TESTS = [{
da692b79 3036 'note': 'playlists, multipage',
8bdd16b4 3037 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3038 'playlist_mincount': 94,
3039 'info_dict': {
3040 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3041 'title': 'Игорь Клейнер - Playlists',
3042 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3043 'uploader': 'Игорь Клейнер',
3044 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 3045 },
3046 }, {
da692b79 3047 'note': 'playlists, multipage, different order',
8bdd16b4 3048 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3049 'playlist_mincount': 94,
3050 'info_dict': {
3051 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3052 'title': 'Игорь Клейнер - Playlists',
3053 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 3054 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3055 'uploader': 'Игорь Клейнер',
8bdd16b4 3056 },
201c1459 3057 }, {
da692b79 3058 'note': 'playlists, series',
201c1459 3059 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3060 'playlist_mincount': 5,
3061 'info_dict': {
3062 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3063 'title': '3Blue1Brown - Playlists',
3064 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
da692b79 3065 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3066 'uploader': '3Blue1Brown',
201c1459 3067 },
8bdd16b4 3068 }, {
da692b79 3069 'note': 'playlists, singlepage',
8bdd16b4 3070 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3071 'playlist_mincount': 4,
3072 'info_dict': {
3073 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3074 'title': 'ThirstForScience - Playlists',
3075 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 3076 'uploader': 'ThirstForScience',
3077 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 3078 }
3079 }, {
3080 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3081 'only_matching': True,
3082 }, {
da692b79 3083 'note': 'basic, single video playlist',
0e30a7b9 3084 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 3085 'info_dict': {
0e30a7b9 3086 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3087 'uploader': 'Sergey M.',
3088 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 3089 'title': 'youtube-dl public playlist',
81127aa5 3090 },
0e30a7b9 3091 'playlist_count': 1,
9291475f 3092 }, {
da692b79 3093 'note': 'empty playlist',
0e30a7b9 3094 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 3095 'info_dict': {
0e30a7b9 3096 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3097 'uploader': 'Sergey M.',
3098 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 3099 'title': 'youtube-dl empty playlist',
9291475f
PH
3100 },
3101 'playlist_count': 0,
3102 }, {
da692b79 3103 'note': 'Home tab',
8bdd16b4 3104 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 3105 'info_dict': {
8bdd16b4 3106 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3107 'title': 'lex will - Home',
3108 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3109 'uploader': 'lex will',
3110 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3111 },
8bdd16b4 3112 'playlist_mincount': 2,
9291475f 3113 }, {
da692b79 3114 'note': 'Videos tab',
8bdd16b4 3115 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 3116 'info_dict': {
8bdd16b4 3117 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3118 'title': 'lex will - Videos',
3119 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3120 'uploader': 'lex will',
3121 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3122 },
8bdd16b4 3123 'playlist_mincount': 975,
9291475f 3124 }, {
da692b79 3125 'note': 'Videos tab, sorted by popular',
8bdd16b4 3126 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 3127 'info_dict': {
8bdd16b4 3128 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3129 'title': 'lex will - Videos',
3130 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3131 'uploader': 'lex will',
3132 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3133 },
8bdd16b4 3134 'playlist_mincount': 199,
9291475f 3135 }, {
da692b79 3136 'note': 'Playlists tab',
8bdd16b4 3137 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 3138 'info_dict': {
8bdd16b4 3139 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3140 'title': 'lex will - Playlists',
3141 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3142 'uploader': 'lex will',
3143 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 3144 },
8bdd16b4 3145 'playlist_mincount': 17,
ac7553d0 3146 }, {
da692b79 3147 'note': 'Community tab',
8bdd16b4 3148 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 3149 'info_dict': {
8bdd16b4 3150 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3151 'title': 'lex will - Community',
3152 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3153 'uploader': 'lex will',
3154 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3155 },
3156 'playlist_mincount': 18,
87dadd45 3157 }, {
da692b79 3158 'note': 'Channels tab',
8bdd16b4 3159 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 3160 'info_dict': {
8bdd16b4 3161 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3162 'title': 'lex will - Channels',
3163 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 3164 'uploader': 'lex will',
3165 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 3166 },
deaec5af 3167 'playlist_mincount': 12,
cd684175 3168 }, {
3169 'note': 'Search tab',
3170 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3171 'playlist_mincount': 40,
3172 'info_dict': {
3173 'id': 'UCYO_jab_esuFRV4b17AJtAw',
3174 'title': '3Blue1Brown - Search - linear algebra',
3175 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3176 'uploader': '3Blue1Brown',
3177 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3178 },
6b08cdf6 3179 }, {
a0566bbf 3180 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3181 'only_matching': True,
3182 }, {
a0566bbf 3183 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3184 'only_matching': True,
3185 }, {
a0566bbf 3186 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 3187 'only_matching': True,
3188 }, {
3189 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3190 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3191 'info_dict': {
3192 'title': '29C3: Not my department',
3193 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3194 'uploader': 'Christiaan008',
3195 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 3196 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 3197 },
3198 'playlist_count': 96,
3199 }, {
3200 'note': 'Large playlist',
3201 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 3202 'info_dict': {
8bdd16b4 3203 'title': 'Uploads from Cauchemar',
3204 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3205 'uploader': 'Cauchemar',
3206 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 3207 },
8bdd16b4 3208 'playlist_mincount': 1123,
3209 }, {
da692b79 3210 'note': 'even larger playlist, 8832 videos',
8bdd16b4 3211 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3212 'only_matching': True,
4b7df0d3
JMF
3213 }, {
3214 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3215 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3216 'info_dict': {
acf757f4
PH
3217 'title': 'Uploads from Interstellar Movie',
3218 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 3219 'uploader': 'Interstellar Movie',
8bdd16b4 3220 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 3221 },
481cc733 3222 'playlist_mincount': 21,
358de58c 3223 }, {
3224 'note': 'Playlist with "show unavailable videos" button',
3225 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3226 'info_dict': {
3227 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3228 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3229 'uploader': 'Phim Siêu Nhân Nhật Bản',
3230 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3231 },
da692b79 3232 'playlist_mincount': 200,
5d342002 3233 }, {
da692b79 3234 'note': 'Playlist with unavailable videos in page 7',
5d342002 3235 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3236 'info_dict': {
3237 'title': 'Uploads from BlankTV',
3238 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3239 'uploader': 'BlankTV',
3240 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3241 },
da692b79 3242 'playlist_mincount': 1000,
8bdd16b4 3243 }, {
da692b79 3244 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
8bdd16b4 3245 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3246 'info_dict': {
3247 'title': 'Data Analysis with Dr Mike Pound',
3248 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3249 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3250 'uploader': 'Computerphile',
deaec5af 3251 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 3252 },
3253 'playlist_mincount': 11,
3254 }, {
a0566bbf 3255 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 3256 'only_matching': True,
dacb3a86 3257 }, {
da692b79 3258 'note': 'Playlist URL that does not actually serve a playlist',
dacb3a86
S
3259 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3260 'info_dict': {
3261 'id': 'FqZTN594JQw',
3262 'ext': 'webm',
3263 'title': "Smiley's People 01 detective, Adventure Series, Action",
3264 'uploader': 'STREEM',
3265 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 3266 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
3267 'upload_date': '20150526',
3268 'license': 'Standard YouTube License',
3269 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3270 'categories': ['People & Blogs'],
3271 'tags': list,
dbdaaa23 3272 'view_count': int,
dacb3a86
S
3273 'like_count': int,
3274 'dislike_count': int,
3275 },
3276 'params': {
3277 'skip_download': True,
3278 },
13a75688 3279 'skip': 'This video is not available.',
dacb3a86 3280 'add_ie': [YoutubeIE.ie_key()],
481cc733 3281 }, {
8bdd16b4 3282 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 3283 'only_matching': True,
66b48727 3284 }, {
8bdd16b4 3285 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 3286 'only_matching': True,
a0566bbf 3287 }, {
3288 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3289 'info_dict': {
da692b79 3290 'id': 'X1whbWASnNQ', # This will keep changing
a0566bbf 3291 'ext': 'mp4',
deaec5af 3292 'title': compat_str,
a0566bbf 3293 'uploader': 'Sky News',
3294 'uploader_id': 'skynews',
3295 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
da692b79 3296 'upload_date': r're:\d{8}',
3297 'description': compat_str,
a0566bbf 3298 'categories': ['News & Politics'],
3299 'tags': list,
3300 'like_count': int,
3301 'dislike_count': int,
3302 },
3303 'params': {
3304 'skip_download': True,
3305 },
da692b79 3306 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
a0566bbf 3307 }, {
3308 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3309 'info_dict': {
3310 'id': 'a48o2S1cPoo',
3311 'ext': 'mp4',
3312 'title': 'The Young Turks - Live Main Show',
3313 'uploader': 'The Young Turks',
3314 'uploader_id': 'TheYoungTurks',
3315 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3316 'upload_date': '20150715',
3317 'license': 'Standard YouTube License',
3318 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3319 'categories': ['News & Politics'],
3320 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3321 'like_count': int,
3322 'dislike_count': int,
3323 },
3324 'params': {
3325 'skip_download': True,
3326 },
3327 'only_matching': True,
3328 }, {
3329 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3330 'only_matching': True,
3331 }, {
3332 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3333 'only_matching': True,
09f1580e 3334 }, {
3335 'note': 'A channel that is not live. Should raise error',
3336 'url': 'https://www.youtube.com/user/numberphile/live',
3337 'only_matching': True,
3d3dddc9 3338 }, {
3339 'url': 'https://www.youtube.com/feed/trending',
3340 'only_matching': True,
3341 }, {
3d3dddc9 3342 'url': 'https://www.youtube.com/feed/library',
3343 'only_matching': True,
3344 }, {
3d3dddc9 3345 'url': 'https://www.youtube.com/feed/history',
3346 'only_matching': True,
3347 }, {
3d3dddc9 3348 'url': 'https://www.youtube.com/feed/subscriptions',
3349 'only_matching': True,
3350 }, {
3d3dddc9 3351 'url': 'https://www.youtube.com/feed/watch_later',
3352 'only_matching': True,
3353 }, {
da692b79 3354 'note': 'Recommended - redirects to home page',
3d3dddc9 3355 'url': 'https://www.youtube.com/feed/recommended',
3356 'only_matching': True,
29f7c58a 3357 }, {
da692b79 3358 'note': 'inline playlist with not always working continuations',
29f7c58a 3359 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3360 'only_matching': True,
3361 }, {
3362 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3363 'only_matching': True,
3364 }, {
3365 'url': 'https://www.youtube.com/course',
3366 'only_matching': True,
3367 }, {
3368 'url': 'https://www.youtube.com/zsecurity',
3369 'only_matching': True,
3370 }, {
3371 'url': 'http://www.youtube.com/NASAgovVideo/videos',
3372 'only_matching': True,
3373 }, {
3374 'url': 'https://www.youtube.com/TheYoungTurks/live',
3375 'only_matching': True,
39ed931e 3376 }, {
3377 'url': 'https://www.youtube.com/hashtag/cctv9',
3378 'info_dict': {
3379 'id': 'cctv9',
3380 'title': '#cctv9',
3381 },
3382 'playlist_mincount': 350,
201c1459 3383 }, {
3384 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3385 'only_matching': True,
9297939e 3386 }, {
da692b79 3387 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
9297939e 3388 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3389 'only_matching': True
fe03a6cd 3390 }, {
3391 'note': '/browse/ should redirect to /channel/',
3392 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3393 'only_matching': True
3394 }, {
3395 'note': 'VLPL, should redirect to playlist?list=PL...',
3396 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3397 'info_dict': {
3398 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3399 'uploader': 'NoCopyrightSounds',
3400 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3401 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3402 'title': 'NCS Releases',
3403 },
3404 'playlist_mincount': 166,
18db7548 3405 }, {
3406 'note': 'Topic, should redirect to playlist?list=UU...',
3407 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3408 'info_dict': {
3409 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3410 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3411 'title': 'Uploads from Royalty Free Music - Topic',
3412 'uploader': 'Royalty Free Music - Topic',
3413 },
3414 'expected_warnings': [
3415 'A channel/user page was given',
3416 'The URL does not have a videos tab',
3417 ],
3418 'playlist_mincount': 101,
3419 }, {
3420 'note': 'Topic without a UU playlist',
3421 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3422 'info_dict': {
3423 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3424 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3425 },
3426 'expected_warnings': [
3427 'A channel/user page was given',
3428 'The URL does not have a videos tab',
3429 'Falling back to channel URL',
3430 ],
3431 'playlist_mincount': 9,
abcdd12b 3432 }, {
3433 'note': 'Youtube music Album',
3434 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3435 'info_dict': {
3436 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3437 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3438 },
3439 'playlist_count': 50,
47193e02 3440 }, {
3441 'note': 'unlisted single video playlist',
3442 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3443 'info_dict': {
3444 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3445 'uploader': 'colethedj',
3446 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3447 'title': 'yt-dlp unlisted playlist test',
3448 'availability': 'unlisted'
3449 },
3450 'playlist_count': 1,
29f7c58a 3451 }]
3452
3453 @classmethod
3454 def suitable(cls, url):
3455 return False if YoutubeIE.suitable(url) else super(
3456 YoutubeTabIE, cls).suitable(url)
8bdd16b4 3457
3458 def _extract_channel_id(self, webpage):
3459 channel_id = self._html_search_meta(
3460 'channelId', webpage, 'channel id', default=None)
3461 if channel_id:
3462 return channel_id
3463 channel_url = self._html_search_meta(
3464 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3465 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3466 'twitter:app:url:googleplay'), webpage, 'channel url')
3467 return self._search_regex(
3468 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3469 channel_url, 'channel id')
15f6397c 3470
8bdd16b4 3471 @staticmethod
cd7c66cf 3472 def _extract_basic_item_renderer(item):
3473 # Modified from _extract_grid_item_renderer
201c1459 3474 known_basic_renderers = (
3475 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 3476 )
3477 for key, renderer in item.items():
201c1459 3478 if not isinstance(renderer, dict):
cd7c66cf 3479 continue
201c1459 3480 elif key in known_basic_renderers:
3481 return renderer
3482 elif key.startswith('grid') and key.endswith('Renderer'):
3483 return renderer
8bdd16b4 3484
8bdd16b4 3485 def _grid_entries(self, grid_renderer):
3486 for item in grid_renderer['items']:
3487 if not isinstance(item, dict):
39b62db1 3488 continue
cd7c66cf 3489 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 3490 if not isinstance(renderer, dict):
3491 continue
fe93e2c4 3492 title = self._get_text(renderer.get('title'))
3493
8bdd16b4 3494 # playlist
3495 playlist_id = renderer.get('playlistId')
3496 if playlist_id:
3497 yield self.url_result(
3498 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3499 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3500 video_title=title)
201c1459 3501 continue
8bdd16b4 3502 # video
3503 video_id = renderer.get('videoId')
3504 if video_id:
3505 yield self._extract_video(renderer)
201c1459 3506 continue
8bdd16b4 3507 # channel
3508 channel_id = renderer.get('channelId')
3509 if channel_id:
8bdd16b4 3510 yield self.url_result(
3511 'https://www.youtube.com/channel/%s' % channel_id,
3512 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 3513 continue
3514 # generic endpoint URL support
3515 ep_url = urljoin('https://www.youtube.com/', try_get(
3516 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3517 compat_str))
3518 if ep_url:
3519 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3520 if ie.suitable(ep_url):
3521 yield self.url_result(
3522 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3523 break
8bdd16b4 3524
3d3dddc9 3525 def _shelf_entries_from_content(self, shelf_renderer):
3526 content = shelf_renderer.get('content')
3527 if not isinstance(content, dict):
8bdd16b4 3528 return
cd7c66cf 3529 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 3530 if renderer:
3531 # TODO: add support for nested playlists so each shelf is processed
3532 # as separate playlist
3533 # TODO: this includes only first N items
3534 for entry in self._grid_entries(renderer):
3535 yield entry
3536 renderer = content.get('horizontalListRenderer')
3537 if renderer:
3538 # TODO
3539 pass
8bdd16b4 3540
29f7c58a 3541 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3542 ep = try_get(
3543 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3544 compat_str)
3545 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3546 if shelf_url:
29f7c58a 3547 # Skipping links to another channels, note that checking for
3548 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3549 # will not work
3550 if skip_channels and '/channels?' in shelf_url:
3551 return
fe93e2c4 3552 title = self._get_text(shelf_renderer, lambda x: x['title'])
3d3dddc9 3553 yield self.url_result(shelf_url, video_title=title)
3554 # Shelf may not contain shelf URL, fallback to extraction from content
3555 for entry in self._shelf_entries_from_content(shelf_renderer):
3556 yield entry
c5e8d7af 3557
8bdd16b4 3558 def _playlist_entries(self, video_list_renderer):
3559 for content in video_list_renderer['contents']:
3560 if not isinstance(content, dict):
3561 continue
3562 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3563 if not isinstance(renderer, dict):
3564 continue
3565 video_id = renderer.get('videoId')
3566 if not video_id:
3567 continue
3568 yield self._extract_video(renderer)
07aeced6 3569
3462ffa8 3570 def _rich_entries(self, rich_grid_renderer):
3571 renderer = try_get(
70d5c17b 3572 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3573 video_id = renderer.get('videoId')
3574 if not video_id:
3575 return
3576 yield self._extract_video(renderer)
3577
8bdd16b4 3578 def _video_entry(self, video_renderer):
3579 video_id = video_renderer.get('videoId')
3580 if video_id:
3581 return self._extract_video(video_renderer)
dacb3a86 3582
8bdd16b4 3583 def _post_thread_entries(self, post_thread_renderer):
3584 post_renderer = try_get(
3585 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3586 if not post_renderer:
3587 return
3588 # video attachment
3589 video_renderer = try_get(
895b0931 3590 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3591 video_id = video_renderer.get('videoId')
3592 if video_id:
3593 entry = self._extract_video(video_renderer)
8bdd16b4 3594 if entry:
3595 yield entry
895b0931 3596 # playlist attachment
3597 playlist_id = try_get(
3598 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3599 if playlist_id:
3600 yield self.url_result(
e28f1c0a 3601 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3602 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3603 # inline video links
3604 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3605 for run in runs:
3606 if not isinstance(run, dict):
3607 continue
3608 ep_url = try_get(
3609 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3610 if not ep_url:
3611 continue
3612 if not YoutubeIE.suitable(ep_url):
3613 continue
3614 ep_video_id = YoutubeIE._match_id(ep_url)
3615 if video_id == ep_video_id:
3616 continue
895b0931 3617 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 3618
8bdd16b4 3619 def _post_thread_continuation_entries(self, post_thread_continuation):
3620 contents = post_thread_continuation.get('contents')
3621 if not isinstance(contents, list):
3622 return
3623 for content in contents:
3624 renderer = content.get('backstagePostThreadRenderer')
3625 if not isinstance(renderer, dict):
3626 continue
3627 for entry in self._post_thread_entries(renderer):
3628 yield entry
07aeced6 3629
39ed931e 3630 r''' # unused
3631 def _rich_grid_entries(self, contents):
3632 for content in contents:
3633 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3634 if video_renderer:
3635 entry = self._video_entry(video_renderer)
3636 if entry:
3637 yield entry
3638 '''
f4f751af 3639 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3462ffa8 3640
70d5c17b 3641 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3642 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3643 for content in contents:
3644 if not isinstance(content, dict):
8bdd16b4 3645 continue
70d5c17b 3646 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3647 if not is_renderer:
70d5c17b 3648 renderer = content.get('richItemRenderer')
3462ffa8 3649 if renderer:
3650 for entry in self._rich_entries(renderer):
3651 yield entry
3652 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3653 continue
3462ffa8 3654 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3655 for isr_content in isr_contents:
3656 if not isinstance(isr_content, dict):
3657 continue
69184e41 3658
3659 known_renderers = {
3660 'playlistVideoListRenderer': self._playlist_entries,
3661 'gridRenderer': self._grid_entries,
3662 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3663 'backstagePostThreadRenderer': self._post_thread_entries,
3664 'videoRenderer': lambda x: [self._video_entry(x)],
3665 }
3666 for key, renderer in isr_content.items():
3667 if key not in known_renderers:
3668 continue
3669 for entry in known_renderers[key](renderer):
3670 if entry:
3671 yield entry
3462ffa8 3672 continuation_list[0] = self._extract_continuation(renderer)
69184e41 3673 break
70d5c17b 3674
3462ffa8 3675 if not continuation_list[0]:
3676 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3677
3678 if not continuation_list[0]:
3679 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3680
3681 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3682 tab_content = try_get(tab, lambda x: x['content'], dict)
3683 if not tab_content:
3684 return
3462ffa8 3685 parent_renderer = (
29f7c58a 3686 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3687 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3688 for entry in extract_entries(parent_renderer):
3689 yield entry
3462ffa8 3690 continuation = continuation_list[0]
fe93e2c4 3691 visitor_data = None
d069eca7 3692
8bdd16b4 3693 for page_num in itertools.count(1):
3694 if not continuation:
3695 break
f4f751af 3696 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
79360d99 3697 response = self._extract_response(
3698 item_id='%s page %s' % (item_id, page_num),
fe93e2c4 3699 query=continuation, headers=headers, ytcfg=ytcfg,
79360d99 3700 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3701
3702 if not response:
8bdd16b4 3703 break
f4f751af 3704 visitor_data = try_get(
3705 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
ebf1b291 3706
69184e41 3707 known_continuation_renderers = {
3708 'playlistVideoListContinuation': self._playlist_entries,
3709 'gridContinuation': self._grid_entries,
3710 'itemSectionContinuation': self._post_thread_continuation_entries,
3711 'sectionListContinuation': extract_entries, # for feeds
3712 }
8bdd16b4 3713 continuation_contents = try_get(
69184e41 3714 response, lambda x: x['continuationContents'], dict) or {}
3715 continuation_renderer = None
3716 for key, value in continuation_contents.items():
3717 if key not in known_continuation_renderers:
3462ffa8 3718 continue
69184e41 3719 continuation_renderer = value
3720 continuation_list = [None]
3721 for entry in known_continuation_renderers[key](continuation_renderer):
3722 yield entry
3723 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3724 break
3725 if continuation_renderer:
3726 continue
c5e8d7af 3727
a1b535bd 3728 known_renderers = {
3729 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3730 'gridVideoRenderer': (self._grid_entries, 'items'),
d61fc646 3731 'gridChannelRenderer': (self._grid_entries, 'items'),
a1b535bd 3732 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3733 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3734 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3735 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3736 }
cce889b9 3737 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3738 continuation_items = try_get(
cce889b9 3739 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3740 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3741 video_items_renderer = None
3742 for key, value in continuation_item.items():
3743 if key not in known_renderers:
8bdd16b4 3744 continue
a1b535bd 3745 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3746 continuation_list = [None]
a1b535bd 3747 for entry in known_renderers[key][0](video_items_renderer):
3748 yield entry
9ba5705a 3749 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3750 break
3751 if video_items_renderer:
3752 continue
8bdd16b4 3753 break
9558dcec 3754
8bdd16b4 3755 @staticmethod
3756 def _extract_selected_tab(tabs):
3757 for tab in tabs:
cd684175 3758 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3759 if renderer.get('selected') is True:
3760 return renderer
2b3c2546 3761 else:
8bdd16b4 3762 raise ExtractorError('Unable to find selected tab')
b82f815f 3763
47193e02 3764 @classmethod
3765 def _extract_uploader(cls, data):
8bdd16b4 3766 uploader = {}
47193e02 3767 renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3768 owner = try_get(
3769 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3770 if owner:
3771 uploader['uploader'] = owner.get('text')
3772 uploader['uploader_id'] = try_get(
3773 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3774 uploader['uploader_url'] = urljoin(
3775 'https://www.youtube.com/',
3776 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3777 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3778
d069eca7 3779 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3780 playlist_id = title = description = channel_url = channel_name = channel_id = None
3781 thumbnails_list = tags = []
3782
8bdd16b4 3783 selected_tab = self._extract_selected_tab(tabs)
3784 renderer = try_get(
3785 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3786 if renderer:
b60419c5 3787 channel_name = renderer.get('title')
3788 channel_url = renderer.get('channelUrl')
3789 channel_id = renderer.get('externalId')
39ed931e 3790 else:
64c0d954 3791 renderer = try_get(
3792 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3793
8bdd16b4 3794 if renderer:
3795 title = renderer.get('title')
ecc97af3 3796 description = renderer.get('description', '')
b60419c5 3797 playlist_id = channel_id
3798 tags = renderer.get('keywords', '').split()
3799 thumbnails_list = (
3800 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3801 or try_get(
47193e02 3802 self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3803 lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
ff84930c 3804 list)
b60419c5 3805 or [])
3806
3807 thumbnails = []
3808 for t in thumbnails_list:
3809 if not isinstance(t, dict):
3810 continue
3811 thumbnail_url = url_or_none(t.get('url'))
3812 if not thumbnail_url:
3813 continue
3814 thumbnails.append({
3815 'url': thumbnail_url,
3816 'width': int_or_none(t.get('width')),
3817 'height': int_or_none(t.get('height')),
3818 })
3462ffa8 3819 if playlist_id is None:
70d5c17b 3820 playlist_id = item_id
3821 if title is None:
39ed931e 3822 title = (
3823 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3824 or playlist_id)
b60419c5 3825 title += format_field(selected_tab, 'title', ' - %s')
cd684175 3826 title += format_field(selected_tab, 'expandedText', ' - %s')
b60419c5 3827 metadata = {
3828 'playlist_id': playlist_id,
3829 'playlist_title': title,
3830 'playlist_description': description,
3831 'uploader': channel_name,
3832 'uploader_id': channel_id,
3833 'uploader_url': channel_url,
3834 'thumbnails': thumbnails,
3835 'tags': tags,
3836 }
47193e02 3837 availability = self._extract_availability(data)
3838 if availability:
3839 metadata['availability'] = availability
b60419c5 3840 if not channel_id:
3841 metadata.update(self._extract_uploader(data))
3842 metadata.update({
3843 'channel': metadata['uploader'],
3844 'channel_id': metadata['uploader_id'],
3845 'channel_url': metadata['uploader_url']})
fe93e2c4 3846 ytcfg = self._extract_ytcfg(item_id, webpage)
b60419c5 3847 return self.playlist_result(
d069eca7
M
3848 self._entries(
3849 selected_tab, playlist_id,
3850 self._extract_identity_token(webpage, item_id),
fe93e2c4 3851 self._extract_account_syncid(ytcfg, data), ytcfg),
b60419c5 3852 **metadata)
73c4ac2c 3853
79360d99 3854 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
2be71994 3855 first_id = last_id = None
79360d99 3856 ytcfg = self._extract_ytcfg(playlist_id, webpage)
3857 headers = self._generate_api_headers(
fe93e2c4 3858 ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3859 identity_token=self._extract_identity_token(webpage, item_id=playlist_id))
2be71994 3860 for page_num in itertools.count(1):
cd7c66cf 3861 videos = list(self._playlist_entries(playlist))
3862 if not videos:
3863 return
2be71994 3864 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3865 if start >= len(videos):
3866 return
3867 for video in videos[start:]:
3868 if video['id'] == first_id:
3869 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3870 return
3871 yield video
3872 first_id = first_id or videos[0]['id']
3873 last_id = videos[-1]['id']
79360d99 3874 watch_endpoint = try_get(
3875 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3876 query = {
3877 'playlistId': playlist_id,
3878 'videoId': watch_endpoint.get('videoId') or last_id,
3879 'index': watch_endpoint.get('index') or len(videos),
3880 'params': watch_endpoint.get('params') or 'OAE%3D'
3881 }
3882 response = self._extract_response(
3883 item_id='%s page %d' % (playlist_id, page_num),
fe93e2c4 3884 query=query, ep='next', headers=headers, ytcfg=ytcfg,
79360d99 3885 check_get_keys='contents'
3886 )
cd7c66cf 3887 playlist = try_get(
79360d99 3888 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 3889
79360d99 3890 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
8bdd16b4 3891 title = playlist.get('title') or try_get(
3892 data, lambda x: x['titleText']['simpleText'], compat_str)
3893 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3894
3895 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3896 playlist_url = urljoin(url, try_get(
3897 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3898 compat_str))
3899 if playlist_url and playlist_url != url:
3900 return self.url_result(
3901 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3902 video_title=title)
cd7c66cf 3903
8bdd16b4 3904 return self.playlist_result(
79360d99 3905 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
cd7c66cf 3906 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 3907
47193e02 3908 def _extract_availability(self, data):
3909 """
3910 Gets the availability of a given playlist/tab.
3911 Note: Unless YouTube tells us explicitly, we do not assume it is public
3912 @param data: response
3913 """
3914 is_private = is_unlisted = None
3915 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
3916 badge_labels = self._extract_badges(renderer)
3917
3918 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
3919 privacy_dropdown_entries = try_get(
3920 renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
3921 for renderer_dict in privacy_dropdown_entries:
3922 is_selected = try_get(
3923 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
3924 if not is_selected:
3925 continue
fe93e2c4 3926 label = self._get_text(
3927 try_get(renderer_dict, lambda x: x['privacyDropdownItemRenderer']['label'], dict) or [])
47193e02 3928 if label:
3929 badge_labels.add(label.lower())
3930 break
3931
3932 for badge_label in badge_labels:
3933 if badge_label == 'unlisted':
3934 is_unlisted = True
3935 elif badge_label == 'private':
3936 is_private = True
3937 elif badge_label == 'public':
3938 is_unlisted = is_private = False
3939 return self._availability(is_private, False, False, False, is_unlisted)
3940
3941 @staticmethod
3942 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
3943 sidebar_renderer = try_get(
3944 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
3945 for item in sidebar_renderer:
3946 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
3947 if renderer:
3948 return renderer
3949
358de58c 3950 def _reload_with_unavailable_videos(self, item_id, data, webpage):
3951 """
3952 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3953 """
5d342002 3954 browse_id = params = None
47193e02 3955 renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
3956 if not renderer:
3957 return
3958 menu_renderer = try_get(
3959 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3960 for menu_item in menu_renderer:
3961 if not isinstance(menu_item, dict):
358de58c 3962 continue
47193e02 3963 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3964 text = try_get(
3965 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3966 if not text or text.lower() != 'show unavailable videos':
3967 continue
3968 browse_endpoint = try_get(
3969 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3970 browse_id = browse_endpoint.get('browseId')
3971 params = browse_endpoint.get('params')
3972 break
5d342002 3973
47193e02 3974 ytcfg = self._extract_ytcfg(item_id, webpage)
3975 headers = self._generate_api_headers(
fe93e2c4 3976 ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
47193e02 3977 identity_token=self._extract_identity_token(webpage, item_id=item_id),
3978 visitor_data=try_get(
3979 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
3980 query = {
3981 'params': params or 'wgYCCAA=',
3982 'browseId': browse_id or 'VL%s' % item_id
3983 }
3984 return self._extract_response(
3985 item_id=item_id, headers=headers, query=query,
fe93e2c4 3986 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
47193e02 3987 note='Downloading API JSON with unavailable videos')
358de58c 3988
cd7c66cf 3989 def _extract_webpage(self, url, item_id):
a06916d9 3990 retries = self.get_param('extractor_retries', 3)
62bff2c1 3991 count = -1
c705177d 3992 last_error = 'Incomplete yt initial data recieved'
14fdfea9 3993 while count < retries:
62bff2c1 3994 count += 1
14fdfea9 3995 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 3996 # See: https://github.com/yt-dlp/yt-dlp/issues/116
3997 if count:
c705177d 3998 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 3999 webpage = self._download_webpage(
4000 url, item_id,
cd7c66cf 4001 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
14fdfea9 4002 data = self._extract_yt_initial_data(item_id, webpage)
14fdfea9 4003 if data.get('contents') or data.get('currentVideoEndpoint'):
4004 break
95c01b6c 4005 # Extract alerts here only when there is error
4006 self._extract_and_report_alerts(data)
c705177d 4007 if count >= retries:
6a39ee13 4008 raise ExtractorError(last_error)
cd7c66cf 4009 return webpage, data
4010
9297939e 4011 @staticmethod
4012 def _smuggle_data(entries, data):
4013 for entry in entries:
4014 if data:
4015 entry['url'] = smuggle_url(entry['url'], data)
4016 yield entry
4017
cd7c66cf 4018 def _real_extract(self, url):
9297939e 4019 url, smuggled_data = unsmuggle_url(url, {})
4020 if self.is_music_url(url):
4021 smuggled_data['is_music_url'] = True
fe03a6cd 4022 info_dict = self.__real_extract(url, smuggled_data)
9297939e 4023 if info_dict.get('entries'):
4024 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4025 return info_dict
4026
fe03a6cd 4027 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4028
4029 def __real_extract(self, url, smuggled_data):
cd7c66cf 4030 item_id = self._match_id(url)
4031 url = compat_urlparse.urlunparse(
4032 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 4033 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 4034
fe03a6cd 4035 def get_mobj(url):
4036 mobj = self._url_re.match(url).groupdict()
07cce701 4037 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 4038 return mobj
4039
4040 mobj = get_mobj(url)
4041 # Youtube returns incomplete data if tabname is not lower case
4042 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4043
4044 if is_channel:
4045 if smuggled_data.get('is_music_url'):
4046 if item_id[:2] == 'VL':
4047 # Youtube music VL channels have an equivalent playlist
4048 item_id = item_id[2:]
4049 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
abcdd12b 4050 elif item_id[:2] == 'MP':
4051 # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4052 item_id = self._search_regex(
4053 r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4054 self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4055 'playlist id')
4056 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
fe03a6cd 4057 elif mobj['channel_type'] == 'browse':
4058 # Youtube music /browse/ should be changed to /channel/
4059 pre = 'https://www.youtube.com/channel/%s' % item_id
4060 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4061 # Home URLs should redirect to /videos/
6a39ee13 4062 self.report_warning(
cd7c66cf 4063 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4064 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 4065 tab = '/videos'
4066
4067 url = ''.join((pre, tab, post))
4068 mobj = get_mobj(url)
cd7c66cf 4069
4070 # Handle both video/playlist URLs
201c1459 4071 qs = parse_qs(url)
cd7c66cf 4072 video_id = qs.get('v', [None])[0]
4073 playlist_id = qs.get('list', [None])[0]
4074
fe03a6cd 4075 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 4076 if not playlist_id:
fe03a6cd 4077 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 4078 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 4079 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6a39ee13 4080 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
cd7c66cf 4081 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
18db7548 4082 mobj = get_mobj(url)
cd7c66cf 4083
4084 if video_id and playlist_id:
a06916d9 4085 if self.get_param('noplaylist'):
cd7c66cf 4086 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4087 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4088 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4089
4090 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 4091
18db7548 4092 tabs = try_get(
4093 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4094 if tabs:
4095 selected_tab = self._extract_selected_tab(tabs)
4096 tab_name = selected_tab.get('title', '')
09f1580e 4097 if 'no-youtube-channel-redirect' not in compat_opts:
4098 if mobj['tab'] == '/live':
4099 # Live tab should have redirected to the video
4100 raise ExtractorError('The channel is not currently live', expected=True)
4101 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4102 if not mobj['not_channel'] and item_id[:2] == 'UC':
4103 # Topic channels don't have /videos. Use the equivalent playlist instead
4104 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4105 pl_id = 'UU%s' % item_id[2:]
4106 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4107 try:
4108 pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4109 for alert_type, alert_message in self._extract_alerts(pl_data):
4110 if alert_type == 'error':
4111 raise ExtractorError('Youtube said: %s' % alert_message)
4112 item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4113 except ExtractorError:
4114 self.report_warning('The playlist gave error. Falling back to channel URL')
4115 else:
4116 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
18db7548 4117
4118 self.write_debug('Final URL: %s' % url)
4119
358de58c 4120 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 4121 if 'no-youtube-unavailable-videos' not in compat_opts:
4122 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
95c01b6c 4123 self._extract_and_report_alerts(data)
8bdd16b4 4124 tabs = try_get(
4125 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4126 if tabs:
d069eca7 4127 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 4128
8bdd16b4 4129 playlist = try_get(
4130 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4131 if playlist:
79360d99 4132 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
cd7c66cf 4133
a0566bbf 4134 video_id = try_get(
4135 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4136 compat_str) or video_id
8bdd16b4 4137 if video_id:
09f1580e 4138 if mobj['tab'] != '/live': # live tab is expected to redirect to video
4139 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 4140 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 4141
8bdd16b4 4142 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 4143
c5e8d7af 4144
8bdd16b4 4145class YoutubePlaylistIE(InfoExtractor):
4146 IE_DESC = 'YouTube.com playlists'
4147 _VALID_URL = r'''(?x)(?:
4148 (?:https?://)?
4149 (?:\w+\.)?
4150 (?:
4151 (?:
4152 youtube(?:kids)?\.com|
29f7c58a 4153 invidio\.us
8bdd16b4 4154 )
4155 /.*?\?.*?\blist=
4156 )?
4157 (?P<id>%(playlist_id)s)
4158 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4159 IE_NAME = 'youtube:playlist'
cdc628a4 4160 _TESTS = [{
8bdd16b4 4161 'note': 'issue #673',
4162 'url': 'PLBB231211A4F62143',
cdc628a4 4163 'info_dict': {
8bdd16b4 4164 'title': '[OLD]Team Fortress 2 (Class-based LP)',
4165 'id': 'PLBB231211A4F62143',
4166 'uploader': 'Wickydoo',
4167 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4168 },
4169 'playlist_mincount': 29,
4170 }, {
4171 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4172 'info_dict': {
4173 'title': 'YDL_safe_search',
4174 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4175 },
4176 'playlist_count': 2,
4177 'skip': 'This playlist is private',
9558dcec 4178 }, {
8bdd16b4 4179 'note': 'embedded',
4180 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4181 'playlist_count': 4,
9558dcec 4182 'info_dict': {
8bdd16b4 4183 'title': 'JODA15',
4184 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4185 'uploader': 'milan',
4186 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 4187 }
cdc628a4 4188 }, {
8bdd16b4 4189 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4190 'playlist_mincount': 982,
4191 'info_dict': {
4192 'title': '2018 Chinese New Singles (11/6 updated)',
4193 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4194 'uploader': 'LBK',
4195 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4196 }
daa0df9e 4197 }, {
29f7c58a 4198 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4199 'only_matching': True,
4200 }, {
4201 # music album playlist
4202 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4203 'only_matching': True,
4204 }]
4205
4206 @classmethod
4207 def suitable(cls, url):
201c1459 4208 if YoutubeTabIE.suitable(url):
4209 return False
1bdae7d3 4210 # Hack for lazy extractors until more generic solution is implemented
4211 # (see #28780)
4212 from .youtube import parse_qs
201c1459 4213 qs = parse_qs(url)
4214 if qs.get('v', [None])[0]:
4215 return False
4216 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 4217
4218 def _real_extract(self, url):
4219 playlist_id = self._match_id(url)
46953e7e 4220 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 4221 url = update_url_query(
4222 'https://www.youtube.com/playlist',
4223 parse_qs(url) or {'list': playlist_id})
4224 if is_music_url:
4225 url = smuggle_url(url, {'is_music_url': True})
4226 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 4227
4228
4229class YoutubeYtBeIE(InfoExtractor):
c76eb41b 4230 IE_DESC = 'youtu.be'
29f7c58a 4231 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4232 _TESTS = [{
8bdd16b4 4233 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4234 'info_dict': {
4235 'id': 'yeWKywCrFtk',
4236 'ext': 'mp4',
4237 'title': 'Small Scale Baler and Braiding Rugs',
4238 'uploader': 'Backus-Page House Museum',
4239 'uploader_id': 'backuspagemuseum',
4240 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4241 'upload_date': '20161008',
4242 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4243 'categories': ['Nonprofits & Activism'],
4244 'tags': list,
4245 'like_count': int,
4246 'dislike_count': int,
4247 },
4248 'params': {
4249 'noplaylist': True,
4250 'skip_download': True,
4251 },
39e7107d 4252 }, {
8bdd16b4 4253 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 4254 'only_matching': True,
cdc628a4
PH
4255 }]
4256
8bdd16b4 4257 def _real_extract(self, url):
29f7c58a 4258 mobj = re.match(self._VALID_URL, url)
4259 video_id = mobj.group('id')
4260 playlist_id = mobj.group('playlist_id')
8bdd16b4 4261 return self.url_result(
29f7c58a 4262 update_url_query('https://www.youtube.com/watch', {
4263 'v': video_id,
4264 'list': playlist_id,
4265 'feature': 'youtu.be',
4266 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 4267
4268
4269class YoutubeYtUserIE(InfoExtractor):
c76eb41b 4270 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 4271 _VALID_URL = r'ytuser:(?P<id>.+)'
4272 _TESTS = [{
4273 'url': 'ytuser:phihag',
4274 'only_matching': True,
4275 }]
4276
4277 def _real_extract(self, url):
4278 user_id = self._match_id(url)
4279 return self.url_result(
4280 'https://www.youtube.com/user/%s' % user_id,
4281 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 4282
b05654f0 4283
3d3dddc9 4284class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 4285 IE_NAME = 'youtube:favorites'
4286 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4287 _VALID_URL = r':ytfav(?:ou?rite)?s?'
4288 _LOGIN_REQUIRED = True
4289 _TESTS = [{
4290 'url': ':ytfav',
4291 'only_matching': True,
4292 }, {
4293 'url': ':ytfavorites',
4294 'only_matching': True,
4295 }]
4296
4297 def _real_extract(self, url):
4298 return self.url_result(
4299 'https://www.youtube.com/playlist?list=LL',
4300 ie=YoutubeTabIE.ie_key())
4301
4302
79360d99 4303class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
69184e41 4304 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
4305 # there doesn't appear to be a real limit, for example if you search for
4306 # 'python' you get more than 8.000.000 results
4307 _MAX_RESULTS = float('inf')
78caa52a 4308 IE_NAME = 'youtube:search'
b05654f0 4309 _SEARCH_KEY = 'ytsearch'
6c894ea1 4310 _SEARCH_PARAMS = None
9dd8e46a 4311 _TESTS = []
b05654f0 4312
6c894ea1 4313 def _entries(self, query, n):
a5c56234 4314 data = {'query': query}
6c894ea1
U
4315 if self._SEARCH_PARAMS:
4316 data['params'] = self._SEARCH_PARAMS
4317 total = 0
fe93e2c4 4318 continuation = {}
6c894ea1 4319 for page_num in itertools.count(1):
fe93e2c4 4320 data.update(continuation)
79360d99 4321 search = self._extract_response(
4322 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4323 check_get_keys=('contents', 'onResponseReceivedCommands')
4324 )
6c894ea1 4325 if not search:
b4c08069 4326 break
6c894ea1
U
4327 slr_contents = try_get(
4328 search,
4329 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4330 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4331 list)
4332 if not slr_contents:
a22b2fd1 4333 break
0366ae87 4334
0366ae87
M
4335 # Youtube sometimes adds promoted content to searches,
4336 # changing the index location of videos and token.
4337 # So we search through all entries till we find them.
fe93e2c4 4338 continuation = None
30a074c2 4339 for slr_content in slr_contents:
fe93e2c4 4340 if not continuation:
4341 continuation = self._extract_continuation({'contents': [slr_content]})
a96c6d15 4342
30a074c2 4343 isr_contents = try_get(
4344 slr_content,
4345 lambda x: x['itemSectionRenderer']['contents'],
4346 list)
9da76d30 4347 if not isr_contents:
30a074c2 4348 continue
4349 for content in isr_contents:
4350 if not isinstance(content, dict):
4351 continue
4352 video = content.get('videoRenderer')
4353 if not isinstance(video, dict):
4354 continue
4355 video_id = video.get('videoId')
4356 if not video_id:
4357 continue
4358
4359 yield self._extract_video(video)
4360 total += 1
4361 if total == n:
4362 return
0366ae87 4363
fe93e2c4 4364 if not continuation:
6c894ea1 4365 break
b05654f0 4366
6c894ea1
U
4367 def _get_n_results(self, query, n):
4368 """Get a specified number of results for a query"""
4369 return self.playlist_result(self._entries(query, n), query)
75dff0ee 4370
c9ae7b95 4371
a3dd9248 4372class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 4373 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 4374 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 4375 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 4376 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 4377
c9ae7b95 4378
386e1dd9 4379class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 4380 IE_DESC = 'YouTube.com search URLs'
386e1dd9 4381 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4382 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 4383 # _MAX_RESULTS = 100
3462ffa8 4384 _TESTS = [{
4385 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4386 'playlist_mincount': 5,
4387 'info_dict': {
4388 'title': 'youtube-dl test video',
4389 }
4390 }, {
4391 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4392 'only_matching': True,
4393 }]
4394
386e1dd9 4395 @classmethod
4396 def _make_valid_url(cls):
4397 return cls._VALID_URL
4398
3462ffa8 4399 def _real_extract(self, url):
386e1dd9 4400 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4401 query = (qs.get('search_query') or qs.get('q'))[0]
4402 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4403 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 4404
4405
4406class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 4407 """
25f14e9f 4408 Base class for feed extractors
3d3dddc9 4409 Subclasses must define the _FEED_NAME property.
d7ae0639 4410 """
b2e8bc1b 4411 _LOGIN_REQUIRED = True
ef2f3c7f 4412 _TESTS = []
d7ae0639
JMF
4413
4414 @property
4415 def IE_NAME(self):
78caa52a 4416 return 'youtube:%s' % self._FEED_NAME
04cc9617 4417
3853309f 4418 def _real_extract(self, url):
3d3dddc9 4419 return self.url_result(
4420 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4421 ie=YoutubeTabIE.ie_key())
25f14e9f
S
4422
4423
ef2f3c7f 4424class YoutubeWatchLaterIE(InfoExtractor):
4425 IE_NAME = 'youtube:watchlater'
70d5c17b 4426 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 4427 _VALID_URL = r':ytwatchlater'
bc7a9cd8 4428 _TESTS = [{
8bdd16b4 4429 'url': ':ytwatchlater',
bc7a9cd8
S
4430 'only_matching': True,
4431 }]
25f14e9f
S
4432
4433 def _real_extract(self, url):
ef2f3c7f 4434 return self.url_result(
4435 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 4436
4437
25f14e9f
S
4438class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4439 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 4440 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 4441 _FEED_NAME = 'recommended'
45db527f 4442 _LOGIN_REQUIRED = False
3d3dddc9 4443 _TESTS = [{
4444 'url': ':ytrec',
4445 'only_matching': True,
4446 }, {
4447 'url': ':ytrecommended',
4448 'only_matching': True,
4449 }, {
4450 'url': 'https://youtube.com',
4451 'only_matching': True,
4452 }]
1ed5b5c9 4453
1ed5b5c9 4454
25f14e9f 4455class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 4456 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 4457 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 4458 _FEED_NAME = 'subscriptions'
3d3dddc9 4459 _TESTS = [{
4460 'url': ':ytsubs',
4461 'only_matching': True,
4462 }, {
4463 'url': ':ytsubscriptions',
4464 'only_matching': True,
4465 }]
1ed5b5c9 4466
1ed5b5c9 4467
25f14e9f 4468class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
4469 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4470 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 4471 _FEED_NAME = 'history'
3d3dddc9 4472 _TESTS = [{
4473 'url': ':ythistory',
4474 'only_matching': True,
4475 }]
1ed5b5c9
JMF
4476
4477
15870e90
PH
4478class YoutubeTruncatedURLIE(InfoExtractor):
4479 IE_NAME = 'youtube:truncated_url'
4480 IE_DESC = False # Do not list
975d35db 4481 _VALID_URL = r'''(?x)
b95aab84
PH
4482 (?:https?://)?
4483 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4484 (?:watch\?(?:
c4808c60 4485 feature=[a-z_]+|
b95aab84
PH
4486 annotation_id=annotation_[^&]+|
4487 x-yt-cl=[0-9]+|
c1708b89 4488 hl=[^&]*|
287be8c6 4489 t=[0-9]+
b95aab84
PH
4490 )?
4491 |
4492 attribution_link\?a=[^&]+
4493 )
4494 $
975d35db 4495 '''
15870e90 4496
c4808c60 4497 _TESTS = [{
2d3d2997 4498 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 4499 'only_matching': True,
dc2fc736 4500 }, {
2d3d2997 4501 'url': 'https://www.youtube.com/watch?',
dc2fc736 4502 'only_matching': True,
b95aab84
PH
4503 }, {
4504 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4505 'only_matching': True,
4506 }, {
4507 'url': 'https://www.youtube.com/watch?feature=foo',
4508 'only_matching': True,
c1708b89
PH
4509 }, {
4510 'url': 'https://www.youtube.com/watch?hl=en-GB',
4511 'only_matching': True,
287be8c6
PH
4512 }, {
4513 'url': 'https://www.youtube.com/watch?t=2372',
4514 'only_matching': True,
c4808c60
PH
4515 }]
4516
15870e90
PH
4517 def _real_extract(self, url):
4518 raise ExtractorError(
78caa52a
PH
4519 'Did you forget to quote the URL? Remember that & is a meta '
4520 'character in most shells, so you want to put the URL in quotes, '
3867038a 4521 'like youtube-dl '
2d3d2997 4522 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 4523 ' or simply youtube-dl BaW_jenozKc .',
15870e90 4524 expected=True)
772fd5cc
PH
4525
4526
4527class YoutubeTruncatedIDIE(InfoExtractor):
4528 IE_NAME = 'youtube:truncated_id'
4529 IE_DESC = False # Do not list
b95aab84 4530 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
4531
4532 _TESTS = [{
4533 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4534 'only_matching': True,
4535 }]
4536
4537 def _real_extract(self, url):
4538 video_id = self._match_id(url)
4539 raise ExtractorError(
4540 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4541 expected=True)