]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[youtube_live_chat] use `clickTrackingParams` (#449)
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
d92f5d5a 5import calendar
a5c56234 6import hashlib
0ca96d48 7import itertools
c5e8d7af 8import json
c4417ddb 9import os.path
d77ab8e2 10import random
c5e8d7af 11import re
8a784c74 12import time
e0df6211 13import traceback
c5e8d7af 14
b05654f0 15from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 16from ..compat import (
edf3e38e 17 compat_chr,
29f7c58a 18 compat_HTTPError,
c5e8d7af 19 compat_parse_qs,
545cc85d 20 compat_str,
7fd002c0 21 compat_urllib_parse_unquote_plus,
15707c7e 22 compat_urllib_parse_urlencode,
7c80519c 23 compat_urllib_parse_urlparse,
7c61bd36 24 compat_urlparse,
4bb4a188 25)
545cc85d 26from ..jsinterp import JSInterpreter
4bb4a188 27from ..utils import (
c224251a 28 bool_or_none,
c5e8d7af 29 clean_html,
26fe8ffe 30 dict_get,
d92f5d5a 31 datetime_from_str,
358de58c 32 error_to_compat_str,
c5e8d7af 33 ExtractorError,
b60419c5 34 format_field,
2d30521a 35 float_or_none,
dd27fd17 36 int_or_none,
94278f72 37 mimetype2ext,
6310acf5 38 parse_codecs,
7c80519c 39 parse_duration,
dca3ff4a 40 qualities,
3995d37d 41 remove_start,
cf7e015f 42 smuggle_url,
dbdaaa23 43 str_or_none,
c93d53f5 44 str_to_int,
556dbe7f 45 try_get,
c5e8d7af
PH
46 unescapeHTML,
47 unified_strdate,
cf7e015f 48 unsmuggle_url,
8bdd16b4 49 update_url_query,
21c340b8 50 url_or_none,
6e6bc8da 51 urlencode_postdata,
d92f5d5a 52 urljoin
c5e8d7af
PH
53)
54
5f6a1245 55
201c1459 56def parse_qs(url):
57 return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
58
59
de7f3446 60class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
61 """Provide base functions for Youtube extractors"""
62 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 63 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
64
65 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
66 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
67 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 68
3462ffa8 69 _RESERVED_NAMES = (
bea74222 70 r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|shorts|'
46953e7e 71 r'movies|results|shared|hashtag|trending|feed|feeds|oembed|get_video_info|'
cd7c66cf 72 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 73
b2e8bc1b
JMF
74 _NETRC_MACHINE = 'youtube'
75 # If True it will raise an error if no login info is provided
76 _LOGIN_REQUIRED = False
77
70d5c17b 78 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 79
b2e8bc1b 80 def _login(self):
83317f69 81 """
82 Attempt to log in to YouTube.
83 True is returned if successful or skipped.
84 False is returned if login failed.
85
86 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
87 """
9d5d4d64 88
89 def warn(message):
90 self.report_warning(message)
91
92 # username+password login is broken
93 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
94 self.raise_login_required(
95 'Login details are needed to download this content', method='cookies')
68217024 96 username, password = self._get_login_info()
9d5d4d64 97 if username:
98 warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
99 return
100 # Everything below this is broken!
101
b2e8bc1b
JMF
102 # No authentication to be performed
103 if username is None:
a06916d9 104 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
69ea8ca4 105 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
a06916d9 106 # if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
545cc85d 107 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 108 return True
b2e8bc1b 109
7cc3570e
PH
110 login_page = self._download_webpage(
111 self._LOGIN_URL, None,
69ea8ca4
PH
112 note='Downloading login page',
113 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
114 if login_page is False:
115 return
b2e8bc1b 116
1212e997 117 login_form = self._hidden_inputs(login_page)
c5e8d7af 118
e00eb564
S
119 def req(url, f_req, note, errnote):
120 data = login_form.copy()
121 data.update({
122 'pstMsg': 1,
123 'checkConnection': 'youtube',
124 'checkedDomains': 'youtube',
125 'hl': 'en',
126 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 127 'f.req': json.dumps(f_req),
e00eb564
S
128 'flowName': 'GlifWebSignIn',
129 'flowEntry': 'ServiceLogin',
baf67a60
S
130 # TODO: reverse actual botguard identifier generation algo
131 'bgRequest': '["identifier",""]',
041bc3ad 132 })
e00eb564
S
133 return self._download_json(
134 url, None, note=note, errnote=errnote,
135 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
136 fatal=False,
137 data=urlencode_postdata(data), headers={
138 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
139 'Google-Accounts-XSRF': 1,
140 })
141
3995d37d
S
142 lookup_req = [
143 username,
144 None, [], None, 'US', None, None, 2, False, True,
145 [
146 None, None,
147 [2, 1, None, 1,
148 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
149 None, [], 4],
150 1, [None, None, []], None, None, None, True
151 ],
152 username,
153 ]
154
e00eb564 155 lookup_results = req(
3995d37d 156 self._LOOKUP_URL, lookup_req,
e00eb564
S
157 'Looking up account info', 'Unable to look up account info')
158
159 if lookup_results is False:
160 return False
041bc3ad 161
3995d37d
S
162 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
163 if not user_hash:
164 warn('Unable to extract user hash')
165 return False
166
167 challenge_req = [
168 user_hash,
169 None, 1, None, [1, None, None, None, [password, None, True]],
170 [
171 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
172 1, [None, None, []], None, None, None, True
173 ]]
83317f69 174
3995d37d
S
175 challenge_results = req(
176 self._CHALLENGE_URL, challenge_req,
177 'Logging in', 'Unable to log in')
83317f69 178
3995d37d 179 if challenge_results is False:
e00eb564 180 return
83317f69 181
3995d37d
S
182 login_res = try_get(challenge_results, lambda x: x[0][5], list)
183 if login_res:
184 login_msg = try_get(login_res, lambda x: x[5], compat_str)
185 warn(
186 'Unable to login: %s' % 'Invalid password'
187 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
188 return False
189
190 res = try_get(challenge_results, lambda x: x[0][-1], list)
191 if not res:
192 warn('Unable to extract result entry')
193 return False
194
9a6628aa
S
195 login_challenge = try_get(res, lambda x: x[0][0], list)
196 if login_challenge:
197 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
198 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
199 # SEND_SUCCESS - TFA code has been successfully sent to phone
200 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 201 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
202 if status == 'QUOTA_EXCEEDED':
203 warn('Exceeded the limit of TFA codes, try later')
204 return False
205
206 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
207 if not tl:
208 warn('Unable to extract TL')
209 return False
210
211 tfa_code = self._get_tfa_info('2-step verification code')
212
213 if not tfa_code:
214 warn(
215 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
216 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
217 return False
218
219 tfa_code = remove_start(tfa_code, 'G-')
220
221 tfa_req = [
222 user_hash, None, 2, None,
223 [
224 9, None, None, None, None, None, None, None,
225 [None, tfa_code, True, 2]
226 ]]
227
228 tfa_results = req(
229 self._TFA_URL.format(tl), tfa_req,
230 'Submitting TFA code', 'Unable to submit TFA code')
231
232 if tfa_results is False:
233 return False
234
235 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
236 if tfa_res:
237 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
238 warn(
239 'Unable to finish TFA: %s' % 'Invalid TFA code'
240 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
241 return False
242
243 check_cookie_url = try_get(
244 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
245 else:
246 CHALLENGES = {
247 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
248 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
249 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
250 }
251 challenge = CHALLENGES.get(
252 challenge_str,
253 '%s returned error %s.' % (self.IE_NAME, challenge_str))
254 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
255 return False
3995d37d
S
256 else:
257 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
258
259 if not check_cookie_url:
260 warn('Unable to extract CheckCookie URL')
261 return False
e00eb564
S
262
263 check_cookie_results = self._download_webpage(
3995d37d
S
264 check_cookie_url, None, 'Checking cookie', fatal=False)
265
266 if check_cookie_results is False:
267 return False
e00eb564 268
3995d37d
S
269 if 'https://myaccount.google.com/' not in check_cookie_results:
270 warn('Unable to log in')
b2e8bc1b 271 return False
e00eb564 272
b2e8bc1b
JMF
273 return True
274
cce889b9 275 def _initialize_consent(self):
276 cookies = self._get_cookies('https://www.youtube.com/')
277 if cookies.get('__Secure-3PSID'):
278 return
279 consent_id = None
280 consent = cookies.get('CONSENT')
281 if consent:
282 if 'YES' in consent.value:
283 return
284 consent_id = self._search_regex(
285 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
286 if not consent_id:
287 consent_id = random.randint(100, 999)
288 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 289
b2e8bc1b 290 def _real_initialize(self):
cce889b9 291 self._initialize_consent()
b2e8bc1b
JMF
292 if self._downloader is None:
293 return
b2e8bc1b
JMF
294 if not self._login():
295 return
c5e8d7af 296
f4f751af 297 _YT_WEB_CLIENT_VERSION = '2.20210407.08.00'
298 _YT_INNERTUBE_API_KEY = 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
a0566bbf 299 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 300 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
301 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 302
a5c56234 303 def _generate_sapisidhash_header(self):
1974e99f 304 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
305 # See: https://github.com/yt-dlp/yt-dlp/issues/393
306 yt_cookies = self._get_cookies('https://www.youtube.com')
307 sapisid_cookie = dict_get(
308 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
a5c56234
M
309 if sapisid_cookie is None:
310 return
311 time_now = round(time.time())
1974e99f 312 # SAPISID cookie is required if not already present
313 if not yt_cookies.get('SAPISID'):
314 self._set_cookie(
315 '.youtube.com', 'SAPISID', sapisid_cookie.value, secure=True, expire_time=time_now + 3600)
316 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
317 sapisidhash = hashlib.sha1(
318 f'{time_now} {sapisid_cookie.value} https://www.youtube.com'.encode('utf-8')).hexdigest()
319 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
320
321 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 322 note='Downloading API JSON', errnote='Unable to download API page',
323 context=None, api_key=None):
324
325 data = {'context': context} if context else {'context': self._extract_context()}
8bdd16b4 326 data.update(query)
f4f751af 327 real_headers = self._generate_api_headers()
328 real_headers.update({'content-type': 'application/json'})
329 if headers:
330 real_headers.update(headers)
545cc85d 331 return self._download_json(
a5c56234
M
332 'https://www.youtube.com/youtubei/v1/%s' % ep,
333 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 334 data=json.dumps(data).encode('utf8'), headers=real_headers,
335 query={'key': api_key or self._extract_api_key()})
336
337 def _extract_api_key(self, ytcfg=None):
338 return try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str) or self._YT_INNERTUBE_API_KEY
c54f4aad 339
8bdd16b4 340 def _extract_yt_initial_data(self, video_id, webpage):
341 return self._parse_json(
342 self._search_regex(
29f7c58a 343 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 344 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 345 video_id)
0c148415 346
a1c5d2ca
M
347 def _extract_identity_token(self, webpage, item_id):
348 ytcfg = self._extract_ytcfg(item_id, webpage)
349 if ytcfg:
350 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
351 if token:
352 return token
353 return self._search_regex(
354 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
355 'identity token', default=None)
356
357 @staticmethod
358 def _extract_account_syncid(data):
8ea3f7b9 359 """
360 Extract syncId required to download private playlists of secondary channels
361 @param data Either response or ytcfg
362 """
363 sync_ids = (try_get(
364 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
365 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
a1c5d2ca
M
366 if len(sync_ids) >= 2 and sync_ids[1]:
367 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
368 # and just "user_syncid||" for primary channel. We only want the channel_syncid
369 return sync_ids[0]
8ea3f7b9 370 # ytcfg includes channel_syncid if on secondary channel
371 return data.get('DELEGATED_SESSION_ID')
a1c5d2ca 372
29f7c58a 373 def _extract_ytcfg(self, video_id, webpage):
8c54a305 374 if not webpage:
375 return {}
29f7c58a 376 return self._parse_json(
377 self._search_regex(
378 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 379 default='{}'), video_id, fatal=False) or {}
380
381 def __extract_client_version(self, ytcfg):
382 return try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str) or self._YT_WEB_CLIENT_VERSION
383
384 def _extract_context(self, ytcfg=None):
385 context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'], dict)
386 if context:
387 return context
388
389 # Recreate the client context (required)
390 client_version = self.__extract_client_version(ytcfg)
391 client_name = try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str) or 'WEB'
392 context = {
393 'client': {
394 'clientName': client_name,
395 'clientVersion': client_version,
396 }
397 }
398 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
399 if visitor_data:
400 context['client']['visitorData'] = visitor_data
401 return context
402
403 def _generate_api_headers(self, ytcfg=None, identity_token=None, account_syncid=None, visitor_data=None):
404 headers = {
405 'X-YouTube-Client-Name': '1',
406 'X-YouTube-Client-Version': self.__extract_client_version(ytcfg),
407 }
408 if identity_token:
409 headers['x-youtube-identity-token'] = identity_token
410 if account_syncid:
411 headers['X-Goog-PageId'] = account_syncid
412 headers['X-Goog-AuthUser'] = 0
413 if visitor_data:
414 headers['x-goog-visitor-id'] = visitor_data
415 auth = self._generate_sapisidhash_header()
416 if auth is not None:
417 headers['Authorization'] = auth
418 headers['X-Origin'] = 'https://www.youtube.com'
419 return headers
29f7c58a 420
9297939e 421 @staticmethod
422 def is_music_url(url):
423 return re.match(r'https?://music\.youtube\.com/', url) is not None
424
30a074c2 425 def _extract_video(self, renderer):
426 video_id = renderer.get('videoId')
427 title = try_get(
428 renderer,
429 (lambda x: x['title']['runs'][0]['text'],
430 lambda x: x['title']['simpleText']), compat_str)
431 description = try_get(
432 renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
433 compat_str)
434 duration = parse_duration(try_get(
435 renderer, lambda x: x['lengthText']['simpleText'], compat_str))
436 view_count_text = try_get(
437 renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
438 view_count = str_to_int(self._search_regex(
439 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
440 'view count', default=None))
441 uploader = try_get(
bc2ca1bb 442 renderer,
443 (lambda x: x['ownerText']['runs'][0]['text'],
444 lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
30a074c2 445 return {
39ed931e 446 '_type': 'url',
30a074c2 447 'ie_key': YoutubeIE.ie_key(),
448 'id': video_id,
449 'url': video_id,
450 'title': title,
451 'description': description,
452 'duration': duration,
453 'view_count': view_count,
454 'uploader': uploader,
455 }
456
0c148415 457
360e1ca5 458class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 459 IE_DESC = 'YouTube.com'
bc2ca1bb 460 _INVIDIOUS_SITES = (
461 # invidious-redirect websites
462 r'(?:www\.)?redirect\.invidious\.io',
463 r'(?:(?:www|dev)\.)?invidio\.us',
464 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
465 r'(?:www\.)?invidious\.pussthecat\.org',
bc2ca1bb 466 r'(?:www\.)?invidious\.zee\.li',
bc2ca1bb 467 r'(?:www\.)?invidious\.ethibox\.fr',
bc2ca1bb 468 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
bc2ca1bb 469 # youtube-dl invidious instances list
470 r'(?:(?:www|no)\.)?invidiou\.sh',
471 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
472 r'(?:www\.)?invidious\.kabi\.tk',
bc2ca1bb 473 r'(?:www\.)?invidious\.mastodon\.host',
474 r'(?:www\.)?invidious\.zapashcanon\.fr',
ed807c18 475 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
201c1459 476 r'(?:www\.)?invidious\.tinfoil-hat\.net',
477 r'(?:www\.)?invidious\.himiko\.cloud',
478 r'(?:www\.)?invidious\.reallyancient\.tech',
bc2ca1bb 479 r'(?:www\.)?invidious\.tube',
480 r'(?:www\.)?invidiou\.site',
481 r'(?:www\.)?invidious\.site',
482 r'(?:www\.)?invidious\.xyz',
483 r'(?:www\.)?invidious\.nixnet\.xyz',
201c1459 484 r'(?:www\.)?invidious\.048596\.xyz',
bc2ca1bb 485 r'(?:www\.)?invidious\.drycat\.fr',
201c1459 486 r'(?:www\.)?inv\.skyn3t\.in',
bc2ca1bb 487 r'(?:www\.)?tube\.poal\.co',
488 r'(?:www\.)?tube\.connect\.cafe',
489 r'(?:www\.)?vid\.wxzm\.sx',
490 r'(?:www\.)?vid\.mint\.lgbt',
201c1459 491 r'(?:www\.)?vid\.puffyan\.us',
bc2ca1bb 492 r'(?:www\.)?yewtu\.be',
493 r'(?:www\.)?yt\.elukerio\.org',
494 r'(?:www\.)?yt\.lelux\.fi',
495 r'(?:www\.)?invidious\.ggc-project\.de',
496 r'(?:www\.)?yt\.maisputain\.ovh',
201c1459 497 r'(?:www\.)?ytprivate\.com',
498 r'(?:www\.)?invidious\.13ad\.de',
bc2ca1bb 499 r'(?:www\.)?invidious\.toot\.koeln',
500 r'(?:www\.)?invidious\.fdn\.fr',
501 r'(?:www\.)?watch\.nettohikari\.com',
ed807c18 502 r'(?:www\.)?invidious\.namazso\.eu',
503 r'(?:www\.)?invidious\.silkky\.cloud',
504 r'(?:www\.)?invidious\.exonip\.de',
505 r'(?:www\.)?invidious\.riverside\.rocks',
506 r'(?:www\.)?invidious\.blamefran\.net',
507 r'(?:www\.)?invidious\.moomoo\.de',
508 r'(?:www\.)?ytb\.trom\.tf',
509 r'(?:www\.)?yt\.cyberhost\.uk',
bc2ca1bb 510 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
511 r'(?:www\.)?qklhadlycap4cnod\.onion',
512 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
513 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
514 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
515 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
516 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
517 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
ed807c18 518 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
519 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
520 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
521 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
bc2ca1bb 522 )
cb7dfeea 523 _VALID_URL = r"""(?x)^
c5e8d7af 524 (
edb53e2d 525 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 526 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
527 (?:www\.)?deturl\.com/www\.youtube\.com|
528 (?:www\.)?pwnyoutube\.com|
529 (?:www\.)?hooktube\.com|
530 (?:www\.)?yourepeat\.com|
531 tube\.majestyc\.net|
532 %(invidious)s|
533 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
534 (?:.*?\#/)? # handle anchor (#/) redirect urls
535 (?: # the various things that can precede the ID:
ac7553d0 536 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 537 |(?: # or the v= param in all its forms
f7000f3a 538 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 539 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 540 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
541 v=
542 )
f4b05232 543 ))
cbaed4bb
S
544 |(?:
545 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
546 vid\.plus| # or vid.plus/xxxx
547 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 548 %(invidious)s
cbaed4bb 549 )/
edb53e2d 550 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 551 )
c5e8d7af 552 )? # all until now is optional -> you can pass the naked ID
201c1459 553 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 554 (?(1).+)? # if we found the ID, everything can follow
9297939e 555 (?:\#|$)""" % {
bc2ca1bb 556 'invidious': '|'.join(_INVIDIOUS_SITES),
557 }
e40c758c 558 _PLAYER_INFO_RE = (
cc2db878 559 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
560 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 561 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 562 )
2c62dc26 563 _formats = {
c2d3cb4c 564 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
565 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
566 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
567 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
568 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
569 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
570 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
571 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 572 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 573 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
574 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
575 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
576 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
577 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
578 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 579 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 580 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
581 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 582
583
584 # 3D videos
c2d3cb4c 585 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
586 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
587 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
588 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 589 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
590 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
591 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 592
96fb5605 593 # Apple HTTP Live Streaming
11f12195 594 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 595 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
596 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
597 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
598 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
599 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 600 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
601 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
602
603 # DASH mp4 video
d23028a8
S
604 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
605 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
606 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
607 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
608 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 609 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
610 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
611 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
612 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
613 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
614 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
615 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 616
f6f1fc92 617 # Dash mp4 audio
d23028a8
S
618 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
619 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
620 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
621 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
622 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
623 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
624 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
625
626 # Dash webm
d23028a8
S
627 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
628 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
629 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
630 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
631 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
632 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
633 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
634 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
635 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
636 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
637 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
638 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
639 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
640 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
641 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 642 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
643 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
644 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
645 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
646 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
647 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
648 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
649
650 # Dash webm audio
d23028a8
S
651 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
652 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 653
0857baad 654 # Dash webm audio with opus inside
d23028a8
S
655 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
656 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
657 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 658
ce6b9a2d
PH
659 # RTMP (unnamed)
660 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
661
662 # av01 video only formats sometimes served with "unknown" codecs
663 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
664 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
665 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
666 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 667 }
29f7c58a 668 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 669
fd5c4aab
S
670 _GEO_BYPASS = False
671
78caa52a 672 IE_NAME = 'youtube'
2eb88d95
PH
673 _TESTS = [
674 {
2d3d2997 675 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
676 'info_dict': {
677 'id': 'BaW_jenozKc',
678 'ext': 'mp4',
3867038a 679 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
680 'uploader': 'Philipp Hagemeister',
681 'uploader_id': 'phihag',
ec85ded8 682 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
683 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
684 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 685 'upload_date': '20121002',
3867038a 686 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 687 'categories': ['Science & Technology'],
3867038a 688 'tags': ['youtube-dl'],
556dbe7f 689 'duration': 10,
dbdaaa23 690 'view_count': int,
3e7c1224
PH
691 'like_count': int,
692 'dislike_count': int,
7c80519c 693 'start_time': 1,
297a564b 694 'end_time': 9,
2eb88d95 695 }
0e853ca4 696 },
fccd3771 697 {
4bc3a23e
PH
698 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
699 'note': 'Embed-only video (#1746)',
700 'info_dict': {
701 'id': 'yZIXLfi8CZQ',
702 'ext': 'mp4',
703 'upload_date': '20120608',
704 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
705 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
706 'uploader': 'SET India',
94bfcd23 707 'uploader_id': 'setindia',
ec85ded8 708 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 709 'age_limit': 18,
545cc85d 710 },
711 'skip': 'Private video',
fccd3771 712 },
11b56058 713 {
8bdd16b4 714 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
715 'note': 'Use the first video ID in the URL',
716 'info_dict': {
717 'id': 'BaW_jenozKc',
718 'ext': 'mp4',
3867038a 719 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
720 'uploader': 'Philipp Hagemeister',
721 'uploader_id': 'phihag',
ec85ded8 722 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 723 'upload_date': '20121002',
3867038a 724 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 725 'categories': ['Science & Technology'],
3867038a 726 'tags': ['youtube-dl'],
556dbe7f 727 'duration': 10,
dbdaaa23 728 'view_count': int,
11b56058
PM
729 'like_count': int,
730 'dislike_count': int,
34a7de29
S
731 },
732 'params': {
733 'skip_download': True,
734 },
11b56058 735 },
dd27fd17 736 {
2d3d2997 737 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
738 'note': '256k DASH audio (format 141) via DASH manifest',
739 'info_dict': {
740 'id': 'a9LDPn-MO4I',
741 'ext': 'm4a',
742 'upload_date': '20121002',
743 'uploader_id': '8KVIDEO',
ec85ded8 744 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
745 'description': '',
746 'uploader': '8KVIDEO',
747 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 748 },
4bc3a23e
PH
749 'params': {
750 'youtube_include_dash_manifest': True,
751 'format': '141',
4919603f 752 },
de3c7fe0 753 'skip': 'format 141 not served anymore',
dd27fd17 754 },
8bdd16b4 755 # DASH manifest with encrypted signature
756 {
757 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
758 'info_dict': {
759 'id': 'IB3lcPjvWLA',
760 'ext': 'm4a',
761 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
762 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
763 'duration': 244,
764 'uploader': 'AfrojackVEVO',
765 'uploader_id': 'AfrojackVEVO',
766 'upload_date': '20131011',
cc2db878 767 'abr': 129.495,
8bdd16b4 768 },
769 'params': {
770 'youtube_include_dash_manifest': True,
771 'format': '141/bestaudio[ext=m4a]',
772 },
773 },
aa79ac0c
PH
774 # Controversy video
775 {
776 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
777 'info_dict': {
778 'id': 'T4XJQO3qol8',
779 'ext': 'mp4',
556dbe7f 780 'duration': 219,
aa79ac0c 781 'upload_date': '20100909',
4fe54c12 782 'uploader': 'Amazing Atheist',
aa79ac0c 783 'uploader_id': 'TheAmazingAtheist',
ec85ded8 784 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c 785 'title': 'Burning Everyone\'s Koran',
545cc85d 786 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
aa79ac0c 787 }
c522adb1 788 },
dd2d55f1 789 # Normal age-gate video (embed allowed)
c522adb1 790 {
2d3d2997 791 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
792 'info_dict': {
793 'id': 'HtVdAasjOgU',
794 'ext': 'mp4',
795 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 796 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 797 'duration': 142,
c522adb1
JMF
798 'uploader': 'The Witcher',
799 'uploader_id': 'WitcherGame',
ec85ded8 800 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 801 'upload_date': '20140605',
34952f09 802 'age_limit': 18,
c522adb1
JMF
803 },
804 },
8bdd16b4 805 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
806 # YouTube Red ad is not captured for creator
807 {
808 'url': '__2ABJjxzNo',
809 'info_dict': {
810 'id': '__2ABJjxzNo',
811 'ext': 'mp4',
812 'duration': 266,
813 'upload_date': '20100430',
814 'uploader_id': 'deadmau5',
815 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 816 'creator': 'deadmau5',
817 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 818 'uploader': 'deadmau5',
819 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 820 'alt_title': 'Some Chords',
8bdd16b4 821 },
822 'expected_warnings': [
823 'DASH manifest missing',
824 ]
825 },
067aa17e 826 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
827 {
828 'url': 'lqQg6PlCWgI',
829 'info_dict': {
830 'id': 'lqQg6PlCWgI',
831 'ext': 'mp4',
556dbe7f 832 'duration': 6085,
90227264 833 'upload_date': '20150827',
cbe2bd91 834 'uploader_id': 'olympic',
ec85ded8 835 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 836 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 837 'uploader': 'Olympic',
cbe2bd91
PH
838 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
839 },
840 'params': {
841 'skip_download': 'requires avconv',
e52a40ab 842 }
cbe2bd91 843 },
6271f1ca
PH
844 # Non-square pixels
845 {
846 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
847 'info_dict': {
848 'id': '_b-2C3KPAM0',
849 'ext': 'mp4',
850 'stretched_ratio': 16 / 9.,
556dbe7f 851 'duration': 85,
6271f1ca
PH
852 'upload_date': '20110310',
853 'uploader_id': 'AllenMeow',
ec85ded8 854 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 855 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 856 'uploader': '孫ᄋᄅ',
6271f1ca
PH
857 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
858 },
06b491eb
S
859 },
860 # url_encoded_fmt_stream_map is empty string
861 {
862 'url': 'qEJwOuvDf7I',
863 'info_dict': {
864 'id': 'qEJwOuvDf7I',
f57b7835 865 'ext': 'webm',
06b491eb
S
866 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
867 'description': '',
868 'upload_date': '20150404',
869 'uploader_id': 'spbelect',
870 'uploader': 'Наблюдатели Петербурга',
871 },
872 'params': {
873 'skip_download': 'requires avconv',
e323cf3f
S
874 },
875 'skip': 'This live event has ended.',
06b491eb 876 },
067aa17e 877 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
878 {
879 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
880 'info_dict': {
881 'id': 'FIl7x6_3R5Y',
eb6793ba 882 'ext': 'webm',
da77d856
S
883 'title': 'md5:7b81415841e02ecd4313668cde88737a',
884 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 885 'duration': 220,
da77d856
S
886 'upload_date': '20150625',
887 'uploader_id': 'dorappi2000',
ec85ded8 888 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 889 'uploader': 'dorappi2000',
eb6793ba 890 'formats': 'mincount:31',
da77d856 891 },
eb6793ba 892 'skip': 'not actual anymore',
2ee8f5d8 893 },
8a1a26ce
YCH
894 # DASH manifest with segment_list
895 {
896 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
897 'md5': '8ce563a1d667b599d21064e982ab9e31',
898 'info_dict': {
899 'id': 'CsmdDsKjzN8',
900 'ext': 'mp4',
17ee98e1 901 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
902 'uploader': 'Airtek',
903 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
904 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
905 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
906 },
907 'params': {
908 'youtube_include_dash_manifest': True,
909 'format': '135', # bestvideo
be49068d
S
910 },
911 'skip': 'This live event has ended.',
2ee8f5d8 912 },
cf7e015f
S
913 {
914 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 915 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 916 'info_dict': {
545cc85d 917 'id': 'jvGDaLqkpTg',
918 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
919 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
920 },
921 'playlist': [{
922 'info_dict': {
545cc85d 923 'id': 'jvGDaLqkpTg',
cf7e015f 924 'ext': 'mp4',
545cc85d 925 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
926 'description': 'md5:e03b909557865076822aa169218d6a5d',
927 'duration': 10643,
928 'upload_date': '20161111',
929 'uploader': 'Team PGP',
930 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
931 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
932 },
933 }, {
934 'info_dict': {
545cc85d 935 'id': '3AKt1R1aDnw',
cf7e015f 936 'ext': 'mp4',
545cc85d 937 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
938 'description': 'md5:e03b909557865076822aa169218d6a5d',
939 'duration': 10991,
940 'upload_date': '20161111',
941 'uploader': 'Team PGP',
942 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
943 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
944 },
945 }, {
946 'info_dict': {
545cc85d 947 'id': 'RtAMM00gpVc',
cf7e015f 948 'ext': 'mp4',
545cc85d 949 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
950 'description': 'md5:e03b909557865076822aa169218d6a5d',
951 'duration': 10995,
952 'upload_date': '20161111',
953 'uploader': 'Team PGP',
954 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
955 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
956 },
957 }, {
958 'info_dict': {
545cc85d 959 'id': '6N2fdlP3C5U',
cf7e015f 960 'ext': 'mp4',
545cc85d 961 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
962 'description': 'md5:e03b909557865076822aa169218d6a5d',
963 'duration': 10990,
964 'upload_date': '20161111',
965 'uploader': 'Team PGP',
966 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
967 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
968 },
969 }],
970 'params': {
971 'skip_download': True,
972 },
cbaed4bb 973 },
f9f49d87 974 {
067aa17e 975 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
976 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
977 'info_dict': {
978 'id': 'gVfLd0zydlo',
979 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
980 },
981 'playlist_count': 2,
be49068d 982 'skip': 'Not multifeed anymore',
f9f49d87 983 },
cbaed4bb 984 {
2d3d2997 985 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 986 'only_matching': True,
0e49d9a6 987 },
6d4fc66b 988 {
2d3d2997 989 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
990 'only_matching': True,
991 },
0e49d9a6 992 {
067aa17e 993 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 994 # Also tests cut-off URL expansion in video description (see
067aa17e
S
995 # https://github.com/ytdl-org/youtube-dl/issues/1892,
996 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
997 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
998 'info_dict': {
999 'id': 'lsguqyKfVQg',
1000 'ext': 'mp4',
1001 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 1002 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 1003 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 1004 'duration': 133,
0e49d9a6
LL
1005 'upload_date': '20151119',
1006 'uploader_id': 'IronSoulElf',
ec85ded8 1007 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1008 'uploader': 'IronSoulElf',
eb6793ba
S
1009 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
1010 'track': 'Dark Walk - Position Music',
1011 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 1012 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
1013 },
1014 'params': {
1015 'skip_download': True,
1016 },
1017 },
61f92af1 1018 {
067aa17e 1019 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1020 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1021 'only_matching': True,
1022 },
313dfc45
LL
1023 {
1024 # Video with yt:stretch=17:0
1025 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1026 'info_dict': {
1027 'id': 'Q39EVAstoRM',
1028 'ext': 'mp4',
1029 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1030 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1031 'upload_date': '20151107',
1032 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1033 'uploader': 'CH GAMER DROID',
1034 },
1035 'params': {
1036 'skip_download': True,
1037 },
be49068d 1038 'skip': 'This video does not exist.',
313dfc45 1039 },
201c1459 1040 {
1041 # Video with incomplete 'yt:stretch=16:'
1042 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1043 'only_matching': True,
1044 },
7caf9830
S
1045 {
1046 # Video licensed under Creative Commons
1047 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1048 'info_dict': {
1049 'id': 'M4gD1WSo5mA',
1050 'ext': 'mp4',
1051 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1052 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1053 'duration': 721,
7caf9830
S
1054 'upload_date': '20150127',
1055 'uploader_id': 'BerkmanCenter',
ec85ded8 1056 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1057 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1058 'license': 'Creative Commons Attribution license (reuse allowed)',
1059 },
1060 'params': {
1061 'skip_download': True,
1062 },
1063 },
fd050249
S
1064 {
1065 # Channel-like uploader_url
1066 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1067 'info_dict': {
1068 'id': 'eQcmzGIKrzg',
1069 'ext': 'mp4',
1070 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1071 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1072 'duration': 4060,
fd050249 1073 'upload_date': '20151119',
eb6793ba 1074 'uploader': 'Bernie Sanders',
fd050249 1075 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1076 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1077 'license': 'Creative Commons Attribution license (reuse allowed)',
1078 },
1079 'params': {
1080 'skip_download': True,
1081 },
1082 },
040ac686
S
1083 {
1084 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1085 'only_matching': True,
7f29cf54
S
1086 },
1087 {
067aa17e 1088 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1089 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1090 'only_matching': True,
6496ccb4
S
1091 },
1092 {
1093 # Rental video preview
1094 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1095 'info_dict': {
1096 'id': 'uGpuVWrhIzE',
1097 'ext': 'mp4',
1098 'title': 'Piku - Trailer',
1099 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1100 'upload_date': '20150811',
1101 'uploader': 'FlixMatrix',
1102 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1103 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1104 'license': 'Standard YouTube License',
1105 },
1106 'params': {
1107 'skip_download': True,
1108 },
eb6793ba 1109 'skip': 'This video is not available.',
022a5d66 1110 },
12afdc2a
S
1111 {
1112 # YouTube Red video with episode data
1113 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1114 'info_dict': {
1115 'id': 'iqKdEhx-dD4',
1116 'ext': 'mp4',
1117 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1118 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1119 'duration': 2085,
12afdc2a
S
1120 'upload_date': '20170118',
1121 'uploader': 'Vsauce',
1122 'uploader_id': 'Vsauce',
1123 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1124 'series': 'Mind Field',
1125 'season_number': 1,
1126 'episode_number': 1,
1127 },
1128 'params': {
1129 'skip_download': True,
1130 },
1131 'expected_warnings': [
1132 'Skipping DASH manifest',
1133 ],
1134 },
c7121fa7
S
1135 {
1136 # The following content has been identified by the YouTube community
1137 # as inappropriate or offensive to some audiences.
1138 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1139 'info_dict': {
1140 'id': '6SJNVb0GnPI',
1141 'ext': 'mp4',
1142 'title': 'Race Differences in Intelligence',
1143 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1144 'duration': 965,
1145 'upload_date': '20140124',
1146 'uploader': 'New Century Foundation',
1147 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1148 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1149 },
1150 'params': {
1151 'skip_download': True,
1152 },
545cc85d 1153 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1154 },
022a5d66
S
1155 {
1156 # itag 212
1157 'url': '1t24XAntNCY',
1158 'only_matching': True,
fd5c4aab
S
1159 },
1160 {
1161 # geo restricted to JP
1162 'url': 'sJL6WA-aGkQ',
1163 'only_matching': True,
1164 },
cd5a74a2
S
1165 {
1166 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1167 'only_matching': True,
1168 },
bc2ca1bb 1169 {
1170 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1171 'only_matching': True,
1172 },
1173 {
1174 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1175 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1176 'only_matching': True,
1177 },
825cd268
RA
1178 {
1179 # DRM protected
1180 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1181 'only_matching': True,
4fe54c12
S
1182 },
1183 {
1184 # Video with unsupported adaptive stream type formats
1185 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1186 'info_dict': {
1187 'id': 'Z4Vy8R84T1U',
1188 'ext': 'mp4',
1189 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1190 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1191 'duration': 433,
1192 'upload_date': '20130923',
1193 'uploader': 'Amelia Putri Harwita',
1194 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1195 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1196 'formats': 'maxcount:10',
1197 },
1198 'params': {
1199 'skip_download': True,
1200 'youtube_include_dash_manifest': False,
1201 },
5429d6a9 1202 'skip': 'not actual anymore',
5caabd3c 1203 },
1204 {
822b9d9c 1205 # Youtube Music Auto-generated description
5caabd3c 1206 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1207 'info_dict': {
1208 'id': 'MgNrAu2pzNs',
1209 'ext': 'mp4',
1210 'title': 'Voyeur Girl',
1211 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1212 'upload_date': '20190312',
5429d6a9
S
1213 'uploader': 'Stephen - Topic',
1214 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1215 'artist': 'Stephen',
1216 'track': 'Voyeur Girl',
1217 'album': 'it\'s too much love to know my dear',
1218 'release_date': '20190313',
1219 'release_year': 2019,
1220 },
1221 'params': {
1222 'skip_download': True,
1223 },
1224 },
66b48727
RA
1225 {
1226 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1227 'only_matching': True,
1228 },
011e75e6
S
1229 {
1230 # invalid -> valid video id redirection
1231 'url': 'DJztXj2GPfl',
1232 'info_dict': {
1233 'id': 'DJztXj2GPfk',
1234 'ext': 'mp4',
1235 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1236 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1237 'upload_date': '20090125',
1238 'uploader': 'Prochorowka',
1239 'uploader_id': 'Prochorowka',
1240 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1241 'artist': 'Panjabi MC',
1242 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1243 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1244 },
1245 'params': {
1246 'skip_download': True,
1247 },
545cc85d 1248 'skip': 'Video unavailable',
ea74e00b
DP
1249 },
1250 {
1251 # empty description results in an empty string
1252 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1253 'info_dict': {
1254 'id': 'x41yOUIvK2k',
1255 'ext': 'mp4',
1256 'title': 'IMG 3456',
1257 'description': '',
1258 'upload_date': '20170613',
1259 'uploader_id': 'ElevageOrVert',
1260 'uploader': 'ElevageOrVert',
1261 },
1262 'params': {
1263 'skip_download': True,
1264 },
1265 },
a0566bbf 1266 {
29f7c58a 1267 # with '};' inside yt initial data (see [1])
1268 # see [2] for an example with '};' inside ytInitialPlayerResponse
1269 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1270 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1271 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1272 'info_dict': {
1273 'id': 'CHqg6qOn4no',
1274 'ext': 'mp4',
1275 'title': 'Part 77 Sort a list of simple types in c#',
1276 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1277 'upload_date': '20130831',
1278 'uploader_id': 'kudvenkat',
1279 'uploader': 'kudvenkat',
1280 },
1281 'params': {
1282 'skip_download': True,
1283 },
1284 },
29f7c58a 1285 {
1286 # another example of '};' in ytInitialData
1287 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1288 'only_matching': True,
1289 },
1290 {
1291 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1292 'only_matching': True,
1293 },
545cc85d 1294 {
cc2db878 1295 # https://github.com/ytdl-org/youtube-dl/pull/28094
1296 'url': 'OtqTfy26tG0',
1297 'info_dict': {
1298 'id': 'OtqTfy26tG0',
1299 'ext': 'mp4',
1300 'title': 'Burn Out',
1301 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1302 'upload_date': '20141120',
1303 'uploader': 'The Cinematic Orchestra - Topic',
1304 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1305 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1306 'artist': 'The Cinematic Orchestra',
1307 'track': 'Burn Out',
1308 'album': 'Every Day',
1309 'release_data': None,
1310 'release_year': None,
1311 },
1312 'params': {
1313 'skip_download': True,
1314 },
545cc85d 1315 },
bc2ca1bb 1316 {
1317 # controversial video, only works with bpctr when authenticated with cookies
1318 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1319 'only_matching': True,
1320 },
f7ad7160 1321 {
1322 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1323 'url': 'cBvYw8_A0vQ',
1324 'info_dict': {
1325 'id': 'cBvYw8_A0vQ',
1326 'ext': 'mp4',
1327 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1328 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1329 'upload_date': '20201120',
1330 'uploader': 'Walk around Japan',
1331 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1332 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1333 },
1334 'params': {
1335 'skip_download': True,
1336 },
0fb983f6 1337 }, {
1338 # Has multiple audio streams
1339 'url': 'WaOKSUlf4TM',
1340 'only_matching': True
9297939e 1341 }, {
1342 # Requires Premium: has format 141 when requested using YTM url
1343 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1344 'only_matching': True
1345 }, {
120916da 1346 # multiple subtitles with same lang_code
1347 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1348 'only_matching': True,
1349 },
2eb88d95
PH
1350 ]
1351
201c1459 1352 @classmethod
1353 def suitable(cls, url):
1bdae7d3 1354 # Hack for lazy extractors until more generic solution is implemented
1355 # (see #28780)
1356 from .youtube import parse_qs
201c1459 1357 qs = parse_qs(url)
1358 if qs.get('list', [None])[0]:
1359 return False
1360 return super(YoutubeIE, cls).suitable(url)
1361
e0df6211
PH
1362 def __init__(self, *args, **kwargs):
1363 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1364 self._code_cache = {}
83799698 1365 self._player_cache = {}
e0df6211 1366
60064c53
PH
1367 def _signature_cache_id(self, example_sig):
1368 """ Return a string representation of a signature """
78caa52a 1369 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1370
e40c758c
S
1371 @classmethod
1372 def _extract_player_info(cls, player_url):
1373 for player_re in cls._PLAYER_INFO_RE:
1374 id_m = re.search(player_re, player_url)
1375 if id_m:
1376 break
1377 else:
c081b35c 1378 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1379 return id_m.group('id')
e40c758c
S
1380
1381 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1382 player_id = self._extract_player_info(player_url)
e0df6211 1383
c4417ddb 1384 # Read from filesystem cache
545cc85d 1385 func_id = 'js_%s_%s' % (
1386 player_id, self._signature_cache_id(example_sig))
c4417ddb 1387 assert os.path.basename(func_id) == func_id
a0e07d31 1388
69ea8ca4 1389 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1390 if cache_spec is not None:
78caa52a 1391 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1392
545cc85d 1393 if player_id not in self._code_cache:
1394 self._code_cache[player_id] = self._download_webpage(
e0df6211 1395 player_url, video_id,
545cc85d 1396 note='Downloading player ' + player_id,
69ea8ca4 1397 errnote='Download of %s failed' % player_url)
545cc85d 1398 code = self._code_cache[player_id]
1399 res = self._parse_sig_js(code)
e0df6211 1400
785521bf
PH
1401 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1402 cache_res = res(test_string)
1403 cache_spec = [ord(c) for c in cache_res]
83799698 1404
69ea8ca4 1405 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1406 return res
1407
60064c53 1408 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1409 def gen_sig_code(idxs):
1410 def _genslice(start, end, step):
78caa52a 1411 starts = '' if start == 0 else str(start)
8bcc8756 1412 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1413 steps = '' if step == 1 else (':%d' % step)
78caa52a 1414 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1415
1416 step = None
7af808a5
PH
1417 # Quelch pyflakes warnings - start will be set when step is set
1418 start = '(Never used)'
edf3e38e
PH
1419 for i, prev in zip(idxs[1:], idxs[:-1]):
1420 if step is not None:
1421 if i - prev == step:
1422 continue
1423 yield _genslice(start, prev, step)
1424 step = None
1425 continue
1426 if i - prev in [-1, 1]:
1427 step = i - prev
1428 start = prev
1429 continue
1430 else:
78caa52a 1431 yield 's[%d]' % prev
edf3e38e 1432 if step is None:
78caa52a 1433 yield 's[%d]' % i
edf3e38e
PH
1434 else:
1435 yield _genslice(start, i, step)
1436
78caa52a 1437 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1438 cache_res = func(test_string)
edf3e38e 1439 cache_spec = [ord(c) for c in cache_res]
78caa52a 1440 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1441 signature_id_tuple = '(%s)' % (
1442 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1443 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1444 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1445 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1446
e0df6211
PH
1447 def _parse_sig_js(self, jscode):
1448 funcname = self._search_regex(
abefc03f
S
1449 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1450 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1451 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1452 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1453 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1454 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1455 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1456 # Obsolete patterns
1457 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1458 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1459 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1460 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1461 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1462 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1463 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1464 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1465 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1466
1467 jsi = JSInterpreter(jscode)
1468 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1469 return lambda s: initial_function([s])
1470
545cc85d 1471 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1472 """Turn the encrypted s field into a working signature"""
6b37f0be 1473
c8bf86d5 1474 if player_url is None:
69ea8ca4 1475 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1476
69ea8ca4 1477 if player_url.startswith('//'):
78caa52a 1478 player_url = 'https:' + player_url
3c90cc8b
S
1479 elif not re.match(r'https?://', player_url):
1480 player_url = compat_urlparse.urljoin(
1481 'https://www.youtube.com', player_url)
c8bf86d5 1482 try:
62af3a0e 1483 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1484 if player_id not in self._player_cache:
1485 func = self._extract_signature_function(
60064c53 1486 video_id, player_url, s
c8bf86d5
PH
1487 )
1488 self._player_cache[player_id] = func
1489 func = self._player_cache[player_id]
a06916d9 1490 if self.get_param('youtube_print_sig_code'):
60064c53 1491 self._print_sig_code(func, s)
c8bf86d5
PH
1492 return func(s)
1493 except Exception as e:
1494 tb = traceback.format_exc()
1495 raise ExtractorError(
78caa52a 1496 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1497
545cc85d 1498 def _mark_watched(self, video_id, player_response):
21c340b8
S
1499 playback_url = url_or_none(try_get(
1500 player_response,
545cc85d 1501 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
d77ab8e2
S
1502 if not playback_url:
1503 return
1504 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1505 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1506
1507 # cpn generation algorithm is reverse engineered from base.js.
1508 # In fact it works even with dummy cpn.
1509 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1510 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1511
1512 qs.update({
1513 'ver': ['2'],
1514 'cpn': [cpn],
1515 })
1516 playback_url = compat_urlparse.urlunparse(
15707c7e 1517 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1518
1519 self._download_webpage(
1520 playback_url, video_id, 'Marking watched',
1521 'Unable to mark watched', fatal=False)
1522
66c9fa36
S
1523 @staticmethod
1524 def _extract_urls(webpage):
1525 # Embedded YouTube player
1526 entries = [
1527 unescapeHTML(mobj.group('url'))
1528 for mobj in re.finditer(r'''(?x)
1529 (?:
1530 <iframe[^>]+?src=|
1531 data-video-url=|
1532 <embed[^>]+?src=|
1533 embedSWF\(?:\s*|
1534 <object[^>]+data=|
1535 new\s+SWFObject\(
1536 )
1537 (["\'])
1538 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1539 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1540 \1''', webpage)]
1541
1542 # lazyYT YouTube embed
1543 entries.extend(list(map(
1544 unescapeHTML,
1545 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1546
1547 # Wordpress "YouTube Video Importer" plugin
1548 matches = re.findall(r'''(?x)<div[^>]+
1549 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1550 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1551 entries.extend(m[-1] for m in matches)
1552
1553 return entries
1554
1555 @staticmethod
1556 def _extract_url(webpage):
1557 urls = YoutubeIE._extract_urls(webpage)
1558 return urls[0] if urls else None
1559
97665381
PH
1560 @classmethod
1561 def extract_id(cls, url):
1562 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1563 if mobj is None:
69ea8ca4 1564 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1565 video_id = mobj.group(2)
1566 return video_id
1567
545cc85d 1568 def _extract_chapters_from_json(self, data, video_id, duration):
84213ea8 1569 chapters_list = try_get(
8bdd16b4 1570 data,
84213ea8
S
1571 lambda x: x['playerOverlays']
1572 ['playerOverlayRenderer']
1573 ['decoratedPlayerBarRenderer']
1574 ['decoratedPlayerBarRenderer']
1575 ['playerBar']
1576 ['chapteredPlayerBarRenderer']
1577 ['chapters'],
1578 list)
1579 if not chapters_list:
1580 return
1581
1582 def chapter_time(chapter):
1583 return float_or_none(
1584 try_get(
1585 chapter,
1586 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1587 int),
1588 scale=1000)
1589 chapters = []
1590 for next_num, chapter in enumerate(chapters_list, start=1):
1591 start_time = chapter_time(chapter)
1592 if start_time is None:
1593 continue
1594 end_time = (chapter_time(chapters_list[next_num])
1595 if next_num < len(chapters_list) else duration)
1596 if end_time is None:
1597 continue
1598 title = try_get(
1599 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1600 compat_str)
1601 chapters.append({
1602 'start_time': start_time,
1603 'end_time': end_time,
1604 'title': title,
1605 })
1606 return chapters
1607
545cc85d 1608 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1609 return self._parse_json(self._search_regex(
1610 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1611 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 1612
d92f5d5a 1613 @staticmethod
1614 def parse_time_text(time_text):
1615 """
1616 Parse the comment time text
1617 time_text is in the format 'X units ago (edited)'
1618 """
1619 time_text_split = time_text.split(' ')
1620 if len(time_text_split) >= 3:
1621 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
1622
a1c5d2ca
M
1623 @staticmethod
1624 def _join_text_entries(runs):
1625 text = None
1626 for run in runs:
1627 if not isinstance(run, dict):
1628 continue
1629 sub_text = try_get(run, lambda x: x['text'], compat_str)
1630 if sub_text:
1631 if not text:
1632 text = sub_text
1633 continue
1634 text += sub_text
1635 return text
1636
1637 def _extract_comment(self, comment_renderer, parent=None):
1638 comment_id = comment_renderer.get('commentId')
1639 if not comment_id:
1640 return
1641 comment_text_runs = try_get(comment_renderer, lambda x: x['contentText']['runs']) or []
1642 text = self._join_text_entries(comment_text_runs) or ''
1643 comment_time_text = try_get(comment_renderer, lambda x: x['publishedTimeText']['runs']) or []
1644 time_text = self._join_text_entries(comment_time_text)
d92f5d5a 1645 timestamp = calendar.timegm(self.parse_time_text(time_text).timetuple())
a1c5d2ca
M
1646 author = try_get(comment_renderer, lambda x: x['authorText']['simpleText'], compat_str)
1647 author_id = try_get(comment_renderer,
1648 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
1649 votes = str_to_int(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
1650 lambda x: x['likeCount']), compat_str)) or 0
1651 author_thumbnail = try_get(comment_renderer,
1652 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
1653
1654 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
1655 is_liked = try_get(comment_renderer, lambda x: x['isLiked'], bool)
a1c5d2ca
M
1656 return {
1657 'id': comment_id,
1658 'text': text,
d92f5d5a 1659 'timestamp': timestamp,
a1c5d2ca
M
1660 'time_text': time_text,
1661 'like_count': votes,
1662 'is_favorited': is_liked,
1663 'author': author,
1664 'author_id': author_id,
1665 'author_thumbnail': author_thumbnail,
1666 'author_is_uploader': author_is_uploader,
1667 'parent': parent or 'root'
1668 }
1669
1670 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
f4f751af 1671 ytcfg, session_token_list, parent=None, comment_counts=None):
a1c5d2ca
M
1672
1673 def extract_thread(parent_renderer):
1674 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
1675 if not parent:
1676 comment_counts[2] = 0
1677 for content in contents:
1678 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
1679 comment_renderer = try_get(
1680 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
1681 content, (lambda x: x['commentRenderer'], dict))
1682
1683 if not comment_renderer:
1684 continue
1685 comment = self._extract_comment(comment_renderer, parent)
1686 if not comment:
1687 continue
1688 comment_counts[0] += 1
1689 yield comment
1690 # Attempt to get the replies
1691 comment_replies_renderer = try_get(
1692 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
1693
1694 if comment_replies_renderer:
1695 comment_counts[2] += 1
1696 comment_entries_iter = self._comment_entries(
f4f751af 1697 comment_replies_renderer, identity_token, account_syncid, ytcfg,
a1c5d2ca
M
1698 parent=comment.get('id'), session_token_list=session_token_list,
1699 comment_counts=comment_counts)
1700
1701 for reply_comment in comment_entries_iter:
1702 yield reply_comment
1703
1704 if not comment_counts:
1705 # comment so far, est. total comments, current comment thread #
1706 comment_counts = [0, 0, 0]
a1c5d2ca
M
1707
1708 # TODO: Generalize the download code with TabIE
f4f751af 1709 context = self._extract_context(ytcfg)
1710 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
a1c5d2ca
M
1711 continuation = YoutubeTabIE._extract_continuation(root_continuation_data) # TODO
1712 first_continuation = False
1713 if parent is None:
1714 first_continuation = True
1715
1716 for page_num in itertools.count(0):
1717 if not continuation:
1718 break
f4f751af 1719 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
a06916d9 1720 retries = self.get_param('extractor_retries', 3)
a1c5d2ca
M
1721 count = -1
1722 last_error = None
1723
1724 while count < retries:
1725 count += 1
1726 if last_error:
1727 self.report_warning('%s. Retrying ...' % last_error)
1728 try:
1729 query = {
1730 'ctoken': continuation['ctoken'],
1731 'pbj': 1,
1732 'type': 'next',
1733 }
45261e06 1734 if 'itct' in continuation:
1735 query['itct'] = continuation['itct']
a1c5d2ca
M
1736 if parent:
1737 query['action_get_comment_replies'] = 1
1738 else:
1739 query['action_get_comments'] = 1
1740
1741 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
1742 if page_num == 0:
1743 if first_continuation:
d92f5d5a 1744 note_prefix = 'Downloading initial comment continuation page'
a1c5d2ca 1745 else:
d92f5d5a 1746 note_prefix = ' Downloading comment reply thread %d %s' % (comment_counts[2], comment_prog_str)
a1c5d2ca 1747 else:
d92f5d5a 1748 note_prefix = '%sDownloading comment%s page %d %s' % (
1749 ' ' if parent else '',
a1c5d2ca
M
1750 ' replies' if parent else '',
1751 page_num,
1752 comment_prog_str)
1753
1754 browse = self._download_json(
1755 'https://www.youtube.com/comment_service_ajax', None,
1756 '%s %s' % (note_prefix, '(retry #%d)' % count if count else ''),
1757 headers=headers, query=query,
1758 data=urlencode_postdata({
1759 'session_token': session_token_list[0]
1760 }))
1761 except ExtractorError as e:
1762 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404, 413):
1763 if e.cause.code == 413:
d92f5d5a 1764 self.report_warning('Assumed end of comments (received HTTP Error 413)')
a1c5d2ca
M
1765 return
1766 # Downloading page may result in intermittent 5xx HTTP error
1767 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
1768 last_error = 'HTTP Error %s' % e.cause.code
1769 if e.cause.code == 404:
d92f5d5a 1770 last_error = last_error + ' (this API is probably deprecated)'
a1c5d2ca
M
1771 if count < retries:
1772 continue
1773 raise
1774 else:
1775 session_token = try_get(browse, lambda x: x['xsrf_token'], compat_str)
1776 if session_token:
1777 session_token_list[0] = session_token
1778
1779 response = try_get(browse,
1780 (lambda x: x['response'],
45261e06 1781 lambda x: x[1]['response']), dict) or {}
a1c5d2ca
M
1782
1783 if response.get('continuationContents'):
1784 break
1785
1786 # YouTube sometimes gives reload: now json if something went wrong (e.g. bad auth)
45261e06 1787 if isinstance(browse, dict):
1788 if browse.get('reload'):
1789 raise ExtractorError('Invalid or missing params in continuation request', expected=False)
1790
1791 # TODO: not tested, merged from old extractor
1792 err_msg = browse.get('externalErrorMessage')
1793 if err_msg:
1794 last_error = err_msg
1795 continue
a1c5d2ca 1796
45261e06 1797 response_error = try_get(response, lambda x: x['responseContext']['errors']['error'][0], dict) or {}
1798 err_msg = response_error.get('externalErrorMessage')
a1c5d2ca 1799 if err_msg:
45261e06 1800 last_error = err_msg
1801 continue
a1c5d2ca
M
1802
1803 # Youtube sometimes sends incomplete data
1804 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
1805 last_error = 'Incomplete data received'
1806 if count >= retries:
6a39ee13 1807 raise ExtractorError(last_error)
a1c5d2ca
M
1808
1809 if not response:
1810 break
f4f751af 1811 visitor_data = try_get(
1812 response,
1813 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
1814 compat_str) or visitor_data
a1c5d2ca
M
1815
1816 known_continuation_renderers = {
1817 'itemSectionContinuation': extract_thread,
1818 'commentRepliesContinuation': extract_thread
1819 }
1820
1821 # extract next root continuation from the results
1822 continuation_contents = try_get(
1823 response, lambda x: x['continuationContents'], dict) or {}
1824
1825 for key, value in continuation_contents.items():
1826 if key not in known_continuation_renderers:
1827 continue
1828 continuation_renderer = value
1829
1830 if first_continuation:
1831 first_continuation = False
1832 expected_comment_count = try_get(
1833 continuation_renderer,
1834 (lambda x: x['header']['commentsHeaderRenderer']['countText']['runs'][0]['text'],
1835 lambda x: x['header']['commentsHeaderRenderer']['commentsCount']['runs'][0]['text']),
1836 compat_str)
1837
1838 if expected_comment_count:
1839 comment_counts[1] = str_to_int(expected_comment_count)
d92f5d5a 1840 self.to_screen('Downloading ~%d comments' % str_to_int(expected_comment_count))
a1c5d2ca
M
1841 yield comment_counts[1]
1842
1843 # TODO: cli arg.
1844 # 1/True for newest, 0/False for popular (default)
1845 comment_sort_index = int(True)
1846 sort_continuation_renderer = try_get(
1847 continuation_renderer,
1848 lambda x: x['header']['commentsHeaderRenderer']['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems']
1849 [comment_sort_index]['continuation']['reloadContinuationData'], dict)
1850 # If this fails, the initial continuation page
1851 # starts off with popular anyways.
1852 if sort_continuation_renderer:
1853 continuation = YoutubeTabIE._build_continuation_query(
1854 continuation=sort_continuation_renderer.get('continuation'),
1855 ctp=sort_continuation_renderer.get('clickTrackingParams'))
d92f5d5a 1856 self.to_screen('Sorting comments by %s' % ('popular' if comment_sort_index == 0 else 'newest'))
a1c5d2ca
M
1857 break
1858
1859 for entry in known_continuation_renderers[key](continuation_renderer):
1860 yield entry
1861
1862 continuation = YoutubeTabIE._extract_continuation(continuation_renderer) # TODO
1863 break
1864
1865 def _extract_comments(self, ytcfg, video_id, contents, webpage, xsrf_token):
1866 """Entry for comment extraction"""
1867 comments = []
1868 known_entry_comment_renderers = (
1869 'itemSectionRenderer',
1870 )
1871 estimated_total = 0
1872 for entry in contents:
1873 for key, renderer in entry.items():
1874 if key not in known_entry_comment_renderers:
1875 continue
1876
1877 comment_iter = self._comment_entries(
1878 renderer,
1879 identity_token=self._extract_identity_token(webpage, item_id=video_id),
1880 account_syncid=self._extract_account_syncid(ytcfg),
f4f751af 1881 ytcfg=ytcfg,
a1c5d2ca
M
1882 session_token_list=[xsrf_token])
1883
1884 for comment in comment_iter:
1885 if isinstance(comment, int):
1886 estimated_total = comment
1887 continue
1888 comments.append(comment)
1889 break
d92f5d5a 1890 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
a1c5d2ca
M
1891 return {
1892 'comments': comments,
1893 'comment_count': len(comments),
1894 }
1895
4e6767b5 1896 @staticmethod
1897 def _get_video_info_params(video_id):
1898 return {
1899 'video_id': video_id,
1900 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1901 'html5': '1',
1902 'c': 'TVHTML5',
1903 'cver': '6.20180913',
1904 }
1905
c5e8d7af 1906 def _real_extract(self, url):
cf7e015f 1907 url, smuggled_data = unsmuggle_url(url, {})
545cc85d 1908 video_id = self._match_id(url)
9297939e 1909
1910 is_music_url = smuggled_data.get('is_music_url') or self.is_music_url(url)
1911
545cc85d 1912 base_url = self.http_scheme() + '//www.youtube.com/'
b3d12425 1913 webpage_url = base_url + 'watch?v=' + video_id
1914 webpage = self._download_webpage(
cce889b9 1915 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
545cc85d 1916
9297939e 1917 def get_text(x):
1918 if not x:
1919 return
1920 text = x.get('simpleText')
1921 if text and isinstance(text, compat_str):
1922 return text
1923 runs = x.get('runs')
1924 if not isinstance(runs, list):
1925 return
1926 return ''.join([r['text'] for r in runs if isinstance(r.get('text'), compat_str)])
1927
1928 ytm_streaming_data = {}
1929 if is_music_url:
1930 # we are forcing to use parse_json because 141 only appeared in get_video_info.
1931 # el, c, cver, cplayer field required for 141(aac 256kbps) codec
1932 # maybe paramter of youtube music player?
1933 ytm_player_response = self._parse_json(try_get(compat_parse_qs(
1934 self._download_webpage(
1935 base_url + 'get_video_info', video_id,
fe03a6cd 1936 'Fetching youtube music info webpage',
1937 'unable to download youtube music info webpage', query={
4e6767b5 1938 **self._get_video_info_params(video_id),
9297939e 1939 'el': 'detailpage',
1940 'c': 'WEB_REMIX',
1941 'cver': '0.1',
00ae2769 1942 'cplayer': 'UNIPLAYER',
ed807c18 1943 }, fatal=False) or ''),
9297939e 1944 lambda x: x['player_response'][0],
ed807c18 1945 compat_str) or '{}', video_id, fatal=False)
9297939e 1946 ytm_streaming_data = ytm_player_response.get('streamingData') or {}
1947
545cc85d 1948 player_response = None
1949 if webpage:
1950 player_response = self._extract_yt_initial_variable(
1951 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
1952 video_id, 'initial player response')
f4f751af 1953
1954 ytcfg = self._extract_ytcfg(video_id, webpage)
545cc85d 1955 if not player_response:
1956 player_response = self._call_api(
f4f751af 1957 'player', {'videoId': video_id}, video_id, api_key=self._extract_api_key(ytcfg))
545cc85d 1958
1959 playability_status = player_response.get('playabilityStatus') or {}
1960 if playability_status.get('reason') == 'Sign in to confirm your age':
1961 pr = self._parse_json(try_get(compat_parse_qs(
1962 self._download_webpage(
1963 base_url + 'get_video_info', video_id,
4e6767b5 1964 'Refetching age-gated info webpage', 'unable to download video info webpage',
1965 query=self._get_video_info_params(video_id), fatal=False)),
545cc85d 1966 lambda x: x['player_response'][0],
1967 compat_str) or '{}', video_id)
1968 if pr:
1969 player_response = pr
1970
1971 trailer_video_id = try_get(
1972 playability_status,
1973 lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
1974 compat_str)
1975 if trailer_video_id:
1976 return self.url_result(
1977 trailer_video_id, self.ie_key(), trailer_video_id)
cf7e015f 1978
545cc85d 1979 search_meta = (
1980 lambda x: self._html_search_meta(x, webpage, default=None)) \
1981 if webpage else lambda x: None
dbdaaa23 1982
545cc85d 1983 video_details = player_response.get('videoDetails') or {}
37357d21 1984 microformat = try_get(
545cc85d 1985 player_response,
1986 lambda x: x['microformat']['playerMicroformatRenderer'],
1987 dict) or {}
1988 video_title = video_details.get('title') \
1989 or get_text(microformat.get('title')) \
1990 or search_meta(['og:title', 'twitter:title', 'title'])
1991 video_description = video_details.get('shortDescription')
cf7e015f 1992
8fe10494 1993 if not smuggled_data.get('force_singlefeed', False):
a06916d9 1994 if not self.get_param('noplaylist'):
8fe10494
S
1995 multifeed_metadata_list = try_get(
1996 player_response,
1997 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
545cc85d 1998 compat_str)
8fe10494
S
1999 if multifeed_metadata_list:
2000 entries = []
2001 feed_ids = []
2002 for feed in multifeed_metadata_list.split(','):
2003 # Unquote should take place before split on comma (,) since textual
2004 # fields may contain comma as well (see
067aa17e 2005 # https://github.com/ytdl-org/youtube-dl/issues/8536)
545cc85d 2006 feed_data = compat_parse_qs(
2007 compat_urllib_parse_unquote_plus(feed))
6b09401b
S
2008
2009 def feed_entry(name):
545cc85d 2010 return try_get(
2011 feed_data, lambda x: x[name][0], compat_str)
6b09401b
S
2012
2013 feed_id = feed_entry('id')
2014 if not feed_id:
2015 continue
2016 feed_title = feed_entry('title')
2017 title = video_title
2018 if feed_title:
2019 title += ' (%s)' % feed_title
8fe10494
S
2020 entries.append({
2021 '_type': 'url_transparent',
2022 'ie_key': 'Youtube',
2023 'url': smuggle_url(
545cc85d 2024 base_url + 'watch?v=' + feed_data['id'][0],
8fe10494 2025 {'force_singlefeed': True}),
6b09401b 2026 'title': title,
8fe10494 2027 })
6b09401b 2028 feed_ids.append(feed_id)
8fe10494
S
2029 self.to_screen(
2030 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2031 % (', '.join(feed_ids), video_id))
545cc85d 2032 return self.playlist_result(
2033 entries, video_id, video_title, video_description)
8fe10494
S
2034 else:
2035 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 2036
9297939e 2037 formats, itags, stream_ids = [], [], []
cc2db878 2038 itag_qualities = {}
545cc85d 2039 player_url = None
d3fc8074 2040 q = qualities([
2041 'tiny', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
2042 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2043 ])
9297939e 2044
545cc85d 2045 streaming_data = player_response.get('streamingData') or {}
2046 streaming_formats = streaming_data.get('formats') or []
2047 streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
9297939e 2048 streaming_formats.extend(ytm_streaming_data.get('formats') or [])
2049 streaming_formats.extend(ytm_streaming_data.get('adaptiveFormats') or [])
2050
545cc85d 2051 for fmt in streaming_formats:
2052 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2053 continue
321bf820 2054
cc2db878 2055 itag = str_or_none(fmt.get('itag'))
9297939e 2056 audio_track = fmt.get('audioTrack') or {}
2057 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2058 if stream_id in stream_ids:
2059 continue
2060
cc2db878 2061 quality = fmt.get('quality')
d3fc8074 2062 if quality == 'tiny' or not quality:
2063 quality = fmt.get('audioQuality', '').lower() or quality
cc2db878 2064 if itag and quality:
2065 itag_qualities[itag] = quality
2066 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2067 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2068 # number of fragment that would subsequently requested with (`&sq=N`)
2069 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2070 continue
2071
545cc85d 2072 fmt_url = fmt.get('url')
2073 if not fmt_url:
2074 sc = compat_parse_qs(fmt.get('signatureCipher'))
2075 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2076 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2077 if not (sc and fmt_url and encrypted_sig):
2078 continue
2079 if not player_url:
2080 if not webpage:
2081 continue
2082 player_url = self._search_regex(
2083 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
2084 webpage, 'player URL', fatal=False)
2085 if not player_url:
201e9eaa 2086 continue
545cc85d 2087 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2088 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2089 fmt_url += '&' + sp + '=' + signature
2090
545cc85d 2091 if itag:
2092 itags.append(itag)
9297939e 2093 stream_ids.append(stream_id)
2094
cc2db878 2095 tbr = float_or_none(
2096 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 2097 dct = {
2098 'asr': int_or_none(fmt.get('audioSampleRate')),
2099 'filesize': int_or_none(fmt.get('contentLength')),
2100 'format_id': itag,
0fb983f6 2101 'format_note': audio_track.get('displayName') or fmt.get('qualityLabel') or quality,
545cc85d 2102 'fps': int_or_none(fmt.get('fps')),
2103 'height': int_or_none(fmt.get('height')),
dca3ff4a 2104 'quality': q(quality),
cc2db878 2105 'tbr': tbr,
545cc85d 2106 'url': fmt_url,
2107 'width': fmt.get('width'),
0fb983f6 2108 'language': audio_track.get('id', '').split('.')[0],
545cc85d 2109 }
2110 mimetype = fmt.get('mimeType')
2111 if mimetype:
2112 mobj = re.match(
2113 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', mimetype)
2114 if mobj:
2115 dct['ext'] = mimetype2ext(mobj.group(1))
2116 dct.update(parse_codecs(mobj.group(2)))
cc2db878 2117 no_audio = dct.get('acodec') == 'none'
2118 no_video = dct.get('vcodec') == 'none'
2119 if no_audio:
2120 dct['vbr'] = tbr
2121 if no_video:
2122 dct['abr'] = tbr
2123 if no_audio or no_video:
545cc85d 2124 dct['downloader_options'] = {
2125 # Youtube throttles chunks >~10M
2126 'http_chunk_size': 10485760,
bf1317d2 2127 }
7c60c33e 2128 if dct.get('ext'):
2129 dct['container'] = dct['ext'] + '_dash'
545cc85d 2130 formats.append(dct)
2131
5d3a0e79 2132 skip_manifests = self._configuration_arg('skip') or []
2133 get_dash = 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True)
2134 get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2135
9297939e 2136 for sd in (streaming_data, ytm_streaming_data):
5d3a0e79 2137 hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
9297939e 2138 if hls_manifest_url:
2139 for f in self._extract_m3u8_formats(
2140 hls_manifest_url, video_id, 'mp4', fatal=False):
2141 itag = self._search_regex(
2142 r'/itag/(\d+)', f['url'], 'itag', default=None)
2143 if itag:
2144 f['format_id'] = itag
8d68ab98 2145 formats.append(f)
545cc85d 2146
5d3a0e79 2147 dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2148 if dash_manifest_url:
2149 for f in self._extract_mpd_formats(
2150 dash_manifest_url, video_id, fatal=False):
2151 itag = f['format_id']
2152 if itag in itags:
2153 continue
2154 if itag in itag_qualities:
2155 f['quality'] = q(itag_qualities[itag])
2156 filesize = int_or_none(self._search_regex(
2157 r'/clen/(\d+)', f.get('fragment_base_url')
2158 or f['url'], 'file size', default=None))
2159 if filesize:
2160 f['filesize'] = filesize
2161 formats.append(f)
bf1317d2 2162
545cc85d 2163 if not formats:
a06916d9 2164 if not self.get_param('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
b7da73eb 2165 self.raise_no_formats(
545cc85d 2166 'This video is DRM protected.', expected=True)
2167 pemr = try_get(
2168 playability_status,
2169 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
2170 dict) or {}
2171 reason = get_text(pemr.get('reason')) or playability_status.get('reason')
2172 subreason = pemr.get('subreason')
2173 if subreason:
2174 subreason = clean_html(get_text(subreason))
2175 if subreason == 'The uploader has not made this video available in your country.':
2176 countries = microformat.get('availableCountries')
2177 if not countries:
2178 regions_allowed = search_meta('regionsAllowed')
2179 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2180 self.raise_geo_restricted(subreason, countries, metadata_available=True)
545cc85d 2181 reason += '\n' + subreason
2182 if reason:
b7da73eb 2183 self.raise_no_formats(reason, expected=True)
bf1317d2 2184
545cc85d 2185 self._sort_formats(formats)
bf1317d2 2186
545cc85d 2187 keywords = video_details.get('keywords') or []
2188 if not keywords and webpage:
2189 keywords = [
2190 unescapeHTML(m.group('content'))
2191 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2192 for keyword in keywords:
2193 if keyword.startswith('yt:stretch='):
201c1459 2194 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2195 if mobj:
2196 # NB: float is intentional for forcing float division
2197 w, h = (float(v) for v in mobj.groups())
2198 if w > 0 and h > 0:
2199 ratio = w / h
2200 for f in formats:
2201 if f.get('vcodec') != 'none':
2202 f['stretched_ratio'] = ratio
2203 break
6449cd80 2204
545cc85d 2205 thumbnails = []
2206 for container in (video_details, microformat):
2207 for thumbnail in (try_get(
2208 container,
2209 lambda x: x['thumbnail']['thumbnails'], list) or []):
2210 thumbnail_url = thumbnail.get('url')
2211 if not thumbnail_url:
bf1317d2 2212 continue
1988fab7 2213 # Sometimes youtube gives a wrong thumbnail URL. See:
2214 # https://github.com/yt-dlp/yt-dlp/issues/233
2215 # https://github.com/ytdl-org/youtube-dl/issues/28023
2216 if 'maxresdefault' in thumbnail_url:
2217 thumbnail_url = thumbnail_url.split('?')[0]
545cc85d 2218 thumbnails.append({
545cc85d 2219 'url': thumbnail_url,
ff2751ac 2220 'height': int_or_none(thumbnail.get('height')),
545cc85d 2221 'width': int_or_none(thumbnail.get('width')),
ff2751ac 2222 'preference': 1 if 'maxresdefault' in thumbnail_url else -1
545cc85d 2223 })
ff2751ac 2224 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2225 if thumbnail_url:
2226 thumbnails.append({
2227 'url': thumbnail_url,
2228 'preference': 1 if 'maxresdefault' in thumbnail_url else -1
2229 })
2230 # All videos have a maxresdefault thumbnail, but sometimes it does not appear in the webpage
2231 # See: https://github.com/ytdl-org/youtube-dl/issues/29049
2232 thumbnails.append({
2233 'url': 'https://i.ytimg.com/vi/%s/maxresdefault.jpg' % video_id,
2234 'preference': 1,
2235 })
2236 self._remove_duplicate_formats(thumbnails)
545cc85d 2237
2238 category = microformat.get('category') or search_meta('genre')
2239 channel_id = video_details.get('channelId') \
2240 or microformat.get('externalChannelId') \
2241 or search_meta('channelId')
2242 duration = int_or_none(
2243 video_details.get('lengthSeconds')
2244 or microformat.get('lengthSeconds')) \
2245 or parse_duration(search_meta('duration'))
2246 is_live = video_details.get('isLive')
2247 owner_profile_url = microformat.get('ownerProfileUrl')
2248
2249 info = {
2250 'id': video_id,
2251 'title': self._live_title(video_title) if is_live else video_title,
2252 'formats': formats,
2253 'thumbnails': thumbnails,
2254 'description': video_description,
2255 'upload_date': unified_strdate(
2256 microformat.get('uploadDate')
2257 or search_meta('uploadDate')),
2258 'uploader': video_details['author'],
2259 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2260 'uploader_url': owner_profile_url,
2261 'channel_id': channel_id,
2262 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
2263 'duration': duration,
2264 'view_count': int_or_none(
2265 video_details.get('viewCount')
2266 or microformat.get('viewCount')
2267 or search_meta('interactionCount')),
2268 'average_rating': float_or_none(video_details.get('averageRating')),
2269 'age_limit': 18 if (
2270 microformat.get('isFamilySafe') is False
2271 or search_meta('isFamilyFriendly') == 'false'
2272 or search_meta('og:restrictions:age') == '18+') else 0,
2273 'webpage_url': webpage_url,
2274 'categories': [category] if category else None,
2275 'tags': keywords,
2276 'is_live': is_live,
2277 'playable_in_embed': playability_status.get('playableInEmbed'),
c224251a 2278 'was_live': video_details.get('isLiveContent'),
545cc85d 2279 }
b477fc13 2280
545cc85d 2281 pctr = try_get(
2282 player_response,
2283 lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
2284 subtitles = {}
2285 if pctr:
774d79cc 2286 def process_language(container, base_url, lang_code, sub_name, query):
120916da 2287 lang_subs = container.setdefault(lang_code, [])
545cc85d 2288 for fmt in self._SUBTITLE_FORMATS:
2289 query.update({
2290 'fmt': fmt,
2291 })
2292 lang_subs.append({
2293 'ext': fmt,
2294 'url': update_url_query(base_url, query),
774d79cc 2295 'name': sub_name,
545cc85d 2296 })
7e72694b 2297
545cc85d 2298 for caption_track in (pctr.get('captionTracks') or []):
2299 base_url = caption_track.get('baseUrl')
2300 if not base_url:
2301 continue
2302 if caption_track.get('kind') != 'asr':
120916da 2303 lang_code = (
2304 remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2305 or caption_track.get('languageCode'))
545cc85d 2306 if not lang_code:
2307 continue
2308 process_language(
774d79cc 2309 subtitles, base_url, lang_code,
2310 try_get(caption_track, lambda x: x.get('name').get('simpleText')),
2311 {})
545cc85d 2312 continue
2313 automatic_captions = {}
2314 for translation_language in (pctr.get('translationLanguages') or []):
2315 translation_language_code = translation_language.get('languageCode')
2316 if not translation_language_code:
2317 continue
2318 process_language(
2319 automatic_captions, base_url, translation_language_code,
49c258e1 2320 try_get(translation_language, (
2321 lambda x: x['languageName']['simpleText'],
2322 lambda x: x['languageName']['runs'][0]['text'])),
545cc85d 2323 {'tlang': translation_language_code})
2324 info['automatic_captions'] = automatic_captions
2325 info['subtitles'] = subtitles
7e72694b 2326
545cc85d 2327 parsed_url = compat_urllib_parse_urlparse(url)
2328 for component in [parsed_url.fragment, parsed_url.query]:
2329 query = compat_parse_qs(component)
2330 for k, v in query.items():
2331 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2332 d_k += '_time'
2333 if d_k not in info and k in s_ks:
2334 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2335
2336 # Youtube Music Auto-generated description
822b9d9c 2337 if video_description:
38d70284 2338 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2339 if mobj:
822b9d9c
RA
2340 release_year = mobj.group('release_year')
2341 release_date = mobj.group('release_date')
2342 if release_date:
2343 release_date = release_date.replace('-', '')
2344 if not release_year:
545cc85d 2345 release_year = release_date[:4]
2346 info.update({
2347 'album': mobj.group('album'.strip()),
2348 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2349 'track': mobj.group('track').strip(),
2350 'release_date': release_date,
cc2db878 2351 'release_year': int_or_none(release_year),
545cc85d 2352 })
7e72694b 2353
545cc85d 2354 initial_data = None
2355 if webpage:
2356 initial_data = self._extract_yt_initial_variable(
2357 webpage, self._YT_INITIAL_DATA_RE, video_id,
2358 'yt initial data')
2359 if not initial_data:
2360 initial_data = self._call_api(
f4f751af 2361 'next', {'videoId': video_id}, video_id, fatal=False, api_key=self._extract_api_key(ytcfg))
545cc85d 2362
c60ee3a2 2363 try:
2364 # This will error if there is no livechat
2365 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2366 info['subtitles']['live_chat'] = [{
2367 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
2368 'video_id': video_id,
2369 'ext': 'json',
2370 'protocol': 'youtube_live_chat' if is_live else 'youtube_live_chat_replay',
2371 }]
2372 except (KeyError, IndexError, TypeError):
2373 pass
545cc85d 2374
2375 if initial_data:
2376 chapters = self._extract_chapters_from_json(
2377 initial_data, video_id, duration)
2378 if not chapters:
2379 for engagment_pannel in (initial_data.get('engagementPanels') or []):
2380 contents = try_get(
2381 engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
2382 list)
2383 if not contents:
2384 continue
2385
2386 def chapter_time(mmlir):
2387 return parse_duration(
2388 get_text(mmlir.get('timeDescription')))
2389
2390 chapters = []
2391 for next_num, content in enumerate(contents, start=1):
2392 mmlir = content.get('macroMarkersListItemRenderer') or {}
2393 start_time = chapter_time(mmlir)
2394 end_time = chapter_time(try_get(
2395 contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
2396 if next_num < len(contents) else duration
2397 if start_time is None or end_time is None:
2398 continue
2399 chapters.append({
2400 'start_time': start_time,
2401 'end_time': end_time,
2402 'title': get_text(mmlir.get('title')),
2403 })
2404 if chapters:
2405 break
2406 if chapters:
2407 info['chapters'] = chapters
2408
2409 contents = try_get(
2410 initial_data,
2411 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2412 list) or []
2413 for content in contents:
2414 vpir = content.get('videoPrimaryInfoRenderer')
2415 if vpir:
2416 stl = vpir.get('superTitleLink')
2417 if stl:
2418 stl = get_text(stl)
2419 if try_get(
2420 vpir,
2421 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2422 info['location'] = stl
2423 else:
2424 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2425 if mobj:
2426 info.update({
2427 'series': mobj.group(1),
2428 'season_number': int(mobj.group(2)),
2429 'episode_number': int(mobj.group(3)),
2430 })
2431 for tlb in (try_get(
2432 vpir,
2433 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2434 list) or []):
2435 tbr = tlb.get('toggleButtonRenderer') or {}
2436 for getter, regex in [(
2437 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2438 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2439 lambda x: x['accessibility'],
2440 lambda x: x['accessibilityData']['accessibilityData'],
2441 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2442 label = (try_get(tbr, getter, dict) or {}).get('label')
2443 if label:
2444 mobj = re.match(regex, label)
2445 if mobj:
2446 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2447 break
2448 sbr_tooltip = try_get(
2449 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2450 if sbr_tooltip:
2451 like_count, dislike_count = sbr_tooltip.split(' / ')
2452 info.update({
2453 'like_count': str_to_int(like_count),
2454 'dislike_count': str_to_int(dislike_count),
2455 })
2456 vsir = content.get('videoSecondaryInfoRenderer')
2457 if vsir:
2458 info['channel'] = get_text(try_get(
2459 vsir,
2460 lambda x: x['owner']['videoOwnerRenderer']['title'],
cce889b9 2461 dict))
545cc85d 2462 rows = try_get(
2463 vsir,
2464 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2465 list) or []
2466 multiple_songs = False
2467 for row in rows:
2468 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2469 multiple_songs = True
2470 break
2471 for row in rows:
2472 mrr = row.get('metadataRowRenderer') or {}
2473 mrr_title = mrr.get('title')
2474 if not mrr_title:
2475 continue
2476 mrr_title = get_text(mrr['title'])
2477 mrr_contents_text = get_text(mrr['contents'][0])
2478 if mrr_title == 'License':
2479 info['license'] = mrr_contents_text
2480 elif not multiple_songs:
2481 if mrr_title == 'Album':
2482 info['album'] = mrr_contents_text
2483 elif mrr_title == 'Artist':
2484 info['artist'] = mrr_contents_text
2485 elif mrr_title == 'Song':
2486 info['track'] = mrr_contents_text
2487
2488 fallbacks = {
2489 'channel': 'uploader',
2490 'channel_id': 'uploader_id',
2491 'channel_url': 'uploader_url',
2492 }
2493 for to, frm in fallbacks.items():
2494 if not info.get(to):
2495 info[to] = info.get(frm)
2496
2497 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
2498 v = info.get(s_k)
2499 if v:
2500 info[d_k] = v
b84071c0 2501
c224251a
M
2502 is_private = bool_or_none(video_details.get('isPrivate'))
2503 is_unlisted = bool_or_none(microformat.get('isUnlisted'))
2504 is_membersonly = None
b28f8d24 2505 is_premium = None
c224251a
M
2506 if initial_data and is_private is not None:
2507 is_membersonly = False
b28f8d24 2508 is_premium = False
c224251a
M
2509 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list)
2510 for content in contents or []:
2511 badges = try_get(content, lambda x: x['videoPrimaryInfoRenderer']['badges'], list)
2512 for badge in badges or []:
2513 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label']) or ''
2514 if label.lower() == 'members only':
2515 is_membersonly = True
2516 break
b28f8d24
M
2517 elif label.lower() == 'premium':
2518 is_premium = True
2519 break
2520 if is_membersonly or is_premium:
c224251a
M
2521 break
2522
2523 # TODO: Add this for playlists
2524 info['availability'] = self._availability(
2525 is_private=is_private,
b28f8d24 2526 needs_premium=is_premium,
c224251a
M
2527 needs_subscription=is_membersonly,
2528 needs_auth=info['age_limit'] >= 18,
2529 is_unlisted=None if is_private is None else is_unlisted)
2530
06167fbb 2531 # get xsrf for annotations or comments
a06916d9 2532 get_annotations = self.get_param('writeannotations', False)
2533 get_comments = self.get_param('getcomments', False)
06167fbb 2534 if get_annotations or get_comments:
29f7c58a 2535 xsrf_token = None
545cc85d 2536 ytcfg = self._extract_ytcfg(video_id, webpage)
29f7c58a 2537 if ytcfg:
2538 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2539 if not xsrf_token:
2540 xsrf_token = self._search_regex(
2541 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 2542 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 2543
2544 # annotations
06167fbb 2545 if get_annotations:
64b6a4e9
RA
2546 invideo_url = try_get(
2547 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2548 if xsrf_token and invideo_url:
29f7c58a 2549 xsrf_field_name = None
2550 if ytcfg:
2551 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2552 if not xsrf_field_name:
2553 xsrf_field_name = self._search_regex(
2554 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 2555 webpage, 'xsrf field name',
29f7c58a 2556 group='xsrf_field_name', default='session_token')
8a784c74 2557 info['annotations'] = self._download_webpage(
64b6a4e9
RA
2558 self._proto_relative_url(invideo_url),
2559 video_id, note='Downloading annotations',
2560 errnote='Unable to download video annotations', fatal=False,
2561 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 2562
277d6ff5 2563 if get_comments:
a1c5d2ca 2564 info['__post_extractor'] = lambda: self._extract_comments(ytcfg, video_id, contents, webpage, xsrf_token)
4ea3be0a 2565
545cc85d 2566 self.mark_watched(video_id, player_response)
d77ab8e2 2567
545cc85d 2568 return info
c5e8d7af 2569
5f6a1245 2570
8bdd16b4 2571class YoutubeTabIE(YoutubeBaseInfoExtractor):
2572 IE_DESC = 'YouTube.com tab'
70d5c17b 2573 _VALID_URL = r'''(?x)
2574 https?://
2575 (?:\w+\.)?
2576 (?:
2577 youtube(?:kids)?\.com|
2578 invidio\.us
2579 )/
2580 (?:
fe03a6cd 2581 (?P<channel_type>channel|c|user|browse)/|
70d5c17b 2582 (?P<not_channel>
9ba5705a 2583 feed/|hashtag/|
70d5c17b 2584 (?:playlist|watch)\?.*?\blist=
2585 )|
29f7c58a 2586 (?!(?:%s)\b) # Direct URLs
70d5c17b 2587 )
2588 (?P<id>[^/?\#&]+)
2589 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 2590 IE_NAME = 'youtube:tab'
2591
81127aa5 2592 _TESTS = [{
da692b79 2593 'note': 'playlists, multipage',
8bdd16b4 2594 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2595 'playlist_mincount': 94,
2596 'info_dict': {
2597 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2598 'title': 'Игорь Клейнер - Playlists',
2599 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2600 'uploader': 'Игорь Клейнер',
2601 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 2602 },
2603 }, {
da692b79 2604 'note': 'playlists, multipage, different order',
8bdd16b4 2605 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2606 'playlist_mincount': 94,
2607 'info_dict': {
2608 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2609 'title': 'Игорь Клейнер - Playlists',
2610 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2611 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
2612 'uploader': 'Игорь Клейнер',
8bdd16b4 2613 },
201c1459 2614 }, {
da692b79 2615 'note': 'playlists, series',
201c1459 2616 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
2617 'playlist_mincount': 5,
2618 'info_dict': {
2619 'id': 'UCYO_jab_esuFRV4b17AJtAw',
2620 'title': '3Blue1Brown - Playlists',
2621 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
da692b79 2622 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
2623 'uploader': '3Blue1Brown',
201c1459 2624 },
8bdd16b4 2625 }, {
da692b79 2626 'note': 'playlists, singlepage',
8bdd16b4 2627 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2628 'playlist_mincount': 4,
2629 'info_dict': {
2630 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2631 'title': 'ThirstForScience - Playlists',
2632 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 2633 'uploader': 'ThirstForScience',
2634 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 2635 }
2636 }, {
2637 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
2638 'only_matching': True,
2639 }, {
da692b79 2640 'note': 'basic, single video playlist',
0e30a7b9 2641 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 2642 'info_dict': {
0e30a7b9 2643 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2644 'uploader': 'Sergey M.',
2645 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 2646 'title': 'youtube-dl public playlist',
81127aa5 2647 },
0e30a7b9 2648 'playlist_count': 1,
9291475f 2649 }, {
da692b79 2650 'note': 'empty playlist',
0e30a7b9 2651 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 2652 'info_dict': {
0e30a7b9 2653 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2654 'uploader': 'Sergey M.',
2655 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 2656 'title': 'youtube-dl empty playlist',
9291475f
PH
2657 },
2658 'playlist_count': 0,
2659 }, {
da692b79 2660 'note': 'Home tab',
8bdd16b4 2661 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 2662 'info_dict': {
8bdd16b4 2663 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2664 'title': 'lex will - Home',
2665 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2666 'uploader': 'lex will',
2667 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2668 },
8bdd16b4 2669 'playlist_mincount': 2,
9291475f 2670 }, {
da692b79 2671 'note': 'Videos tab',
8bdd16b4 2672 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 2673 'info_dict': {
8bdd16b4 2674 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2675 'title': 'lex will - Videos',
2676 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2677 'uploader': 'lex will',
2678 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2679 },
8bdd16b4 2680 'playlist_mincount': 975,
9291475f 2681 }, {
da692b79 2682 'note': 'Videos tab, sorted by popular',
8bdd16b4 2683 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 2684 'info_dict': {
8bdd16b4 2685 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2686 'title': 'lex will - Videos',
2687 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2688 'uploader': 'lex will',
2689 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2690 },
8bdd16b4 2691 'playlist_mincount': 199,
9291475f 2692 }, {
da692b79 2693 'note': 'Playlists tab',
8bdd16b4 2694 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 2695 'info_dict': {
8bdd16b4 2696 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2697 'title': 'lex will - Playlists',
2698 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2699 'uploader': 'lex will',
2700 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2701 },
8bdd16b4 2702 'playlist_mincount': 17,
ac7553d0 2703 }, {
da692b79 2704 'note': 'Community tab',
8bdd16b4 2705 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 2706 'info_dict': {
8bdd16b4 2707 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2708 'title': 'lex will - Community',
2709 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2710 'uploader': 'lex will',
2711 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2712 },
2713 'playlist_mincount': 18,
87dadd45 2714 }, {
da692b79 2715 'note': 'Channels tab',
8bdd16b4 2716 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 2717 'info_dict': {
8bdd16b4 2718 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2719 'title': 'lex will - Channels',
2720 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2721 'uploader': 'lex will',
2722 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2723 },
deaec5af 2724 'playlist_mincount': 12,
cd684175 2725 }, {
2726 'note': 'Search tab',
2727 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
2728 'playlist_mincount': 40,
2729 'info_dict': {
2730 'id': 'UCYO_jab_esuFRV4b17AJtAw',
2731 'title': '3Blue1Brown - Search - linear algebra',
2732 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
2733 'uploader': '3Blue1Brown',
2734 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
2735 },
6b08cdf6 2736 }, {
a0566bbf 2737 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2738 'only_matching': True,
2739 }, {
a0566bbf 2740 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2741 'only_matching': True,
2742 }, {
a0566bbf 2743 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2744 'only_matching': True,
2745 }, {
2746 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2747 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2748 'info_dict': {
2749 'title': '29C3: Not my department',
2750 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2751 'uploader': 'Christiaan008',
2752 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 2753 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 2754 },
2755 'playlist_count': 96,
2756 }, {
2757 'note': 'Large playlist',
2758 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 2759 'info_dict': {
8bdd16b4 2760 'title': 'Uploads from Cauchemar',
2761 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2762 'uploader': 'Cauchemar',
2763 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 2764 },
8bdd16b4 2765 'playlist_mincount': 1123,
2766 }, {
da692b79 2767 'note': 'even larger playlist, 8832 videos',
8bdd16b4 2768 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
2769 'only_matching': True,
4b7df0d3
JMF
2770 }, {
2771 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2772 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2773 'info_dict': {
acf757f4
PH
2774 'title': 'Uploads from Interstellar Movie',
2775 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 2776 'uploader': 'Interstellar Movie',
8bdd16b4 2777 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 2778 },
481cc733 2779 'playlist_mincount': 21,
358de58c 2780 }, {
2781 'note': 'Playlist with "show unavailable videos" button',
2782 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
2783 'info_dict': {
2784 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
2785 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
2786 'uploader': 'Phim Siêu Nhân Nhật Bản',
2787 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
2788 },
da692b79 2789 'playlist_mincount': 200,
5d342002 2790 }, {
da692b79 2791 'note': 'Playlist with unavailable videos in page 7',
5d342002 2792 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
2793 'info_dict': {
2794 'title': 'Uploads from BlankTV',
2795 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
2796 'uploader': 'BlankTV',
2797 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
2798 },
da692b79 2799 'playlist_mincount': 1000,
8bdd16b4 2800 }, {
da692b79 2801 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
8bdd16b4 2802 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2803 'info_dict': {
2804 'title': 'Data Analysis with Dr Mike Pound',
2805 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2806 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
2807 'uploader': 'Computerphile',
deaec5af 2808 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 2809 },
2810 'playlist_mincount': 11,
2811 }, {
a0566bbf 2812 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 2813 'only_matching': True,
dacb3a86 2814 }, {
da692b79 2815 'note': 'Playlist URL that does not actually serve a playlist',
dacb3a86
S
2816 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2817 'info_dict': {
2818 'id': 'FqZTN594JQw',
2819 'ext': 'webm',
2820 'title': "Smiley's People 01 detective, Adventure Series, Action",
2821 'uploader': 'STREEM',
2822 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2823 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2824 'upload_date': '20150526',
2825 'license': 'Standard YouTube License',
2826 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2827 'categories': ['People & Blogs'],
2828 'tags': list,
dbdaaa23 2829 'view_count': int,
dacb3a86
S
2830 'like_count': int,
2831 'dislike_count': int,
2832 },
2833 'params': {
2834 'skip_download': True,
2835 },
13a75688 2836 'skip': 'This video is not available.',
dacb3a86 2837 'add_ie': [YoutubeIE.ie_key()],
481cc733 2838 }, {
8bdd16b4 2839 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 2840 'only_matching': True,
66b48727 2841 }, {
8bdd16b4 2842 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 2843 'only_matching': True,
a0566bbf 2844 }, {
2845 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
2846 'info_dict': {
da692b79 2847 'id': 'X1whbWASnNQ', # This will keep changing
a0566bbf 2848 'ext': 'mp4',
deaec5af 2849 'title': compat_str,
a0566bbf 2850 'uploader': 'Sky News',
2851 'uploader_id': 'skynews',
2852 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
da692b79 2853 'upload_date': r're:\d{8}',
2854 'description': compat_str,
a0566bbf 2855 'categories': ['News & Politics'],
2856 'tags': list,
2857 'like_count': int,
2858 'dislike_count': int,
2859 },
2860 'params': {
2861 'skip_download': True,
2862 },
da692b79 2863 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
a0566bbf 2864 }, {
2865 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2866 'info_dict': {
2867 'id': 'a48o2S1cPoo',
2868 'ext': 'mp4',
2869 'title': 'The Young Turks - Live Main Show',
2870 'uploader': 'The Young Turks',
2871 'uploader_id': 'TheYoungTurks',
2872 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2873 'upload_date': '20150715',
2874 'license': 'Standard YouTube License',
2875 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2876 'categories': ['News & Politics'],
2877 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2878 'like_count': int,
2879 'dislike_count': int,
2880 },
2881 'params': {
2882 'skip_download': True,
2883 },
2884 'only_matching': True,
2885 }, {
2886 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2887 'only_matching': True,
2888 }, {
2889 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2890 'only_matching': True,
09f1580e 2891 }, {
2892 'note': 'A channel that is not live. Should raise error',
2893 'url': 'https://www.youtube.com/user/numberphile/live',
2894 'only_matching': True,
3d3dddc9 2895 }, {
2896 'url': 'https://www.youtube.com/feed/trending',
2897 'only_matching': True,
2898 }, {
3d3dddc9 2899 'url': 'https://www.youtube.com/feed/library',
2900 'only_matching': True,
2901 }, {
3d3dddc9 2902 'url': 'https://www.youtube.com/feed/history',
2903 'only_matching': True,
2904 }, {
3d3dddc9 2905 'url': 'https://www.youtube.com/feed/subscriptions',
2906 'only_matching': True,
2907 }, {
3d3dddc9 2908 'url': 'https://www.youtube.com/feed/watch_later',
2909 'only_matching': True,
2910 }, {
da692b79 2911 'note': 'Recommended - redirects to home page',
3d3dddc9 2912 'url': 'https://www.youtube.com/feed/recommended',
2913 'only_matching': True,
29f7c58a 2914 }, {
da692b79 2915 'note': 'inline playlist with not always working continuations',
29f7c58a 2916 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
2917 'only_matching': True,
2918 }, {
2919 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
2920 'only_matching': True,
2921 }, {
2922 'url': 'https://www.youtube.com/course',
2923 'only_matching': True,
2924 }, {
2925 'url': 'https://www.youtube.com/zsecurity',
2926 'only_matching': True,
2927 }, {
2928 'url': 'http://www.youtube.com/NASAgovVideo/videos',
2929 'only_matching': True,
2930 }, {
2931 'url': 'https://www.youtube.com/TheYoungTurks/live',
2932 'only_matching': True,
39ed931e 2933 }, {
2934 'url': 'https://www.youtube.com/hashtag/cctv9',
2935 'info_dict': {
2936 'id': 'cctv9',
2937 'title': '#cctv9',
2938 },
2939 'playlist_mincount': 350,
201c1459 2940 }, {
2941 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
2942 'only_matching': True,
9297939e 2943 }, {
da692b79 2944 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
9297939e 2945 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
2946 'only_matching': True
fe03a6cd 2947 }, {
2948 'note': '/browse/ should redirect to /channel/',
2949 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
2950 'only_matching': True
2951 }, {
2952 'note': 'VLPL, should redirect to playlist?list=PL...',
2953 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
2954 'info_dict': {
2955 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
2956 'uploader': 'NoCopyrightSounds',
2957 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
2958 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
2959 'title': 'NCS Releases',
2960 },
2961 'playlist_mincount': 166,
18db7548 2962 }, {
2963 'note': 'Topic, should redirect to playlist?list=UU...',
2964 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
2965 'info_dict': {
2966 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
2967 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
2968 'title': 'Uploads from Royalty Free Music - Topic',
2969 'uploader': 'Royalty Free Music - Topic',
2970 },
2971 'expected_warnings': [
2972 'A channel/user page was given',
2973 'The URL does not have a videos tab',
2974 ],
2975 'playlist_mincount': 101,
2976 }, {
2977 'note': 'Topic without a UU playlist',
2978 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
2979 'info_dict': {
2980 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
2981 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
2982 },
2983 'expected_warnings': [
2984 'A channel/user page was given',
2985 'The URL does not have a videos tab',
2986 'Falling back to channel URL',
2987 ],
2988 'playlist_mincount': 9,
abcdd12b 2989 }, {
2990 'note': 'Youtube music Album',
2991 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
2992 'info_dict': {
2993 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
2994 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
2995 },
2996 'playlist_count': 50,
29f7c58a 2997 }]
2998
2999 @classmethod
3000 def suitable(cls, url):
3001 return False if YoutubeIE.suitable(url) else super(
3002 YoutubeTabIE, cls).suitable(url)
8bdd16b4 3003
3004 def _extract_channel_id(self, webpage):
3005 channel_id = self._html_search_meta(
3006 'channelId', webpage, 'channel id', default=None)
3007 if channel_id:
3008 return channel_id
3009 channel_url = self._html_search_meta(
3010 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3011 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3012 'twitter:app:url:googleplay'), webpage, 'channel url')
3013 return self._search_regex(
3014 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3015 channel_url, 'channel id')
15f6397c 3016
8bdd16b4 3017 @staticmethod
cd7c66cf 3018 def _extract_basic_item_renderer(item):
3019 # Modified from _extract_grid_item_renderer
201c1459 3020 known_basic_renderers = (
3021 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 3022 )
3023 for key, renderer in item.items():
201c1459 3024 if not isinstance(renderer, dict):
cd7c66cf 3025 continue
201c1459 3026 elif key in known_basic_renderers:
3027 return renderer
3028 elif key.startswith('grid') and key.endswith('Renderer'):
3029 return renderer
8bdd16b4 3030
8bdd16b4 3031 def _grid_entries(self, grid_renderer):
3032 for item in grid_renderer['items']:
3033 if not isinstance(item, dict):
39b62db1 3034 continue
cd7c66cf 3035 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 3036 if not isinstance(renderer, dict):
3037 continue
3038 title = try_get(
201c1459 3039 renderer, (lambda x: x['title']['runs'][0]['text'],
3040 lambda x: x['title']['simpleText']), compat_str)
8bdd16b4 3041 # playlist
3042 playlist_id = renderer.get('playlistId')
3043 if playlist_id:
3044 yield self.url_result(
3045 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3046 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3047 video_title=title)
201c1459 3048 continue
8bdd16b4 3049 # video
3050 video_id = renderer.get('videoId')
3051 if video_id:
3052 yield self._extract_video(renderer)
201c1459 3053 continue
8bdd16b4 3054 # channel
3055 channel_id = renderer.get('channelId')
3056 if channel_id:
3057 title = try_get(
3058 renderer, lambda x: x['title']['simpleText'], compat_str)
3059 yield self.url_result(
3060 'https://www.youtube.com/channel/%s' % channel_id,
3061 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 3062 continue
3063 # generic endpoint URL support
3064 ep_url = urljoin('https://www.youtube.com/', try_get(
3065 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3066 compat_str))
3067 if ep_url:
3068 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3069 if ie.suitable(ep_url):
3070 yield self.url_result(
3071 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3072 break
8bdd16b4 3073
3d3dddc9 3074 def _shelf_entries_from_content(self, shelf_renderer):
3075 content = shelf_renderer.get('content')
3076 if not isinstance(content, dict):
8bdd16b4 3077 return
cd7c66cf 3078 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 3079 if renderer:
3080 # TODO: add support for nested playlists so each shelf is processed
3081 # as separate playlist
3082 # TODO: this includes only first N items
3083 for entry in self._grid_entries(renderer):
3084 yield entry
3085 renderer = content.get('horizontalListRenderer')
3086 if renderer:
3087 # TODO
3088 pass
8bdd16b4 3089
29f7c58a 3090 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3091 ep = try_get(
3092 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3093 compat_str)
3094 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3095 if shelf_url:
29f7c58a 3096 # Skipping links to another channels, note that checking for
3097 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3098 # will not work
3099 if skip_channels and '/channels?' in shelf_url:
3100 return
3d3dddc9 3101 title = try_get(
3102 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
3103 yield self.url_result(shelf_url, video_title=title)
3104 # Shelf may not contain shelf URL, fallback to extraction from content
3105 for entry in self._shelf_entries_from_content(shelf_renderer):
3106 yield entry
c5e8d7af 3107
8bdd16b4 3108 def _playlist_entries(self, video_list_renderer):
3109 for content in video_list_renderer['contents']:
3110 if not isinstance(content, dict):
3111 continue
3112 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3113 if not isinstance(renderer, dict):
3114 continue
3115 video_id = renderer.get('videoId')
3116 if not video_id:
3117 continue
3118 yield self._extract_video(renderer)
07aeced6 3119
3462ffa8 3120 def _rich_entries(self, rich_grid_renderer):
3121 renderer = try_get(
70d5c17b 3122 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3123 video_id = renderer.get('videoId')
3124 if not video_id:
3125 return
3126 yield self._extract_video(renderer)
3127
8bdd16b4 3128 def _video_entry(self, video_renderer):
3129 video_id = video_renderer.get('videoId')
3130 if video_id:
3131 return self._extract_video(video_renderer)
dacb3a86 3132
8bdd16b4 3133 def _post_thread_entries(self, post_thread_renderer):
3134 post_renderer = try_get(
3135 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3136 if not post_renderer:
3137 return
3138 # video attachment
3139 video_renderer = try_get(
895b0931 3140 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3141 video_id = video_renderer.get('videoId')
3142 if video_id:
3143 entry = self._extract_video(video_renderer)
8bdd16b4 3144 if entry:
3145 yield entry
895b0931 3146 # playlist attachment
3147 playlist_id = try_get(
3148 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3149 if playlist_id:
3150 yield self.url_result(
e28f1c0a 3151 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3152 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3153 # inline video links
3154 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3155 for run in runs:
3156 if not isinstance(run, dict):
3157 continue
3158 ep_url = try_get(
3159 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3160 if not ep_url:
3161 continue
3162 if not YoutubeIE.suitable(ep_url):
3163 continue
3164 ep_video_id = YoutubeIE._match_id(ep_url)
3165 if video_id == ep_video_id:
3166 continue
895b0931 3167 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 3168
8bdd16b4 3169 def _post_thread_continuation_entries(self, post_thread_continuation):
3170 contents = post_thread_continuation.get('contents')
3171 if not isinstance(contents, list):
3172 return
3173 for content in contents:
3174 renderer = content.get('backstagePostThreadRenderer')
3175 if not isinstance(renderer, dict):
3176 continue
3177 for entry in self._post_thread_entries(renderer):
3178 yield entry
07aeced6 3179
39ed931e 3180 r''' # unused
3181 def _rich_grid_entries(self, contents):
3182 for content in contents:
3183 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3184 if video_renderer:
3185 entry = self._video_entry(video_renderer)
3186 if entry:
3187 yield entry
3188 '''
3189
29f7c58a 3190 @staticmethod
3191 def _build_continuation_query(continuation, ctp=None):
3192 query = {
3193 'ctoken': continuation,
3194 'continuation': continuation,
3195 }
3196 if ctp:
3197 query['itct'] = ctp
3198 return query
3199
8bdd16b4 3200 @staticmethod
3201 def _extract_next_continuation_data(renderer):
3202 next_continuation = try_get(
3203 renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
3204 if not next_continuation:
3205 return
3206 continuation = next_continuation.get('continuation')
3207 if not continuation:
3208 return
3209 ctp = next_continuation.get('clickTrackingParams')
29f7c58a 3210 return YoutubeTabIE._build_continuation_query(continuation, ctp)
c5e8d7af 3211
8bdd16b4 3212 @classmethod
3213 def _extract_continuation(cls, renderer):
3214 next_continuation = cls._extract_next_continuation_data(renderer)
3215 if next_continuation:
3216 return next_continuation
cc2db878 3217 contents = []
3218 for key in ('contents', 'items'):
3219 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
8bdd16b4 3220 for content in contents:
3221 if not isinstance(content, dict):
3222 continue
3223 continuation_ep = try_get(
3224 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
3225 dict)
3226 if not continuation_ep:
3227 continue
3228 continuation = try_get(
3229 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
3230 if not continuation:
3231 continue
3232 ctp = continuation_ep.get('clickTrackingParams')
29f7c58a 3233 return YoutubeTabIE._build_continuation_query(continuation, ctp)
448830ce 3234
f4f751af 3235 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3462ffa8 3236
70d5c17b 3237 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3238 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3239 for content in contents:
3240 if not isinstance(content, dict):
8bdd16b4 3241 continue
70d5c17b 3242 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3243 if not is_renderer:
70d5c17b 3244 renderer = content.get('richItemRenderer')
3462ffa8 3245 if renderer:
3246 for entry in self._rich_entries(renderer):
3247 yield entry
3248 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3249 continue
3462ffa8 3250 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3251 for isr_content in isr_contents:
3252 if not isinstance(isr_content, dict):
3253 continue
69184e41 3254
3255 known_renderers = {
3256 'playlistVideoListRenderer': self._playlist_entries,
3257 'gridRenderer': self._grid_entries,
3258 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3259 'backstagePostThreadRenderer': self._post_thread_entries,
3260 'videoRenderer': lambda x: [self._video_entry(x)],
3261 }
3262 for key, renderer in isr_content.items():
3263 if key not in known_renderers:
3264 continue
3265 for entry in known_renderers[key](renderer):
3266 if entry:
3267 yield entry
3462ffa8 3268 continuation_list[0] = self._extract_continuation(renderer)
69184e41 3269 break
70d5c17b 3270
3462ffa8 3271 if not continuation_list[0]:
3272 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3273
3274 if not continuation_list[0]:
3275 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3276
3277 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3278 tab_content = try_get(tab, lambda x: x['content'], dict)
3279 if not tab_content:
3280 return
3462ffa8 3281 parent_renderer = (
29f7c58a 3282 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3283 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3284 for entry in extract_entries(parent_renderer):
3285 yield entry
3462ffa8 3286 continuation = continuation_list[0]
f4f751af 3287 context = self._extract_context(ytcfg)
3288 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
d069eca7 3289
8bdd16b4 3290 for page_num in itertools.count(1):
3291 if not continuation:
3292 break
79360d99 3293 query = {
3294 'continuation': continuation['continuation'],
3295 'clickTracking': {'clickTrackingParams': continuation['itct']}
3296 }
f4f751af 3297 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
79360d99 3298 response = self._extract_response(
3299 item_id='%s page %s' % (item_id, page_num),
3300 query=query, headers=headers, ytcfg=ytcfg,
3301 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3302
3303 if not response:
8bdd16b4 3304 break
f4f751af 3305 visitor_data = try_get(
3306 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
ebf1b291 3307
69184e41 3308 known_continuation_renderers = {
3309 'playlistVideoListContinuation': self._playlist_entries,
3310 'gridContinuation': self._grid_entries,
3311 'itemSectionContinuation': self._post_thread_continuation_entries,
3312 'sectionListContinuation': extract_entries, # for feeds
3313 }
8bdd16b4 3314 continuation_contents = try_get(
69184e41 3315 response, lambda x: x['continuationContents'], dict) or {}
3316 continuation_renderer = None
3317 for key, value in continuation_contents.items():
3318 if key not in known_continuation_renderers:
3462ffa8 3319 continue
69184e41 3320 continuation_renderer = value
3321 continuation_list = [None]
3322 for entry in known_continuation_renderers[key](continuation_renderer):
3323 yield entry
3324 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3325 break
3326 if continuation_renderer:
3327 continue
c5e8d7af 3328
a1b535bd 3329 known_renderers = {
3330 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3331 'gridVideoRenderer': (self._grid_entries, 'items'),
3332 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3333 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3334 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3335 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3336 }
cce889b9 3337 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3338 continuation_items = try_get(
cce889b9 3339 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3340 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3341 video_items_renderer = None
3342 for key, value in continuation_item.items():
3343 if key not in known_renderers:
8bdd16b4 3344 continue
a1b535bd 3345 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3346 continuation_list = [None]
a1b535bd 3347 for entry in known_renderers[key][0](video_items_renderer):
3348 yield entry
9ba5705a 3349 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3350 break
3351 if video_items_renderer:
3352 continue
8bdd16b4 3353 break
9558dcec 3354
8bdd16b4 3355 @staticmethod
3356 def _extract_selected_tab(tabs):
3357 for tab in tabs:
cd684175 3358 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3359 if renderer.get('selected') is True:
3360 return renderer
2b3c2546 3361 else:
8bdd16b4 3362 raise ExtractorError('Unable to find selected tab')
b82f815f 3363
8bdd16b4 3364 @staticmethod
3365 def _extract_uploader(data):
3366 uploader = {}
3367 sidebar_renderer = try_get(
3368 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3369 if sidebar_renderer:
3370 for item in sidebar_renderer:
3371 if not isinstance(item, dict):
3372 continue
3373 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3374 if not isinstance(renderer, dict):
3375 continue
3376 owner = try_get(
3377 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3378 if owner:
3379 uploader['uploader'] = owner.get('text')
3380 uploader['uploader_id'] = try_get(
3381 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3382 uploader['uploader_url'] = urljoin(
3383 'https://www.youtube.com/',
3384 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3385 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3386
d069eca7 3387 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3388 playlist_id = title = description = channel_url = channel_name = channel_id = None
3389 thumbnails_list = tags = []
3390
8bdd16b4 3391 selected_tab = self._extract_selected_tab(tabs)
3392 renderer = try_get(
3393 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3394 if renderer:
b60419c5 3395 channel_name = renderer.get('title')
3396 channel_url = renderer.get('channelUrl')
3397 channel_id = renderer.get('externalId')
39ed931e 3398 else:
64c0d954 3399 renderer = try_get(
3400 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3401
8bdd16b4 3402 if renderer:
3403 title = renderer.get('title')
ecc97af3 3404 description = renderer.get('description', '')
b60419c5 3405 playlist_id = channel_id
3406 tags = renderer.get('keywords', '').split()
3407 thumbnails_list = (
3408 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3409 or try_get(
3410 data,
3411 lambda x: x['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3412 list)
b60419c5 3413 or [])
3414
3415 thumbnails = []
3416 for t in thumbnails_list:
3417 if not isinstance(t, dict):
3418 continue
3419 thumbnail_url = url_or_none(t.get('url'))
3420 if not thumbnail_url:
3421 continue
3422 thumbnails.append({
3423 'url': thumbnail_url,
3424 'width': int_or_none(t.get('width')),
3425 'height': int_or_none(t.get('height')),
3426 })
3462ffa8 3427 if playlist_id is None:
70d5c17b 3428 playlist_id = item_id
3429 if title is None:
39ed931e 3430 title = (
3431 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3432 or playlist_id)
b60419c5 3433 title += format_field(selected_tab, 'title', ' - %s')
cd684175 3434 title += format_field(selected_tab, 'expandedText', ' - %s')
b60419c5 3435
3436 metadata = {
3437 'playlist_id': playlist_id,
3438 'playlist_title': title,
3439 'playlist_description': description,
3440 'uploader': channel_name,
3441 'uploader_id': channel_id,
3442 'uploader_url': channel_url,
3443 'thumbnails': thumbnails,
3444 'tags': tags,
3445 }
3446 if not channel_id:
3447 metadata.update(self._extract_uploader(data))
3448 metadata.update({
3449 'channel': metadata['uploader'],
3450 'channel_id': metadata['uploader_id'],
3451 'channel_url': metadata['uploader_url']})
3452 return self.playlist_result(
d069eca7
M
3453 self._entries(
3454 selected_tab, playlist_id,
3455 self._extract_identity_token(webpage, item_id),
f4f751af 3456 self._extract_account_syncid(data),
3457 self._extract_ytcfg(item_id, webpage)),
b60419c5 3458 **metadata)
73c4ac2c 3459
79360d99 3460 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
2be71994 3461 first_id = last_id = None
79360d99 3462 ytcfg = self._extract_ytcfg(playlist_id, webpage)
3463 headers = self._generate_api_headers(
3464 ytcfg, account_syncid=self._extract_account_syncid(data),
3465 identity_token=self._extract_identity_token(webpage, item_id=playlist_id),
3466 visitor_data=try_get(self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
2be71994 3467 for page_num in itertools.count(1):
cd7c66cf 3468 videos = list(self._playlist_entries(playlist))
3469 if not videos:
3470 return
2be71994 3471 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3472 if start >= len(videos):
3473 return
3474 for video in videos[start:]:
3475 if video['id'] == first_id:
3476 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3477 return
3478 yield video
3479 first_id = first_id or videos[0]['id']
3480 last_id = videos[-1]['id']
79360d99 3481 watch_endpoint = try_get(
3482 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3483 query = {
3484 'playlistId': playlist_id,
3485 'videoId': watch_endpoint.get('videoId') or last_id,
3486 'index': watch_endpoint.get('index') or len(videos),
3487 'params': watch_endpoint.get('params') or 'OAE%3D'
3488 }
3489 response = self._extract_response(
3490 item_id='%s page %d' % (playlist_id, page_num),
3491 query=query,
3492 ep='next',
3493 headers=headers,
3494 check_get_keys='contents'
3495 )
cd7c66cf 3496 playlist = try_get(
79360d99 3497 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 3498
79360d99 3499 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
8bdd16b4 3500 title = playlist.get('title') or try_get(
3501 data, lambda x: x['titleText']['simpleText'], compat_str)
3502 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3503
3504 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3505 playlist_url = urljoin(url, try_get(
3506 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3507 compat_str))
3508 if playlist_url and playlist_url != url:
3509 return self.url_result(
3510 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3511 video_title=title)
cd7c66cf 3512
8bdd16b4 3513 return self.playlist_result(
79360d99 3514 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
cd7c66cf 3515 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 3516
95c01b6c 3517 @staticmethod
3518 def _extract_alerts(data):
3519 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
3520 if not isinstance(alert_dict, dict):
3521 continue
3522 for alert in alert_dict.values():
3523 alert_type = alert.get('type')
3524 if not alert_type:
02ced43c 3525 continue
95c01b6c 3526 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str) or ''
3527 if message:
3528 yield alert_type, message
3529 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
3530 message += try_get(run, lambda x: x['text'], compat_str)
3531 if message:
3532 yield alert_type, message
3533
3534 def _report_alerts(self, alerts, expected=True):
3ffc7c89 3535 errors = []
3536 warnings = []
95c01b6c 3537 for alert_type, alert_message in alerts:
f3eaa8dd 3538 if alert_type.lower() == 'error':
3ffc7c89 3539 errors.append([alert_type, alert_message])
f3eaa8dd 3540 else:
3ffc7c89 3541 warnings.append([alert_type, alert_message])
f3eaa8dd 3542
3ffc7c89 3543 for alert_type, alert_message in (warnings + errors[:-1]):
6a39ee13 3544 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
3ffc7c89 3545 if errors:
3546 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
02ced43c 3547
95c01b6c 3548 def _extract_and_report_alerts(self, data, *args, **kwargs):
3549 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
3550
358de58c 3551 def _reload_with_unavailable_videos(self, item_id, data, webpage):
3552 """
3553 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3554 """
3555 sidebar_renderer = try_get(
5d342002 3556 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3557 if not sidebar_renderer:
3558 return
3559 browse_id = params = None
358de58c 3560 for item in sidebar_renderer:
3561 if not isinstance(item, dict):
3562 continue
3563 renderer = item.get('playlistSidebarPrimaryInfoRenderer')
3564 menu_renderer = try_get(
3565 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3566 for menu_item in menu_renderer:
3567 if not isinstance(menu_item, dict):
3568 continue
3569 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3570 text = try_get(
3571 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3572 if not text or text.lower() != 'show unavailable videos':
3573 continue
3574 browse_endpoint = try_get(
3575 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3576 browse_id = browse_endpoint.get('browseId')
3577 params = browse_endpoint.get('params')
5d342002 3578 break
3579
3580 ytcfg = self._extract_ytcfg(item_id, webpage)
3581 headers = self._generate_api_headers(
3582 ytcfg, account_syncid=self._extract_account_syncid(ytcfg),
3583 identity_token=self._extract_identity_token(webpage, item_id=item_id),
3584 visitor_data=try_get(
3585 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
3586 query = {
3587 'params': params or 'wgYCCAA=',
3588 'browseId': browse_id or 'VL%s' % item_id
3589 }
3590 return self._extract_response(
3591 item_id=item_id, headers=headers, query=query,
3592 check_get_keys='contents', fatal=False,
3593 note='Downloading API JSON with unavailable videos')
358de58c 3594
79360d99 3595 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
358de58c 3596 ytcfg=None, check_get_keys=None, ep='browse', fatal=True):
79360d99 3597 response = None
3598 last_error = None
3599 count = -1
a06916d9 3600 retries = self.get_param('extractor_retries', 3)
79360d99 3601 if check_get_keys is None:
3602 check_get_keys = []
3603 while count < retries:
3604 count += 1
3605 if last_error:
3606 self.report_warning('%s. Retrying ...' % last_error)
3607 try:
3608 response = self._call_api(
3609 ep=ep, fatal=True, headers=headers,
358de58c 3610 video_id=item_id, query=query,
79360d99 3611 context=self._extract_context(ytcfg),
3612 api_key=self._extract_api_key(ytcfg),
3613 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
3614 except ExtractorError as e:
3615 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
3616 # Downloading page may result in intermittent 5xx HTTP error
3617 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
3618 last_error = 'HTTP Error %s' % e.cause.code
3619 if count < retries:
3620 continue
358de58c 3621 if fatal:
3622 raise
3623 else:
3624 self.report_warning(error_to_compat_str(e))
3625 return
3626
79360d99 3627 else:
3628 # Youtube may send alerts if there was an issue with the continuation page
4ba00108 3629 try:
3630 self._extract_and_report_alerts(response, expected=False)
3631 except ExtractorError as e:
3632 if fatal:
3633 raise
3634 self.report_warning(error_to_compat_str(e))
3635 return
79360d99 3636 if not check_get_keys or dict_get(response, check_get_keys):
3637 break
3638 # Youtube sometimes sends incomplete data
3639 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
3640 last_error = 'Incomplete data received'
3641 if count >= retries:
358de58c 3642 if fatal:
3643 raise ExtractorError(last_error)
3644 else:
3645 self.report_warning(last_error)
3646 return
79360d99 3647 return response
3648
cd7c66cf 3649 def _extract_webpage(self, url, item_id):
a06916d9 3650 retries = self.get_param('extractor_retries', 3)
62bff2c1 3651 count = -1
c705177d 3652 last_error = 'Incomplete yt initial data recieved'
14fdfea9 3653 while count < retries:
62bff2c1 3654 count += 1
14fdfea9 3655 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 3656 # See: https://github.com/yt-dlp/yt-dlp/issues/116
3657 if count:
c705177d 3658 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 3659 webpage = self._download_webpage(
3660 url, item_id,
cd7c66cf 3661 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
14fdfea9 3662 data = self._extract_yt_initial_data(item_id, webpage)
14fdfea9 3663 if data.get('contents') or data.get('currentVideoEndpoint'):
3664 break
95c01b6c 3665 # Extract alerts here only when there is error
3666 self._extract_and_report_alerts(data)
c705177d 3667 if count >= retries:
6a39ee13 3668 raise ExtractorError(last_error)
cd7c66cf 3669 return webpage, data
3670
9297939e 3671 @staticmethod
3672 def _smuggle_data(entries, data):
3673 for entry in entries:
3674 if data:
3675 entry['url'] = smuggle_url(entry['url'], data)
3676 yield entry
3677
cd7c66cf 3678 def _real_extract(self, url):
9297939e 3679 url, smuggled_data = unsmuggle_url(url, {})
3680 if self.is_music_url(url):
3681 smuggled_data['is_music_url'] = True
fe03a6cd 3682 info_dict = self.__real_extract(url, smuggled_data)
9297939e 3683 if info_dict.get('entries'):
3684 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
3685 return info_dict
3686
fe03a6cd 3687 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
3688
3689 def __real_extract(self, url, smuggled_data):
cd7c66cf 3690 item_id = self._match_id(url)
3691 url = compat_urlparse.urlunparse(
3692 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 3693 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 3694
fe03a6cd 3695 def get_mobj(url):
3696 mobj = self._url_re.match(url).groupdict()
07cce701 3697 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 3698 return mobj
3699
3700 mobj = get_mobj(url)
3701 # Youtube returns incomplete data if tabname is not lower case
3702 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
3703
3704 if is_channel:
3705 if smuggled_data.get('is_music_url'):
3706 if item_id[:2] == 'VL':
3707 # Youtube music VL channels have an equivalent playlist
3708 item_id = item_id[2:]
3709 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
abcdd12b 3710 elif item_id[:2] == 'MP':
3711 # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
3712 item_id = self._search_regex(
3713 r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
3714 self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
3715 'playlist id')
3716 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
fe03a6cd 3717 elif mobj['channel_type'] == 'browse':
3718 # Youtube music /browse/ should be changed to /channel/
3719 pre = 'https://www.youtube.com/channel/%s' % item_id
3720 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
3721 # Home URLs should redirect to /videos/
6a39ee13 3722 self.report_warning(
cd7c66cf 3723 'A channel/user page was given. All the channel\'s videos will be downloaded. '
3724 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 3725 tab = '/videos'
3726
3727 url = ''.join((pre, tab, post))
3728 mobj = get_mobj(url)
cd7c66cf 3729
3730 # Handle both video/playlist URLs
201c1459 3731 qs = parse_qs(url)
cd7c66cf 3732 video_id = qs.get('v', [None])[0]
3733 playlist_id = qs.get('list', [None])[0]
3734
fe03a6cd 3735 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 3736 if not playlist_id:
fe03a6cd 3737 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 3738 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 3739 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6a39ee13 3740 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
cd7c66cf 3741 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
18db7548 3742 mobj = get_mobj(url)
cd7c66cf 3743
3744 if video_id and playlist_id:
a06916d9 3745 if self.get_param('noplaylist'):
cd7c66cf 3746 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3747 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3748 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
3749
3750 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 3751
18db7548 3752 tabs = try_get(
3753 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3754 if tabs:
3755 selected_tab = self._extract_selected_tab(tabs)
3756 tab_name = selected_tab.get('title', '')
09f1580e 3757 if 'no-youtube-channel-redirect' not in compat_opts:
3758 if mobj['tab'] == '/live':
3759 # Live tab should have redirected to the video
3760 raise ExtractorError('The channel is not currently live', expected=True)
3761 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
3762 if not mobj['not_channel'] and item_id[:2] == 'UC':
3763 # Topic channels don't have /videos. Use the equivalent playlist instead
3764 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
3765 pl_id = 'UU%s' % item_id[2:]
3766 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
3767 try:
3768 pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
3769 for alert_type, alert_message in self._extract_alerts(pl_data):
3770 if alert_type == 'error':
3771 raise ExtractorError('Youtube said: %s' % alert_message)
3772 item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
3773 except ExtractorError:
3774 self.report_warning('The playlist gave error. Falling back to channel URL')
3775 else:
3776 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
18db7548 3777
3778 self.write_debug('Final URL: %s' % url)
3779
358de58c 3780 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 3781 if 'no-youtube-unavailable-videos' not in compat_opts:
3782 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
95c01b6c 3783 self._extract_and_report_alerts(data)
358de58c 3784
8bdd16b4 3785 tabs = try_get(
3786 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3787 if tabs:
d069eca7 3788 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 3789
8bdd16b4 3790 playlist = try_get(
3791 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3792 if playlist:
79360d99 3793 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
cd7c66cf 3794
a0566bbf 3795 video_id = try_get(
3796 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
3797 compat_str) or video_id
8bdd16b4 3798 if video_id:
09f1580e 3799 if mobj['tab'] != '/live': # live tab is expected to redirect to video
3800 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 3801 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 3802
8bdd16b4 3803 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 3804
c5e8d7af 3805
8bdd16b4 3806class YoutubePlaylistIE(InfoExtractor):
3807 IE_DESC = 'YouTube.com playlists'
3808 _VALID_URL = r'''(?x)(?:
3809 (?:https?://)?
3810 (?:\w+\.)?
3811 (?:
3812 (?:
3813 youtube(?:kids)?\.com|
29f7c58a 3814 invidio\.us
8bdd16b4 3815 )
3816 /.*?\?.*?\blist=
3817 )?
3818 (?P<id>%(playlist_id)s)
3819 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3820 IE_NAME = 'youtube:playlist'
cdc628a4 3821 _TESTS = [{
8bdd16b4 3822 'note': 'issue #673',
3823 'url': 'PLBB231211A4F62143',
cdc628a4 3824 'info_dict': {
8bdd16b4 3825 'title': '[OLD]Team Fortress 2 (Class-based LP)',
3826 'id': 'PLBB231211A4F62143',
3827 'uploader': 'Wickydoo',
3828 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
3829 },
3830 'playlist_mincount': 29,
3831 }, {
3832 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3833 'info_dict': {
3834 'title': 'YDL_safe_search',
3835 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3836 },
3837 'playlist_count': 2,
3838 'skip': 'This playlist is private',
9558dcec 3839 }, {
8bdd16b4 3840 'note': 'embedded',
3841 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3842 'playlist_count': 4,
9558dcec 3843 'info_dict': {
8bdd16b4 3844 'title': 'JODA15',
3845 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3846 'uploader': 'milan',
3847 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 3848 }
cdc628a4 3849 }, {
8bdd16b4 3850 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3851 'playlist_mincount': 982,
3852 'info_dict': {
3853 'title': '2018 Chinese New Singles (11/6 updated)',
3854 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3855 'uploader': 'LBK',
3856 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
3857 }
daa0df9e 3858 }, {
29f7c58a 3859 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
3860 'only_matching': True,
3861 }, {
3862 # music album playlist
3863 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
3864 'only_matching': True,
3865 }]
3866
3867 @classmethod
3868 def suitable(cls, url):
201c1459 3869 if YoutubeTabIE.suitable(url):
3870 return False
1bdae7d3 3871 # Hack for lazy extractors until more generic solution is implemented
3872 # (see #28780)
3873 from .youtube import parse_qs
201c1459 3874 qs = parse_qs(url)
3875 if qs.get('v', [None])[0]:
3876 return False
3877 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 3878
3879 def _real_extract(self, url):
3880 playlist_id = self._match_id(url)
46953e7e 3881 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 3882 url = update_url_query(
3883 'https://www.youtube.com/playlist',
3884 parse_qs(url) or {'list': playlist_id})
3885 if is_music_url:
3886 url = smuggle_url(url, {'is_music_url': True})
3887 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 3888
3889
3890class YoutubeYtBeIE(InfoExtractor):
c76eb41b 3891 IE_DESC = 'youtu.be'
29f7c58a 3892 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3893 _TESTS = [{
8bdd16b4 3894 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
3895 'info_dict': {
3896 'id': 'yeWKywCrFtk',
3897 'ext': 'mp4',
3898 'title': 'Small Scale Baler and Braiding Rugs',
3899 'uploader': 'Backus-Page House Museum',
3900 'uploader_id': 'backuspagemuseum',
3901 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
3902 'upload_date': '20161008',
3903 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
3904 'categories': ['Nonprofits & Activism'],
3905 'tags': list,
3906 'like_count': int,
3907 'dislike_count': int,
3908 },
3909 'params': {
3910 'noplaylist': True,
3911 'skip_download': True,
3912 },
39e7107d 3913 }, {
8bdd16b4 3914 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 3915 'only_matching': True,
cdc628a4
PH
3916 }]
3917
8bdd16b4 3918 def _real_extract(self, url):
29f7c58a 3919 mobj = re.match(self._VALID_URL, url)
3920 video_id = mobj.group('id')
3921 playlist_id = mobj.group('playlist_id')
8bdd16b4 3922 return self.url_result(
29f7c58a 3923 update_url_query('https://www.youtube.com/watch', {
3924 'v': video_id,
3925 'list': playlist_id,
3926 'feature': 'youtu.be',
3927 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3928
3929
3930class YoutubeYtUserIE(InfoExtractor):
c76eb41b 3931 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 3932 _VALID_URL = r'ytuser:(?P<id>.+)'
3933 _TESTS = [{
3934 'url': 'ytuser:phihag',
3935 'only_matching': True,
3936 }]
3937
3938 def _real_extract(self, url):
3939 user_id = self._match_id(url)
3940 return self.url_result(
3941 'https://www.youtube.com/user/%s' % user_id,
3942 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 3943
b05654f0 3944
3d3dddc9 3945class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 3946 IE_NAME = 'youtube:favorites'
3947 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
3948 _VALID_URL = r':ytfav(?:ou?rite)?s?'
3949 _LOGIN_REQUIRED = True
3950 _TESTS = [{
3951 'url': ':ytfav',
3952 'only_matching': True,
3953 }, {
3954 'url': ':ytfavorites',
3955 'only_matching': True,
3956 }]
3957
3958 def _real_extract(self, url):
3959 return self.url_result(
3960 'https://www.youtube.com/playlist?list=LL',
3961 ie=YoutubeTabIE.ie_key())
3962
3963
79360d99 3964class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
69184e41 3965 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
3966 # there doesn't appear to be a real limit, for example if you search for
3967 # 'python' you get more than 8.000.000 results
3968 _MAX_RESULTS = float('inf')
78caa52a 3969 IE_NAME = 'youtube:search'
b05654f0 3970 _SEARCH_KEY = 'ytsearch'
6c894ea1 3971 _SEARCH_PARAMS = None
9dd8e46a 3972 _TESTS = []
b05654f0 3973
6c894ea1 3974 def _entries(self, query, n):
a5c56234 3975 data = {'query': query}
6c894ea1
U
3976 if self._SEARCH_PARAMS:
3977 data['params'] = self._SEARCH_PARAMS
3978 total = 0
3979 for page_num in itertools.count(1):
79360d99 3980 search = self._extract_response(
3981 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
3982 check_get_keys=('contents', 'onResponseReceivedCommands')
3983 )
6c894ea1 3984 if not search:
b4c08069 3985 break
6c894ea1
U
3986 slr_contents = try_get(
3987 search,
3988 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3989 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3990 list)
3991 if not slr_contents:
a22b2fd1 3992 break
0366ae87 3993
0366ae87
M
3994 # Youtube sometimes adds promoted content to searches,
3995 # changing the index location of videos and token.
3996 # So we search through all entries till we find them.
30a074c2 3997 continuation_token = None
3998 for slr_content in slr_contents:
a96c6d15 3999 if continuation_token is None:
4000 continuation_token = try_get(
4001 slr_content,
4002 lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
4003 compat_str)
4004
30a074c2 4005 isr_contents = try_get(
4006 slr_content,
4007 lambda x: x['itemSectionRenderer']['contents'],
4008 list)
9da76d30 4009 if not isr_contents:
30a074c2 4010 continue
4011 for content in isr_contents:
4012 if not isinstance(content, dict):
4013 continue
4014 video = content.get('videoRenderer')
4015 if not isinstance(video, dict):
4016 continue
4017 video_id = video.get('videoId')
4018 if not video_id:
4019 continue
4020
4021 yield self._extract_video(video)
4022 total += 1
4023 if total == n:
4024 return
0366ae87 4025
0366ae87 4026 if not continuation_token:
6c894ea1 4027 break
0366ae87 4028 data['continuation'] = continuation_token
b05654f0 4029
6c894ea1
U
4030 def _get_n_results(self, query, n):
4031 """Get a specified number of results for a query"""
4032 return self.playlist_result(self._entries(query, n), query)
75dff0ee 4033
c9ae7b95 4034
a3dd9248 4035class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 4036 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 4037 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 4038 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 4039 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 4040
c9ae7b95 4041
386e1dd9 4042class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 4043 IE_DESC = 'YouTube.com search URLs'
386e1dd9 4044 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4045 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 4046 # _MAX_RESULTS = 100
3462ffa8 4047 _TESTS = [{
4048 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4049 'playlist_mincount': 5,
4050 'info_dict': {
4051 'title': 'youtube-dl test video',
4052 }
4053 }, {
4054 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4055 'only_matching': True,
4056 }]
4057
386e1dd9 4058 @classmethod
4059 def _make_valid_url(cls):
4060 return cls._VALID_URL
4061
3462ffa8 4062 def _real_extract(self, url):
386e1dd9 4063 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4064 query = (qs.get('search_query') or qs.get('q'))[0]
4065 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4066 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 4067
4068
4069class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 4070 """
25f14e9f 4071 Base class for feed extractors
3d3dddc9 4072 Subclasses must define the _FEED_NAME property.
d7ae0639 4073 """
b2e8bc1b 4074 _LOGIN_REQUIRED = True
ef2f3c7f 4075 _TESTS = []
d7ae0639
JMF
4076
4077 @property
4078 def IE_NAME(self):
78caa52a 4079 return 'youtube:%s' % self._FEED_NAME
04cc9617 4080
3853309f 4081 def _real_extract(self, url):
3d3dddc9 4082 return self.url_result(
4083 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4084 ie=YoutubeTabIE.ie_key())
25f14e9f
S
4085
4086
ef2f3c7f 4087class YoutubeWatchLaterIE(InfoExtractor):
4088 IE_NAME = 'youtube:watchlater'
70d5c17b 4089 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 4090 _VALID_URL = r':ytwatchlater'
bc7a9cd8 4091 _TESTS = [{
8bdd16b4 4092 'url': ':ytwatchlater',
bc7a9cd8
S
4093 'only_matching': True,
4094 }]
25f14e9f
S
4095
4096 def _real_extract(self, url):
ef2f3c7f 4097 return self.url_result(
4098 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 4099
4100
25f14e9f
S
4101class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4102 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 4103 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 4104 _FEED_NAME = 'recommended'
45db527f 4105 _LOGIN_REQUIRED = False
3d3dddc9 4106 _TESTS = [{
4107 'url': ':ytrec',
4108 'only_matching': True,
4109 }, {
4110 'url': ':ytrecommended',
4111 'only_matching': True,
4112 }, {
4113 'url': 'https://youtube.com',
4114 'only_matching': True,
4115 }]
1ed5b5c9 4116
1ed5b5c9 4117
25f14e9f 4118class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 4119 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 4120 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 4121 _FEED_NAME = 'subscriptions'
3d3dddc9 4122 _TESTS = [{
4123 'url': ':ytsubs',
4124 'only_matching': True,
4125 }, {
4126 'url': ':ytsubscriptions',
4127 'only_matching': True,
4128 }]
1ed5b5c9 4129
1ed5b5c9 4130
25f14e9f 4131class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
4132 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4133 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 4134 _FEED_NAME = 'history'
3d3dddc9 4135 _TESTS = [{
4136 'url': ':ythistory',
4137 'only_matching': True,
4138 }]
1ed5b5c9
JMF
4139
4140
15870e90
PH
4141class YoutubeTruncatedURLIE(InfoExtractor):
4142 IE_NAME = 'youtube:truncated_url'
4143 IE_DESC = False # Do not list
975d35db 4144 _VALID_URL = r'''(?x)
b95aab84
PH
4145 (?:https?://)?
4146 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4147 (?:watch\?(?:
c4808c60 4148 feature=[a-z_]+|
b95aab84
PH
4149 annotation_id=annotation_[^&]+|
4150 x-yt-cl=[0-9]+|
c1708b89 4151 hl=[^&]*|
287be8c6 4152 t=[0-9]+
b95aab84
PH
4153 )?
4154 |
4155 attribution_link\?a=[^&]+
4156 )
4157 $
975d35db 4158 '''
15870e90 4159
c4808c60 4160 _TESTS = [{
2d3d2997 4161 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 4162 'only_matching': True,
dc2fc736 4163 }, {
2d3d2997 4164 'url': 'https://www.youtube.com/watch?',
dc2fc736 4165 'only_matching': True,
b95aab84
PH
4166 }, {
4167 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4168 'only_matching': True,
4169 }, {
4170 'url': 'https://www.youtube.com/watch?feature=foo',
4171 'only_matching': True,
c1708b89
PH
4172 }, {
4173 'url': 'https://www.youtube.com/watch?hl=en-GB',
4174 'only_matching': True,
287be8c6
PH
4175 }, {
4176 'url': 'https://www.youtube.com/watch?t=2372',
4177 'only_matching': True,
c4808c60
PH
4178 }]
4179
15870e90
PH
4180 def _real_extract(self, url):
4181 raise ExtractorError(
78caa52a
PH
4182 'Did you forget to quote the URL? Remember that & is a meta '
4183 'character in most shells, so you want to put the URL in quotes, '
3867038a 4184 'like youtube-dl '
2d3d2997 4185 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 4186 ' or simply youtube-dl BaW_jenozKc .',
15870e90 4187 expected=True)
772fd5cc
PH
4188
4189
4190class YoutubeTruncatedIDIE(InfoExtractor):
4191 IE_NAME = 'youtube:truncated_id'
4192 IE_DESC = False # Do not list
b95aab84 4193 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
4194
4195 _TESTS = [{
4196 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4197 'only_matching': True,
4198 }]
4199
4200 def _real_extract(self, url):
4201 video_id = self._match_id(url)
4202 raise ExtractorError(
4203 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4204 expected=True)