]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[funimation] Extract subtitles (#434)
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
d92f5d5a 5import calendar
a5c56234 6import hashlib
0ca96d48 7import itertools
c5e8d7af 8import json
c4417ddb 9import os.path
d77ab8e2 10import random
c5e8d7af 11import re
8a784c74 12import time
e0df6211 13import traceback
c5e8d7af 14
b05654f0 15from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 16from ..compat import (
edf3e38e 17 compat_chr,
29f7c58a 18 compat_HTTPError,
c5e8d7af 19 compat_parse_qs,
545cc85d 20 compat_str,
7fd002c0 21 compat_urllib_parse_unquote_plus,
15707c7e 22 compat_urllib_parse_urlencode,
7c80519c 23 compat_urllib_parse_urlparse,
7c61bd36 24 compat_urlparse,
4bb4a188 25)
545cc85d 26from ..jsinterp import JSInterpreter
4bb4a188 27from ..utils import (
c224251a 28 bool_or_none,
c5e8d7af 29 clean_html,
26fe8ffe 30 dict_get,
d92f5d5a 31 datetime_from_str,
358de58c 32 error_to_compat_str,
c5e8d7af 33 ExtractorError,
b60419c5 34 format_field,
2d30521a 35 float_or_none,
dd27fd17 36 int_or_none,
94278f72 37 mimetype2ext,
6310acf5 38 parse_codecs,
7c80519c 39 parse_duration,
dca3ff4a 40 qualities,
3995d37d 41 remove_start,
cf7e015f 42 smuggle_url,
dbdaaa23 43 str_or_none,
c93d53f5 44 str_to_int,
556dbe7f 45 try_get,
c5e8d7af
PH
46 unescapeHTML,
47 unified_strdate,
cf7e015f 48 unsmuggle_url,
8bdd16b4 49 update_url_query,
21c340b8 50 url_or_none,
6e6bc8da 51 urlencode_postdata,
d92f5d5a 52 urljoin
c5e8d7af
PH
53)
54
5f6a1245 55
201c1459 56def parse_qs(url):
57 return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
58
59
de7f3446 60class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
61 """Provide base functions for Youtube extractors"""
62 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 63 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
64
65 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
66 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
67 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 68
3462ffa8 69 _RESERVED_NAMES = (
bea74222 70 r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|shorts|'
46953e7e 71 r'movies|results|shared|hashtag|trending|feed|feeds|oembed|get_video_info|'
cd7c66cf 72 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 73
b2e8bc1b
JMF
74 _NETRC_MACHINE = 'youtube'
75 # If True it will raise an error if no login info is provided
76 _LOGIN_REQUIRED = False
77
70d5c17b 78 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 79
b2e8bc1b 80 def _login(self):
83317f69 81 """
82 Attempt to log in to YouTube.
83 True is returned if successful or skipped.
84 False is returned if login failed.
85
86 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
87 """
9d5d4d64 88
89 def warn(message):
90 self.report_warning(message)
91
92 # username+password login is broken
93 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
94 self.raise_login_required(
95 'Login details are needed to download this content', method='cookies')
68217024 96 username, password = self._get_login_info()
9d5d4d64 97 if username:
98 warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
99 return
100 # Everything below this is broken!
101
b2e8bc1b
JMF
102 # No authentication to be performed
103 if username is None:
a06916d9 104 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
69ea8ca4 105 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
a06916d9 106 # if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
545cc85d 107 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 108 return True
b2e8bc1b 109
7cc3570e
PH
110 login_page = self._download_webpage(
111 self._LOGIN_URL, None,
69ea8ca4
PH
112 note='Downloading login page',
113 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
114 if login_page is False:
115 return
b2e8bc1b 116
1212e997 117 login_form = self._hidden_inputs(login_page)
c5e8d7af 118
e00eb564
S
119 def req(url, f_req, note, errnote):
120 data = login_form.copy()
121 data.update({
122 'pstMsg': 1,
123 'checkConnection': 'youtube',
124 'checkedDomains': 'youtube',
125 'hl': 'en',
126 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 127 'f.req': json.dumps(f_req),
e00eb564
S
128 'flowName': 'GlifWebSignIn',
129 'flowEntry': 'ServiceLogin',
baf67a60
S
130 # TODO: reverse actual botguard identifier generation algo
131 'bgRequest': '["identifier",""]',
041bc3ad 132 })
e00eb564
S
133 return self._download_json(
134 url, None, note=note, errnote=errnote,
135 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
136 fatal=False,
137 data=urlencode_postdata(data), headers={
138 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
139 'Google-Accounts-XSRF': 1,
140 })
141
3995d37d
S
142 lookup_req = [
143 username,
144 None, [], None, 'US', None, None, 2, False, True,
145 [
146 None, None,
147 [2, 1, None, 1,
148 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
149 None, [], 4],
150 1, [None, None, []], None, None, None, True
151 ],
152 username,
153 ]
154
e00eb564 155 lookup_results = req(
3995d37d 156 self._LOOKUP_URL, lookup_req,
e00eb564
S
157 'Looking up account info', 'Unable to look up account info')
158
159 if lookup_results is False:
160 return False
041bc3ad 161
3995d37d
S
162 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
163 if not user_hash:
164 warn('Unable to extract user hash')
165 return False
166
167 challenge_req = [
168 user_hash,
169 None, 1, None, [1, None, None, None, [password, None, True]],
170 [
171 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
172 1, [None, None, []], None, None, None, True
173 ]]
83317f69 174
3995d37d
S
175 challenge_results = req(
176 self._CHALLENGE_URL, challenge_req,
177 'Logging in', 'Unable to log in')
83317f69 178
3995d37d 179 if challenge_results is False:
e00eb564 180 return
83317f69 181
3995d37d
S
182 login_res = try_get(challenge_results, lambda x: x[0][5], list)
183 if login_res:
184 login_msg = try_get(login_res, lambda x: x[5], compat_str)
185 warn(
186 'Unable to login: %s' % 'Invalid password'
187 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
188 return False
189
190 res = try_get(challenge_results, lambda x: x[0][-1], list)
191 if not res:
192 warn('Unable to extract result entry')
193 return False
194
9a6628aa
S
195 login_challenge = try_get(res, lambda x: x[0][0], list)
196 if login_challenge:
197 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
198 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
199 # SEND_SUCCESS - TFA code has been successfully sent to phone
200 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 201 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
202 if status == 'QUOTA_EXCEEDED':
203 warn('Exceeded the limit of TFA codes, try later')
204 return False
205
206 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
207 if not tl:
208 warn('Unable to extract TL')
209 return False
210
211 tfa_code = self._get_tfa_info('2-step verification code')
212
213 if not tfa_code:
214 warn(
215 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
216 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
217 return False
218
219 tfa_code = remove_start(tfa_code, 'G-')
220
221 tfa_req = [
222 user_hash, None, 2, None,
223 [
224 9, None, None, None, None, None, None, None,
225 [None, tfa_code, True, 2]
226 ]]
227
228 tfa_results = req(
229 self._TFA_URL.format(tl), tfa_req,
230 'Submitting TFA code', 'Unable to submit TFA code')
231
232 if tfa_results is False:
233 return False
234
235 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
236 if tfa_res:
237 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
238 warn(
239 'Unable to finish TFA: %s' % 'Invalid TFA code'
240 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
241 return False
242
243 check_cookie_url = try_get(
244 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
245 else:
246 CHALLENGES = {
247 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
248 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
249 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
250 }
251 challenge = CHALLENGES.get(
252 challenge_str,
253 '%s returned error %s.' % (self.IE_NAME, challenge_str))
254 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
255 return False
3995d37d
S
256 else:
257 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
258
259 if not check_cookie_url:
260 warn('Unable to extract CheckCookie URL')
261 return False
e00eb564
S
262
263 check_cookie_results = self._download_webpage(
3995d37d
S
264 check_cookie_url, None, 'Checking cookie', fatal=False)
265
266 if check_cookie_results is False:
267 return False
e00eb564 268
3995d37d
S
269 if 'https://myaccount.google.com/' not in check_cookie_results:
270 warn('Unable to log in')
b2e8bc1b 271 return False
e00eb564 272
b2e8bc1b
JMF
273 return True
274
cce889b9 275 def _initialize_consent(self):
276 cookies = self._get_cookies('https://www.youtube.com/')
277 if cookies.get('__Secure-3PSID'):
278 return
279 consent_id = None
280 consent = cookies.get('CONSENT')
281 if consent:
282 if 'YES' in consent.value:
283 return
284 consent_id = self._search_regex(
285 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
286 if not consent_id:
287 consent_id = random.randint(100, 999)
288 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 289
b2e8bc1b 290 def _real_initialize(self):
cce889b9 291 self._initialize_consent()
b2e8bc1b
JMF
292 if self._downloader is None:
293 return
b2e8bc1b
JMF
294 if not self._login():
295 return
c5e8d7af 296
f4f751af 297 _YT_WEB_CLIENT_VERSION = '2.20210407.08.00'
298 _YT_INNERTUBE_API_KEY = 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
a0566bbf 299 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 300 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
301 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 302
a5c56234 303 def _generate_sapisidhash_header(self):
1974e99f 304 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
305 # See: https://github.com/yt-dlp/yt-dlp/issues/393
306 yt_cookies = self._get_cookies('https://www.youtube.com')
307 sapisid_cookie = dict_get(
308 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
a5c56234
M
309 if sapisid_cookie is None:
310 return
311 time_now = round(time.time())
1974e99f 312 # SAPISID cookie is required if not already present
313 if not yt_cookies.get('SAPISID'):
314 self._set_cookie(
315 '.youtube.com', 'SAPISID', sapisid_cookie.value, secure=True, expire_time=time_now + 3600)
316 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
317 sapisidhash = hashlib.sha1(
318 f'{time_now} {sapisid_cookie.value} https://www.youtube.com'.encode('utf-8')).hexdigest()
319 return f'SAPISIDHASH {time_now}_{sapisidhash}'
a5c56234
M
320
321 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 322 note='Downloading API JSON', errnote='Unable to download API page',
323 context=None, api_key=None):
324
325 data = {'context': context} if context else {'context': self._extract_context()}
8bdd16b4 326 data.update(query)
f4f751af 327 real_headers = self._generate_api_headers()
328 real_headers.update({'content-type': 'application/json'})
329 if headers:
330 real_headers.update(headers)
545cc85d 331 return self._download_json(
a5c56234
M
332 'https://www.youtube.com/youtubei/v1/%s' % ep,
333 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 334 data=json.dumps(data).encode('utf8'), headers=real_headers,
335 query={'key': api_key or self._extract_api_key()})
336
337 def _extract_api_key(self, ytcfg=None):
338 return try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str) or self._YT_INNERTUBE_API_KEY
c54f4aad 339
8bdd16b4 340 def _extract_yt_initial_data(self, video_id, webpage):
341 return self._parse_json(
342 self._search_regex(
29f7c58a 343 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 344 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 345 video_id)
0c148415 346
a1c5d2ca
M
347 def _extract_identity_token(self, webpage, item_id):
348 ytcfg = self._extract_ytcfg(item_id, webpage)
349 if ytcfg:
350 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
351 if token:
352 return token
353 return self._search_regex(
354 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
355 'identity token', default=None)
356
357 @staticmethod
358 def _extract_account_syncid(data):
8ea3f7b9 359 """
360 Extract syncId required to download private playlists of secondary channels
361 @param data Either response or ytcfg
362 """
363 sync_ids = (try_get(
364 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
365 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
a1c5d2ca
M
366 if len(sync_ids) >= 2 and sync_ids[1]:
367 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
368 # and just "user_syncid||" for primary channel. We only want the channel_syncid
369 return sync_ids[0]
8ea3f7b9 370 # ytcfg includes channel_syncid if on secondary channel
371 return data.get('DELEGATED_SESSION_ID')
a1c5d2ca 372
29f7c58a 373 def _extract_ytcfg(self, video_id, webpage):
8c54a305 374 if not webpage:
375 return {}
29f7c58a 376 return self._parse_json(
377 self._search_regex(
378 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 379 default='{}'), video_id, fatal=False) or {}
380
381 def __extract_client_version(self, ytcfg):
382 return try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str) or self._YT_WEB_CLIENT_VERSION
383
384 def _extract_context(self, ytcfg=None):
385 context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'], dict)
386 if context:
387 return context
388
389 # Recreate the client context (required)
390 client_version = self.__extract_client_version(ytcfg)
391 client_name = try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str) or 'WEB'
392 context = {
393 'client': {
394 'clientName': client_name,
395 'clientVersion': client_version,
396 }
397 }
398 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
399 if visitor_data:
400 context['client']['visitorData'] = visitor_data
401 return context
402
403 def _generate_api_headers(self, ytcfg=None, identity_token=None, account_syncid=None, visitor_data=None):
404 headers = {
405 'X-YouTube-Client-Name': '1',
406 'X-YouTube-Client-Version': self.__extract_client_version(ytcfg),
407 }
408 if identity_token:
409 headers['x-youtube-identity-token'] = identity_token
410 if account_syncid:
411 headers['X-Goog-PageId'] = account_syncid
412 headers['X-Goog-AuthUser'] = 0
413 if visitor_data:
414 headers['x-goog-visitor-id'] = visitor_data
415 auth = self._generate_sapisidhash_header()
416 if auth is not None:
417 headers['Authorization'] = auth
418 headers['X-Origin'] = 'https://www.youtube.com'
419 return headers
29f7c58a 420
9297939e 421 @staticmethod
422 def is_music_url(url):
423 return re.match(r'https?://music\.youtube\.com/', url) is not None
424
30a074c2 425 def _extract_video(self, renderer):
426 video_id = renderer.get('videoId')
427 title = try_get(
428 renderer,
429 (lambda x: x['title']['runs'][0]['text'],
430 lambda x: x['title']['simpleText']), compat_str)
431 description = try_get(
432 renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
433 compat_str)
434 duration = parse_duration(try_get(
435 renderer, lambda x: x['lengthText']['simpleText'], compat_str))
436 view_count_text = try_get(
437 renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
438 view_count = str_to_int(self._search_regex(
439 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
440 'view count', default=None))
441 uploader = try_get(
bc2ca1bb 442 renderer,
443 (lambda x: x['ownerText']['runs'][0]['text'],
444 lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
30a074c2 445 return {
39ed931e 446 '_type': 'url',
30a074c2 447 'ie_key': YoutubeIE.ie_key(),
448 'id': video_id,
449 'url': video_id,
450 'title': title,
451 'description': description,
452 'duration': duration,
453 'view_count': view_count,
454 'uploader': uploader,
455 }
456
0c148415 457
360e1ca5 458class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 459 IE_DESC = 'YouTube.com'
bc2ca1bb 460 _INVIDIOUS_SITES = (
461 # invidious-redirect websites
462 r'(?:www\.)?redirect\.invidious\.io',
463 r'(?:(?:www|dev)\.)?invidio\.us',
464 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
465 r'(?:www\.)?invidious\.pussthecat\.org',
bc2ca1bb 466 r'(?:www\.)?invidious\.zee\.li',
bc2ca1bb 467 r'(?:(?:www|au)\.)?ytprivate\.com',
468 r'(?:www\.)?invidious\.namazso\.eu',
469 r'(?:www\.)?invidious\.ethibox\.fr',
bc2ca1bb 470 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
471 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
472 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
473 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
474 # youtube-dl invidious instances list
475 r'(?:(?:www|no)\.)?invidiou\.sh',
476 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
477 r'(?:www\.)?invidious\.kabi\.tk',
bc2ca1bb 478 r'(?:www\.)?invidious\.mastodon\.host',
479 r'(?:www\.)?invidious\.zapashcanon\.fr',
480 r'(?:www\.)?invidious\.kavin\.rocks',
201c1459 481 r'(?:www\.)?invidious\.tinfoil-hat\.net',
482 r'(?:www\.)?invidious\.himiko\.cloud',
483 r'(?:www\.)?invidious\.reallyancient\.tech',
bc2ca1bb 484 r'(?:www\.)?invidious\.tube',
485 r'(?:www\.)?invidiou\.site',
486 r'(?:www\.)?invidious\.site',
487 r'(?:www\.)?invidious\.xyz',
488 r'(?:www\.)?invidious\.nixnet\.xyz',
201c1459 489 r'(?:www\.)?invidious\.048596\.xyz',
bc2ca1bb 490 r'(?:www\.)?invidious\.drycat\.fr',
201c1459 491 r'(?:www\.)?inv\.skyn3t\.in',
bc2ca1bb 492 r'(?:www\.)?tube\.poal\.co',
493 r'(?:www\.)?tube\.connect\.cafe',
494 r'(?:www\.)?vid\.wxzm\.sx',
495 r'(?:www\.)?vid\.mint\.lgbt',
201c1459 496 r'(?:www\.)?vid\.puffyan\.us',
bc2ca1bb 497 r'(?:www\.)?yewtu\.be',
498 r'(?:www\.)?yt\.elukerio\.org',
499 r'(?:www\.)?yt\.lelux\.fi',
500 r'(?:www\.)?invidious\.ggc-project\.de',
501 r'(?:www\.)?yt\.maisputain\.ovh',
201c1459 502 r'(?:www\.)?ytprivate\.com',
503 r'(?:www\.)?invidious\.13ad\.de',
bc2ca1bb 504 r'(?:www\.)?invidious\.toot\.koeln',
505 r'(?:www\.)?invidious\.fdn\.fr',
506 r'(?:www\.)?watch\.nettohikari\.com',
507 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
508 r'(?:www\.)?qklhadlycap4cnod\.onion',
509 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
510 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
511 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
512 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
513 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
514 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
515 )
cb7dfeea 516 _VALID_URL = r"""(?x)^
c5e8d7af 517 (
edb53e2d 518 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 519 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
520 (?:www\.)?deturl\.com/www\.youtube\.com|
521 (?:www\.)?pwnyoutube\.com|
522 (?:www\.)?hooktube\.com|
523 (?:www\.)?yourepeat\.com|
524 tube\.majestyc\.net|
525 %(invidious)s|
526 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
527 (?:.*?\#/)? # handle anchor (#/) redirect urls
528 (?: # the various things that can precede the ID:
ac7553d0 529 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 530 |(?: # or the v= param in all its forms
f7000f3a 531 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 532 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 533 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
534 v=
535 )
f4b05232 536 ))
cbaed4bb
S
537 |(?:
538 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
539 vid\.plus| # or vid.plus/xxxx
540 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 541 %(invidious)s
cbaed4bb 542 )/
edb53e2d 543 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 544 )
c5e8d7af 545 )? # all until now is optional -> you can pass the naked ID
201c1459 546 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 547 (?(1).+)? # if we found the ID, everything can follow
9297939e 548 (?:\#|$)""" % {
bc2ca1bb 549 'invidious': '|'.join(_INVIDIOUS_SITES),
550 }
e40c758c 551 _PLAYER_INFO_RE = (
cc2db878 552 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
553 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 554 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 555 )
2c62dc26 556 _formats = {
c2d3cb4c 557 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
558 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
559 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
560 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
561 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
562 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
563 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
564 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 565 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 566 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
567 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
568 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
569 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
570 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
571 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 572 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 573 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
574 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 575
576
577 # 3D videos
c2d3cb4c 578 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
579 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
580 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
581 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 582 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
583 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
584 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 585
96fb5605 586 # Apple HTTP Live Streaming
11f12195 587 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 588 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
589 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
590 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
591 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
592 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 593 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
594 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
595
596 # DASH mp4 video
d23028a8
S
597 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
598 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
599 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
600 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
601 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 602 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
603 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
604 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
605 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
606 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
607 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
608 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 609
f6f1fc92 610 # Dash mp4 audio
d23028a8
S
611 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
612 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
613 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
614 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
615 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
616 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
617 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
618
619 # Dash webm
d23028a8
S
620 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
621 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
622 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
623 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
624 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
625 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
626 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
627 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
628 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
629 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
630 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
631 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
632 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
633 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
634 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 635 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
636 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
637 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
638 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
639 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
640 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
641 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
642
643 # Dash webm audio
d23028a8
S
644 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
645 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 646
0857baad 647 # Dash webm audio with opus inside
d23028a8
S
648 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
649 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
650 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 651
ce6b9a2d
PH
652 # RTMP (unnamed)
653 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
654
655 # av01 video only formats sometimes served with "unknown" codecs
656 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
657 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
658 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
659 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 660 }
29f7c58a 661 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 662
fd5c4aab
S
663 _GEO_BYPASS = False
664
78caa52a 665 IE_NAME = 'youtube'
2eb88d95
PH
666 _TESTS = [
667 {
2d3d2997 668 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
669 'info_dict': {
670 'id': 'BaW_jenozKc',
671 'ext': 'mp4',
3867038a 672 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
673 'uploader': 'Philipp Hagemeister',
674 'uploader_id': 'phihag',
ec85ded8 675 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
676 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
677 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 678 'upload_date': '20121002',
3867038a 679 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 680 'categories': ['Science & Technology'],
3867038a 681 'tags': ['youtube-dl'],
556dbe7f 682 'duration': 10,
dbdaaa23 683 'view_count': int,
3e7c1224
PH
684 'like_count': int,
685 'dislike_count': int,
7c80519c 686 'start_time': 1,
297a564b 687 'end_time': 9,
2eb88d95 688 }
0e853ca4 689 },
fccd3771 690 {
4bc3a23e
PH
691 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
692 'note': 'Embed-only video (#1746)',
693 'info_dict': {
694 'id': 'yZIXLfi8CZQ',
695 'ext': 'mp4',
696 'upload_date': '20120608',
697 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
698 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
699 'uploader': 'SET India',
94bfcd23 700 'uploader_id': 'setindia',
ec85ded8 701 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 702 'age_limit': 18,
545cc85d 703 },
704 'skip': 'Private video',
fccd3771 705 },
11b56058 706 {
8bdd16b4 707 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
708 'note': 'Use the first video ID in the URL',
709 'info_dict': {
710 'id': 'BaW_jenozKc',
711 'ext': 'mp4',
3867038a 712 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
713 'uploader': 'Philipp Hagemeister',
714 'uploader_id': 'phihag',
ec85ded8 715 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 716 'upload_date': '20121002',
3867038a 717 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 718 'categories': ['Science & Technology'],
3867038a 719 'tags': ['youtube-dl'],
556dbe7f 720 'duration': 10,
dbdaaa23 721 'view_count': int,
11b56058
PM
722 'like_count': int,
723 'dislike_count': int,
34a7de29
S
724 },
725 'params': {
726 'skip_download': True,
727 },
11b56058 728 },
dd27fd17 729 {
2d3d2997 730 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
731 'note': '256k DASH audio (format 141) via DASH manifest',
732 'info_dict': {
733 'id': 'a9LDPn-MO4I',
734 'ext': 'm4a',
735 'upload_date': '20121002',
736 'uploader_id': '8KVIDEO',
ec85ded8 737 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
738 'description': '',
739 'uploader': '8KVIDEO',
740 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 741 },
4bc3a23e
PH
742 'params': {
743 'youtube_include_dash_manifest': True,
744 'format': '141',
4919603f 745 },
de3c7fe0 746 'skip': 'format 141 not served anymore',
dd27fd17 747 },
8bdd16b4 748 # DASH manifest with encrypted signature
749 {
750 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
751 'info_dict': {
752 'id': 'IB3lcPjvWLA',
753 'ext': 'm4a',
754 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
755 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
756 'duration': 244,
757 'uploader': 'AfrojackVEVO',
758 'uploader_id': 'AfrojackVEVO',
759 'upload_date': '20131011',
cc2db878 760 'abr': 129.495,
8bdd16b4 761 },
762 'params': {
763 'youtube_include_dash_manifest': True,
764 'format': '141/bestaudio[ext=m4a]',
765 },
766 },
aa79ac0c
PH
767 # Controversy video
768 {
769 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
770 'info_dict': {
771 'id': 'T4XJQO3qol8',
772 'ext': 'mp4',
556dbe7f 773 'duration': 219,
aa79ac0c 774 'upload_date': '20100909',
4fe54c12 775 'uploader': 'Amazing Atheist',
aa79ac0c 776 'uploader_id': 'TheAmazingAtheist',
ec85ded8 777 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c 778 'title': 'Burning Everyone\'s Koran',
545cc85d 779 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
aa79ac0c 780 }
c522adb1 781 },
dd2d55f1 782 # Normal age-gate video (embed allowed)
c522adb1 783 {
2d3d2997 784 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
785 'info_dict': {
786 'id': 'HtVdAasjOgU',
787 'ext': 'mp4',
788 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 789 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 790 'duration': 142,
c522adb1
JMF
791 'uploader': 'The Witcher',
792 'uploader_id': 'WitcherGame',
ec85ded8 793 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 794 'upload_date': '20140605',
34952f09 795 'age_limit': 18,
c522adb1
JMF
796 },
797 },
8bdd16b4 798 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
799 # YouTube Red ad is not captured for creator
800 {
801 'url': '__2ABJjxzNo',
802 'info_dict': {
803 'id': '__2ABJjxzNo',
804 'ext': 'mp4',
805 'duration': 266,
806 'upload_date': '20100430',
807 'uploader_id': 'deadmau5',
808 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 809 'creator': 'deadmau5',
810 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 811 'uploader': 'deadmau5',
812 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 813 'alt_title': 'Some Chords',
8bdd16b4 814 },
815 'expected_warnings': [
816 'DASH manifest missing',
817 ]
818 },
067aa17e 819 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
820 {
821 'url': 'lqQg6PlCWgI',
822 'info_dict': {
823 'id': 'lqQg6PlCWgI',
824 'ext': 'mp4',
556dbe7f 825 'duration': 6085,
90227264 826 'upload_date': '20150827',
cbe2bd91 827 'uploader_id': 'olympic',
ec85ded8 828 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 829 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 830 'uploader': 'Olympic',
cbe2bd91
PH
831 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
832 },
833 'params': {
834 'skip_download': 'requires avconv',
e52a40ab 835 }
cbe2bd91 836 },
6271f1ca
PH
837 # Non-square pixels
838 {
839 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
840 'info_dict': {
841 'id': '_b-2C3KPAM0',
842 'ext': 'mp4',
843 'stretched_ratio': 16 / 9.,
556dbe7f 844 'duration': 85,
6271f1ca
PH
845 'upload_date': '20110310',
846 'uploader_id': 'AllenMeow',
ec85ded8 847 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 848 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 849 'uploader': '孫ᄋᄅ',
6271f1ca
PH
850 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
851 },
06b491eb
S
852 },
853 # url_encoded_fmt_stream_map is empty string
854 {
855 'url': 'qEJwOuvDf7I',
856 'info_dict': {
857 'id': 'qEJwOuvDf7I',
f57b7835 858 'ext': 'webm',
06b491eb
S
859 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
860 'description': '',
861 'upload_date': '20150404',
862 'uploader_id': 'spbelect',
863 'uploader': 'Наблюдатели Петербурга',
864 },
865 'params': {
866 'skip_download': 'requires avconv',
e323cf3f
S
867 },
868 'skip': 'This live event has ended.',
06b491eb 869 },
067aa17e 870 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
871 {
872 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
873 'info_dict': {
874 'id': 'FIl7x6_3R5Y',
eb6793ba 875 'ext': 'webm',
da77d856
S
876 'title': 'md5:7b81415841e02ecd4313668cde88737a',
877 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 878 'duration': 220,
da77d856
S
879 'upload_date': '20150625',
880 'uploader_id': 'dorappi2000',
ec85ded8 881 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 882 'uploader': 'dorappi2000',
eb6793ba 883 'formats': 'mincount:31',
da77d856 884 },
eb6793ba 885 'skip': 'not actual anymore',
2ee8f5d8 886 },
8a1a26ce
YCH
887 # DASH manifest with segment_list
888 {
889 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
890 'md5': '8ce563a1d667b599d21064e982ab9e31',
891 'info_dict': {
892 'id': 'CsmdDsKjzN8',
893 'ext': 'mp4',
17ee98e1 894 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
895 'uploader': 'Airtek',
896 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
897 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
898 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
899 },
900 'params': {
901 'youtube_include_dash_manifest': True,
902 'format': '135', # bestvideo
be49068d
S
903 },
904 'skip': 'This live event has ended.',
2ee8f5d8 905 },
cf7e015f
S
906 {
907 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 908 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 909 'info_dict': {
545cc85d 910 'id': 'jvGDaLqkpTg',
911 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
912 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
913 },
914 'playlist': [{
915 'info_dict': {
545cc85d 916 'id': 'jvGDaLqkpTg',
cf7e015f 917 'ext': 'mp4',
545cc85d 918 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
919 'description': 'md5:e03b909557865076822aa169218d6a5d',
920 'duration': 10643,
921 'upload_date': '20161111',
922 'uploader': 'Team PGP',
923 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
924 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
925 },
926 }, {
927 'info_dict': {
545cc85d 928 'id': '3AKt1R1aDnw',
cf7e015f 929 'ext': 'mp4',
545cc85d 930 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
931 'description': 'md5:e03b909557865076822aa169218d6a5d',
932 'duration': 10991,
933 'upload_date': '20161111',
934 'uploader': 'Team PGP',
935 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
936 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
937 },
938 }, {
939 'info_dict': {
545cc85d 940 'id': 'RtAMM00gpVc',
cf7e015f 941 'ext': 'mp4',
545cc85d 942 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
943 'description': 'md5:e03b909557865076822aa169218d6a5d',
944 'duration': 10995,
945 'upload_date': '20161111',
946 'uploader': 'Team PGP',
947 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
948 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
949 },
950 }, {
951 'info_dict': {
545cc85d 952 'id': '6N2fdlP3C5U',
cf7e015f 953 'ext': 'mp4',
545cc85d 954 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
955 'description': 'md5:e03b909557865076822aa169218d6a5d',
956 'duration': 10990,
957 'upload_date': '20161111',
958 'uploader': 'Team PGP',
959 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
960 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
961 },
962 }],
963 'params': {
964 'skip_download': True,
965 },
cbaed4bb 966 },
f9f49d87 967 {
067aa17e 968 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
969 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
970 'info_dict': {
971 'id': 'gVfLd0zydlo',
972 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
973 },
974 'playlist_count': 2,
be49068d 975 'skip': 'Not multifeed anymore',
f9f49d87 976 },
cbaed4bb 977 {
2d3d2997 978 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 979 'only_matching': True,
0e49d9a6 980 },
6d4fc66b 981 {
2d3d2997 982 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
983 'only_matching': True,
984 },
0e49d9a6 985 {
067aa17e 986 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 987 # Also tests cut-off URL expansion in video description (see
067aa17e
S
988 # https://github.com/ytdl-org/youtube-dl/issues/1892,
989 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
990 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
991 'info_dict': {
992 'id': 'lsguqyKfVQg',
993 'ext': 'mp4',
994 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 995 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 996 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 997 'duration': 133,
0e49d9a6
LL
998 'upload_date': '20151119',
999 'uploader_id': 'IronSoulElf',
ec85ded8 1000 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 1001 'uploader': 'IronSoulElf',
eb6793ba
S
1002 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
1003 'track': 'Dark Walk - Position Music',
1004 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 1005 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
1006 },
1007 'params': {
1008 'skip_download': True,
1009 },
1010 },
61f92af1 1011 {
067aa17e 1012 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1013 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1014 'only_matching': True,
1015 },
313dfc45
LL
1016 {
1017 # Video with yt:stretch=17:0
1018 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1019 'info_dict': {
1020 'id': 'Q39EVAstoRM',
1021 'ext': 'mp4',
1022 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1023 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1024 'upload_date': '20151107',
1025 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1026 'uploader': 'CH GAMER DROID',
1027 },
1028 'params': {
1029 'skip_download': True,
1030 },
be49068d 1031 'skip': 'This video does not exist.',
313dfc45 1032 },
201c1459 1033 {
1034 # Video with incomplete 'yt:stretch=16:'
1035 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1036 'only_matching': True,
1037 },
7caf9830
S
1038 {
1039 # Video licensed under Creative Commons
1040 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1041 'info_dict': {
1042 'id': 'M4gD1WSo5mA',
1043 'ext': 'mp4',
1044 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1045 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1046 'duration': 721,
7caf9830
S
1047 'upload_date': '20150127',
1048 'uploader_id': 'BerkmanCenter',
ec85ded8 1049 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1050 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1051 'license': 'Creative Commons Attribution license (reuse allowed)',
1052 },
1053 'params': {
1054 'skip_download': True,
1055 },
1056 },
fd050249
S
1057 {
1058 # Channel-like uploader_url
1059 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1060 'info_dict': {
1061 'id': 'eQcmzGIKrzg',
1062 'ext': 'mp4',
1063 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1064 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1065 'duration': 4060,
fd050249 1066 'upload_date': '20151119',
eb6793ba 1067 'uploader': 'Bernie Sanders',
fd050249 1068 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1069 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1070 'license': 'Creative Commons Attribution license (reuse allowed)',
1071 },
1072 'params': {
1073 'skip_download': True,
1074 },
1075 },
040ac686
S
1076 {
1077 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1078 'only_matching': True,
7f29cf54
S
1079 },
1080 {
067aa17e 1081 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1082 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1083 'only_matching': True,
6496ccb4
S
1084 },
1085 {
1086 # Rental video preview
1087 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1088 'info_dict': {
1089 'id': 'uGpuVWrhIzE',
1090 'ext': 'mp4',
1091 'title': 'Piku - Trailer',
1092 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1093 'upload_date': '20150811',
1094 'uploader': 'FlixMatrix',
1095 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1096 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1097 'license': 'Standard YouTube License',
1098 },
1099 'params': {
1100 'skip_download': True,
1101 },
eb6793ba 1102 'skip': 'This video is not available.',
022a5d66 1103 },
12afdc2a
S
1104 {
1105 # YouTube Red video with episode data
1106 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1107 'info_dict': {
1108 'id': 'iqKdEhx-dD4',
1109 'ext': 'mp4',
1110 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1111 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1112 'duration': 2085,
12afdc2a
S
1113 'upload_date': '20170118',
1114 'uploader': 'Vsauce',
1115 'uploader_id': 'Vsauce',
1116 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1117 'series': 'Mind Field',
1118 'season_number': 1,
1119 'episode_number': 1,
1120 },
1121 'params': {
1122 'skip_download': True,
1123 },
1124 'expected_warnings': [
1125 'Skipping DASH manifest',
1126 ],
1127 },
c7121fa7
S
1128 {
1129 # The following content has been identified by the YouTube community
1130 # as inappropriate or offensive to some audiences.
1131 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1132 'info_dict': {
1133 'id': '6SJNVb0GnPI',
1134 'ext': 'mp4',
1135 'title': 'Race Differences in Intelligence',
1136 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1137 'duration': 965,
1138 'upload_date': '20140124',
1139 'uploader': 'New Century Foundation',
1140 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1141 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1142 },
1143 'params': {
1144 'skip_download': True,
1145 },
545cc85d 1146 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1147 },
022a5d66
S
1148 {
1149 # itag 212
1150 'url': '1t24XAntNCY',
1151 'only_matching': True,
fd5c4aab
S
1152 },
1153 {
1154 # geo restricted to JP
1155 'url': 'sJL6WA-aGkQ',
1156 'only_matching': True,
1157 },
cd5a74a2
S
1158 {
1159 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1160 'only_matching': True,
1161 },
bc2ca1bb 1162 {
1163 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1164 'only_matching': True,
1165 },
1166 {
1167 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1168 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1169 'only_matching': True,
1170 },
825cd268
RA
1171 {
1172 # DRM protected
1173 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1174 'only_matching': True,
4fe54c12
S
1175 },
1176 {
1177 # Video with unsupported adaptive stream type formats
1178 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1179 'info_dict': {
1180 'id': 'Z4Vy8R84T1U',
1181 'ext': 'mp4',
1182 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1183 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1184 'duration': 433,
1185 'upload_date': '20130923',
1186 'uploader': 'Amelia Putri Harwita',
1187 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1188 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1189 'formats': 'maxcount:10',
1190 },
1191 'params': {
1192 'skip_download': True,
1193 'youtube_include_dash_manifest': False,
1194 },
5429d6a9 1195 'skip': 'not actual anymore',
5caabd3c 1196 },
1197 {
822b9d9c 1198 # Youtube Music Auto-generated description
5caabd3c 1199 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1200 'info_dict': {
1201 'id': 'MgNrAu2pzNs',
1202 'ext': 'mp4',
1203 'title': 'Voyeur Girl',
1204 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1205 'upload_date': '20190312',
5429d6a9
S
1206 'uploader': 'Stephen - Topic',
1207 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1208 'artist': 'Stephen',
1209 'track': 'Voyeur Girl',
1210 'album': 'it\'s too much love to know my dear',
1211 'release_date': '20190313',
1212 'release_year': 2019,
1213 },
1214 'params': {
1215 'skip_download': True,
1216 },
1217 },
66b48727
RA
1218 {
1219 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1220 'only_matching': True,
1221 },
011e75e6
S
1222 {
1223 # invalid -> valid video id redirection
1224 'url': 'DJztXj2GPfl',
1225 'info_dict': {
1226 'id': 'DJztXj2GPfk',
1227 'ext': 'mp4',
1228 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1229 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1230 'upload_date': '20090125',
1231 'uploader': 'Prochorowka',
1232 'uploader_id': 'Prochorowka',
1233 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1234 'artist': 'Panjabi MC',
1235 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1236 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1237 },
1238 'params': {
1239 'skip_download': True,
1240 },
545cc85d 1241 'skip': 'Video unavailable',
ea74e00b
DP
1242 },
1243 {
1244 # empty description results in an empty string
1245 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1246 'info_dict': {
1247 'id': 'x41yOUIvK2k',
1248 'ext': 'mp4',
1249 'title': 'IMG 3456',
1250 'description': '',
1251 'upload_date': '20170613',
1252 'uploader_id': 'ElevageOrVert',
1253 'uploader': 'ElevageOrVert',
1254 },
1255 'params': {
1256 'skip_download': True,
1257 },
1258 },
a0566bbf 1259 {
29f7c58a 1260 # with '};' inside yt initial data (see [1])
1261 # see [2] for an example with '};' inside ytInitialPlayerResponse
1262 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1263 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1264 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1265 'info_dict': {
1266 'id': 'CHqg6qOn4no',
1267 'ext': 'mp4',
1268 'title': 'Part 77 Sort a list of simple types in c#',
1269 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1270 'upload_date': '20130831',
1271 'uploader_id': 'kudvenkat',
1272 'uploader': 'kudvenkat',
1273 },
1274 'params': {
1275 'skip_download': True,
1276 },
1277 },
29f7c58a 1278 {
1279 # another example of '};' in ytInitialData
1280 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1281 'only_matching': True,
1282 },
1283 {
1284 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1285 'only_matching': True,
1286 },
545cc85d 1287 {
cc2db878 1288 # https://github.com/ytdl-org/youtube-dl/pull/28094
1289 'url': 'OtqTfy26tG0',
1290 'info_dict': {
1291 'id': 'OtqTfy26tG0',
1292 'ext': 'mp4',
1293 'title': 'Burn Out',
1294 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1295 'upload_date': '20141120',
1296 'uploader': 'The Cinematic Orchestra - Topic',
1297 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1298 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1299 'artist': 'The Cinematic Orchestra',
1300 'track': 'Burn Out',
1301 'album': 'Every Day',
1302 'release_data': None,
1303 'release_year': None,
1304 },
1305 'params': {
1306 'skip_download': True,
1307 },
545cc85d 1308 },
bc2ca1bb 1309 {
1310 # controversial video, only works with bpctr when authenticated with cookies
1311 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1312 'only_matching': True,
1313 },
f7ad7160 1314 {
1315 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1316 'url': 'cBvYw8_A0vQ',
1317 'info_dict': {
1318 'id': 'cBvYw8_A0vQ',
1319 'ext': 'mp4',
1320 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1321 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1322 'upload_date': '20201120',
1323 'uploader': 'Walk around Japan',
1324 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1325 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1326 },
1327 'params': {
1328 'skip_download': True,
1329 },
0fb983f6 1330 }, {
1331 # Has multiple audio streams
1332 'url': 'WaOKSUlf4TM',
1333 'only_matching': True
9297939e 1334 }, {
1335 # Requires Premium: has format 141 when requested using YTM url
1336 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1337 'only_matching': True
1338 }, {
120916da 1339 # multiple subtitles with same lang_code
1340 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1341 'only_matching': True,
1342 },
2eb88d95
PH
1343 ]
1344
201c1459 1345 @classmethod
1346 def suitable(cls, url):
1bdae7d3 1347 # Hack for lazy extractors until more generic solution is implemented
1348 # (see #28780)
1349 from .youtube import parse_qs
201c1459 1350 qs = parse_qs(url)
1351 if qs.get('list', [None])[0]:
1352 return False
1353 return super(YoutubeIE, cls).suitable(url)
1354
e0df6211
PH
1355 def __init__(self, *args, **kwargs):
1356 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1357 self._code_cache = {}
83799698 1358 self._player_cache = {}
e0df6211 1359
60064c53
PH
1360 def _signature_cache_id(self, example_sig):
1361 """ Return a string representation of a signature """
78caa52a 1362 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1363
e40c758c
S
1364 @classmethod
1365 def _extract_player_info(cls, player_url):
1366 for player_re in cls._PLAYER_INFO_RE:
1367 id_m = re.search(player_re, player_url)
1368 if id_m:
1369 break
1370 else:
c081b35c 1371 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1372 return id_m.group('id')
e40c758c
S
1373
1374 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1375 player_id = self._extract_player_info(player_url)
e0df6211 1376
c4417ddb 1377 # Read from filesystem cache
545cc85d 1378 func_id = 'js_%s_%s' % (
1379 player_id, self._signature_cache_id(example_sig))
c4417ddb 1380 assert os.path.basename(func_id) == func_id
a0e07d31 1381
69ea8ca4 1382 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1383 if cache_spec is not None:
78caa52a 1384 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1385
545cc85d 1386 if player_id not in self._code_cache:
1387 self._code_cache[player_id] = self._download_webpage(
e0df6211 1388 player_url, video_id,
545cc85d 1389 note='Downloading player ' + player_id,
69ea8ca4 1390 errnote='Download of %s failed' % player_url)
545cc85d 1391 code = self._code_cache[player_id]
1392 res = self._parse_sig_js(code)
e0df6211 1393
785521bf
PH
1394 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1395 cache_res = res(test_string)
1396 cache_spec = [ord(c) for c in cache_res]
83799698 1397
69ea8ca4 1398 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1399 return res
1400
60064c53 1401 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1402 def gen_sig_code(idxs):
1403 def _genslice(start, end, step):
78caa52a 1404 starts = '' if start == 0 else str(start)
8bcc8756 1405 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1406 steps = '' if step == 1 else (':%d' % step)
78caa52a 1407 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1408
1409 step = None
7af808a5
PH
1410 # Quelch pyflakes warnings - start will be set when step is set
1411 start = '(Never used)'
edf3e38e
PH
1412 for i, prev in zip(idxs[1:], idxs[:-1]):
1413 if step is not None:
1414 if i - prev == step:
1415 continue
1416 yield _genslice(start, prev, step)
1417 step = None
1418 continue
1419 if i - prev in [-1, 1]:
1420 step = i - prev
1421 start = prev
1422 continue
1423 else:
78caa52a 1424 yield 's[%d]' % prev
edf3e38e 1425 if step is None:
78caa52a 1426 yield 's[%d]' % i
edf3e38e
PH
1427 else:
1428 yield _genslice(start, i, step)
1429
78caa52a 1430 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1431 cache_res = func(test_string)
edf3e38e 1432 cache_spec = [ord(c) for c in cache_res]
78caa52a 1433 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1434 signature_id_tuple = '(%s)' % (
1435 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1436 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1437 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1438 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1439
e0df6211
PH
1440 def _parse_sig_js(self, jscode):
1441 funcname = self._search_regex(
abefc03f
S
1442 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1443 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1444 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1445 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1446 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1447 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1448 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1449 # Obsolete patterns
1450 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1451 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1452 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1453 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1454 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1455 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1456 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1457 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1458 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1459
1460 jsi = JSInterpreter(jscode)
1461 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1462 return lambda s: initial_function([s])
1463
545cc85d 1464 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1465 """Turn the encrypted s field into a working signature"""
6b37f0be 1466
c8bf86d5 1467 if player_url is None:
69ea8ca4 1468 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1469
69ea8ca4 1470 if player_url.startswith('//'):
78caa52a 1471 player_url = 'https:' + player_url
3c90cc8b
S
1472 elif not re.match(r'https?://', player_url):
1473 player_url = compat_urlparse.urljoin(
1474 'https://www.youtube.com', player_url)
c8bf86d5 1475 try:
62af3a0e 1476 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1477 if player_id not in self._player_cache:
1478 func = self._extract_signature_function(
60064c53 1479 video_id, player_url, s
c8bf86d5
PH
1480 )
1481 self._player_cache[player_id] = func
1482 func = self._player_cache[player_id]
a06916d9 1483 if self.get_param('youtube_print_sig_code'):
60064c53 1484 self._print_sig_code(func, s)
c8bf86d5
PH
1485 return func(s)
1486 except Exception as e:
1487 tb = traceback.format_exc()
1488 raise ExtractorError(
78caa52a 1489 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1490
545cc85d 1491 def _mark_watched(self, video_id, player_response):
21c340b8
S
1492 playback_url = url_or_none(try_get(
1493 player_response,
545cc85d 1494 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
d77ab8e2
S
1495 if not playback_url:
1496 return
1497 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1498 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1499
1500 # cpn generation algorithm is reverse engineered from base.js.
1501 # In fact it works even with dummy cpn.
1502 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1503 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1504
1505 qs.update({
1506 'ver': ['2'],
1507 'cpn': [cpn],
1508 })
1509 playback_url = compat_urlparse.urlunparse(
15707c7e 1510 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1511
1512 self._download_webpage(
1513 playback_url, video_id, 'Marking watched',
1514 'Unable to mark watched', fatal=False)
1515
66c9fa36
S
1516 @staticmethod
1517 def _extract_urls(webpage):
1518 # Embedded YouTube player
1519 entries = [
1520 unescapeHTML(mobj.group('url'))
1521 for mobj in re.finditer(r'''(?x)
1522 (?:
1523 <iframe[^>]+?src=|
1524 data-video-url=|
1525 <embed[^>]+?src=|
1526 embedSWF\(?:\s*|
1527 <object[^>]+data=|
1528 new\s+SWFObject\(
1529 )
1530 (["\'])
1531 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1532 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1533 \1''', webpage)]
1534
1535 # lazyYT YouTube embed
1536 entries.extend(list(map(
1537 unescapeHTML,
1538 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1539
1540 # Wordpress "YouTube Video Importer" plugin
1541 matches = re.findall(r'''(?x)<div[^>]+
1542 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1543 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1544 entries.extend(m[-1] for m in matches)
1545
1546 return entries
1547
1548 @staticmethod
1549 def _extract_url(webpage):
1550 urls = YoutubeIE._extract_urls(webpage)
1551 return urls[0] if urls else None
1552
97665381
PH
1553 @classmethod
1554 def extract_id(cls, url):
1555 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1556 if mobj is None:
69ea8ca4 1557 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1558 video_id = mobj.group(2)
1559 return video_id
1560
545cc85d 1561 def _extract_chapters_from_json(self, data, video_id, duration):
84213ea8 1562 chapters_list = try_get(
8bdd16b4 1563 data,
84213ea8
S
1564 lambda x: x['playerOverlays']
1565 ['playerOverlayRenderer']
1566 ['decoratedPlayerBarRenderer']
1567 ['decoratedPlayerBarRenderer']
1568 ['playerBar']
1569 ['chapteredPlayerBarRenderer']
1570 ['chapters'],
1571 list)
1572 if not chapters_list:
1573 return
1574
1575 def chapter_time(chapter):
1576 return float_or_none(
1577 try_get(
1578 chapter,
1579 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1580 int),
1581 scale=1000)
1582 chapters = []
1583 for next_num, chapter in enumerate(chapters_list, start=1):
1584 start_time = chapter_time(chapter)
1585 if start_time is None:
1586 continue
1587 end_time = (chapter_time(chapters_list[next_num])
1588 if next_num < len(chapters_list) else duration)
1589 if end_time is None:
1590 continue
1591 title = try_get(
1592 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1593 compat_str)
1594 chapters.append({
1595 'start_time': start_time,
1596 'end_time': end_time,
1597 'title': title,
1598 })
1599 return chapters
1600
545cc85d 1601 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1602 return self._parse_json(self._search_regex(
1603 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1604 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 1605
d92f5d5a 1606 @staticmethod
1607 def parse_time_text(time_text):
1608 """
1609 Parse the comment time text
1610 time_text is in the format 'X units ago (edited)'
1611 """
1612 time_text_split = time_text.split(' ')
1613 if len(time_text_split) >= 3:
1614 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
1615
a1c5d2ca
M
1616 @staticmethod
1617 def _join_text_entries(runs):
1618 text = None
1619 for run in runs:
1620 if not isinstance(run, dict):
1621 continue
1622 sub_text = try_get(run, lambda x: x['text'], compat_str)
1623 if sub_text:
1624 if not text:
1625 text = sub_text
1626 continue
1627 text += sub_text
1628 return text
1629
1630 def _extract_comment(self, comment_renderer, parent=None):
1631 comment_id = comment_renderer.get('commentId')
1632 if not comment_id:
1633 return
1634 comment_text_runs = try_get(comment_renderer, lambda x: x['contentText']['runs']) or []
1635 text = self._join_text_entries(comment_text_runs) or ''
1636 comment_time_text = try_get(comment_renderer, lambda x: x['publishedTimeText']['runs']) or []
1637 time_text = self._join_text_entries(comment_time_text)
d92f5d5a 1638 timestamp = calendar.timegm(self.parse_time_text(time_text).timetuple())
a1c5d2ca
M
1639 author = try_get(comment_renderer, lambda x: x['authorText']['simpleText'], compat_str)
1640 author_id = try_get(comment_renderer,
1641 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
1642 votes = str_to_int(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
1643 lambda x: x['likeCount']), compat_str)) or 0
1644 author_thumbnail = try_get(comment_renderer,
1645 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
1646
1647 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
1648 is_liked = try_get(comment_renderer, lambda x: x['isLiked'], bool)
a1c5d2ca
M
1649 return {
1650 'id': comment_id,
1651 'text': text,
d92f5d5a 1652 'timestamp': timestamp,
a1c5d2ca
M
1653 'time_text': time_text,
1654 'like_count': votes,
1655 'is_favorited': is_liked,
1656 'author': author,
1657 'author_id': author_id,
1658 'author_thumbnail': author_thumbnail,
1659 'author_is_uploader': author_is_uploader,
1660 'parent': parent or 'root'
1661 }
1662
1663 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
f4f751af 1664 ytcfg, session_token_list, parent=None, comment_counts=None):
a1c5d2ca
M
1665
1666 def extract_thread(parent_renderer):
1667 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
1668 if not parent:
1669 comment_counts[2] = 0
1670 for content in contents:
1671 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
1672 comment_renderer = try_get(
1673 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
1674 content, (lambda x: x['commentRenderer'], dict))
1675
1676 if not comment_renderer:
1677 continue
1678 comment = self._extract_comment(comment_renderer, parent)
1679 if not comment:
1680 continue
1681 comment_counts[0] += 1
1682 yield comment
1683 # Attempt to get the replies
1684 comment_replies_renderer = try_get(
1685 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
1686
1687 if comment_replies_renderer:
1688 comment_counts[2] += 1
1689 comment_entries_iter = self._comment_entries(
f4f751af 1690 comment_replies_renderer, identity_token, account_syncid, ytcfg,
a1c5d2ca
M
1691 parent=comment.get('id'), session_token_list=session_token_list,
1692 comment_counts=comment_counts)
1693
1694 for reply_comment in comment_entries_iter:
1695 yield reply_comment
1696
1697 if not comment_counts:
1698 # comment so far, est. total comments, current comment thread #
1699 comment_counts = [0, 0, 0]
a1c5d2ca
M
1700
1701 # TODO: Generalize the download code with TabIE
f4f751af 1702 context = self._extract_context(ytcfg)
1703 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
a1c5d2ca
M
1704 continuation = YoutubeTabIE._extract_continuation(root_continuation_data) # TODO
1705 first_continuation = False
1706 if parent is None:
1707 first_continuation = True
1708
1709 for page_num in itertools.count(0):
1710 if not continuation:
1711 break
f4f751af 1712 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
a06916d9 1713 retries = self.get_param('extractor_retries', 3)
a1c5d2ca
M
1714 count = -1
1715 last_error = None
1716
1717 while count < retries:
1718 count += 1
1719 if last_error:
1720 self.report_warning('%s. Retrying ...' % last_error)
1721 try:
1722 query = {
1723 'ctoken': continuation['ctoken'],
1724 'pbj': 1,
1725 'type': 'next',
1726 }
1727 if parent:
1728 query['action_get_comment_replies'] = 1
1729 else:
1730 query['action_get_comments'] = 1
1731
1732 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
1733 if page_num == 0:
1734 if first_continuation:
d92f5d5a 1735 note_prefix = 'Downloading initial comment continuation page'
a1c5d2ca 1736 else:
d92f5d5a 1737 note_prefix = ' Downloading comment reply thread %d %s' % (comment_counts[2], comment_prog_str)
a1c5d2ca 1738 else:
d92f5d5a 1739 note_prefix = '%sDownloading comment%s page %d %s' % (
1740 ' ' if parent else '',
a1c5d2ca
M
1741 ' replies' if parent else '',
1742 page_num,
1743 comment_prog_str)
1744
1745 browse = self._download_json(
1746 'https://www.youtube.com/comment_service_ajax', None,
1747 '%s %s' % (note_prefix, '(retry #%d)' % count if count else ''),
1748 headers=headers, query=query,
1749 data=urlencode_postdata({
1750 'session_token': session_token_list[0]
1751 }))
1752 except ExtractorError as e:
1753 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404, 413):
1754 if e.cause.code == 413:
d92f5d5a 1755 self.report_warning('Assumed end of comments (received HTTP Error 413)')
a1c5d2ca
M
1756 return
1757 # Downloading page may result in intermittent 5xx HTTP error
1758 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
1759 last_error = 'HTTP Error %s' % e.cause.code
1760 if e.cause.code == 404:
d92f5d5a 1761 last_error = last_error + ' (this API is probably deprecated)'
a1c5d2ca
M
1762 if count < retries:
1763 continue
1764 raise
1765 else:
1766 session_token = try_get(browse, lambda x: x['xsrf_token'], compat_str)
1767 if session_token:
1768 session_token_list[0] = session_token
1769
1770 response = try_get(browse,
1771 (lambda x: x['response'],
1772 lambda x: x[1]['response'])) or {}
1773
1774 if response.get('continuationContents'):
1775 break
1776
1777 # YouTube sometimes gives reload: now json if something went wrong (e.g. bad auth)
1778 if browse.get('reload'):
d92f5d5a 1779 raise ExtractorError('Invalid or missing params in continuation request', expected=False)
a1c5d2ca
M
1780
1781 # TODO: not tested, merged from old extractor
1782 err_msg = browse.get('externalErrorMessage')
1783 if err_msg:
1784 raise ExtractorError('YouTube said: %s' % err_msg, expected=False)
1785
1786 # Youtube sometimes sends incomplete data
1787 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
1788 last_error = 'Incomplete data received'
1789 if count >= retries:
6a39ee13 1790 raise ExtractorError(last_error)
a1c5d2ca
M
1791
1792 if not response:
1793 break
f4f751af 1794 visitor_data = try_get(
1795 response,
1796 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
1797 compat_str) or visitor_data
a1c5d2ca
M
1798
1799 known_continuation_renderers = {
1800 'itemSectionContinuation': extract_thread,
1801 'commentRepliesContinuation': extract_thread
1802 }
1803
1804 # extract next root continuation from the results
1805 continuation_contents = try_get(
1806 response, lambda x: x['continuationContents'], dict) or {}
1807
1808 for key, value in continuation_contents.items():
1809 if key not in known_continuation_renderers:
1810 continue
1811 continuation_renderer = value
1812
1813 if first_continuation:
1814 first_continuation = False
1815 expected_comment_count = try_get(
1816 continuation_renderer,
1817 (lambda x: x['header']['commentsHeaderRenderer']['countText']['runs'][0]['text'],
1818 lambda x: x['header']['commentsHeaderRenderer']['commentsCount']['runs'][0]['text']),
1819 compat_str)
1820
1821 if expected_comment_count:
1822 comment_counts[1] = str_to_int(expected_comment_count)
d92f5d5a 1823 self.to_screen('Downloading ~%d comments' % str_to_int(expected_comment_count))
a1c5d2ca
M
1824 yield comment_counts[1]
1825
1826 # TODO: cli arg.
1827 # 1/True for newest, 0/False for popular (default)
1828 comment_sort_index = int(True)
1829 sort_continuation_renderer = try_get(
1830 continuation_renderer,
1831 lambda x: x['header']['commentsHeaderRenderer']['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems']
1832 [comment_sort_index]['continuation']['reloadContinuationData'], dict)
1833 # If this fails, the initial continuation page
1834 # starts off with popular anyways.
1835 if sort_continuation_renderer:
1836 continuation = YoutubeTabIE._build_continuation_query(
1837 continuation=sort_continuation_renderer.get('continuation'),
1838 ctp=sort_continuation_renderer.get('clickTrackingParams'))
d92f5d5a 1839 self.to_screen('Sorting comments by %s' % ('popular' if comment_sort_index == 0 else 'newest'))
a1c5d2ca
M
1840 break
1841
1842 for entry in known_continuation_renderers[key](continuation_renderer):
1843 yield entry
1844
1845 continuation = YoutubeTabIE._extract_continuation(continuation_renderer) # TODO
1846 break
1847
1848 def _extract_comments(self, ytcfg, video_id, contents, webpage, xsrf_token):
1849 """Entry for comment extraction"""
1850 comments = []
1851 known_entry_comment_renderers = (
1852 'itemSectionRenderer',
1853 )
1854 estimated_total = 0
1855 for entry in contents:
1856 for key, renderer in entry.items():
1857 if key not in known_entry_comment_renderers:
1858 continue
1859
1860 comment_iter = self._comment_entries(
1861 renderer,
1862 identity_token=self._extract_identity_token(webpage, item_id=video_id),
1863 account_syncid=self._extract_account_syncid(ytcfg),
f4f751af 1864 ytcfg=ytcfg,
a1c5d2ca
M
1865 session_token_list=[xsrf_token])
1866
1867 for comment in comment_iter:
1868 if isinstance(comment, int):
1869 estimated_total = comment
1870 continue
1871 comments.append(comment)
1872 break
d92f5d5a 1873 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
a1c5d2ca
M
1874 return {
1875 'comments': comments,
1876 'comment_count': len(comments),
1877 }
1878
4e6767b5 1879 @staticmethod
1880 def _get_video_info_params(video_id):
1881 return {
1882 'video_id': video_id,
1883 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1884 'html5': '1',
1885 'c': 'TVHTML5',
1886 'cver': '6.20180913',
1887 }
1888
c5e8d7af 1889 def _real_extract(self, url):
cf7e015f 1890 url, smuggled_data = unsmuggle_url(url, {})
545cc85d 1891 video_id = self._match_id(url)
9297939e 1892
1893 is_music_url = smuggled_data.get('is_music_url') or self.is_music_url(url)
1894
545cc85d 1895 base_url = self.http_scheme() + '//www.youtube.com/'
b3d12425 1896 webpage_url = base_url + 'watch?v=' + video_id
1897 webpage = self._download_webpage(
cce889b9 1898 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
545cc85d 1899
9297939e 1900 def get_text(x):
1901 if not x:
1902 return
1903 text = x.get('simpleText')
1904 if text and isinstance(text, compat_str):
1905 return text
1906 runs = x.get('runs')
1907 if not isinstance(runs, list):
1908 return
1909 return ''.join([r['text'] for r in runs if isinstance(r.get('text'), compat_str)])
1910
1911 ytm_streaming_data = {}
1912 if is_music_url:
1913 # we are forcing to use parse_json because 141 only appeared in get_video_info.
1914 # el, c, cver, cplayer field required for 141(aac 256kbps) codec
1915 # maybe paramter of youtube music player?
1916 ytm_player_response = self._parse_json(try_get(compat_parse_qs(
1917 self._download_webpage(
1918 base_url + 'get_video_info', video_id,
fe03a6cd 1919 'Fetching youtube music info webpage',
1920 'unable to download youtube music info webpage', query={
4e6767b5 1921 **self._get_video_info_params(video_id),
9297939e 1922 'el': 'detailpage',
1923 'c': 'WEB_REMIX',
1924 'cver': '0.1',
00ae2769 1925 'cplayer': 'UNIPLAYER',
9297939e 1926 }, fatal=False)),
1927 lambda x: x['player_response'][0],
1928 compat_str) or '{}', video_id)
1929 ytm_streaming_data = ytm_player_response.get('streamingData') or {}
1930
545cc85d 1931 player_response = None
1932 if webpage:
1933 player_response = self._extract_yt_initial_variable(
1934 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
1935 video_id, 'initial player response')
f4f751af 1936
1937 ytcfg = self._extract_ytcfg(video_id, webpage)
545cc85d 1938 if not player_response:
1939 player_response = self._call_api(
f4f751af 1940 'player', {'videoId': video_id}, video_id, api_key=self._extract_api_key(ytcfg))
545cc85d 1941
1942 playability_status = player_response.get('playabilityStatus') or {}
1943 if playability_status.get('reason') == 'Sign in to confirm your age':
1944 pr = self._parse_json(try_get(compat_parse_qs(
1945 self._download_webpage(
1946 base_url + 'get_video_info', video_id,
4e6767b5 1947 'Refetching age-gated info webpage', 'unable to download video info webpage',
1948 query=self._get_video_info_params(video_id), fatal=False)),
545cc85d 1949 lambda x: x['player_response'][0],
1950 compat_str) or '{}', video_id)
1951 if pr:
1952 player_response = pr
1953
1954 trailer_video_id = try_get(
1955 playability_status,
1956 lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
1957 compat_str)
1958 if trailer_video_id:
1959 return self.url_result(
1960 trailer_video_id, self.ie_key(), trailer_video_id)
cf7e015f 1961
545cc85d 1962 search_meta = (
1963 lambda x: self._html_search_meta(x, webpage, default=None)) \
1964 if webpage else lambda x: None
dbdaaa23 1965
545cc85d 1966 video_details = player_response.get('videoDetails') or {}
37357d21 1967 microformat = try_get(
545cc85d 1968 player_response,
1969 lambda x: x['microformat']['playerMicroformatRenderer'],
1970 dict) or {}
1971 video_title = video_details.get('title') \
1972 or get_text(microformat.get('title')) \
1973 or search_meta(['og:title', 'twitter:title', 'title'])
1974 video_description = video_details.get('shortDescription')
cf7e015f 1975
8fe10494 1976 if not smuggled_data.get('force_singlefeed', False):
a06916d9 1977 if not self.get_param('noplaylist'):
8fe10494
S
1978 multifeed_metadata_list = try_get(
1979 player_response,
1980 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
545cc85d 1981 compat_str)
8fe10494
S
1982 if multifeed_metadata_list:
1983 entries = []
1984 feed_ids = []
1985 for feed in multifeed_metadata_list.split(','):
1986 # Unquote should take place before split on comma (,) since textual
1987 # fields may contain comma as well (see
067aa17e 1988 # https://github.com/ytdl-org/youtube-dl/issues/8536)
545cc85d 1989 feed_data = compat_parse_qs(
1990 compat_urllib_parse_unquote_plus(feed))
6b09401b
S
1991
1992 def feed_entry(name):
545cc85d 1993 return try_get(
1994 feed_data, lambda x: x[name][0], compat_str)
6b09401b
S
1995
1996 feed_id = feed_entry('id')
1997 if not feed_id:
1998 continue
1999 feed_title = feed_entry('title')
2000 title = video_title
2001 if feed_title:
2002 title += ' (%s)' % feed_title
8fe10494
S
2003 entries.append({
2004 '_type': 'url_transparent',
2005 'ie_key': 'Youtube',
2006 'url': smuggle_url(
545cc85d 2007 base_url + 'watch?v=' + feed_data['id'][0],
8fe10494 2008 {'force_singlefeed': True}),
6b09401b 2009 'title': title,
8fe10494 2010 })
6b09401b 2011 feed_ids.append(feed_id)
8fe10494
S
2012 self.to_screen(
2013 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2014 % (', '.join(feed_ids), video_id))
545cc85d 2015 return self.playlist_result(
2016 entries, video_id, video_title, video_description)
8fe10494
S
2017 else:
2018 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 2019
9297939e 2020 formats, itags, stream_ids = [], [], []
cc2db878 2021 itag_qualities = {}
545cc85d 2022 player_url = None
d3fc8074 2023 q = qualities([
2024 'tiny', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
2025 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2026 ])
9297939e 2027
545cc85d 2028 streaming_data = player_response.get('streamingData') or {}
2029 streaming_formats = streaming_data.get('formats') or []
2030 streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
9297939e 2031 streaming_formats.extend(ytm_streaming_data.get('formats') or [])
2032 streaming_formats.extend(ytm_streaming_data.get('adaptiveFormats') or [])
2033
545cc85d 2034 for fmt in streaming_formats:
2035 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2036 continue
321bf820 2037
cc2db878 2038 itag = str_or_none(fmt.get('itag'))
9297939e 2039 audio_track = fmt.get('audioTrack') or {}
2040 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2041 if stream_id in stream_ids:
2042 continue
2043
cc2db878 2044 quality = fmt.get('quality')
d3fc8074 2045 if quality == 'tiny' or not quality:
2046 quality = fmt.get('audioQuality', '').lower() or quality
cc2db878 2047 if itag and quality:
2048 itag_qualities[itag] = quality
2049 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2050 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2051 # number of fragment that would subsequently requested with (`&sq=N`)
2052 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2053 continue
2054
545cc85d 2055 fmt_url = fmt.get('url')
2056 if not fmt_url:
2057 sc = compat_parse_qs(fmt.get('signatureCipher'))
2058 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2059 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2060 if not (sc and fmt_url and encrypted_sig):
2061 continue
2062 if not player_url:
2063 if not webpage:
2064 continue
2065 player_url = self._search_regex(
2066 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
2067 webpage, 'player URL', fatal=False)
2068 if not player_url:
201e9eaa 2069 continue
545cc85d 2070 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2071 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2072 fmt_url += '&' + sp + '=' + signature
2073
545cc85d 2074 if itag:
2075 itags.append(itag)
9297939e 2076 stream_ids.append(stream_id)
2077
cc2db878 2078 tbr = float_or_none(
2079 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 2080 dct = {
2081 'asr': int_or_none(fmt.get('audioSampleRate')),
2082 'filesize': int_or_none(fmt.get('contentLength')),
2083 'format_id': itag,
0fb983f6 2084 'format_note': audio_track.get('displayName') or fmt.get('qualityLabel') or quality,
545cc85d 2085 'fps': int_or_none(fmt.get('fps')),
2086 'height': int_or_none(fmt.get('height')),
dca3ff4a 2087 'quality': q(quality),
cc2db878 2088 'tbr': tbr,
545cc85d 2089 'url': fmt_url,
2090 'width': fmt.get('width'),
0fb983f6 2091 'language': audio_track.get('id', '').split('.')[0],
545cc85d 2092 }
2093 mimetype = fmt.get('mimeType')
2094 if mimetype:
2095 mobj = re.match(
2096 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', mimetype)
2097 if mobj:
2098 dct['ext'] = mimetype2ext(mobj.group(1))
2099 dct.update(parse_codecs(mobj.group(2)))
cc2db878 2100 no_audio = dct.get('acodec') == 'none'
2101 no_video = dct.get('vcodec') == 'none'
2102 if no_audio:
2103 dct['vbr'] = tbr
2104 if no_video:
2105 dct['abr'] = tbr
2106 if no_audio or no_video:
545cc85d 2107 dct['downloader_options'] = {
2108 # Youtube throttles chunks >~10M
2109 'http_chunk_size': 10485760,
bf1317d2 2110 }
7c60c33e 2111 if dct.get('ext'):
2112 dct['container'] = dct['ext'] + '_dash'
545cc85d 2113 formats.append(dct)
2114
9297939e 2115 for sd in (streaming_data, ytm_streaming_data):
2116 hls_manifest_url = sd.get('hlsManifestUrl')
2117 if hls_manifest_url:
2118 for f in self._extract_m3u8_formats(
2119 hls_manifest_url, video_id, 'mp4', fatal=False):
2120 itag = self._search_regex(
2121 r'/itag/(\d+)', f['url'], 'itag', default=None)
2122 if itag:
2123 f['format_id'] = itag
8d68ab98 2124 formats.append(f)
545cc85d 2125
a06916d9 2126 if self.get_param('youtube_include_dash_manifest', True):
9297939e 2127 for sd in (streaming_data, ytm_streaming_data):
2128 dash_manifest_url = sd.get('dashManifestUrl')
2129 if dash_manifest_url:
2130 for f in self._extract_mpd_formats(
2131 dash_manifest_url, video_id, fatal=False):
2132 itag = f['format_id']
2133 if itag in itags:
2134 continue
2135 if itag in itag_qualities:
9297939e 2136 f['quality'] = q(itag_qualities[itag])
2137 filesize = int_or_none(self._search_regex(
2138 r'/clen/(\d+)', f.get('fragment_base_url')
2139 or f['url'], 'file size', default=None))
2140 if filesize:
2141 f['filesize'] = filesize
2142 formats.append(f)
bf1317d2 2143
545cc85d 2144 if not formats:
a06916d9 2145 if not self.get_param('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
b7da73eb 2146 self.raise_no_formats(
545cc85d 2147 'This video is DRM protected.', expected=True)
2148 pemr = try_get(
2149 playability_status,
2150 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
2151 dict) or {}
2152 reason = get_text(pemr.get('reason')) or playability_status.get('reason')
2153 subreason = pemr.get('subreason')
2154 if subreason:
2155 subreason = clean_html(get_text(subreason))
2156 if subreason == 'The uploader has not made this video available in your country.':
2157 countries = microformat.get('availableCountries')
2158 if not countries:
2159 regions_allowed = search_meta('regionsAllowed')
2160 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2161 self.raise_geo_restricted(subreason, countries, metadata_available=True)
545cc85d 2162 reason += '\n' + subreason
2163 if reason:
b7da73eb 2164 self.raise_no_formats(reason, expected=True)
bf1317d2 2165
545cc85d 2166 self._sort_formats(formats)
bf1317d2 2167
545cc85d 2168 keywords = video_details.get('keywords') or []
2169 if not keywords and webpage:
2170 keywords = [
2171 unescapeHTML(m.group('content'))
2172 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2173 for keyword in keywords:
2174 if keyword.startswith('yt:stretch='):
201c1459 2175 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2176 if mobj:
2177 # NB: float is intentional for forcing float division
2178 w, h = (float(v) for v in mobj.groups())
2179 if w > 0 and h > 0:
2180 ratio = w / h
2181 for f in formats:
2182 if f.get('vcodec') != 'none':
2183 f['stretched_ratio'] = ratio
2184 break
6449cd80 2185
545cc85d 2186 thumbnails = []
2187 for container in (video_details, microformat):
2188 for thumbnail in (try_get(
2189 container,
2190 lambda x: x['thumbnail']['thumbnails'], list) or []):
2191 thumbnail_url = thumbnail.get('url')
2192 if not thumbnail_url:
bf1317d2 2193 continue
1988fab7 2194 # Sometimes youtube gives a wrong thumbnail URL. See:
2195 # https://github.com/yt-dlp/yt-dlp/issues/233
2196 # https://github.com/ytdl-org/youtube-dl/issues/28023
2197 if 'maxresdefault' in thumbnail_url:
2198 thumbnail_url = thumbnail_url.split('?')[0]
545cc85d 2199 thumbnails.append({
545cc85d 2200 'url': thumbnail_url,
ff2751ac 2201 'height': int_or_none(thumbnail.get('height')),
545cc85d 2202 'width': int_or_none(thumbnail.get('width')),
ff2751ac 2203 'preference': 1 if 'maxresdefault' in thumbnail_url else -1
545cc85d 2204 })
ff2751ac 2205 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2206 if thumbnail_url:
2207 thumbnails.append({
2208 'url': thumbnail_url,
2209 'preference': 1 if 'maxresdefault' in thumbnail_url else -1
2210 })
2211 # All videos have a maxresdefault thumbnail, but sometimes it does not appear in the webpage
2212 # See: https://github.com/ytdl-org/youtube-dl/issues/29049
2213 thumbnails.append({
2214 'url': 'https://i.ytimg.com/vi/%s/maxresdefault.jpg' % video_id,
2215 'preference': 1,
2216 })
2217 self._remove_duplicate_formats(thumbnails)
545cc85d 2218
2219 category = microformat.get('category') or search_meta('genre')
2220 channel_id = video_details.get('channelId') \
2221 or microformat.get('externalChannelId') \
2222 or search_meta('channelId')
2223 duration = int_or_none(
2224 video_details.get('lengthSeconds')
2225 or microformat.get('lengthSeconds')) \
2226 or parse_duration(search_meta('duration'))
2227 is_live = video_details.get('isLive')
2228 owner_profile_url = microformat.get('ownerProfileUrl')
2229
2230 info = {
2231 'id': video_id,
2232 'title': self._live_title(video_title) if is_live else video_title,
2233 'formats': formats,
2234 'thumbnails': thumbnails,
2235 'description': video_description,
2236 'upload_date': unified_strdate(
2237 microformat.get('uploadDate')
2238 or search_meta('uploadDate')),
2239 'uploader': video_details['author'],
2240 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2241 'uploader_url': owner_profile_url,
2242 'channel_id': channel_id,
2243 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
2244 'duration': duration,
2245 'view_count': int_or_none(
2246 video_details.get('viewCount')
2247 or microformat.get('viewCount')
2248 or search_meta('interactionCount')),
2249 'average_rating': float_or_none(video_details.get('averageRating')),
2250 'age_limit': 18 if (
2251 microformat.get('isFamilySafe') is False
2252 or search_meta('isFamilyFriendly') == 'false'
2253 or search_meta('og:restrictions:age') == '18+') else 0,
2254 'webpage_url': webpage_url,
2255 'categories': [category] if category else None,
2256 'tags': keywords,
2257 'is_live': is_live,
2258 'playable_in_embed': playability_status.get('playableInEmbed'),
c224251a 2259 'was_live': video_details.get('isLiveContent'),
545cc85d 2260 }
b477fc13 2261
545cc85d 2262 pctr = try_get(
2263 player_response,
2264 lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
2265 subtitles = {}
2266 if pctr:
774d79cc 2267 def process_language(container, base_url, lang_code, sub_name, query):
120916da 2268 lang_subs = container.setdefault(lang_code, [])
545cc85d 2269 for fmt in self._SUBTITLE_FORMATS:
2270 query.update({
2271 'fmt': fmt,
2272 })
2273 lang_subs.append({
2274 'ext': fmt,
2275 'url': update_url_query(base_url, query),
774d79cc 2276 'name': sub_name,
545cc85d 2277 })
7e72694b 2278
545cc85d 2279 for caption_track in (pctr.get('captionTracks') or []):
2280 base_url = caption_track.get('baseUrl')
2281 if not base_url:
2282 continue
2283 if caption_track.get('kind') != 'asr':
120916da 2284 lang_code = (
2285 remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2286 or caption_track.get('languageCode'))
545cc85d 2287 if not lang_code:
2288 continue
2289 process_language(
774d79cc 2290 subtitles, base_url, lang_code,
2291 try_get(caption_track, lambda x: x.get('name').get('simpleText')),
2292 {})
545cc85d 2293 continue
2294 automatic_captions = {}
2295 for translation_language in (pctr.get('translationLanguages') or []):
2296 translation_language_code = translation_language.get('languageCode')
2297 if not translation_language_code:
2298 continue
2299 process_language(
2300 automatic_captions, base_url, translation_language_code,
774d79cc 2301 try_get(translation_language, lambda x: x['languageName']['simpleText']),
545cc85d 2302 {'tlang': translation_language_code})
2303 info['automatic_captions'] = automatic_captions
2304 info['subtitles'] = subtitles
7e72694b 2305
545cc85d 2306 parsed_url = compat_urllib_parse_urlparse(url)
2307 for component in [parsed_url.fragment, parsed_url.query]:
2308 query = compat_parse_qs(component)
2309 for k, v in query.items():
2310 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2311 d_k += '_time'
2312 if d_k not in info and k in s_ks:
2313 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2314
2315 # Youtube Music Auto-generated description
822b9d9c 2316 if video_description:
38d70284 2317 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2318 if mobj:
822b9d9c
RA
2319 release_year = mobj.group('release_year')
2320 release_date = mobj.group('release_date')
2321 if release_date:
2322 release_date = release_date.replace('-', '')
2323 if not release_year:
545cc85d 2324 release_year = release_date[:4]
2325 info.update({
2326 'album': mobj.group('album'.strip()),
2327 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2328 'track': mobj.group('track').strip(),
2329 'release_date': release_date,
cc2db878 2330 'release_year': int_or_none(release_year),
545cc85d 2331 })
7e72694b 2332
545cc85d 2333 initial_data = None
2334 if webpage:
2335 initial_data = self._extract_yt_initial_variable(
2336 webpage, self._YT_INITIAL_DATA_RE, video_id,
2337 'yt initial data')
2338 if not initial_data:
2339 initial_data = self._call_api(
f4f751af 2340 'next', {'videoId': video_id}, video_id, fatal=False, api_key=self._extract_api_key(ytcfg))
545cc85d 2341
c60ee3a2 2342 try:
2343 # This will error if there is no livechat
2344 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2345 info['subtitles']['live_chat'] = [{
2346 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
2347 'video_id': video_id,
2348 'ext': 'json',
2349 'protocol': 'youtube_live_chat' if is_live else 'youtube_live_chat_replay',
2350 }]
2351 except (KeyError, IndexError, TypeError):
2352 pass
545cc85d 2353
2354 if initial_data:
2355 chapters = self._extract_chapters_from_json(
2356 initial_data, video_id, duration)
2357 if not chapters:
2358 for engagment_pannel in (initial_data.get('engagementPanels') or []):
2359 contents = try_get(
2360 engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
2361 list)
2362 if not contents:
2363 continue
2364
2365 def chapter_time(mmlir):
2366 return parse_duration(
2367 get_text(mmlir.get('timeDescription')))
2368
2369 chapters = []
2370 for next_num, content in enumerate(contents, start=1):
2371 mmlir = content.get('macroMarkersListItemRenderer') or {}
2372 start_time = chapter_time(mmlir)
2373 end_time = chapter_time(try_get(
2374 contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
2375 if next_num < len(contents) else duration
2376 if start_time is None or end_time is None:
2377 continue
2378 chapters.append({
2379 'start_time': start_time,
2380 'end_time': end_time,
2381 'title': get_text(mmlir.get('title')),
2382 })
2383 if chapters:
2384 break
2385 if chapters:
2386 info['chapters'] = chapters
2387
2388 contents = try_get(
2389 initial_data,
2390 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2391 list) or []
2392 for content in contents:
2393 vpir = content.get('videoPrimaryInfoRenderer')
2394 if vpir:
2395 stl = vpir.get('superTitleLink')
2396 if stl:
2397 stl = get_text(stl)
2398 if try_get(
2399 vpir,
2400 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2401 info['location'] = stl
2402 else:
2403 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2404 if mobj:
2405 info.update({
2406 'series': mobj.group(1),
2407 'season_number': int(mobj.group(2)),
2408 'episode_number': int(mobj.group(3)),
2409 })
2410 for tlb in (try_get(
2411 vpir,
2412 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2413 list) or []):
2414 tbr = tlb.get('toggleButtonRenderer') or {}
2415 for getter, regex in [(
2416 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2417 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2418 lambda x: x['accessibility'],
2419 lambda x: x['accessibilityData']['accessibilityData'],
2420 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2421 label = (try_get(tbr, getter, dict) or {}).get('label')
2422 if label:
2423 mobj = re.match(regex, label)
2424 if mobj:
2425 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2426 break
2427 sbr_tooltip = try_get(
2428 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2429 if sbr_tooltip:
2430 like_count, dislike_count = sbr_tooltip.split(' / ')
2431 info.update({
2432 'like_count': str_to_int(like_count),
2433 'dislike_count': str_to_int(dislike_count),
2434 })
2435 vsir = content.get('videoSecondaryInfoRenderer')
2436 if vsir:
2437 info['channel'] = get_text(try_get(
2438 vsir,
2439 lambda x: x['owner']['videoOwnerRenderer']['title'],
cce889b9 2440 dict))
545cc85d 2441 rows = try_get(
2442 vsir,
2443 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2444 list) or []
2445 multiple_songs = False
2446 for row in rows:
2447 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2448 multiple_songs = True
2449 break
2450 for row in rows:
2451 mrr = row.get('metadataRowRenderer') or {}
2452 mrr_title = mrr.get('title')
2453 if not mrr_title:
2454 continue
2455 mrr_title = get_text(mrr['title'])
2456 mrr_contents_text = get_text(mrr['contents'][0])
2457 if mrr_title == 'License':
2458 info['license'] = mrr_contents_text
2459 elif not multiple_songs:
2460 if mrr_title == 'Album':
2461 info['album'] = mrr_contents_text
2462 elif mrr_title == 'Artist':
2463 info['artist'] = mrr_contents_text
2464 elif mrr_title == 'Song':
2465 info['track'] = mrr_contents_text
2466
2467 fallbacks = {
2468 'channel': 'uploader',
2469 'channel_id': 'uploader_id',
2470 'channel_url': 'uploader_url',
2471 }
2472 for to, frm in fallbacks.items():
2473 if not info.get(to):
2474 info[to] = info.get(frm)
2475
2476 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
2477 v = info.get(s_k)
2478 if v:
2479 info[d_k] = v
b84071c0 2480
c224251a
M
2481 is_private = bool_or_none(video_details.get('isPrivate'))
2482 is_unlisted = bool_or_none(microformat.get('isUnlisted'))
2483 is_membersonly = None
b28f8d24 2484 is_premium = None
c224251a
M
2485 if initial_data and is_private is not None:
2486 is_membersonly = False
b28f8d24 2487 is_premium = False
c224251a
M
2488 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list)
2489 for content in contents or []:
2490 badges = try_get(content, lambda x: x['videoPrimaryInfoRenderer']['badges'], list)
2491 for badge in badges or []:
2492 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label']) or ''
2493 if label.lower() == 'members only':
2494 is_membersonly = True
2495 break
b28f8d24
M
2496 elif label.lower() == 'premium':
2497 is_premium = True
2498 break
2499 if is_membersonly or is_premium:
c224251a
M
2500 break
2501
2502 # TODO: Add this for playlists
2503 info['availability'] = self._availability(
2504 is_private=is_private,
b28f8d24 2505 needs_premium=is_premium,
c224251a
M
2506 needs_subscription=is_membersonly,
2507 needs_auth=info['age_limit'] >= 18,
2508 is_unlisted=None if is_private is None else is_unlisted)
2509
06167fbb 2510 # get xsrf for annotations or comments
a06916d9 2511 get_annotations = self.get_param('writeannotations', False)
2512 get_comments = self.get_param('getcomments', False)
06167fbb 2513 if get_annotations or get_comments:
29f7c58a 2514 xsrf_token = None
545cc85d 2515 ytcfg = self._extract_ytcfg(video_id, webpage)
29f7c58a 2516 if ytcfg:
2517 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2518 if not xsrf_token:
2519 xsrf_token = self._search_regex(
2520 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 2521 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 2522
2523 # annotations
06167fbb 2524 if get_annotations:
64b6a4e9
RA
2525 invideo_url = try_get(
2526 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2527 if xsrf_token and invideo_url:
29f7c58a 2528 xsrf_field_name = None
2529 if ytcfg:
2530 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2531 if not xsrf_field_name:
2532 xsrf_field_name = self._search_regex(
2533 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 2534 webpage, 'xsrf field name',
29f7c58a 2535 group='xsrf_field_name', default='session_token')
8a784c74 2536 info['annotations'] = self._download_webpage(
64b6a4e9
RA
2537 self._proto_relative_url(invideo_url),
2538 video_id, note='Downloading annotations',
2539 errnote='Unable to download video annotations', fatal=False,
2540 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 2541
277d6ff5 2542 if get_comments:
a1c5d2ca 2543 info['__post_extractor'] = lambda: self._extract_comments(ytcfg, video_id, contents, webpage, xsrf_token)
4ea3be0a 2544
545cc85d 2545 self.mark_watched(video_id, player_response)
d77ab8e2 2546
545cc85d 2547 return info
c5e8d7af 2548
5f6a1245 2549
8bdd16b4 2550class YoutubeTabIE(YoutubeBaseInfoExtractor):
2551 IE_DESC = 'YouTube.com tab'
70d5c17b 2552 _VALID_URL = r'''(?x)
2553 https?://
2554 (?:\w+\.)?
2555 (?:
2556 youtube(?:kids)?\.com|
2557 invidio\.us
2558 )/
2559 (?:
fe03a6cd 2560 (?P<channel_type>channel|c|user|browse)/|
70d5c17b 2561 (?P<not_channel>
9ba5705a 2562 feed/|hashtag/|
70d5c17b 2563 (?:playlist|watch)\?.*?\blist=
2564 )|
29f7c58a 2565 (?!(?:%s)\b) # Direct URLs
70d5c17b 2566 )
2567 (?P<id>[^/?\#&]+)
2568 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 2569 IE_NAME = 'youtube:tab'
2570
81127aa5 2571 _TESTS = [{
da692b79 2572 'note': 'playlists, multipage',
8bdd16b4 2573 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2574 'playlist_mincount': 94,
2575 'info_dict': {
2576 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2577 'title': 'Игорь Клейнер - Playlists',
2578 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2579 'uploader': 'Игорь Клейнер',
2580 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 2581 },
2582 }, {
da692b79 2583 'note': 'playlists, multipage, different order',
8bdd16b4 2584 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2585 'playlist_mincount': 94,
2586 'info_dict': {
2587 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2588 'title': 'Игорь Клейнер - Playlists',
2589 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2590 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
2591 'uploader': 'Игорь Клейнер',
8bdd16b4 2592 },
201c1459 2593 }, {
da692b79 2594 'note': 'playlists, series',
201c1459 2595 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
2596 'playlist_mincount': 5,
2597 'info_dict': {
2598 'id': 'UCYO_jab_esuFRV4b17AJtAw',
2599 'title': '3Blue1Brown - Playlists',
2600 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
da692b79 2601 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
2602 'uploader': '3Blue1Brown',
201c1459 2603 },
8bdd16b4 2604 }, {
da692b79 2605 'note': 'playlists, singlepage',
8bdd16b4 2606 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2607 'playlist_mincount': 4,
2608 'info_dict': {
2609 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2610 'title': 'ThirstForScience - Playlists',
2611 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 2612 'uploader': 'ThirstForScience',
2613 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 2614 }
2615 }, {
2616 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
2617 'only_matching': True,
2618 }, {
da692b79 2619 'note': 'basic, single video playlist',
0e30a7b9 2620 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 2621 'info_dict': {
0e30a7b9 2622 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2623 'uploader': 'Sergey M.',
2624 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 2625 'title': 'youtube-dl public playlist',
81127aa5 2626 },
0e30a7b9 2627 'playlist_count': 1,
9291475f 2628 }, {
da692b79 2629 'note': 'empty playlist',
0e30a7b9 2630 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 2631 'info_dict': {
0e30a7b9 2632 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2633 'uploader': 'Sergey M.',
2634 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 2635 'title': 'youtube-dl empty playlist',
9291475f
PH
2636 },
2637 'playlist_count': 0,
2638 }, {
da692b79 2639 'note': 'Home tab',
8bdd16b4 2640 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 2641 'info_dict': {
8bdd16b4 2642 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2643 'title': 'lex will - Home',
2644 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2645 'uploader': 'lex will',
2646 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2647 },
8bdd16b4 2648 'playlist_mincount': 2,
9291475f 2649 }, {
da692b79 2650 'note': 'Videos tab',
8bdd16b4 2651 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 2652 'info_dict': {
8bdd16b4 2653 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2654 'title': 'lex will - Videos',
2655 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2656 'uploader': 'lex will',
2657 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2658 },
8bdd16b4 2659 'playlist_mincount': 975,
9291475f 2660 }, {
da692b79 2661 'note': 'Videos tab, sorted by popular',
8bdd16b4 2662 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 2663 'info_dict': {
8bdd16b4 2664 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2665 'title': 'lex will - Videos',
2666 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2667 'uploader': 'lex will',
2668 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2669 },
8bdd16b4 2670 'playlist_mincount': 199,
9291475f 2671 }, {
da692b79 2672 'note': 'Playlists tab',
8bdd16b4 2673 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 2674 'info_dict': {
8bdd16b4 2675 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2676 'title': 'lex will - Playlists',
2677 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2678 'uploader': 'lex will',
2679 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2680 },
8bdd16b4 2681 'playlist_mincount': 17,
ac7553d0 2682 }, {
da692b79 2683 'note': 'Community tab',
8bdd16b4 2684 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 2685 'info_dict': {
8bdd16b4 2686 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2687 'title': 'lex will - Community',
2688 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2689 'uploader': 'lex will',
2690 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2691 },
2692 'playlist_mincount': 18,
87dadd45 2693 }, {
da692b79 2694 'note': 'Channels tab',
8bdd16b4 2695 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 2696 'info_dict': {
8bdd16b4 2697 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2698 'title': 'lex will - Channels',
2699 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2700 'uploader': 'lex will',
2701 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2702 },
deaec5af 2703 'playlist_mincount': 12,
cd684175 2704 }, {
2705 'note': 'Search tab',
2706 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
2707 'playlist_mincount': 40,
2708 'info_dict': {
2709 'id': 'UCYO_jab_esuFRV4b17AJtAw',
2710 'title': '3Blue1Brown - Search - linear algebra',
2711 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
2712 'uploader': '3Blue1Brown',
2713 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
2714 },
6b08cdf6 2715 }, {
a0566bbf 2716 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2717 'only_matching': True,
2718 }, {
a0566bbf 2719 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2720 'only_matching': True,
2721 }, {
a0566bbf 2722 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2723 'only_matching': True,
2724 }, {
2725 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2726 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2727 'info_dict': {
2728 'title': '29C3: Not my department',
2729 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2730 'uploader': 'Christiaan008',
2731 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 2732 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 2733 },
2734 'playlist_count': 96,
2735 }, {
2736 'note': 'Large playlist',
2737 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 2738 'info_dict': {
8bdd16b4 2739 'title': 'Uploads from Cauchemar',
2740 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2741 'uploader': 'Cauchemar',
2742 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 2743 },
8bdd16b4 2744 'playlist_mincount': 1123,
2745 }, {
da692b79 2746 'note': 'even larger playlist, 8832 videos',
8bdd16b4 2747 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
2748 'only_matching': True,
4b7df0d3
JMF
2749 }, {
2750 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2751 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2752 'info_dict': {
acf757f4
PH
2753 'title': 'Uploads from Interstellar Movie',
2754 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 2755 'uploader': 'Interstellar Movie',
8bdd16b4 2756 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 2757 },
481cc733 2758 'playlist_mincount': 21,
358de58c 2759 }, {
2760 'note': 'Playlist with "show unavailable videos" button',
2761 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
2762 'info_dict': {
2763 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
2764 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
2765 'uploader': 'Phim Siêu Nhân Nhật Bản',
2766 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
2767 },
da692b79 2768 'playlist_mincount': 200,
5d342002 2769 }, {
da692b79 2770 'note': 'Playlist with unavailable videos in page 7',
5d342002 2771 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
2772 'info_dict': {
2773 'title': 'Uploads from BlankTV',
2774 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
2775 'uploader': 'BlankTV',
2776 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
2777 },
da692b79 2778 'playlist_mincount': 1000,
8bdd16b4 2779 }, {
da692b79 2780 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
8bdd16b4 2781 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2782 'info_dict': {
2783 'title': 'Data Analysis with Dr Mike Pound',
2784 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2785 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
2786 'uploader': 'Computerphile',
deaec5af 2787 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 2788 },
2789 'playlist_mincount': 11,
2790 }, {
a0566bbf 2791 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 2792 'only_matching': True,
dacb3a86 2793 }, {
da692b79 2794 'note': 'Playlist URL that does not actually serve a playlist',
dacb3a86
S
2795 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2796 'info_dict': {
2797 'id': 'FqZTN594JQw',
2798 'ext': 'webm',
2799 'title': "Smiley's People 01 detective, Adventure Series, Action",
2800 'uploader': 'STREEM',
2801 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2802 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2803 'upload_date': '20150526',
2804 'license': 'Standard YouTube License',
2805 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2806 'categories': ['People & Blogs'],
2807 'tags': list,
dbdaaa23 2808 'view_count': int,
dacb3a86
S
2809 'like_count': int,
2810 'dislike_count': int,
2811 },
2812 'params': {
2813 'skip_download': True,
2814 },
13a75688 2815 'skip': 'This video is not available.',
dacb3a86 2816 'add_ie': [YoutubeIE.ie_key()],
481cc733 2817 }, {
8bdd16b4 2818 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 2819 'only_matching': True,
66b48727 2820 }, {
8bdd16b4 2821 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 2822 'only_matching': True,
a0566bbf 2823 }, {
2824 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
2825 'info_dict': {
da692b79 2826 'id': 'X1whbWASnNQ', # This will keep changing
a0566bbf 2827 'ext': 'mp4',
deaec5af 2828 'title': compat_str,
a0566bbf 2829 'uploader': 'Sky News',
2830 'uploader_id': 'skynews',
2831 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
da692b79 2832 'upload_date': r're:\d{8}',
2833 'description': compat_str,
a0566bbf 2834 'categories': ['News & Politics'],
2835 'tags': list,
2836 'like_count': int,
2837 'dislike_count': int,
2838 },
2839 'params': {
2840 'skip_download': True,
2841 },
da692b79 2842 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
a0566bbf 2843 }, {
2844 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2845 'info_dict': {
2846 'id': 'a48o2S1cPoo',
2847 'ext': 'mp4',
2848 'title': 'The Young Turks - Live Main Show',
2849 'uploader': 'The Young Turks',
2850 'uploader_id': 'TheYoungTurks',
2851 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2852 'upload_date': '20150715',
2853 'license': 'Standard YouTube License',
2854 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2855 'categories': ['News & Politics'],
2856 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2857 'like_count': int,
2858 'dislike_count': int,
2859 },
2860 'params': {
2861 'skip_download': True,
2862 },
2863 'only_matching': True,
2864 }, {
2865 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2866 'only_matching': True,
2867 }, {
2868 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2869 'only_matching': True,
09f1580e 2870 }, {
2871 'note': 'A channel that is not live. Should raise error',
2872 'url': 'https://www.youtube.com/user/numberphile/live',
2873 'only_matching': True,
3d3dddc9 2874 }, {
2875 'url': 'https://www.youtube.com/feed/trending',
2876 'only_matching': True,
2877 }, {
3d3dddc9 2878 'url': 'https://www.youtube.com/feed/library',
2879 'only_matching': True,
2880 }, {
3d3dddc9 2881 'url': 'https://www.youtube.com/feed/history',
2882 'only_matching': True,
2883 }, {
3d3dddc9 2884 'url': 'https://www.youtube.com/feed/subscriptions',
2885 'only_matching': True,
2886 }, {
3d3dddc9 2887 'url': 'https://www.youtube.com/feed/watch_later',
2888 'only_matching': True,
2889 }, {
da692b79 2890 'note': 'Recommended - redirects to home page',
3d3dddc9 2891 'url': 'https://www.youtube.com/feed/recommended',
2892 'only_matching': True,
29f7c58a 2893 }, {
da692b79 2894 'note': 'inline playlist with not always working continuations',
29f7c58a 2895 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
2896 'only_matching': True,
2897 }, {
2898 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
2899 'only_matching': True,
2900 }, {
2901 'url': 'https://www.youtube.com/course',
2902 'only_matching': True,
2903 }, {
2904 'url': 'https://www.youtube.com/zsecurity',
2905 'only_matching': True,
2906 }, {
2907 'url': 'http://www.youtube.com/NASAgovVideo/videos',
2908 'only_matching': True,
2909 }, {
2910 'url': 'https://www.youtube.com/TheYoungTurks/live',
2911 'only_matching': True,
39ed931e 2912 }, {
2913 'url': 'https://www.youtube.com/hashtag/cctv9',
2914 'info_dict': {
2915 'id': 'cctv9',
2916 'title': '#cctv9',
2917 },
2918 'playlist_mincount': 350,
201c1459 2919 }, {
2920 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
2921 'only_matching': True,
9297939e 2922 }, {
da692b79 2923 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
9297939e 2924 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
2925 'only_matching': True
fe03a6cd 2926 }, {
2927 'note': '/browse/ should redirect to /channel/',
2928 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
2929 'only_matching': True
2930 }, {
2931 'note': 'VLPL, should redirect to playlist?list=PL...',
2932 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
2933 'info_dict': {
2934 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
2935 'uploader': 'NoCopyrightSounds',
2936 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
2937 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
2938 'title': 'NCS Releases',
2939 },
2940 'playlist_mincount': 166,
18db7548 2941 }, {
2942 'note': 'Topic, should redirect to playlist?list=UU...',
2943 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
2944 'info_dict': {
2945 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
2946 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
2947 'title': 'Uploads from Royalty Free Music - Topic',
2948 'uploader': 'Royalty Free Music - Topic',
2949 },
2950 'expected_warnings': [
2951 'A channel/user page was given',
2952 'The URL does not have a videos tab',
2953 ],
2954 'playlist_mincount': 101,
2955 }, {
2956 'note': 'Topic without a UU playlist',
2957 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
2958 'info_dict': {
2959 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
2960 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
2961 },
2962 'expected_warnings': [
2963 'A channel/user page was given',
2964 'The URL does not have a videos tab',
2965 'Falling back to channel URL',
2966 ],
2967 'playlist_mincount': 9,
abcdd12b 2968 }, {
2969 'note': 'Youtube music Album',
2970 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
2971 'info_dict': {
2972 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
2973 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
2974 },
2975 'playlist_count': 50,
29f7c58a 2976 }]
2977
2978 @classmethod
2979 def suitable(cls, url):
2980 return False if YoutubeIE.suitable(url) else super(
2981 YoutubeTabIE, cls).suitable(url)
8bdd16b4 2982
2983 def _extract_channel_id(self, webpage):
2984 channel_id = self._html_search_meta(
2985 'channelId', webpage, 'channel id', default=None)
2986 if channel_id:
2987 return channel_id
2988 channel_url = self._html_search_meta(
2989 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
2990 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
2991 'twitter:app:url:googleplay'), webpage, 'channel url')
2992 return self._search_regex(
2993 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
2994 channel_url, 'channel id')
15f6397c 2995
8bdd16b4 2996 @staticmethod
cd7c66cf 2997 def _extract_basic_item_renderer(item):
2998 # Modified from _extract_grid_item_renderer
201c1459 2999 known_basic_renderers = (
3000 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 3001 )
3002 for key, renderer in item.items():
201c1459 3003 if not isinstance(renderer, dict):
cd7c66cf 3004 continue
201c1459 3005 elif key in known_basic_renderers:
3006 return renderer
3007 elif key.startswith('grid') and key.endswith('Renderer'):
3008 return renderer
8bdd16b4 3009
8bdd16b4 3010 def _grid_entries(self, grid_renderer):
3011 for item in grid_renderer['items']:
3012 if not isinstance(item, dict):
39b62db1 3013 continue
cd7c66cf 3014 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 3015 if not isinstance(renderer, dict):
3016 continue
3017 title = try_get(
201c1459 3018 renderer, (lambda x: x['title']['runs'][0]['text'],
3019 lambda x: x['title']['simpleText']), compat_str)
8bdd16b4 3020 # playlist
3021 playlist_id = renderer.get('playlistId')
3022 if playlist_id:
3023 yield self.url_result(
3024 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3025 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3026 video_title=title)
201c1459 3027 continue
8bdd16b4 3028 # video
3029 video_id = renderer.get('videoId')
3030 if video_id:
3031 yield self._extract_video(renderer)
201c1459 3032 continue
8bdd16b4 3033 # channel
3034 channel_id = renderer.get('channelId')
3035 if channel_id:
3036 title = try_get(
3037 renderer, lambda x: x['title']['simpleText'], compat_str)
3038 yield self.url_result(
3039 'https://www.youtube.com/channel/%s' % channel_id,
3040 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 3041 continue
3042 # generic endpoint URL support
3043 ep_url = urljoin('https://www.youtube.com/', try_get(
3044 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3045 compat_str))
3046 if ep_url:
3047 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3048 if ie.suitable(ep_url):
3049 yield self.url_result(
3050 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3051 break
8bdd16b4 3052
3d3dddc9 3053 def _shelf_entries_from_content(self, shelf_renderer):
3054 content = shelf_renderer.get('content')
3055 if not isinstance(content, dict):
8bdd16b4 3056 return
cd7c66cf 3057 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 3058 if renderer:
3059 # TODO: add support for nested playlists so each shelf is processed
3060 # as separate playlist
3061 # TODO: this includes only first N items
3062 for entry in self._grid_entries(renderer):
3063 yield entry
3064 renderer = content.get('horizontalListRenderer')
3065 if renderer:
3066 # TODO
3067 pass
8bdd16b4 3068
29f7c58a 3069 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3070 ep = try_get(
3071 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3072 compat_str)
3073 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3074 if shelf_url:
29f7c58a 3075 # Skipping links to another channels, note that checking for
3076 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3077 # will not work
3078 if skip_channels and '/channels?' in shelf_url:
3079 return
3d3dddc9 3080 title = try_get(
3081 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
3082 yield self.url_result(shelf_url, video_title=title)
3083 # Shelf may not contain shelf URL, fallback to extraction from content
3084 for entry in self._shelf_entries_from_content(shelf_renderer):
3085 yield entry
c5e8d7af 3086
8bdd16b4 3087 def _playlist_entries(self, video_list_renderer):
3088 for content in video_list_renderer['contents']:
3089 if not isinstance(content, dict):
3090 continue
3091 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3092 if not isinstance(renderer, dict):
3093 continue
3094 video_id = renderer.get('videoId')
3095 if not video_id:
3096 continue
3097 yield self._extract_video(renderer)
07aeced6 3098
3462ffa8 3099 def _rich_entries(self, rich_grid_renderer):
3100 renderer = try_get(
70d5c17b 3101 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3102 video_id = renderer.get('videoId')
3103 if not video_id:
3104 return
3105 yield self._extract_video(renderer)
3106
8bdd16b4 3107 def _video_entry(self, video_renderer):
3108 video_id = video_renderer.get('videoId')
3109 if video_id:
3110 return self._extract_video(video_renderer)
dacb3a86 3111
8bdd16b4 3112 def _post_thread_entries(self, post_thread_renderer):
3113 post_renderer = try_get(
3114 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3115 if not post_renderer:
3116 return
3117 # video attachment
3118 video_renderer = try_get(
895b0931 3119 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3120 video_id = video_renderer.get('videoId')
3121 if video_id:
3122 entry = self._extract_video(video_renderer)
8bdd16b4 3123 if entry:
3124 yield entry
895b0931 3125 # playlist attachment
3126 playlist_id = try_get(
3127 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3128 if playlist_id:
3129 yield self.url_result(
e28f1c0a 3130 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3131 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3132 # inline video links
3133 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3134 for run in runs:
3135 if not isinstance(run, dict):
3136 continue
3137 ep_url = try_get(
3138 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3139 if not ep_url:
3140 continue
3141 if not YoutubeIE.suitable(ep_url):
3142 continue
3143 ep_video_id = YoutubeIE._match_id(ep_url)
3144 if video_id == ep_video_id:
3145 continue
895b0931 3146 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 3147
8bdd16b4 3148 def _post_thread_continuation_entries(self, post_thread_continuation):
3149 contents = post_thread_continuation.get('contents')
3150 if not isinstance(contents, list):
3151 return
3152 for content in contents:
3153 renderer = content.get('backstagePostThreadRenderer')
3154 if not isinstance(renderer, dict):
3155 continue
3156 for entry in self._post_thread_entries(renderer):
3157 yield entry
07aeced6 3158
39ed931e 3159 r''' # unused
3160 def _rich_grid_entries(self, contents):
3161 for content in contents:
3162 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3163 if video_renderer:
3164 entry = self._video_entry(video_renderer)
3165 if entry:
3166 yield entry
3167 '''
3168
29f7c58a 3169 @staticmethod
3170 def _build_continuation_query(continuation, ctp=None):
3171 query = {
3172 'ctoken': continuation,
3173 'continuation': continuation,
3174 }
3175 if ctp:
3176 query['itct'] = ctp
3177 return query
3178
8bdd16b4 3179 @staticmethod
3180 def _extract_next_continuation_data(renderer):
3181 next_continuation = try_get(
3182 renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
3183 if not next_continuation:
3184 return
3185 continuation = next_continuation.get('continuation')
3186 if not continuation:
3187 return
3188 ctp = next_continuation.get('clickTrackingParams')
29f7c58a 3189 return YoutubeTabIE._build_continuation_query(continuation, ctp)
c5e8d7af 3190
8bdd16b4 3191 @classmethod
3192 def _extract_continuation(cls, renderer):
3193 next_continuation = cls._extract_next_continuation_data(renderer)
3194 if next_continuation:
3195 return next_continuation
cc2db878 3196 contents = []
3197 for key in ('contents', 'items'):
3198 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
8bdd16b4 3199 for content in contents:
3200 if not isinstance(content, dict):
3201 continue
3202 continuation_ep = try_get(
3203 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
3204 dict)
3205 if not continuation_ep:
3206 continue
3207 continuation = try_get(
3208 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
3209 if not continuation:
3210 continue
3211 ctp = continuation_ep.get('clickTrackingParams')
29f7c58a 3212 return YoutubeTabIE._build_continuation_query(continuation, ctp)
448830ce 3213
f4f751af 3214 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3462ffa8 3215
70d5c17b 3216 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3217 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3218 for content in contents:
3219 if not isinstance(content, dict):
8bdd16b4 3220 continue
70d5c17b 3221 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3222 if not is_renderer:
70d5c17b 3223 renderer = content.get('richItemRenderer')
3462ffa8 3224 if renderer:
3225 for entry in self._rich_entries(renderer):
3226 yield entry
3227 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3228 continue
3462ffa8 3229 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3230 for isr_content in isr_contents:
3231 if not isinstance(isr_content, dict):
3232 continue
69184e41 3233
3234 known_renderers = {
3235 'playlistVideoListRenderer': self._playlist_entries,
3236 'gridRenderer': self._grid_entries,
3237 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3238 'backstagePostThreadRenderer': self._post_thread_entries,
3239 'videoRenderer': lambda x: [self._video_entry(x)],
3240 }
3241 for key, renderer in isr_content.items():
3242 if key not in known_renderers:
3243 continue
3244 for entry in known_renderers[key](renderer):
3245 if entry:
3246 yield entry
3462ffa8 3247 continuation_list[0] = self._extract_continuation(renderer)
69184e41 3248 break
70d5c17b 3249
3462ffa8 3250 if not continuation_list[0]:
3251 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3252
3253 if not continuation_list[0]:
3254 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3255
3256 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3257 tab_content = try_get(tab, lambda x: x['content'], dict)
3258 if not tab_content:
3259 return
3462ffa8 3260 parent_renderer = (
29f7c58a 3261 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3262 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3263 for entry in extract_entries(parent_renderer):
3264 yield entry
3462ffa8 3265 continuation = continuation_list[0]
f4f751af 3266 context = self._extract_context(ytcfg)
3267 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
d069eca7 3268
8bdd16b4 3269 for page_num in itertools.count(1):
3270 if not continuation:
3271 break
79360d99 3272 query = {
3273 'continuation': continuation['continuation'],
3274 'clickTracking': {'clickTrackingParams': continuation['itct']}
3275 }
f4f751af 3276 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
79360d99 3277 response = self._extract_response(
3278 item_id='%s page %s' % (item_id, page_num),
3279 query=query, headers=headers, ytcfg=ytcfg,
3280 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3281
3282 if not response:
8bdd16b4 3283 break
f4f751af 3284 visitor_data = try_get(
3285 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
ebf1b291 3286
69184e41 3287 known_continuation_renderers = {
3288 'playlistVideoListContinuation': self._playlist_entries,
3289 'gridContinuation': self._grid_entries,
3290 'itemSectionContinuation': self._post_thread_continuation_entries,
3291 'sectionListContinuation': extract_entries, # for feeds
3292 }
8bdd16b4 3293 continuation_contents = try_get(
69184e41 3294 response, lambda x: x['continuationContents'], dict) or {}
3295 continuation_renderer = None
3296 for key, value in continuation_contents.items():
3297 if key not in known_continuation_renderers:
3462ffa8 3298 continue
69184e41 3299 continuation_renderer = value
3300 continuation_list = [None]
3301 for entry in known_continuation_renderers[key](continuation_renderer):
3302 yield entry
3303 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3304 break
3305 if continuation_renderer:
3306 continue
c5e8d7af 3307
a1b535bd 3308 known_renderers = {
3309 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3310 'gridVideoRenderer': (self._grid_entries, 'items'),
3311 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3312 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3313 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3314 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3315 }
cce889b9 3316 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3317 continuation_items = try_get(
cce889b9 3318 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3319 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3320 video_items_renderer = None
3321 for key, value in continuation_item.items():
3322 if key not in known_renderers:
8bdd16b4 3323 continue
a1b535bd 3324 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3325 continuation_list = [None]
a1b535bd 3326 for entry in known_renderers[key][0](video_items_renderer):
3327 yield entry
9ba5705a 3328 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3329 break
3330 if video_items_renderer:
3331 continue
8bdd16b4 3332 break
9558dcec 3333
8bdd16b4 3334 @staticmethod
3335 def _extract_selected_tab(tabs):
3336 for tab in tabs:
cd684175 3337 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3338 if renderer.get('selected') is True:
3339 return renderer
2b3c2546 3340 else:
8bdd16b4 3341 raise ExtractorError('Unable to find selected tab')
b82f815f 3342
8bdd16b4 3343 @staticmethod
3344 def _extract_uploader(data):
3345 uploader = {}
3346 sidebar_renderer = try_get(
3347 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3348 if sidebar_renderer:
3349 for item in sidebar_renderer:
3350 if not isinstance(item, dict):
3351 continue
3352 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3353 if not isinstance(renderer, dict):
3354 continue
3355 owner = try_get(
3356 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3357 if owner:
3358 uploader['uploader'] = owner.get('text')
3359 uploader['uploader_id'] = try_get(
3360 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3361 uploader['uploader_url'] = urljoin(
3362 'https://www.youtube.com/',
3363 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3364 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3365
d069eca7 3366 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3367 playlist_id = title = description = channel_url = channel_name = channel_id = None
3368 thumbnails_list = tags = []
3369
8bdd16b4 3370 selected_tab = self._extract_selected_tab(tabs)
3371 renderer = try_get(
3372 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3373 if renderer:
b60419c5 3374 channel_name = renderer.get('title')
3375 channel_url = renderer.get('channelUrl')
3376 channel_id = renderer.get('externalId')
39ed931e 3377 else:
64c0d954 3378 renderer = try_get(
3379 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3380
8bdd16b4 3381 if renderer:
3382 title = renderer.get('title')
ecc97af3 3383 description = renderer.get('description', '')
b60419c5 3384 playlist_id = channel_id
3385 tags = renderer.get('keywords', '').split()
3386 thumbnails_list = (
3387 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3388 or try_get(
3389 data,
3390 lambda x: x['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3391 list)
b60419c5 3392 or [])
3393
3394 thumbnails = []
3395 for t in thumbnails_list:
3396 if not isinstance(t, dict):
3397 continue
3398 thumbnail_url = url_or_none(t.get('url'))
3399 if not thumbnail_url:
3400 continue
3401 thumbnails.append({
3402 'url': thumbnail_url,
3403 'width': int_or_none(t.get('width')),
3404 'height': int_or_none(t.get('height')),
3405 })
3462ffa8 3406 if playlist_id is None:
70d5c17b 3407 playlist_id = item_id
3408 if title is None:
39ed931e 3409 title = (
3410 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3411 or playlist_id)
b60419c5 3412 title += format_field(selected_tab, 'title', ' - %s')
cd684175 3413 title += format_field(selected_tab, 'expandedText', ' - %s')
b60419c5 3414
3415 metadata = {
3416 'playlist_id': playlist_id,
3417 'playlist_title': title,
3418 'playlist_description': description,
3419 'uploader': channel_name,
3420 'uploader_id': channel_id,
3421 'uploader_url': channel_url,
3422 'thumbnails': thumbnails,
3423 'tags': tags,
3424 }
3425 if not channel_id:
3426 metadata.update(self._extract_uploader(data))
3427 metadata.update({
3428 'channel': metadata['uploader'],
3429 'channel_id': metadata['uploader_id'],
3430 'channel_url': metadata['uploader_url']})
3431 return self.playlist_result(
d069eca7
M
3432 self._entries(
3433 selected_tab, playlist_id,
3434 self._extract_identity_token(webpage, item_id),
f4f751af 3435 self._extract_account_syncid(data),
3436 self._extract_ytcfg(item_id, webpage)),
b60419c5 3437 **metadata)
73c4ac2c 3438
79360d99 3439 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
2be71994 3440 first_id = last_id = None
79360d99 3441 ytcfg = self._extract_ytcfg(playlist_id, webpage)
3442 headers = self._generate_api_headers(
3443 ytcfg, account_syncid=self._extract_account_syncid(data),
3444 identity_token=self._extract_identity_token(webpage, item_id=playlist_id),
3445 visitor_data=try_get(self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
2be71994 3446 for page_num in itertools.count(1):
cd7c66cf 3447 videos = list(self._playlist_entries(playlist))
3448 if not videos:
3449 return
2be71994 3450 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3451 if start >= len(videos):
3452 return
3453 for video in videos[start:]:
3454 if video['id'] == first_id:
3455 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3456 return
3457 yield video
3458 first_id = first_id or videos[0]['id']
3459 last_id = videos[-1]['id']
79360d99 3460 watch_endpoint = try_get(
3461 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3462 query = {
3463 'playlistId': playlist_id,
3464 'videoId': watch_endpoint.get('videoId') or last_id,
3465 'index': watch_endpoint.get('index') or len(videos),
3466 'params': watch_endpoint.get('params') or 'OAE%3D'
3467 }
3468 response = self._extract_response(
3469 item_id='%s page %d' % (playlist_id, page_num),
3470 query=query,
3471 ep='next',
3472 headers=headers,
3473 check_get_keys='contents'
3474 )
cd7c66cf 3475 playlist = try_get(
79360d99 3476 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 3477
79360d99 3478 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
8bdd16b4 3479 title = playlist.get('title') or try_get(
3480 data, lambda x: x['titleText']['simpleText'], compat_str)
3481 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3482
3483 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3484 playlist_url = urljoin(url, try_get(
3485 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3486 compat_str))
3487 if playlist_url and playlist_url != url:
3488 return self.url_result(
3489 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3490 video_title=title)
cd7c66cf 3491
8bdd16b4 3492 return self.playlist_result(
79360d99 3493 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
cd7c66cf 3494 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 3495
95c01b6c 3496 @staticmethod
3497 def _extract_alerts(data):
3498 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
3499 if not isinstance(alert_dict, dict):
3500 continue
3501 for alert in alert_dict.values():
3502 alert_type = alert.get('type')
3503 if not alert_type:
02ced43c 3504 continue
95c01b6c 3505 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str) or ''
3506 if message:
3507 yield alert_type, message
3508 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
3509 message += try_get(run, lambda x: x['text'], compat_str)
3510 if message:
3511 yield alert_type, message
3512
3513 def _report_alerts(self, alerts, expected=True):
3ffc7c89 3514 errors = []
3515 warnings = []
95c01b6c 3516 for alert_type, alert_message in alerts:
f3eaa8dd 3517 if alert_type.lower() == 'error':
3ffc7c89 3518 errors.append([alert_type, alert_message])
f3eaa8dd 3519 else:
3ffc7c89 3520 warnings.append([alert_type, alert_message])
f3eaa8dd 3521
3ffc7c89 3522 for alert_type, alert_message in (warnings + errors[:-1]):
6a39ee13 3523 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
3ffc7c89 3524 if errors:
3525 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
02ced43c 3526
95c01b6c 3527 def _extract_and_report_alerts(self, data, *args, **kwargs):
3528 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
3529
358de58c 3530 def _reload_with_unavailable_videos(self, item_id, data, webpage):
3531 """
3532 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3533 """
3534 sidebar_renderer = try_get(
5d342002 3535 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3536 if not sidebar_renderer:
3537 return
3538 browse_id = params = None
358de58c 3539 for item in sidebar_renderer:
3540 if not isinstance(item, dict):
3541 continue
3542 renderer = item.get('playlistSidebarPrimaryInfoRenderer')
3543 menu_renderer = try_get(
3544 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3545 for menu_item in menu_renderer:
3546 if not isinstance(menu_item, dict):
3547 continue
3548 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3549 text = try_get(
3550 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3551 if not text or text.lower() != 'show unavailable videos':
3552 continue
3553 browse_endpoint = try_get(
3554 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3555 browse_id = browse_endpoint.get('browseId')
3556 params = browse_endpoint.get('params')
5d342002 3557 break
3558
3559 ytcfg = self._extract_ytcfg(item_id, webpage)
3560 headers = self._generate_api_headers(
3561 ytcfg, account_syncid=self._extract_account_syncid(ytcfg),
3562 identity_token=self._extract_identity_token(webpage, item_id=item_id),
3563 visitor_data=try_get(
3564 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
3565 query = {
3566 'params': params or 'wgYCCAA=',
3567 'browseId': browse_id or 'VL%s' % item_id
3568 }
3569 return self._extract_response(
3570 item_id=item_id, headers=headers, query=query,
3571 check_get_keys='contents', fatal=False,
3572 note='Downloading API JSON with unavailable videos')
358de58c 3573
79360d99 3574 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
358de58c 3575 ytcfg=None, check_get_keys=None, ep='browse', fatal=True):
79360d99 3576 response = None
3577 last_error = None
3578 count = -1
a06916d9 3579 retries = self.get_param('extractor_retries', 3)
79360d99 3580 if check_get_keys is None:
3581 check_get_keys = []
3582 while count < retries:
3583 count += 1
3584 if last_error:
3585 self.report_warning('%s. Retrying ...' % last_error)
3586 try:
3587 response = self._call_api(
3588 ep=ep, fatal=True, headers=headers,
358de58c 3589 video_id=item_id, query=query,
79360d99 3590 context=self._extract_context(ytcfg),
3591 api_key=self._extract_api_key(ytcfg),
3592 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
3593 except ExtractorError as e:
3594 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
3595 # Downloading page may result in intermittent 5xx HTTP error
3596 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
3597 last_error = 'HTTP Error %s' % e.cause.code
3598 if count < retries:
3599 continue
358de58c 3600 if fatal:
3601 raise
3602 else:
3603 self.report_warning(error_to_compat_str(e))
3604 return
3605
79360d99 3606 else:
3607 # Youtube may send alerts if there was an issue with the continuation page
4ba00108 3608 try:
3609 self._extract_and_report_alerts(response, expected=False)
3610 except ExtractorError as e:
3611 if fatal:
3612 raise
3613 self.report_warning(error_to_compat_str(e))
3614 return
79360d99 3615 if not check_get_keys or dict_get(response, check_get_keys):
3616 break
3617 # Youtube sometimes sends incomplete data
3618 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
3619 last_error = 'Incomplete data received'
3620 if count >= retries:
358de58c 3621 if fatal:
3622 raise ExtractorError(last_error)
3623 else:
3624 self.report_warning(last_error)
3625 return
79360d99 3626 return response
3627
cd7c66cf 3628 def _extract_webpage(self, url, item_id):
a06916d9 3629 retries = self.get_param('extractor_retries', 3)
62bff2c1 3630 count = -1
c705177d 3631 last_error = 'Incomplete yt initial data recieved'
14fdfea9 3632 while count < retries:
62bff2c1 3633 count += 1
14fdfea9 3634 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 3635 # See: https://github.com/yt-dlp/yt-dlp/issues/116
3636 if count:
c705177d 3637 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 3638 webpage = self._download_webpage(
3639 url, item_id,
cd7c66cf 3640 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
14fdfea9 3641 data = self._extract_yt_initial_data(item_id, webpage)
14fdfea9 3642 if data.get('contents') or data.get('currentVideoEndpoint'):
3643 break
95c01b6c 3644 # Extract alerts here only when there is error
3645 self._extract_and_report_alerts(data)
c705177d 3646 if count >= retries:
6a39ee13 3647 raise ExtractorError(last_error)
cd7c66cf 3648 return webpage, data
3649
9297939e 3650 @staticmethod
3651 def _smuggle_data(entries, data):
3652 for entry in entries:
3653 if data:
3654 entry['url'] = smuggle_url(entry['url'], data)
3655 yield entry
3656
cd7c66cf 3657 def _real_extract(self, url):
9297939e 3658 url, smuggled_data = unsmuggle_url(url, {})
3659 if self.is_music_url(url):
3660 smuggled_data['is_music_url'] = True
fe03a6cd 3661 info_dict = self.__real_extract(url, smuggled_data)
9297939e 3662 if info_dict.get('entries'):
3663 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
3664 return info_dict
3665
fe03a6cd 3666 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
3667
3668 def __real_extract(self, url, smuggled_data):
cd7c66cf 3669 item_id = self._match_id(url)
3670 url = compat_urlparse.urlunparse(
3671 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 3672 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 3673
fe03a6cd 3674 def get_mobj(url):
3675 mobj = self._url_re.match(url).groupdict()
07cce701 3676 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 3677 return mobj
3678
3679 mobj = get_mobj(url)
3680 # Youtube returns incomplete data if tabname is not lower case
3681 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
3682
3683 if is_channel:
3684 if smuggled_data.get('is_music_url'):
3685 if item_id[:2] == 'VL':
3686 # Youtube music VL channels have an equivalent playlist
3687 item_id = item_id[2:]
3688 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
abcdd12b 3689 elif item_id[:2] == 'MP':
3690 # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
3691 item_id = self._search_regex(
3692 r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
3693 self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
3694 'playlist id')
3695 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
fe03a6cd 3696 elif mobj['channel_type'] == 'browse':
3697 # Youtube music /browse/ should be changed to /channel/
3698 pre = 'https://www.youtube.com/channel/%s' % item_id
3699 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
3700 # Home URLs should redirect to /videos/
6a39ee13 3701 self.report_warning(
cd7c66cf 3702 'A channel/user page was given. All the channel\'s videos will be downloaded. '
3703 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 3704 tab = '/videos'
3705
3706 url = ''.join((pre, tab, post))
3707 mobj = get_mobj(url)
cd7c66cf 3708
3709 # Handle both video/playlist URLs
201c1459 3710 qs = parse_qs(url)
cd7c66cf 3711 video_id = qs.get('v', [None])[0]
3712 playlist_id = qs.get('list', [None])[0]
3713
fe03a6cd 3714 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 3715 if not playlist_id:
fe03a6cd 3716 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 3717 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 3718 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6a39ee13 3719 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
cd7c66cf 3720 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
18db7548 3721 mobj = get_mobj(url)
cd7c66cf 3722
3723 if video_id and playlist_id:
a06916d9 3724 if self.get_param('noplaylist'):
cd7c66cf 3725 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3726 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3727 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
3728
3729 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 3730
18db7548 3731 tabs = try_get(
3732 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3733 if tabs:
3734 selected_tab = self._extract_selected_tab(tabs)
3735 tab_name = selected_tab.get('title', '')
09f1580e 3736 if 'no-youtube-channel-redirect' not in compat_opts:
3737 if mobj['tab'] == '/live':
3738 # Live tab should have redirected to the video
3739 raise ExtractorError('The channel is not currently live', expected=True)
3740 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
3741 if not mobj['not_channel'] and item_id[:2] == 'UC':
3742 # Topic channels don't have /videos. Use the equivalent playlist instead
3743 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
3744 pl_id = 'UU%s' % item_id[2:]
3745 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
3746 try:
3747 pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
3748 for alert_type, alert_message in self._extract_alerts(pl_data):
3749 if alert_type == 'error':
3750 raise ExtractorError('Youtube said: %s' % alert_message)
3751 item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
3752 except ExtractorError:
3753 self.report_warning('The playlist gave error. Falling back to channel URL')
3754 else:
3755 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
18db7548 3756
3757 self.write_debug('Final URL: %s' % url)
3758
358de58c 3759 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 3760 if 'no-youtube-unavailable-videos' not in compat_opts:
3761 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
95c01b6c 3762 self._extract_and_report_alerts(data)
358de58c 3763
8bdd16b4 3764 tabs = try_get(
3765 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3766 if tabs:
d069eca7 3767 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 3768
8bdd16b4 3769 playlist = try_get(
3770 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3771 if playlist:
79360d99 3772 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
cd7c66cf 3773
a0566bbf 3774 video_id = try_get(
3775 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
3776 compat_str) or video_id
8bdd16b4 3777 if video_id:
09f1580e 3778 if mobj['tab'] != '/live': # live tab is expected to redirect to video
3779 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 3780 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 3781
8bdd16b4 3782 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 3783
c5e8d7af 3784
8bdd16b4 3785class YoutubePlaylistIE(InfoExtractor):
3786 IE_DESC = 'YouTube.com playlists'
3787 _VALID_URL = r'''(?x)(?:
3788 (?:https?://)?
3789 (?:\w+\.)?
3790 (?:
3791 (?:
3792 youtube(?:kids)?\.com|
29f7c58a 3793 invidio\.us
8bdd16b4 3794 )
3795 /.*?\?.*?\blist=
3796 )?
3797 (?P<id>%(playlist_id)s)
3798 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3799 IE_NAME = 'youtube:playlist'
cdc628a4 3800 _TESTS = [{
8bdd16b4 3801 'note': 'issue #673',
3802 'url': 'PLBB231211A4F62143',
cdc628a4 3803 'info_dict': {
8bdd16b4 3804 'title': '[OLD]Team Fortress 2 (Class-based LP)',
3805 'id': 'PLBB231211A4F62143',
3806 'uploader': 'Wickydoo',
3807 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
3808 },
3809 'playlist_mincount': 29,
3810 }, {
3811 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3812 'info_dict': {
3813 'title': 'YDL_safe_search',
3814 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3815 },
3816 'playlist_count': 2,
3817 'skip': 'This playlist is private',
9558dcec 3818 }, {
8bdd16b4 3819 'note': 'embedded',
3820 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3821 'playlist_count': 4,
9558dcec 3822 'info_dict': {
8bdd16b4 3823 'title': 'JODA15',
3824 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3825 'uploader': 'milan',
3826 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 3827 }
cdc628a4 3828 }, {
8bdd16b4 3829 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3830 'playlist_mincount': 982,
3831 'info_dict': {
3832 'title': '2018 Chinese New Singles (11/6 updated)',
3833 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3834 'uploader': 'LBK',
3835 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
3836 }
daa0df9e 3837 }, {
29f7c58a 3838 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
3839 'only_matching': True,
3840 }, {
3841 # music album playlist
3842 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
3843 'only_matching': True,
3844 }]
3845
3846 @classmethod
3847 def suitable(cls, url):
201c1459 3848 if YoutubeTabIE.suitable(url):
3849 return False
1bdae7d3 3850 # Hack for lazy extractors until more generic solution is implemented
3851 # (see #28780)
3852 from .youtube import parse_qs
201c1459 3853 qs = parse_qs(url)
3854 if qs.get('v', [None])[0]:
3855 return False
3856 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 3857
3858 def _real_extract(self, url):
3859 playlist_id = self._match_id(url)
46953e7e 3860 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 3861 url = update_url_query(
3862 'https://www.youtube.com/playlist',
3863 parse_qs(url) or {'list': playlist_id})
3864 if is_music_url:
3865 url = smuggle_url(url, {'is_music_url': True})
3866 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 3867
3868
3869class YoutubeYtBeIE(InfoExtractor):
c76eb41b 3870 IE_DESC = 'youtu.be'
29f7c58a 3871 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3872 _TESTS = [{
8bdd16b4 3873 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
3874 'info_dict': {
3875 'id': 'yeWKywCrFtk',
3876 'ext': 'mp4',
3877 'title': 'Small Scale Baler and Braiding Rugs',
3878 'uploader': 'Backus-Page House Museum',
3879 'uploader_id': 'backuspagemuseum',
3880 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
3881 'upload_date': '20161008',
3882 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
3883 'categories': ['Nonprofits & Activism'],
3884 'tags': list,
3885 'like_count': int,
3886 'dislike_count': int,
3887 },
3888 'params': {
3889 'noplaylist': True,
3890 'skip_download': True,
3891 },
39e7107d 3892 }, {
8bdd16b4 3893 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 3894 'only_matching': True,
cdc628a4
PH
3895 }]
3896
8bdd16b4 3897 def _real_extract(self, url):
29f7c58a 3898 mobj = re.match(self._VALID_URL, url)
3899 video_id = mobj.group('id')
3900 playlist_id = mobj.group('playlist_id')
8bdd16b4 3901 return self.url_result(
29f7c58a 3902 update_url_query('https://www.youtube.com/watch', {
3903 'v': video_id,
3904 'list': playlist_id,
3905 'feature': 'youtu.be',
3906 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3907
3908
3909class YoutubeYtUserIE(InfoExtractor):
c76eb41b 3910 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 3911 _VALID_URL = r'ytuser:(?P<id>.+)'
3912 _TESTS = [{
3913 'url': 'ytuser:phihag',
3914 'only_matching': True,
3915 }]
3916
3917 def _real_extract(self, url):
3918 user_id = self._match_id(url)
3919 return self.url_result(
3920 'https://www.youtube.com/user/%s' % user_id,
3921 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 3922
b05654f0 3923
3d3dddc9 3924class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 3925 IE_NAME = 'youtube:favorites'
3926 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
3927 _VALID_URL = r':ytfav(?:ou?rite)?s?'
3928 _LOGIN_REQUIRED = True
3929 _TESTS = [{
3930 'url': ':ytfav',
3931 'only_matching': True,
3932 }, {
3933 'url': ':ytfavorites',
3934 'only_matching': True,
3935 }]
3936
3937 def _real_extract(self, url):
3938 return self.url_result(
3939 'https://www.youtube.com/playlist?list=LL',
3940 ie=YoutubeTabIE.ie_key())
3941
3942
79360d99 3943class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
69184e41 3944 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
3945 # there doesn't appear to be a real limit, for example if you search for
3946 # 'python' you get more than 8.000.000 results
3947 _MAX_RESULTS = float('inf')
78caa52a 3948 IE_NAME = 'youtube:search'
b05654f0 3949 _SEARCH_KEY = 'ytsearch'
6c894ea1 3950 _SEARCH_PARAMS = None
9dd8e46a 3951 _TESTS = []
b05654f0 3952
6c894ea1 3953 def _entries(self, query, n):
a5c56234 3954 data = {'query': query}
6c894ea1
U
3955 if self._SEARCH_PARAMS:
3956 data['params'] = self._SEARCH_PARAMS
3957 total = 0
3958 for page_num in itertools.count(1):
79360d99 3959 search = self._extract_response(
3960 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
3961 check_get_keys=('contents', 'onResponseReceivedCommands')
3962 )
6c894ea1 3963 if not search:
b4c08069 3964 break
6c894ea1
U
3965 slr_contents = try_get(
3966 search,
3967 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3968 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3969 list)
3970 if not slr_contents:
a22b2fd1 3971 break
0366ae87 3972
0366ae87
M
3973 # Youtube sometimes adds promoted content to searches,
3974 # changing the index location of videos and token.
3975 # So we search through all entries till we find them.
30a074c2 3976 continuation_token = None
3977 for slr_content in slr_contents:
a96c6d15 3978 if continuation_token is None:
3979 continuation_token = try_get(
3980 slr_content,
3981 lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
3982 compat_str)
3983
30a074c2 3984 isr_contents = try_get(
3985 slr_content,
3986 lambda x: x['itemSectionRenderer']['contents'],
3987 list)
9da76d30 3988 if not isr_contents:
30a074c2 3989 continue
3990 for content in isr_contents:
3991 if not isinstance(content, dict):
3992 continue
3993 video = content.get('videoRenderer')
3994 if not isinstance(video, dict):
3995 continue
3996 video_id = video.get('videoId')
3997 if not video_id:
3998 continue
3999
4000 yield self._extract_video(video)
4001 total += 1
4002 if total == n:
4003 return
0366ae87 4004
0366ae87 4005 if not continuation_token:
6c894ea1 4006 break
0366ae87 4007 data['continuation'] = continuation_token
b05654f0 4008
6c894ea1
U
4009 def _get_n_results(self, query, n):
4010 """Get a specified number of results for a query"""
4011 return self.playlist_result(self._entries(query, n), query)
75dff0ee 4012
c9ae7b95 4013
a3dd9248 4014class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 4015 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 4016 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 4017 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 4018 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 4019
c9ae7b95 4020
386e1dd9 4021class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 4022 IE_DESC = 'YouTube.com search URLs'
386e1dd9 4023 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4024 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 4025 # _MAX_RESULTS = 100
3462ffa8 4026 _TESTS = [{
4027 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4028 'playlist_mincount': 5,
4029 'info_dict': {
4030 'title': 'youtube-dl test video',
4031 }
4032 }, {
4033 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4034 'only_matching': True,
4035 }]
4036
386e1dd9 4037 @classmethod
4038 def _make_valid_url(cls):
4039 return cls._VALID_URL
4040
3462ffa8 4041 def _real_extract(self, url):
386e1dd9 4042 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4043 query = (qs.get('search_query') or qs.get('q'))[0]
4044 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4045 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 4046
4047
4048class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 4049 """
25f14e9f 4050 Base class for feed extractors
3d3dddc9 4051 Subclasses must define the _FEED_NAME property.
d7ae0639 4052 """
b2e8bc1b 4053 _LOGIN_REQUIRED = True
ef2f3c7f 4054 _TESTS = []
d7ae0639
JMF
4055
4056 @property
4057 def IE_NAME(self):
78caa52a 4058 return 'youtube:%s' % self._FEED_NAME
04cc9617 4059
3853309f 4060 def _real_extract(self, url):
3d3dddc9 4061 return self.url_result(
4062 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4063 ie=YoutubeTabIE.ie_key())
25f14e9f
S
4064
4065
ef2f3c7f 4066class YoutubeWatchLaterIE(InfoExtractor):
4067 IE_NAME = 'youtube:watchlater'
70d5c17b 4068 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 4069 _VALID_URL = r':ytwatchlater'
bc7a9cd8 4070 _TESTS = [{
8bdd16b4 4071 'url': ':ytwatchlater',
bc7a9cd8
S
4072 'only_matching': True,
4073 }]
25f14e9f
S
4074
4075 def _real_extract(self, url):
ef2f3c7f 4076 return self.url_result(
4077 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 4078
4079
25f14e9f
S
4080class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4081 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 4082 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 4083 _FEED_NAME = 'recommended'
45db527f 4084 _LOGIN_REQUIRED = False
3d3dddc9 4085 _TESTS = [{
4086 'url': ':ytrec',
4087 'only_matching': True,
4088 }, {
4089 'url': ':ytrecommended',
4090 'only_matching': True,
4091 }, {
4092 'url': 'https://youtube.com',
4093 'only_matching': True,
4094 }]
1ed5b5c9 4095
1ed5b5c9 4096
25f14e9f 4097class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 4098 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 4099 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 4100 _FEED_NAME = 'subscriptions'
3d3dddc9 4101 _TESTS = [{
4102 'url': ':ytsubs',
4103 'only_matching': True,
4104 }, {
4105 'url': ':ytsubscriptions',
4106 'only_matching': True,
4107 }]
1ed5b5c9 4108
1ed5b5c9 4109
25f14e9f 4110class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
4111 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4112 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 4113 _FEED_NAME = 'history'
3d3dddc9 4114 _TESTS = [{
4115 'url': ':ythistory',
4116 'only_matching': True,
4117 }]
1ed5b5c9
JMF
4118
4119
15870e90
PH
4120class YoutubeTruncatedURLIE(InfoExtractor):
4121 IE_NAME = 'youtube:truncated_url'
4122 IE_DESC = False # Do not list
975d35db 4123 _VALID_URL = r'''(?x)
b95aab84
PH
4124 (?:https?://)?
4125 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4126 (?:watch\?(?:
c4808c60 4127 feature=[a-z_]+|
b95aab84
PH
4128 annotation_id=annotation_[^&]+|
4129 x-yt-cl=[0-9]+|
c1708b89 4130 hl=[^&]*|
287be8c6 4131 t=[0-9]+
b95aab84
PH
4132 )?
4133 |
4134 attribution_link\?a=[^&]+
4135 )
4136 $
975d35db 4137 '''
15870e90 4138
c4808c60 4139 _TESTS = [{
2d3d2997 4140 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 4141 'only_matching': True,
dc2fc736 4142 }, {
2d3d2997 4143 'url': 'https://www.youtube.com/watch?',
dc2fc736 4144 'only_matching': True,
b95aab84
PH
4145 }, {
4146 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4147 'only_matching': True,
4148 }, {
4149 'url': 'https://www.youtube.com/watch?feature=foo',
4150 'only_matching': True,
c1708b89
PH
4151 }, {
4152 'url': 'https://www.youtube.com/watch?hl=en-GB',
4153 'only_matching': True,
287be8c6
PH
4154 }, {
4155 'url': 'https://www.youtube.com/watch?t=2372',
4156 'only_matching': True,
c4808c60
PH
4157 }]
4158
15870e90
PH
4159 def _real_extract(self, url):
4160 raise ExtractorError(
78caa52a
PH
4161 'Did you forget to quote the URL? Remember that & is a meta '
4162 'character in most shells, so you want to put the URL in quotes, '
3867038a 4163 'like youtube-dl '
2d3d2997 4164 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 4165 ' or simply youtube-dl BaW_jenozKc .',
15870e90 4166 expected=True)
772fd5cc
PH
4167
4168
4169class YoutubeTruncatedIDIE(InfoExtractor):
4170 IE_NAME = 'youtube:truncated_id'
4171 IE_DESC = False # Do not list
b95aab84 4172 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
4173
4174 _TESTS = [{
4175 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4176 'only_matching': True,
4177 }]
4178
4179 def _real_extract(self, url):
4180 video_id = self._match_id(url)
4181 raise ExtractorError(
4182 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4183 expected=True)