]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/youtube.py
[hls] Decrypt fragment when reading from disk
[yt-dlp.git] / yt_dlp / extractor / youtube.py
CommitLineData
c5e8d7af 1# coding: utf-8
c5e8d7af 2
78caa52a
PH
3from __future__ import unicode_literals
4
d92f5d5a 5import calendar
a5c56234 6import hashlib
0ca96d48 7import itertools
c5e8d7af 8import json
c4417ddb 9import os.path
d77ab8e2 10import random
c5e8d7af 11import re
8a784c74 12import time
e0df6211 13import traceback
c5e8d7af 14
b05654f0 15from .common import InfoExtractor, SearchInfoExtractor
4bb4a188 16from ..compat import (
edf3e38e 17 compat_chr,
29f7c58a 18 compat_HTTPError,
c5e8d7af 19 compat_parse_qs,
545cc85d 20 compat_str,
7fd002c0 21 compat_urllib_parse_unquote_plus,
15707c7e 22 compat_urllib_parse_urlencode,
7c80519c 23 compat_urllib_parse_urlparse,
7c61bd36 24 compat_urlparse,
4bb4a188 25)
545cc85d 26from ..jsinterp import JSInterpreter
4bb4a188 27from ..utils import (
c224251a 28 bool_or_none,
c5e8d7af 29 clean_html,
26fe8ffe 30 dict_get,
d92f5d5a 31 datetime_from_str,
358de58c 32 error_to_compat_str,
c5e8d7af 33 ExtractorError,
b60419c5 34 format_field,
2d30521a 35 float_or_none,
dd27fd17 36 int_or_none,
94278f72 37 mimetype2ext,
6310acf5 38 parse_codecs,
7c80519c 39 parse_duration,
dca3ff4a 40 qualities,
3995d37d 41 remove_start,
cf7e015f 42 smuggle_url,
dbdaaa23 43 str_or_none,
c93d53f5 44 str_to_int,
556dbe7f 45 try_get,
c5e8d7af
PH
46 unescapeHTML,
47 unified_strdate,
cf7e015f 48 unsmuggle_url,
8bdd16b4 49 update_url_query,
21c340b8 50 url_or_none,
6e6bc8da 51 urlencode_postdata,
d92f5d5a 52 urljoin
c5e8d7af
PH
53)
54
5f6a1245 55
201c1459 56def parse_qs(url):
57 return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
58
59
de7f3446 60class YoutubeBaseInfoExtractor(InfoExtractor):
b2e8bc1b
JMF
61 """Provide base functions for Youtube extractors"""
62 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
9303ce3e 63 _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
e00eb564
S
64
65 _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
3995d37d
S
66 _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
67 _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
e00eb564 68
3462ffa8 69 _RESERVED_NAMES = (
fe03a6cd 70 r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|'
46953e7e 71 r'movies|results|shared|hashtag|trending|feed|feeds|oembed|get_video_info|'
cd7c66cf 72 r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
3462ffa8 73
b2e8bc1b
JMF
74 _NETRC_MACHINE = 'youtube'
75 # If True it will raise an error if no login info is provided
76 _LOGIN_REQUIRED = False
77
70d5c17b 78 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
d0ba5587 79
b2e8bc1b 80 def _login(self):
83317f69 81 """
82 Attempt to log in to YouTube.
83 True is returned if successful or skipped.
84 False is returned if login failed.
85
86 If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
87 """
9d5d4d64 88
89 def warn(message):
90 self.report_warning(message)
91
92 # username+password login is broken
93 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
94 self.raise_login_required(
95 'Login details are needed to download this content', method='cookies')
68217024 96 username, password = self._get_login_info()
9d5d4d64 97 if username:
98 warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
99 return
100 # Everything below this is broken!
101
b2e8bc1b
JMF
102 # No authentication to be performed
103 if username is None:
a06916d9 104 if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
69ea8ca4 105 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
a06916d9 106 # if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
545cc85d 107 # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
83317f69 108 return True
b2e8bc1b 109
7cc3570e
PH
110 login_page = self._download_webpage(
111 self._LOGIN_URL, None,
69ea8ca4
PH
112 note='Downloading login page',
113 errnote='unable to fetch login page', fatal=False)
7cc3570e
PH
114 if login_page is False:
115 return
b2e8bc1b 116
1212e997 117 login_form = self._hidden_inputs(login_page)
c5e8d7af 118
e00eb564
S
119 def req(url, f_req, note, errnote):
120 data = login_form.copy()
121 data.update({
122 'pstMsg': 1,
123 'checkConnection': 'youtube',
124 'checkedDomains': 'youtube',
125 'hl': 'en',
126 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
3995d37d 127 'f.req': json.dumps(f_req),
e00eb564
S
128 'flowName': 'GlifWebSignIn',
129 'flowEntry': 'ServiceLogin',
baf67a60
S
130 # TODO: reverse actual botguard identifier generation algo
131 'bgRequest': '["identifier",""]',
041bc3ad 132 })
e00eb564
S
133 return self._download_json(
134 url, None, note=note, errnote=errnote,
135 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
136 fatal=False,
137 data=urlencode_postdata(data), headers={
138 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
139 'Google-Accounts-XSRF': 1,
140 })
141
3995d37d
S
142 lookup_req = [
143 username,
144 None, [], None, 'US', None, None, 2, False, True,
145 [
146 None, None,
147 [2, 1, None, 1,
148 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
149 None, [], 4],
150 1, [None, None, []], None, None, None, True
151 ],
152 username,
153 ]
154
e00eb564 155 lookup_results = req(
3995d37d 156 self._LOOKUP_URL, lookup_req,
e00eb564
S
157 'Looking up account info', 'Unable to look up account info')
158
159 if lookup_results is False:
160 return False
041bc3ad 161
3995d37d
S
162 user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
163 if not user_hash:
164 warn('Unable to extract user hash')
165 return False
166
167 challenge_req = [
168 user_hash,
169 None, 1, None, [1, None, None, None, [password, None, True]],
170 [
171 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
172 1, [None, None, []], None, None, None, True
173 ]]
83317f69 174
3995d37d
S
175 challenge_results = req(
176 self._CHALLENGE_URL, challenge_req,
177 'Logging in', 'Unable to log in')
83317f69 178
3995d37d 179 if challenge_results is False:
e00eb564 180 return
83317f69 181
3995d37d
S
182 login_res = try_get(challenge_results, lambda x: x[0][5], list)
183 if login_res:
184 login_msg = try_get(login_res, lambda x: x[5], compat_str)
185 warn(
186 'Unable to login: %s' % 'Invalid password'
187 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
188 return False
189
190 res = try_get(challenge_results, lambda x: x[0][-1], list)
191 if not res:
192 warn('Unable to extract result entry')
193 return False
194
9a6628aa
S
195 login_challenge = try_get(res, lambda x: x[0][0], list)
196 if login_challenge:
197 challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
198 if challenge_str == 'TWO_STEP_VERIFICATION':
3995d37d
S
199 # SEND_SUCCESS - TFA code has been successfully sent to phone
200 # QUOTA_EXCEEDED - reached the limit of TFA codes
9a6628aa 201 status = try_get(login_challenge, lambda x: x[5], compat_str)
3995d37d
S
202 if status == 'QUOTA_EXCEEDED':
203 warn('Exceeded the limit of TFA codes, try later')
204 return False
205
206 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
207 if not tl:
208 warn('Unable to extract TL')
209 return False
210
211 tfa_code = self._get_tfa_info('2-step verification code')
212
213 if not tfa_code:
214 warn(
215 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
216 '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
217 return False
218
219 tfa_code = remove_start(tfa_code, 'G-')
220
221 tfa_req = [
222 user_hash, None, 2, None,
223 [
224 9, None, None, None, None, None, None, None,
225 [None, tfa_code, True, 2]
226 ]]
227
228 tfa_results = req(
229 self._TFA_URL.format(tl), tfa_req,
230 'Submitting TFA code', 'Unable to submit TFA code')
231
232 if tfa_results is False:
233 return False
234
235 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
236 if tfa_res:
237 tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
238 warn(
239 'Unable to finish TFA: %s' % 'Invalid TFA code'
240 if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
241 return False
242
243 check_cookie_url = try_get(
244 tfa_results, lambda x: x[0][-1][2], compat_str)
9a6628aa
S
245 else:
246 CHALLENGES = {
247 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
248 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
249 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
250 }
251 challenge = CHALLENGES.get(
252 challenge_str,
253 '%s returned error %s.' % (self.IE_NAME, challenge_str))
254 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
255 return False
3995d37d
S
256 else:
257 check_cookie_url = try_get(res, lambda x: x[2], compat_str)
258
259 if not check_cookie_url:
260 warn('Unable to extract CheckCookie URL')
261 return False
e00eb564
S
262
263 check_cookie_results = self._download_webpage(
3995d37d
S
264 check_cookie_url, None, 'Checking cookie', fatal=False)
265
266 if check_cookie_results is False:
267 return False
e00eb564 268
3995d37d
S
269 if 'https://myaccount.google.com/' not in check_cookie_results:
270 warn('Unable to log in')
b2e8bc1b 271 return False
e00eb564 272
b2e8bc1b
JMF
273 return True
274
cce889b9 275 def _initialize_consent(self):
276 cookies = self._get_cookies('https://www.youtube.com/')
277 if cookies.get('__Secure-3PSID'):
278 return
279 consent_id = None
280 consent = cookies.get('CONSENT')
281 if consent:
282 if 'YES' in consent.value:
283 return
284 consent_id = self._search_regex(
285 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
286 if not consent_id:
287 consent_id = random.randint(100, 999)
288 self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
8d81f3e3 289
b2e8bc1b 290 def _real_initialize(self):
cce889b9 291 self._initialize_consent()
b2e8bc1b
JMF
292 if self._downloader is None:
293 return
b2e8bc1b
JMF
294 if not self._login():
295 return
c5e8d7af 296
f4f751af 297 _YT_WEB_CLIENT_VERSION = '2.20210407.08.00'
298 _YT_INNERTUBE_API_KEY = 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
a0566bbf 299 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
29f7c58a 300 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
301 _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
a0566bbf 302
a5c56234
M
303 def _generate_sapisidhash_header(self):
304 sapisid_cookie = self._get_cookies('https://www.youtube.com').get('SAPISID')
305 if sapisid_cookie is None:
306 return
307 time_now = round(time.time())
308 sapisidhash = hashlib.sha1((str(time_now) + " " + sapisid_cookie.value + " " + "https://www.youtube.com").encode("utf-8")).hexdigest()
309 return "SAPISIDHASH %s_%s" % (time_now, sapisidhash)
310
311 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
f4f751af 312 note='Downloading API JSON', errnote='Unable to download API page',
313 context=None, api_key=None):
314
315 data = {'context': context} if context else {'context': self._extract_context()}
8bdd16b4 316 data.update(query)
f4f751af 317 real_headers = self._generate_api_headers()
318 real_headers.update({'content-type': 'application/json'})
319 if headers:
320 real_headers.update(headers)
545cc85d 321 return self._download_json(
a5c56234
M
322 'https://www.youtube.com/youtubei/v1/%s' % ep,
323 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
f4f751af 324 data=json.dumps(data).encode('utf8'), headers=real_headers,
325 query={'key': api_key or self._extract_api_key()})
326
327 def _extract_api_key(self, ytcfg=None):
328 return try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str) or self._YT_INNERTUBE_API_KEY
c54f4aad 329
8bdd16b4 330 def _extract_yt_initial_data(self, video_id, webpage):
331 return self._parse_json(
332 self._search_regex(
29f7c58a 333 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
a0566bbf 334 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
8bdd16b4 335 video_id)
0c148415 336
a1c5d2ca
M
337 def _extract_identity_token(self, webpage, item_id):
338 ytcfg = self._extract_ytcfg(item_id, webpage)
339 if ytcfg:
340 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
341 if token:
342 return token
343 return self._search_regex(
344 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
345 'identity token', default=None)
346
347 @staticmethod
348 def _extract_account_syncid(data):
8ea3f7b9 349 """
350 Extract syncId required to download private playlists of secondary channels
351 @param data Either response or ytcfg
352 """
353 sync_ids = (try_get(
354 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
355 lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
a1c5d2ca
M
356 if len(sync_ids) >= 2 and sync_ids[1]:
357 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
358 # and just "user_syncid||" for primary channel. We only want the channel_syncid
359 return sync_ids[0]
8ea3f7b9 360 # ytcfg includes channel_syncid if on secondary channel
361 return data.get('DELEGATED_SESSION_ID')
a1c5d2ca 362
29f7c58a 363 def _extract_ytcfg(self, video_id, webpage):
8c54a305 364 if not webpage:
365 return {}
29f7c58a 366 return self._parse_json(
367 self._search_regex(
368 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
f4f751af 369 default='{}'), video_id, fatal=False) or {}
370
371 def __extract_client_version(self, ytcfg):
372 return try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str) or self._YT_WEB_CLIENT_VERSION
373
374 def _extract_context(self, ytcfg=None):
375 context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'], dict)
376 if context:
377 return context
378
379 # Recreate the client context (required)
380 client_version = self.__extract_client_version(ytcfg)
381 client_name = try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str) or 'WEB'
382 context = {
383 'client': {
384 'clientName': client_name,
385 'clientVersion': client_version,
386 }
387 }
388 visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
389 if visitor_data:
390 context['client']['visitorData'] = visitor_data
391 return context
392
393 def _generate_api_headers(self, ytcfg=None, identity_token=None, account_syncid=None, visitor_data=None):
394 headers = {
395 'X-YouTube-Client-Name': '1',
396 'X-YouTube-Client-Version': self.__extract_client_version(ytcfg),
397 }
398 if identity_token:
399 headers['x-youtube-identity-token'] = identity_token
400 if account_syncid:
401 headers['X-Goog-PageId'] = account_syncid
402 headers['X-Goog-AuthUser'] = 0
403 if visitor_data:
404 headers['x-goog-visitor-id'] = visitor_data
405 auth = self._generate_sapisidhash_header()
406 if auth is not None:
407 headers['Authorization'] = auth
408 headers['X-Origin'] = 'https://www.youtube.com'
409 return headers
29f7c58a 410
9297939e 411 @staticmethod
412 def is_music_url(url):
413 return re.match(r'https?://music\.youtube\.com/', url) is not None
414
30a074c2 415 def _extract_video(self, renderer):
416 video_id = renderer.get('videoId')
417 title = try_get(
418 renderer,
419 (lambda x: x['title']['runs'][0]['text'],
420 lambda x: x['title']['simpleText']), compat_str)
421 description = try_get(
422 renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
423 compat_str)
424 duration = parse_duration(try_get(
425 renderer, lambda x: x['lengthText']['simpleText'], compat_str))
426 view_count_text = try_get(
427 renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
428 view_count = str_to_int(self._search_regex(
429 r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
430 'view count', default=None))
431 uploader = try_get(
bc2ca1bb 432 renderer,
433 (lambda x: x['ownerText']['runs'][0]['text'],
434 lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
30a074c2 435 return {
39ed931e 436 '_type': 'url',
30a074c2 437 'ie_key': YoutubeIE.ie_key(),
438 'id': video_id,
439 'url': video_id,
440 'title': title,
441 'description': description,
442 'duration': duration,
443 'view_count': view_count,
444 'uploader': uploader,
445 }
446
0c148415 447
360e1ca5 448class YoutubeIE(YoutubeBaseInfoExtractor):
78caa52a 449 IE_DESC = 'YouTube.com'
bc2ca1bb 450 _INVIDIOUS_SITES = (
451 # invidious-redirect websites
452 r'(?:www\.)?redirect\.invidious\.io',
453 r'(?:(?:www|dev)\.)?invidio\.us',
454 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
455 r'(?:www\.)?invidious\.pussthecat\.org',
bc2ca1bb 456 r'(?:www\.)?invidious\.zee\.li',
bc2ca1bb 457 r'(?:(?:www|au)\.)?ytprivate\.com',
458 r'(?:www\.)?invidious\.namazso\.eu',
459 r'(?:www\.)?invidious\.ethibox\.fr',
bc2ca1bb 460 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
461 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
462 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
463 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
464 # youtube-dl invidious instances list
465 r'(?:(?:www|no)\.)?invidiou\.sh',
466 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
467 r'(?:www\.)?invidious\.kabi\.tk',
bc2ca1bb 468 r'(?:www\.)?invidious\.mastodon\.host',
469 r'(?:www\.)?invidious\.zapashcanon\.fr',
470 r'(?:www\.)?invidious\.kavin\.rocks',
201c1459 471 r'(?:www\.)?invidious\.tinfoil-hat\.net',
472 r'(?:www\.)?invidious\.himiko\.cloud',
473 r'(?:www\.)?invidious\.reallyancient\.tech',
bc2ca1bb 474 r'(?:www\.)?invidious\.tube',
475 r'(?:www\.)?invidiou\.site',
476 r'(?:www\.)?invidious\.site',
477 r'(?:www\.)?invidious\.xyz',
478 r'(?:www\.)?invidious\.nixnet\.xyz',
201c1459 479 r'(?:www\.)?invidious\.048596\.xyz',
bc2ca1bb 480 r'(?:www\.)?invidious\.drycat\.fr',
201c1459 481 r'(?:www\.)?inv\.skyn3t\.in',
bc2ca1bb 482 r'(?:www\.)?tube\.poal\.co',
483 r'(?:www\.)?tube\.connect\.cafe',
484 r'(?:www\.)?vid\.wxzm\.sx',
485 r'(?:www\.)?vid\.mint\.lgbt',
201c1459 486 r'(?:www\.)?vid\.puffyan\.us',
bc2ca1bb 487 r'(?:www\.)?yewtu\.be',
488 r'(?:www\.)?yt\.elukerio\.org',
489 r'(?:www\.)?yt\.lelux\.fi',
490 r'(?:www\.)?invidious\.ggc-project\.de',
491 r'(?:www\.)?yt\.maisputain\.ovh',
201c1459 492 r'(?:www\.)?ytprivate\.com',
493 r'(?:www\.)?invidious\.13ad\.de',
bc2ca1bb 494 r'(?:www\.)?invidious\.toot\.koeln',
495 r'(?:www\.)?invidious\.fdn\.fr',
496 r'(?:www\.)?watch\.nettohikari\.com',
497 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
498 r'(?:www\.)?qklhadlycap4cnod\.onion',
499 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
500 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
501 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
502 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
503 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
504 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
505 )
cb7dfeea 506 _VALID_URL = r"""(?x)^
c5e8d7af 507 (
edb53e2d 508 (?:https?://|//) # http(s):// or protocol-independent URL
bc2ca1bb 509 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
510 (?:www\.)?deturl\.com/www\.youtube\.com|
511 (?:www\.)?pwnyoutube\.com|
512 (?:www\.)?hooktube\.com|
513 (?:www\.)?yourepeat\.com|
514 tube\.majestyc\.net|
515 %(invidious)s|
516 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
c5e8d7af
PH
517 (?:.*?\#/)? # handle anchor (#/) redirect urls
518 (?: # the various things that can precede the ID:
ac7553d0 519 (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
c5e8d7af 520 |(?: # or the v= param in all its forms
f7000f3a 521 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
c5e8d7af 522 (?:\?|\#!?) # the params delimiter ? or # or #!
040ac686 523 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
c5e8d7af
PH
524 v=
525 )
f4b05232 526 ))
cbaed4bb
S
527 |(?:
528 youtu\.be| # just youtu.be/xxxx
6d4fc66b
S
529 vid\.plus| # or vid.plus/xxxx
530 zwearz\.com/watch| # or zwearz.com/watch/xxxx
bc2ca1bb 531 %(invidious)s
cbaed4bb 532 )/
edb53e2d 533 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
f4b05232 534 )
c5e8d7af 535 )? # all until now is optional -> you can pass the naked ID
201c1459 536 (?P<id>[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
c5e8d7af 537 (?(1).+)? # if we found the ID, everything can follow
9297939e 538 (?:\#|$)""" % {
bc2ca1bb 539 'invidious': '|'.join(_INVIDIOUS_SITES),
540 }
e40c758c 541 _PLAYER_INFO_RE = (
cc2db878 542 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
543 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
545cc85d 544 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
e40c758c 545 )
2c62dc26 546 _formats = {
c2d3cb4c 547 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
548 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
549 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
550 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
551 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
552 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
553 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
554 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
3834d3e3 555 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
c2d3cb4c 556 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
557 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
558 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
559 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
560 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
561 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
e1a0bfdf 562 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
c2d3cb4c 563 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
564 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
e1a0bfdf 565
566
567 # 3D videos
c2d3cb4c 568 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
569 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
570 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
571 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
e1a0bfdf 572 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
573 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
574 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
836a086c 575
96fb5605 576 # Apple HTTP Live Streaming
11f12195 577 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
c2d3cb4c 578 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
579 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
580 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
581 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
582 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
e1a0bfdf 583 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
584 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
2c62dc26
PH
585
586 # DASH mp4 video
d23028a8
S
587 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
588 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
589 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
590 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
591 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
067aa17e 592 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
d23028a8
S
593 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
594 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
595 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
596 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
597 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
598 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
836a086c 599
f6f1fc92 600 # Dash mp4 audio
d23028a8
S
601 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
602 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
603 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
604 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
605 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
606 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
607 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
836a086c
AZ
608
609 # Dash webm
d23028a8
S
610 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
611 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
612 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
613 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
614 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
615 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
616 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
617 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
618 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
619 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
620 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
621 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
622 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
623 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
624 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
4c6b4764 625 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
d23028a8
S
626 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
627 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
628 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
629 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
630 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
631 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
2c62dc26
PH
632
633 # Dash webm audio
d23028a8
S
634 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
635 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
ce6b9a2d 636
0857baad 637 # Dash webm audio with opus inside
d23028a8
S
638 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
639 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
640 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
0857baad 641
ce6b9a2d
PH
642 # RTMP (unnamed)
643 '_rtmp': {'protocol': 'rtmp'},
b85eae0f
S
644
645 # av01 video only formats sometimes served with "unknown" codecs
646 '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
647 '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
648 '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
649 '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
c5e8d7af 650 }
29f7c58a 651 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
836a086c 652
fd5c4aab
S
653 _GEO_BYPASS = False
654
78caa52a 655 IE_NAME = 'youtube'
2eb88d95
PH
656 _TESTS = [
657 {
2d3d2997 658 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
4bc3a23e
PH
659 'info_dict': {
660 'id': 'BaW_jenozKc',
661 'ext': 'mp4',
3867038a 662 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
4bc3a23e
PH
663 'uploader': 'Philipp Hagemeister',
664 'uploader_id': 'phihag',
ec85ded8 665 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
dd4c4492
S
666 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
667 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
4bc3a23e 668 'upload_date': '20121002',
3867038a 669 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
4bc3a23e 670 'categories': ['Science & Technology'],
3867038a 671 'tags': ['youtube-dl'],
556dbe7f 672 'duration': 10,
dbdaaa23 673 'view_count': int,
3e7c1224
PH
674 'like_count': int,
675 'dislike_count': int,
7c80519c 676 'start_time': 1,
297a564b 677 'end_time': 9,
2eb88d95 678 }
0e853ca4 679 },
fccd3771 680 {
4bc3a23e
PH
681 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
682 'note': 'Embed-only video (#1746)',
683 'info_dict': {
684 'id': 'yZIXLfi8CZQ',
685 'ext': 'mp4',
686 'upload_date': '20120608',
687 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
688 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
689 'uploader': 'SET India',
94bfcd23 690 'uploader_id': 'setindia',
ec85ded8 691 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
94bfcd23 692 'age_limit': 18,
545cc85d 693 },
694 'skip': 'Private video',
fccd3771 695 },
11b56058 696 {
8bdd16b4 697 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
11b56058
PM
698 'note': 'Use the first video ID in the URL',
699 'info_dict': {
700 'id': 'BaW_jenozKc',
701 'ext': 'mp4',
3867038a 702 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
11b56058
PM
703 'uploader': 'Philipp Hagemeister',
704 'uploader_id': 'phihag',
ec85ded8 705 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
11b56058 706 'upload_date': '20121002',
3867038a 707 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
11b56058 708 'categories': ['Science & Technology'],
3867038a 709 'tags': ['youtube-dl'],
556dbe7f 710 'duration': 10,
dbdaaa23 711 'view_count': int,
11b56058
PM
712 'like_count': int,
713 'dislike_count': int,
34a7de29
S
714 },
715 'params': {
716 'skip_download': True,
717 },
11b56058 718 },
dd27fd17 719 {
2d3d2997 720 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
4bc3a23e
PH
721 'note': '256k DASH audio (format 141) via DASH manifest',
722 'info_dict': {
723 'id': 'a9LDPn-MO4I',
724 'ext': 'm4a',
725 'upload_date': '20121002',
726 'uploader_id': '8KVIDEO',
ec85ded8 727 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
4bc3a23e
PH
728 'description': '',
729 'uploader': '8KVIDEO',
730 'title': 'UHDTV TEST 8K VIDEO.mp4'
4919603f 731 },
4bc3a23e
PH
732 'params': {
733 'youtube_include_dash_manifest': True,
734 'format': '141',
4919603f 735 },
de3c7fe0 736 'skip': 'format 141 not served anymore',
dd27fd17 737 },
8bdd16b4 738 # DASH manifest with encrypted signature
739 {
740 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
741 'info_dict': {
742 'id': 'IB3lcPjvWLA',
743 'ext': 'm4a',
744 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
745 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
746 'duration': 244,
747 'uploader': 'AfrojackVEVO',
748 'uploader_id': 'AfrojackVEVO',
749 'upload_date': '20131011',
cc2db878 750 'abr': 129.495,
8bdd16b4 751 },
752 'params': {
753 'youtube_include_dash_manifest': True,
754 'format': '141/bestaudio[ext=m4a]',
755 },
756 },
aa79ac0c
PH
757 # Controversy video
758 {
759 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
760 'info_dict': {
761 'id': 'T4XJQO3qol8',
762 'ext': 'mp4',
556dbe7f 763 'duration': 219,
aa79ac0c 764 'upload_date': '20100909',
4fe54c12 765 'uploader': 'Amazing Atheist',
aa79ac0c 766 'uploader_id': 'TheAmazingAtheist',
ec85ded8 767 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
aa79ac0c 768 'title': 'Burning Everyone\'s Koran',
545cc85d 769 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
aa79ac0c 770 }
c522adb1 771 },
dd2d55f1 772 # Normal age-gate video (embed allowed)
c522adb1 773 {
2d3d2997 774 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
c522adb1
JMF
775 'info_dict': {
776 'id': 'HtVdAasjOgU',
777 'ext': 'mp4',
778 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
ec85ded8 779 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
556dbe7f 780 'duration': 142,
c522adb1
JMF
781 'uploader': 'The Witcher',
782 'uploader_id': 'WitcherGame',
ec85ded8 783 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
c522adb1 784 'upload_date': '20140605',
34952f09 785 'age_limit': 18,
c522adb1
JMF
786 },
787 },
8bdd16b4 788 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
789 # YouTube Red ad is not captured for creator
790 {
791 'url': '__2ABJjxzNo',
792 'info_dict': {
793 'id': '__2ABJjxzNo',
794 'ext': 'mp4',
795 'duration': 266,
796 'upload_date': '20100430',
797 'uploader_id': 'deadmau5',
798 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
545cc85d 799 'creator': 'deadmau5',
800 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
8bdd16b4 801 'uploader': 'deadmau5',
802 'title': 'Deadmau5 - Some Chords (HD)',
545cc85d 803 'alt_title': 'Some Chords',
8bdd16b4 804 },
805 'expected_warnings': [
806 'DASH manifest missing',
807 ]
808 },
067aa17e 809 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
e52a40ab
PH
810 {
811 'url': 'lqQg6PlCWgI',
812 'info_dict': {
813 'id': 'lqQg6PlCWgI',
814 'ext': 'mp4',
556dbe7f 815 'duration': 6085,
90227264 816 'upload_date': '20150827',
cbe2bd91 817 'uploader_id': 'olympic',
ec85ded8 818 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
cbe2bd91 819 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
be49068d 820 'uploader': 'Olympic',
cbe2bd91
PH
821 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
822 },
823 'params': {
824 'skip_download': 'requires avconv',
e52a40ab 825 }
cbe2bd91 826 },
6271f1ca
PH
827 # Non-square pixels
828 {
829 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
830 'info_dict': {
831 'id': '_b-2C3KPAM0',
832 'ext': 'mp4',
833 'stretched_ratio': 16 / 9.,
556dbe7f 834 'duration': 85,
6271f1ca
PH
835 'upload_date': '20110310',
836 'uploader_id': 'AllenMeow',
ec85ded8 837 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
6271f1ca 838 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
eb6793ba 839 'uploader': '孫ᄋᄅ',
6271f1ca
PH
840 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
841 },
06b491eb
S
842 },
843 # url_encoded_fmt_stream_map is empty string
844 {
845 'url': 'qEJwOuvDf7I',
846 'info_dict': {
847 'id': 'qEJwOuvDf7I',
f57b7835 848 'ext': 'webm',
06b491eb
S
849 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
850 'description': '',
851 'upload_date': '20150404',
852 'uploader_id': 'spbelect',
853 'uploader': 'Наблюдатели Петербурга',
854 },
855 'params': {
856 'skip_download': 'requires avconv',
e323cf3f
S
857 },
858 'skip': 'This live event has ended.',
06b491eb 859 },
067aa17e 860 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
da77d856
S
861 {
862 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
863 'info_dict': {
864 'id': 'FIl7x6_3R5Y',
eb6793ba 865 'ext': 'webm',
da77d856
S
866 'title': 'md5:7b81415841e02ecd4313668cde88737a',
867 'description': 'md5:116377fd2963b81ec4ce64b542173306',
556dbe7f 868 'duration': 220,
da77d856
S
869 'upload_date': '20150625',
870 'uploader_id': 'dorappi2000',
ec85ded8 871 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
da77d856 872 'uploader': 'dorappi2000',
eb6793ba 873 'formats': 'mincount:31',
da77d856 874 },
eb6793ba 875 'skip': 'not actual anymore',
2ee8f5d8 876 },
8a1a26ce
YCH
877 # DASH manifest with segment_list
878 {
879 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
880 'md5': '8ce563a1d667b599d21064e982ab9e31',
881 'info_dict': {
882 'id': 'CsmdDsKjzN8',
883 'ext': 'mp4',
17ee98e1 884 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
8a1a26ce
YCH
885 'uploader': 'Airtek',
886 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
887 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
888 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
889 },
890 'params': {
891 'youtube_include_dash_manifest': True,
892 'format': '135', # bestvideo
be49068d
S
893 },
894 'skip': 'This live event has ended.',
2ee8f5d8 895 },
cf7e015f
S
896 {
897 # Multifeed videos (multiple cameras), URL is for Main Camera
545cc85d 898 'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
cf7e015f 899 'info_dict': {
545cc85d 900 'id': 'jvGDaLqkpTg',
901 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
902 'description': 'md5:e03b909557865076822aa169218d6a5d',
cf7e015f
S
903 },
904 'playlist': [{
905 'info_dict': {
545cc85d 906 'id': 'jvGDaLqkpTg',
cf7e015f 907 'ext': 'mp4',
545cc85d 908 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
909 'description': 'md5:e03b909557865076822aa169218d6a5d',
910 'duration': 10643,
911 'upload_date': '20161111',
912 'uploader': 'Team PGP',
913 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
914 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
915 },
916 }, {
917 'info_dict': {
545cc85d 918 'id': '3AKt1R1aDnw',
cf7e015f 919 'ext': 'mp4',
545cc85d 920 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
921 'description': 'md5:e03b909557865076822aa169218d6a5d',
922 'duration': 10991,
923 'upload_date': '20161111',
924 'uploader': 'Team PGP',
925 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
926 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
927 },
928 }, {
929 'info_dict': {
545cc85d 930 'id': 'RtAMM00gpVc',
cf7e015f 931 'ext': 'mp4',
545cc85d 932 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
933 'description': 'md5:e03b909557865076822aa169218d6a5d',
934 'duration': 10995,
935 'upload_date': '20161111',
936 'uploader': 'Team PGP',
937 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
938 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
939 },
940 }, {
941 'info_dict': {
545cc85d 942 'id': '6N2fdlP3C5U',
cf7e015f 943 'ext': 'mp4',
545cc85d 944 'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
945 'description': 'md5:e03b909557865076822aa169218d6a5d',
946 'duration': 10990,
947 'upload_date': '20161111',
948 'uploader': 'Team PGP',
949 'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
950 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
cf7e015f
S
951 },
952 }],
953 'params': {
954 'skip_download': True,
955 },
cbaed4bb 956 },
f9f49d87 957 {
067aa17e 958 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
f9f49d87
S
959 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
960 'info_dict': {
961 'id': 'gVfLd0zydlo',
962 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
963 },
964 'playlist_count': 2,
be49068d 965 'skip': 'Not multifeed anymore',
f9f49d87 966 },
cbaed4bb 967 {
2d3d2997 968 'url': 'https://vid.plus/FlRa-iH7PGw',
cbaed4bb 969 'only_matching': True,
0e49d9a6 970 },
6d4fc66b 971 {
2d3d2997 972 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
6d4fc66b
S
973 'only_matching': True,
974 },
0e49d9a6 975 {
067aa17e 976 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
a8776b10 977 # Also tests cut-off URL expansion in video description (see
067aa17e
S
978 # https://github.com/ytdl-org/youtube-dl/issues/1892,
979 # https://github.com/ytdl-org/youtube-dl/issues/8164)
0e49d9a6
LL
980 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
981 'info_dict': {
982 'id': 'lsguqyKfVQg',
983 'ext': 'mp4',
984 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
eb6793ba 985 'alt_title': 'Dark Walk - Position Music',
0e49d9a6 986 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
556dbe7f 987 'duration': 133,
0e49d9a6
LL
988 'upload_date': '20151119',
989 'uploader_id': 'IronSoulElf',
ec85ded8 990 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
0e49d9a6 991 'uploader': 'IronSoulElf',
eb6793ba
S
992 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
993 'track': 'Dark Walk - Position Music',
994 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
92bc97d3 995 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
0e49d9a6
LL
996 },
997 'params': {
998 'skip_download': True,
999 },
1000 },
61f92af1 1001 {
067aa17e 1002 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
61f92af1
S
1003 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1004 'only_matching': True,
1005 },
313dfc45
LL
1006 {
1007 # Video with yt:stretch=17:0
1008 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1009 'info_dict': {
1010 'id': 'Q39EVAstoRM',
1011 'ext': 'mp4',
1012 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1013 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1014 'upload_date': '20151107',
1015 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1016 'uploader': 'CH GAMER DROID',
1017 },
1018 'params': {
1019 'skip_download': True,
1020 },
be49068d 1021 'skip': 'This video does not exist.',
313dfc45 1022 },
201c1459 1023 {
1024 # Video with incomplete 'yt:stretch=16:'
1025 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1026 'only_matching': True,
1027 },
7caf9830
S
1028 {
1029 # Video licensed under Creative Commons
1030 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1031 'info_dict': {
1032 'id': 'M4gD1WSo5mA',
1033 'ext': 'mp4',
1034 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1035 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
556dbe7f 1036 'duration': 721,
7caf9830
S
1037 'upload_date': '20150127',
1038 'uploader_id': 'BerkmanCenter',
ec85ded8 1039 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
556dbe7f 1040 'uploader': 'The Berkman Klein Center for Internet & Society',
7caf9830
S
1041 'license': 'Creative Commons Attribution license (reuse allowed)',
1042 },
1043 'params': {
1044 'skip_download': True,
1045 },
1046 },
fd050249
S
1047 {
1048 # Channel-like uploader_url
1049 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1050 'info_dict': {
1051 'id': 'eQcmzGIKrzg',
1052 'ext': 'mp4',
1053 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
545cc85d 1054 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
556dbe7f 1055 'duration': 4060,
fd050249 1056 'upload_date': '20151119',
eb6793ba 1057 'uploader': 'Bernie Sanders',
fd050249 1058 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
ec85ded8 1059 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
fd050249
S
1060 'license': 'Creative Commons Attribution license (reuse allowed)',
1061 },
1062 'params': {
1063 'skip_download': True,
1064 },
1065 },
040ac686
S
1066 {
1067 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1068 'only_matching': True,
7f29cf54
S
1069 },
1070 {
067aa17e 1071 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
7f29cf54
S
1072 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1073 'only_matching': True,
6496ccb4
S
1074 },
1075 {
1076 # Rental video preview
1077 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1078 'info_dict': {
1079 'id': 'uGpuVWrhIzE',
1080 'ext': 'mp4',
1081 'title': 'Piku - Trailer',
1082 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1083 'upload_date': '20150811',
1084 'uploader': 'FlixMatrix',
1085 'uploader_id': 'FlixMatrixKaravan',
ec85ded8 1086 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
6496ccb4
S
1087 'license': 'Standard YouTube License',
1088 },
1089 'params': {
1090 'skip_download': True,
1091 },
eb6793ba 1092 'skip': 'This video is not available.',
022a5d66 1093 },
12afdc2a
S
1094 {
1095 # YouTube Red video with episode data
1096 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1097 'info_dict': {
1098 'id': 'iqKdEhx-dD4',
1099 'ext': 'mp4',
1100 'title': 'Isolation - Mind Field (Ep 1)',
545cc85d 1101 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
556dbe7f 1102 'duration': 2085,
12afdc2a
S
1103 'upload_date': '20170118',
1104 'uploader': 'Vsauce',
1105 'uploader_id': 'Vsauce',
1106 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
12afdc2a
S
1107 'series': 'Mind Field',
1108 'season_number': 1,
1109 'episode_number': 1,
1110 },
1111 'params': {
1112 'skip_download': True,
1113 },
1114 'expected_warnings': [
1115 'Skipping DASH manifest',
1116 ],
1117 },
c7121fa7
S
1118 {
1119 # The following content has been identified by the YouTube community
1120 # as inappropriate or offensive to some audiences.
1121 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1122 'info_dict': {
1123 'id': '6SJNVb0GnPI',
1124 'ext': 'mp4',
1125 'title': 'Race Differences in Intelligence',
1126 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1127 'duration': 965,
1128 'upload_date': '20140124',
1129 'uploader': 'New Century Foundation',
1130 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1131 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
c7121fa7
S
1132 },
1133 'params': {
1134 'skip_download': True,
1135 },
545cc85d 1136 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
c7121fa7 1137 },
022a5d66
S
1138 {
1139 # itag 212
1140 'url': '1t24XAntNCY',
1141 'only_matching': True,
fd5c4aab
S
1142 },
1143 {
1144 # geo restricted to JP
1145 'url': 'sJL6WA-aGkQ',
1146 'only_matching': True,
1147 },
cd5a74a2
S
1148 {
1149 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1150 'only_matching': True,
1151 },
bc2ca1bb 1152 {
1153 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1154 'only_matching': True,
1155 },
1156 {
1157 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1158 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1159 'only_matching': True,
1160 },
825cd268
RA
1161 {
1162 # DRM protected
1163 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1164 'only_matching': True,
4fe54c12
S
1165 },
1166 {
1167 # Video with unsupported adaptive stream type formats
1168 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1169 'info_dict': {
1170 'id': 'Z4Vy8R84T1U',
1171 'ext': 'mp4',
1172 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1173 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1174 'duration': 433,
1175 'upload_date': '20130923',
1176 'uploader': 'Amelia Putri Harwita',
1177 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1178 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1179 'formats': 'maxcount:10',
1180 },
1181 'params': {
1182 'skip_download': True,
1183 'youtube_include_dash_manifest': False,
1184 },
5429d6a9 1185 'skip': 'not actual anymore',
5caabd3c 1186 },
1187 {
822b9d9c 1188 # Youtube Music Auto-generated description
5caabd3c 1189 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1190 'info_dict': {
1191 'id': 'MgNrAu2pzNs',
1192 'ext': 'mp4',
1193 'title': 'Voyeur Girl',
1194 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1195 'upload_date': '20190312',
5429d6a9
S
1196 'uploader': 'Stephen - Topic',
1197 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
5caabd3c 1198 'artist': 'Stephen',
1199 'track': 'Voyeur Girl',
1200 'album': 'it\'s too much love to know my dear',
1201 'release_date': '20190313',
1202 'release_year': 2019,
1203 },
1204 'params': {
1205 'skip_download': True,
1206 },
1207 },
66b48727
RA
1208 {
1209 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1210 'only_matching': True,
1211 },
011e75e6
S
1212 {
1213 # invalid -> valid video id redirection
1214 'url': 'DJztXj2GPfl',
1215 'info_dict': {
1216 'id': 'DJztXj2GPfk',
1217 'ext': 'mp4',
1218 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1219 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1220 'upload_date': '20090125',
1221 'uploader': 'Prochorowka',
1222 'uploader_id': 'Prochorowka',
1223 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1224 'artist': 'Panjabi MC',
1225 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1226 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1227 },
1228 'params': {
1229 'skip_download': True,
1230 },
545cc85d 1231 'skip': 'Video unavailable',
ea74e00b
DP
1232 },
1233 {
1234 # empty description results in an empty string
1235 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1236 'info_dict': {
1237 'id': 'x41yOUIvK2k',
1238 'ext': 'mp4',
1239 'title': 'IMG 3456',
1240 'description': '',
1241 'upload_date': '20170613',
1242 'uploader_id': 'ElevageOrVert',
1243 'uploader': 'ElevageOrVert',
1244 },
1245 'params': {
1246 'skip_download': True,
1247 },
1248 },
a0566bbf 1249 {
29f7c58a 1250 # with '};' inside yt initial data (see [1])
1251 # see [2] for an example with '};' inside ytInitialPlayerResponse
1252 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1253 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
a0566bbf 1254 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1255 'info_dict': {
1256 'id': 'CHqg6qOn4no',
1257 'ext': 'mp4',
1258 'title': 'Part 77 Sort a list of simple types in c#',
1259 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1260 'upload_date': '20130831',
1261 'uploader_id': 'kudvenkat',
1262 'uploader': 'kudvenkat',
1263 },
1264 'params': {
1265 'skip_download': True,
1266 },
1267 },
29f7c58a 1268 {
1269 # another example of '};' in ytInitialData
1270 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1271 'only_matching': True,
1272 },
1273 {
1274 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1275 'only_matching': True,
1276 },
545cc85d 1277 {
cc2db878 1278 # https://github.com/ytdl-org/youtube-dl/pull/28094
1279 'url': 'OtqTfy26tG0',
1280 'info_dict': {
1281 'id': 'OtqTfy26tG0',
1282 'ext': 'mp4',
1283 'title': 'Burn Out',
1284 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1285 'upload_date': '20141120',
1286 'uploader': 'The Cinematic Orchestra - Topic',
1287 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1288 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1289 'artist': 'The Cinematic Orchestra',
1290 'track': 'Burn Out',
1291 'album': 'Every Day',
1292 'release_data': None,
1293 'release_year': None,
1294 },
1295 'params': {
1296 'skip_download': True,
1297 },
545cc85d 1298 },
bc2ca1bb 1299 {
1300 # controversial video, only works with bpctr when authenticated with cookies
1301 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1302 'only_matching': True,
1303 },
f7ad7160 1304 {
1305 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1306 'url': 'cBvYw8_A0vQ',
1307 'info_dict': {
1308 'id': 'cBvYw8_A0vQ',
1309 'ext': 'mp4',
1310 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
1311 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1312 'upload_date': '20201120',
1313 'uploader': 'Walk around Japan',
1314 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1315 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1316 },
1317 'params': {
1318 'skip_download': True,
1319 },
0fb983f6 1320 }, {
1321 # Has multiple audio streams
1322 'url': 'WaOKSUlf4TM',
1323 'only_matching': True
9297939e 1324 }, {
1325 # Requires Premium: has format 141 when requested using YTM url
1326 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1327 'only_matching': True
1328 }, {
120916da 1329 # multiple subtitles with same lang_code
1330 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1331 'only_matching': True,
1332 },
2eb88d95
PH
1333 ]
1334
201c1459 1335 @classmethod
1336 def suitable(cls, url):
1bdae7d3 1337 # Hack for lazy extractors until more generic solution is implemented
1338 # (see #28780)
1339 from .youtube import parse_qs
201c1459 1340 qs = parse_qs(url)
1341 if qs.get('list', [None])[0]:
1342 return False
1343 return super(YoutubeIE, cls).suitable(url)
1344
e0df6211
PH
1345 def __init__(self, *args, **kwargs):
1346 super(YoutubeIE, self).__init__(*args, **kwargs)
545cc85d 1347 self._code_cache = {}
83799698 1348 self._player_cache = {}
e0df6211 1349
60064c53
PH
1350 def _signature_cache_id(self, example_sig):
1351 """ Return a string representation of a signature """
78caa52a 1352 return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
60064c53 1353
e40c758c
S
1354 @classmethod
1355 def _extract_player_info(cls, player_url):
1356 for player_re in cls._PLAYER_INFO_RE:
1357 id_m = re.search(player_re, player_url)
1358 if id_m:
1359 break
1360 else:
c081b35c 1361 raise ExtractorError('Cannot identify player %r' % player_url)
545cc85d 1362 return id_m.group('id')
e40c758c
S
1363
1364 def _extract_signature_function(self, video_id, player_url, example_sig):
545cc85d 1365 player_id = self._extract_player_info(player_url)
e0df6211 1366
c4417ddb 1367 # Read from filesystem cache
545cc85d 1368 func_id = 'js_%s_%s' % (
1369 player_id, self._signature_cache_id(example_sig))
c4417ddb 1370 assert os.path.basename(func_id) == func_id
a0e07d31 1371
69ea8ca4 1372 cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
a0e07d31 1373 if cache_spec is not None:
78caa52a 1374 return lambda s: ''.join(s[i] for i in cache_spec)
83799698 1375
545cc85d 1376 if player_id not in self._code_cache:
1377 self._code_cache[player_id] = self._download_webpage(
e0df6211 1378 player_url, video_id,
545cc85d 1379 note='Downloading player ' + player_id,
69ea8ca4 1380 errnote='Download of %s failed' % player_url)
545cc85d 1381 code = self._code_cache[player_id]
1382 res = self._parse_sig_js(code)
e0df6211 1383
785521bf
PH
1384 test_string = ''.join(map(compat_chr, range(len(example_sig))))
1385 cache_res = res(test_string)
1386 cache_spec = [ord(c) for c in cache_res]
83799698 1387
69ea8ca4 1388 self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
83799698
PH
1389 return res
1390
60064c53 1391 def _print_sig_code(self, func, example_sig):
edf3e38e
PH
1392 def gen_sig_code(idxs):
1393 def _genslice(start, end, step):
78caa52a 1394 starts = '' if start == 0 else str(start)
8bcc8756 1395 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
69ea8ca4 1396 steps = '' if step == 1 else (':%d' % step)
78caa52a 1397 return 's[%s%s%s]' % (starts, ends, steps)
edf3e38e
PH
1398
1399 step = None
7af808a5
PH
1400 # Quelch pyflakes warnings - start will be set when step is set
1401 start = '(Never used)'
edf3e38e
PH
1402 for i, prev in zip(idxs[1:], idxs[:-1]):
1403 if step is not None:
1404 if i - prev == step:
1405 continue
1406 yield _genslice(start, prev, step)
1407 step = None
1408 continue
1409 if i - prev in [-1, 1]:
1410 step = i - prev
1411 start = prev
1412 continue
1413 else:
78caa52a 1414 yield 's[%d]' % prev
edf3e38e 1415 if step is None:
78caa52a 1416 yield 's[%d]' % i
edf3e38e
PH
1417 else:
1418 yield _genslice(start, i, step)
1419
78caa52a 1420 test_string = ''.join(map(compat_chr, range(len(example_sig))))
c705320f 1421 cache_res = func(test_string)
edf3e38e 1422 cache_spec = [ord(c) for c in cache_res]
78caa52a 1423 expr_code = ' + '.join(gen_sig_code(cache_spec))
60064c53
PH
1424 signature_id_tuple = '(%s)' % (
1425 ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
69ea8ca4 1426 code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
78caa52a 1427 ' return %s\n') % (signature_id_tuple, expr_code)
69ea8ca4 1428 self.to_screen('Extracted signature function:\n' + code)
edf3e38e 1429
e0df6211
PH
1430 def _parse_sig_js(self, jscode):
1431 funcname = self._search_regex(
abefc03f
S
1432 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1433 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
cc2db878 1434 r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1435 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1436 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
e450f6cb 1437 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
31ce6e99 1438 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
abefc03f
S
1439 # Obsolete patterns
1440 r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
9a47fa35 1441 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
abefc03f
S
1442 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1443 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1444 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1445 r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1446 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1447 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3c90cc8b 1448 jscode, 'Initial JS player signature function name', group='sig')
2b25cb5d
PH
1449
1450 jsi = JSInterpreter(jscode)
1451 initial_function = jsi.extract_function(funcname)
e0df6211
PH
1452 return lambda s: initial_function([s])
1453
545cc85d 1454 def _decrypt_signature(self, s, video_id, player_url):
257a2501 1455 """Turn the encrypted s field into a working signature"""
6b37f0be 1456
c8bf86d5 1457 if player_url is None:
69ea8ca4 1458 raise ExtractorError('Cannot decrypt signature without player_url')
920de7a2 1459
69ea8ca4 1460 if player_url.startswith('//'):
78caa52a 1461 player_url = 'https:' + player_url
3c90cc8b
S
1462 elif not re.match(r'https?://', player_url):
1463 player_url = compat_urlparse.urljoin(
1464 'https://www.youtube.com', player_url)
c8bf86d5 1465 try:
62af3a0e 1466 player_id = (player_url, self._signature_cache_id(s))
c8bf86d5
PH
1467 if player_id not in self._player_cache:
1468 func = self._extract_signature_function(
60064c53 1469 video_id, player_url, s
c8bf86d5
PH
1470 )
1471 self._player_cache[player_id] = func
1472 func = self._player_cache[player_id]
a06916d9 1473 if self.get_param('youtube_print_sig_code'):
60064c53 1474 self._print_sig_code(func, s)
c8bf86d5
PH
1475 return func(s)
1476 except Exception as e:
1477 tb = traceback.format_exc()
1478 raise ExtractorError(
78caa52a 1479 'Signature extraction failed: ' + tb, cause=e)
e0df6211 1480
545cc85d 1481 def _mark_watched(self, video_id, player_response):
21c340b8
S
1482 playback_url = url_or_none(try_get(
1483 player_response,
545cc85d 1484 lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
d77ab8e2
S
1485 if not playback_url:
1486 return
1487 parsed_playback_url = compat_urlparse.urlparse(playback_url)
1488 qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1489
1490 # cpn generation algorithm is reverse engineered from base.js.
1491 # In fact it works even with dummy cpn.
1492 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1493 cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1494
1495 qs.update({
1496 'ver': ['2'],
1497 'cpn': [cpn],
1498 })
1499 playback_url = compat_urlparse.urlunparse(
15707c7e 1500 parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
d77ab8e2
S
1501
1502 self._download_webpage(
1503 playback_url, video_id, 'Marking watched',
1504 'Unable to mark watched', fatal=False)
1505
66c9fa36
S
1506 @staticmethod
1507 def _extract_urls(webpage):
1508 # Embedded YouTube player
1509 entries = [
1510 unescapeHTML(mobj.group('url'))
1511 for mobj in re.finditer(r'''(?x)
1512 (?:
1513 <iframe[^>]+?src=|
1514 data-video-url=|
1515 <embed[^>]+?src=|
1516 embedSWF\(?:\s*|
1517 <object[^>]+data=|
1518 new\s+SWFObject\(
1519 )
1520 (["\'])
1521 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
f2332f18 1522 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
66c9fa36
S
1523 \1''', webpage)]
1524
1525 # lazyYT YouTube embed
1526 entries.extend(list(map(
1527 unescapeHTML,
1528 re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1529
1530 # Wordpress "YouTube Video Importer" plugin
1531 matches = re.findall(r'''(?x)<div[^>]+
1532 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1533 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1534 entries.extend(m[-1] for m in matches)
1535
1536 return entries
1537
1538 @staticmethod
1539 def _extract_url(webpage):
1540 urls = YoutubeIE._extract_urls(webpage)
1541 return urls[0] if urls else None
1542
97665381
PH
1543 @classmethod
1544 def extract_id(cls, url):
1545 mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
c5e8d7af 1546 if mobj is None:
69ea8ca4 1547 raise ExtractorError('Invalid URL: %s' % url)
c5e8d7af
PH
1548 video_id = mobj.group(2)
1549 return video_id
1550
545cc85d 1551 def _extract_chapters_from_json(self, data, video_id, duration):
84213ea8 1552 chapters_list = try_get(
8bdd16b4 1553 data,
84213ea8
S
1554 lambda x: x['playerOverlays']
1555 ['playerOverlayRenderer']
1556 ['decoratedPlayerBarRenderer']
1557 ['decoratedPlayerBarRenderer']
1558 ['playerBar']
1559 ['chapteredPlayerBarRenderer']
1560 ['chapters'],
1561 list)
1562 if not chapters_list:
1563 return
1564
1565 def chapter_time(chapter):
1566 return float_or_none(
1567 try_get(
1568 chapter,
1569 lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1570 int),
1571 scale=1000)
1572 chapters = []
1573 for next_num, chapter in enumerate(chapters_list, start=1):
1574 start_time = chapter_time(chapter)
1575 if start_time is None:
1576 continue
1577 end_time = (chapter_time(chapters_list[next_num])
1578 if next_num < len(chapters_list) else duration)
1579 if end_time is None:
1580 continue
1581 title = try_get(
1582 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1583 compat_str)
1584 chapters.append({
1585 'start_time': start_time,
1586 'end_time': end_time,
1587 'title': title,
1588 })
1589 return chapters
1590
545cc85d 1591 def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1592 return self._parse_json(self._search_regex(
1593 (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1594 regex), webpage, name, default='{}'), video_id, fatal=False)
84213ea8 1595
d92f5d5a 1596 @staticmethod
1597 def parse_time_text(time_text):
1598 """
1599 Parse the comment time text
1600 time_text is in the format 'X units ago (edited)'
1601 """
1602 time_text_split = time_text.split(' ')
1603 if len(time_text_split) >= 3:
1604 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
1605
a1c5d2ca
M
1606 @staticmethod
1607 def _join_text_entries(runs):
1608 text = None
1609 for run in runs:
1610 if not isinstance(run, dict):
1611 continue
1612 sub_text = try_get(run, lambda x: x['text'], compat_str)
1613 if sub_text:
1614 if not text:
1615 text = sub_text
1616 continue
1617 text += sub_text
1618 return text
1619
1620 def _extract_comment(self, comment_renderer, parent=None):
1621 comment_id = comment_renderer.get('commentId')
1622 if not comment_id:
1623 return
1624 comment_text_runs = try_get(comment_renderer, lambda x: x['contentText']['runs']) or []
1625 text = self._join_text_entries(comment_text_runs) or ''
1626 comment_time_text = try_get(comment_renderer, lambda x: x['publishedTimeText']['runs']) or []
1627 time_text = self._join_text_entries(comment_time_text)
d92f5d5a 1628 timestamp = calendar.timegm(self.parse_time_text(time_text).timetuple())
a1c5d2ca
M
1629 author = try_get(comment_renderer, lambda x: x['authorText']['simpleText'], compat_str)
1630 author_id = try_get(comment_renderer,
1631 lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
1632 votes = str_to_int(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
1633 lambda x: x['likeCount']), compat_str)) or 0
1634 author_thumbnail = try_get(comment_renderer,
1635 lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
1636
1637 author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
1638 is_liked = try_get(comment_renderer, lambda x: x['isLiked'], bool)
a1c5d2ca
M
1639 return {
1640 'id': comment_id,
1641 'text': text,
d92f5d5a 1642 'timestamp': timestamp,
a1c5d2ca
M
1643 'time_text': time_text,
1644 'like_count': votes,
1645 'is_favorited': is_liked,
1646 'author': author,
1647 'author_id': author_id,
1648 'author_thumbnail': author_thumbnail,
1649 'author_is_uploader': author_is_uploader,
1650 'parent': parent or 'root'
1651 }
1652
1653 def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
f4f751af 1654 ytcfg, session_token_list, parent=None, comment_counts=None):
a1c5d2ca
M
1655
1656 def extract_thread(parent_renderer):
1657 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
1658 if not parent:
1659 comment_counts[2] = 0
1660 for content in contents:
1661 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
1662 comment_renderer = try_get(
1663 comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
1664 content, (lambda x: x['commentRenderer'], dict))
1665
1666 if not comment_renderer:
1667 continue
1668 comment = self._extract_comment(comment_renderer, parent)
1669 if not comment:
1670 continue
1671 comment_counts[0] += 1
1672 yield comment
1673 # Attempt to get the replies
1674 comment_replies_renderer = try_get(
1675 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
1676
1677 if comment_replies_renderer:
1678 comment_counts[2] += 1
1679 comment_entries_iter = self._comment_entries(
f4f751af 1680 comment_replies_renderer, identity_token, account_syncid, ytcfg,
a1c5d2ca
M
1681 parent=comment.get('id'), session_token_list=session_token_list,
1682 comment_counts=comment_counts)
1683
1684 for reply_comment in comment_entries_iter:
1685 yield reply_comment
1686
1687 if not comment_counts:
1688 # comment so far, est. total comments, current comment thread #
1689 comment_counts = [0, 0, 0]
a1c5d2ca
M
1690
1691 # TODO: Generalize the download code with TabIE
f4f751af 1692 context = self._extract_context(ytcfg)
1693 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
a1c5d2ca
M
1694 continuation = YoutubeTabIE._extract_continuation(root_continuation_data) # TODO
1695 first_continuation = False
1696 if parent is None:
1697 first_continuation = True
1698
1699 for page_num in itertools.count(0):
1700 if not continuation:
1701 break
f4f751af 1702 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
a06916d9 1703 retries = self.get_param('extractor_retries', 3)
a1c5d2ca
M
1704 count = -1
1705 last_error = None
1706
1707 while count < retries:
1708 count += 1
1709 if last_error:
1710 self.report_warning('%s. Retrying ...' % last_error)
1711 try:
1712 query = {
1713 'ctoken': continuation['ctoken'],
1714 'pbj': 1,
1715 'type': 'next',
1716 }
1717 if parent:
1718 query['action_get_comment_replies'] = 1
1719 else:
1720 query['action_get_comments'] = 1
1721
1722 comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
1723 if page_num == 0:
1724 if first_continuation:
d92f5d5a 1725 note_prefix = 'Downloading initial comment continuation page'
a1c5d2ca 1726 else:
d92f5d5a 1727 note_prefix = ' Downloading comment reply thread %d %s' % (comment_counts[2], comment_prog_str)
a1c5d2ca 1728 else:
d92f5d5a 1729 note_prefix = '%sDownloading comment%s page %d %s' % (
1730 ' ' if parent else '',
a1c5d2ca
M
1731 ' replies' if parent else '',
1732 page_num,
1733 comment_prog_str)
1734
1735 browse = self._download_json(
1736 'https://www.youtube.com/comment_service_ajax', None,
1737 '%s %s' % (note_prefix, '(retry #%d)' % count if count else ''),
1738 headers=headers, query=query,
1739 data=urlencode_postdata({
1740 'session_token': session_token_list[0]
1741 }))
1742 except ExtractorError as e:
1743 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404, 413):
1744 if e.cause.code == 413:
d92f5d5a 1745 self.report_warning('Assumed end of comments (received HTTP Error 413)')
a1c5d2ca
M
1746 return
1747 # Downloading page may result in intermittent 5xx HTTP error
1748 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
1749 last_error = 'HTTP Error %s' % e.cause.code
1750 if e.cause.code == 404:
d92f5d5a 1751 last_error = last_error + ' (this API is probably deprecated)'
a1c5d2ca
M
1752 if count < retries:
1753 continue
1754 raise
1755 else:
1756 session_token = try_get(browse, lambda x: x['xsrf_token'], compat_str)
1757 if session_token:
1758 session_token_list[0] = session_token
1759
1760 response = try_get(browse,
1761 (lambda x: x['response'],
1762 lambda x: x[1]['response'])) or {}
1763
1764 if response.get('continuationContents'):
1765 break
1766
1767 # YouTube sometimes gives reload: now json if something went wrong (e.g. bad auth)
1768 if browse.get('reload'):
d92f5d5a 1769 raise ExtractorError('Invalid or missing params in continuation request', expected=False)
a1c5d2ca
M
1770
1771 # TODO: not tested, merged from old extractor
1772 err_msg = browse.get('externalErrorMessage')
1773 if err_msg:
1774 raise ExtractorError('YouTube said: %s' % err_msg, expected=False)
1775
1776 # Youtube sometimes sends incomplete data
1777 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
1778 last_error = 'Incomplete data received'
1779 if count >= retries:
6a39ee13 1780 raise ExtractorError(last_error)
a1c5d2ca
M
1781
1782 if not response:
1783 break
f4f751af 1784 visitor_data = try_get(
1785 response,
1786 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
1787 compat_str) or visitor_data
a1c5d2ca
M
1788
1789 known_continuation_renderers = {
1790 'itemSectionContinuation': extract_thread,
1791 'commentRepliesContinuation': extract_thread
1792 }
1793
1794 # extract next root continuation from the results
1795 continuation_contents = try_get(
1796 response, lambda x: x['continuationContents'], dict) or {}
1797
1798 for key, value in continuation_contents.items():
1799 if key not in known_continuation_renderers:
1800 continue
1801 continuation_renderer = value
1802
1803 if first_continuation:
1804 first_continuation = False
1805 expected_comment_count = try_get(
1806 continuation_renderer,
1807 (lambda x: x['header']['commentsHeaderRenderer']['countText']['runs'][0]['text'],
1808 lambda x: x['header']['commentsHeaderRenderer']['commentsCount']['runs'][0]['text']),
1809 compat_str)
1810
1811 if expected_comment_count:
1812 comment_counts[1] = str_to_int(expected_comment_count)
d92f5d5a 1813 self.to_screen('Downloading ~%d comments' % str_to_int(expected_comment_count))
a1c5d2ca
M
1814 yield comment_counts[1]
1815
1816 # TODO: cli arg.
1817 # 1/True for newest, 0/False for popular (default)
1818 comment_sort_index = int(True)
1819 sort_continuation_renderer = try_get(
1820 continuation_renderer,
1821 lambda x: x['header']['commentsHeaderRenderer']['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems']
1822 [comment_sort_index]['continuation']['reloadContinuationData'], dict)
1823 # If this fails, the initial continuation page
1824 # starts off with popular anyways.
1825 if sort_continuation_renderer:
1826 continuation = YoutubeTabIE._build_continuation_query(
1827 continuation=sort_continuation_renderer.get('continuation'),
1828 ctp=sort_continuation_renderer.get('clickTrackingParams'))
d92f5d5a 1829 self.to_screen('Sorting comments by %s' % ('popular' if comment_sort_index == 0 else 'newest'))
a1c5d2ca
M
1830 break
1831
1832 for entry in known_continuation_renderers[key](continuation_renderer):
1833 yield entry
1834
1835 continuation = YoutubeTabIE._extract_continuation(continuation_renderer) # TODO
1836 break
1837
1838 def _extract_comments(self, ytcfg, video_id, contents, webpage, xsrf_token):
1839 """Entry for comment extraction"""
1840 comments = []
1841 known_entry_comment_renderers = (
1842 'itemSectionRenderer',
1843 )
1844 estimated_total = 0
1845 for entry in contents:
1846 for key, renderer in entry.items():
1847 if key not in known_entry_comment_renderers:
1848 continue
1849
1850 comment_iter = self._comment_entries(
1851 renderer,
1852 identity_token=self._extract_identity_token(webpage, item_id=video_id),
1853 account_syncid=self._extract_account_syncid(ytcfg),
f4f751af 1854 ytcfg=ytcfg,
a1c5d2ca
M
1855 session_token_list=[xsrf_token])
1856
1857 for comment in comment_iter:
1858 if isinstance(comment, int):
1859 estimated_total = comment
1860 continue
1861 comments.append(comment)
1862 break
d92f5d5a 1863 self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
a1c5d2ca
M
1864 return {
1865 'comments': comments,
1866 'comment_count': len(comments),
1867 }
1868
c5e8d7af 1869 def _real_extract(self, url):
cf7e015f 1870 url, smuggled_data = unsmuggle_url(url, {})
545cc85d 1871 video_id = self._match_id(url)
9297939e 1872
1873 is_music_url = smuggled_data.get('is_music_url') or self.is_music_url(url)
1874
545cc85d 1875 base_url = self.http_scheme() + '//www.youtube.com/'
b3d12425 1876 webpage_url = base_url + 'watch?v=' + video_id
1877 webpage = self._download_webpage(
cce889b9 1878 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
545cc85d 1879
9297939e 1880 def get_text(x):
1881 if not x:
1882 return
1883 text = x.get('simpleText')
1884 if text and isinstance(text, compat_str):
1885 return text
1886 runs = x.get('runs')
1887 if not isinstance(runs, list):
1888 return
1889 return ''.join([r['text'] for r in runs if isinstance(r.get('text'), compat_str)])
1890
1891 ytm_streaming_data = {}
1892 if is_music_url:
1893 # we are forcing to use parse_json because 141 only appeared in get_video_info.
1894 # el, c, cver, cplayer field required for 141(aac 256kbps) codec
1895 # maybe paramter of youtube music player?
1896 ytm_player_response = self._parse_json(try_get(compat_parse_qs(
1897 self._download_webpage(
1898 base_url + 'get_video_info', video_id,
fe03a6cd 1899 'Fetching youtube music info webpage',
1900 'unable to download youtube music info webpage', query={
9297939e 1901 'video_id': video_id,
1902 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
1903 'el': 'detailpage',
1904 'c': 'WEB_REMIX',
1905 'cver': '0.1',
00ae2769 1906 'cplayer': 'UNIPLAYER',
1907 'html5': '1',
9297939e 1908 }, fatal=False)),
1909 lambda x: x['player_response'][0],
1910 compat_str) or '{}', video_id)
1911 ytm_streaming_data = ytm_player_response.get('streamingData') or {}
1912
545cc85d 1913 player_response = None
1914 if webpage:
1915 player_response = self._extract_yt_initial_variable(
1916 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
1917 video_id, 'initial player response')
f4f751af 1918
1919 ytcfg = self._extract_ytcfg(video_id, webpage)
545cc85d 1920 if not player_response:
1921 player_response = self._call_api(
f4f751af 1922 'player', {'videoId': video_id}, video_id, api_key=self._extract_api_key(ytcfg))
545cc85d 1923
1924 playability_status = player_response.get('playabilityStatus') or {}
1925 if playability_status.get('reason') == 'Sign in to confirm your age':
1926 pr = self._parse_json(try_get(compat_parse_qs(
1927 self._download_webpage(
1928 base_url + 'get_video_info', video_id,
1929 'Refetching age-gated info webpage',
1930 'unable to download video info webpage', query={
1931 'video_id': video_id,
7c60c33e 1932 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
00ae2769 1933 'html5': '1',
545cc85d 1934 }, fatal=False)),
1935 lambda x: x['player_response'][0],
1936 compat_str) or '{}', video_id)
1937 if pr:
1938 player_response = pr
1939
1940 trailer_video_id = try_get(
1941 playability_status,
1942 lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
1943 compat_str)
1944 if trailer_video_id:
1945 return self.url_result(
1946 trailer_video_id, self.ie_key(), trailer_video_id)
cf7e015f 1947
545cc85d 1948 search_meta = (
1949 lambda x: self._html_search_meta(x, webpage, default=None)) \
1950 if webpage else lambda x: None
dbdaaa23 1951
545cc85d 1952 video_details = player_response.get('videoDetails') or {}
37357d21 1953 microformat = try_get(
545cc85d 1954 player_response,
1955 lambda x: x['microformat']['playerMicroformatRenderer'],
1956 dict) or {}
1957 video_title = video_details.get('title') \
1958 or get_text(microformat.get('title')) \
1959 or search_meta(['og:title', 'twitter:title', 'title'])
1960 video_description = video_details.get('shortDescription')
cf7e015f 1961
8fe10494 1962 if not smuggled_data.get('force_singlefeed', False):
a06916d9 1963 if not self.get_param('noplaylist'):
8fe10494
S
1964 multifeed_metadata_list = try_get(
1965 player_response,
1966 lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
545cc85d 1967 compat_str)
8fe10494
S
1968 if multifeed_metadata_list:
1969 entries = []
1970 feed_ids = []
1971 for feed in multifeed_metadata_list.split(','):
1972 # Unquote should take place before split on comma (,) since textual
1973 # fields may contain comma as well (see
067aa17e 1974 # https://github.com/ytdl-org/youtube-dl/issues/8536)
545cc85d 1975 feed_data = compat_parse_qs(
1976 compat_urllib_parse_unquote_plus(feed))
6b09401b
S
1977
1978 def feed_entry(name):
545cc85d 1979 return try_get(
1980 feed_data, lambda x: x[name][0], compat_str)
6b09401b
S
1981
1982 feed_id = feed_entry('id')
1983 if not feed_id:
1984 continue
1985 feed_title = feed_entry('title')
1986 title = video_title
1987 if feed_title:
1988 title += ' (%s)' % feed_title
8fe10494
S
1989 entries.append({
1990 '_type': 'url_transparent',
1991 'ie_key': 'Youtube',
1992 'url': smuggle_url(
545cc85d 1993 base_url + 'watch?v=' + feed_data['id'][0],
8fe10494 1994 {'force_singlefeed': True}),
6b09401b 1995 'title': title,
8fe10494 1996 })
6b09401b 1997 feed_ids.append(feed_id)
8fe10494
S
1998 self.to_screen(
1999 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2000 % (', '.join(feed_ids), video_id))
545cc85d 2001 return self.playlist_result(
2002 entries, video_id, video_title, video_description)
8fe10494
S
2003 else:
2004 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
cf7e015f 2005
9297939e 2006 formats, itags, stream_ids = [], [], []
cc2db878 2007 itag_qualities = {}
545cc85d 2008 player_url = None
d3fc8074 2009 q = qualities([
2010 'tiny', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
2011 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2012 ])
9297939e 2013
545cc85d 2014 streaming_data = player_response.get('streamingData') or {}
2015 streaming_formats = streaming_data.get('formats') or []
2016 streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
9297939e 2017 streaming_formats.extend(ytm_streaming_data.get('formats') or [])
2018 streaming_formats.extend(ytm_streaming_data.get('adaptiveFormats') or [])
2019
545cc85d 2020 for fmt in streaming_formats:
2021 if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2022 continue
321bf820 2023
cc2db878 2024 itag = str_or_none(fmt.get('itag'))
9297939e 2025 audio_track = fmt.get('audioTrack') or {}
2026 stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2027 if stream_id in stream_ids:
2028 continue
2029
cc2db878 2030 quality = fmt.get('quality')
d3fc8074 2031 if quality == 'tiny' or not quality:
2032 quality = fmt.get('audioQuality', '').lower() or quality
cc2db878 2033 if itag and quality:
2034 itag_qualities[itag] = quality
2035 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2036 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2037 # number of fragment that would subsequently requested with (`&sq=N`)
2038 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2039 continue
2040
545cc85d 2041 fmt_url = fmt.get('url')
2042 if not fmt_url:
2043 sc = compat_parse_qs(fmt.get('signatureCipher'))
2044 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2045 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2046 if not (sc and fmt_url and encrypted_sig):
2047 continue
2048 if not player_url:
2049 if not webpage:
2050 continue
2051 player_url = self._search_regex(
2052 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
2053 webpage, 'player URL', fatal=False)
2054 if not player_url:
201e9eaa 2055 continue
545cc85d 2056 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2057 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2058 fmt_url += '&' + sp + '=' + signature
2059
545cc85d 2060 if itag:
2061 itags.append(itag)
9297939e 2062 stream_ids.append(stream_id)
2063
cc2db878 2064 tbr = float_or_none(
2065 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
545cc85d 2066 dct = {
2067 'asr': int_or_none(fmt.get('audioSampleRate')),
2068 'filesize': int_or_none(fmt.get('contentLength')),
2069 'format_id': itag,
0fb983f6 2070 'format_note': audio_track.get('displayName') or fmt.get('qualityLabel') or quality,
545cc85d 2071 'fps': int_or_none(fmt.get('fps')),
2072 'height': int_or_none(fmt.get('height')),
dca3ff4a 2073 'quality': q(quality),
cc2db878 2074 'tbr': tbr,
545cc85d 2075 'url': fmt_url,
2076 'width': fmt.get('width'),
0fb983f6 2077 'language': audio_track.get('id', '').split('.')[0],
545cc85d 2078 }
2079 mimetype = fmt.get('mimeType')
2080 if mimetype:
2081 mobj = re.match(
2082 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', mimetype)
2083 if mobj:
2084 dct['ext'] = mimetype2ext(mobj.group(1))
2085 dct.update(parse_codecs(mobj.group(2)))
cc2db878 2086 no_audio = dct.get('acodec') == 'none'
2087 no_video = dct.get('vcodec') == 'none'
2088 if no_audio:
2089 dct['vbr'] = tbr
2090 if no_video:
2091 dct['abr'] = tbr
2092 if no_audio or no_video:
545cc85d 2093 dct['downloader_options'] = {
2094 # Youtube throttles chunks >~10M
2095 'http_chunk_size': 10485760,
bf1317d2 2096 }
7c60c33e 2097 if dct.get('ext'):
2098 dct['container'] = dct['ext'] + '_dash'
545cc85d 2099 formats.append(dct)
2100
9297939e 2101 for sd in (streaming_data, ytm_streaming_data):
2102 hls_manifest_url = sd.get('hlsManifestUrl')
2103 if hls_manifest_url:
2104 for f in self._extract_m3u8_formats(
2105 hls_manifest_url, video_id, 'mp4', fatal=False):
2106 itag = self._search_regex(
2107 r'/itag/(\d+)', f['url'], 'itag', default=None)
2108 if itag:
2109 f['format_id'] = itag
8d68ab98 2110 formats.append(f)
545cc85d 2111
a06916d9 2112 if self.get_param('youtube_include_dash_manifest', True):
9297939e 2113 for sd in (streaming_data, ytm_streaming_data):
2114 dash_manifest_url = sd.get('dashManifestUrl')
2115 if dash_manifest_url:
2116 for f in self._extract_mpd_formats(
2117 dash_manifest_url, video_id, fatal=False):
2118 itag = f['format_id']
2119 if itag in itags:
2120 continue
2121 if itag in itag_qualities:
9297939e 2122 f['quality'] = q(itag_qualities[itag])
2123 filesize = int_or_none(self._search_regex(
2124 r'/clen/(\d+)', f.get('fragment_base_url')
2125 or f['url'], 'file size', default=None))
2126 if filesize:
2127 f['filesize'] = filesize
2128 formats.append(f)
bf1317d2 2129
545cc85d 2130 if not formats:
a06916d9 2131 if not self.get_param('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
b7da73eb 2132 self.raise_no_formats(
545cc85d 2133 'This video is DRM protected.', expected=True)
2134 pemr = try_get(
2135 playability_status,
2136 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
2137 dict) or {}
2138 reason = get_text(pemr.get('reason')) or playability_status.get('reason')
2139 subreason = pemr.get('subreason')
2140 if subreason:
2141 subreason = clean_html(get_text(subreason))
2142 if subreason == 'The uploader has not made this video available in your country.':
2143 countries = microformat.get('availableCountries')
2144 if not countries:
2145 regions_allowed = search_meta('regionsAllowed')
2146 countries = regions_allowed.split(',') if regions_allowed else None
b7da73eb 2147 self.raise_geo_restricted(subreason, countries, metadata_available=True)
545cc85d 2148 reason += '\n' + subreason
2149 if reason:
b7da73eb 2150 self.raise_no_formats(reason, expected=True)
bf1317d2 2151
545cc85d 2152 self._sort_formats(formats)
bf1317d2 2153
545cc85d 2154 keywords = video_details.get('keywords') or []
2155 if not keywords and webpage:
2156 keywords = [
2157 unescapeHTML(m.group('content'))
2158 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2159 for keyword in keywords:
2160 if keyword.startswith('yt:stretch='):
201c1459 2161 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2162 if mobj:
2163 # NB: float is intentional for forcing float division
2164 w, h = (float(v) for v in mobj.groups())
2165 if w > 0 and h > 0:
2166 ratio = w / h
2167 for f in formats:
2168 if f.get('vcodec') != 'none':
2169 f['stretched_ratio'] = ratio
2170 break
6449cd80 2171
545cc85d 2172 thumbnails = []
2173 for container in (video_details, microformat):
2174 for thumbnail in (try_get(
2175 container,
2176 lambda x: x['thumbnail']['thumbnails'], list) or []):
2177 thumbnail_url = thumbnail.get('url')
2178 if not thumbnail_url:
bf1317d2 2179 continue
1988fab7 2180 # Sometimes youtube gives a wrong thumbnail URL. See:
2181 # https://github.com/yt-dlp/yt-dlp/issues/233
2182 # https://github.com/ytdl-org/youtube-dl/issues/28023
2183 if 'maxresdefault' in thumbnail_url:
2184 thumbnail_url = thumbnail_url.split('?')[0]
545cc85d 2185 thumbnails.append({
545cc85d 2186 'url': thumbnail_url,
ff2751ac 2187 'height': int_or_none(thumbnail.get('height')),
545cc85d 2188 'width': int_or_none(thumbnail.get('width')),
ff2751ac 2189 'preference': 1 if 'maxresdefault' in thumbnail_url else -1
545cc85d 2190 })
ff2751ac 2191 thumbnail_url = search_meta(['og:image', 'twitter:image'])
2192 if thumbnail_url:
2193 thumbnails.append({
2194 'url': thumbnail_url,
2195 'preference': 1 if 'maxresdefault' in thumbnail_url else -1
2196 })
2197 # All videos have a maxresdefault thumbnail, but sometimes it does not appear in the webpage
2198 # See: https://github.com/ytdl-org/youtube-dl/issues/29049
2199 thumbnails.append({
2200 'url': 'https://i.ytimg.com/vi/%s/maxresdefault.jpg' % video_id,
2201 'preference': 1,
2202 })
2203 self._remove_duplicate_formats(thumbnails)
545cc85d 2204
2205 category = microformat.get('category') or search_meta('genre')
2206 channel_id = video_details.get('channelId') \
2207 or microformat.get('externalChannelId') \
2208 or search_meta('channelId')
2209 duration = int_or_none(
2210 video_details.get('lengthSeconds')
2211 or microformat.get('lengthSeconds')) \
2212 or parse_duration(search_meta('duration'))
2213 is_live = video_details.get('isLive')
2214 owner_profile_url = microformat.get('ownerProfileUrl')
2215
2216 info = {
2217 'id': video_id,
2218 'title': self._live_title(video_title) if is_live else video_title,
2219 'formats': formats,
2220 'thumbnails': thumbnails,
2221 'description': video_description,
2222 'upload_date': unified_strdate(
2223 microformat.get('uploadDate')
2224 or search_meta('uploadDate')),
2225 'uploader': video_details['author'],
2226 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2227 'uploader_url': owner_profile_url,
2228 'channel_id': channel_id,
2229 'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
2230 'duration': duration,
2231 'view_count': int_or_none(
2232 video_details.get('viewCount')
2233 or microformat.get('viewCount')
2234 or search_meta('interactionCount')),
2235 'average_rating': float_or_none(video_details.get('averageRating')),
2236 'age_limit': 18 if (
2237 microformat.get('isFamilySafe') is False
2238 or search_meta('isFamilyFriendly') == 'false'
2239 or search_meta('og:restrictions:age') == '18+') else 0,
2240 'webpage_url': webpage_url,
2241 'categories': [category] if category else None,
2242 'tags': keywords,
2243 'is_live': is_live,
2244 'playable_in_embed': playability_status.get('playableInEmbed'),
c224251a 2245 'was_live': video_details.get('isLiveContent'),
545cc85d 2246 }
b477fc13 2247
545cc85d 2248 pctr = try_get(
2249 player_response,
2250 lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
2251 subtitles = {}
2252 if pctr:
774d79cc 2253 def process_language(container, base_url, lang_code, sub_name, query):
120916da 2254 lang_subs = container.setdefault(lang_code, [])
545cc85d 2255 for fmt in self._SUBTITLE_FORMATS:
2256 query.update({
2257 'fmt': fmt,
2258 })
2259 lang_subs.append({
2260 'ext': fmt,
2261 'url': update_url_query(base_url, query),
774d79cc 2262 'name': sub_name,
545cc85d 2263 })
7e72694b 2264
545cc85d 2265 for caption_track in (pctr.get('captionTracks') or []):
2266 base_url = caption_track.get('baseUrl')
2267 if not base_url:
2268 continue
2269 if caption_track.get('kind') != 'asr':
120916da 2270 lang_code = (
2271 remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2272 or caption_track.get('languageCode'))
545cc85d 2273 if not lang_code:
2274 continue
2275 process_language(
774d79cc 2276 subtitles, base_url, lang_code,
2277 try_get(caption_track, lambda x: x.get('name').get('simpleText')),
2278 {})
545cc85d 2279 continue
2280 automatic_captions = {}
2281 for translation_language in (pctr.get('translationLanguages') or []):
2282 translation_language_code = translation_language.get('languageCode')
2283 if not translation_language_code:
2284 continue
2285 process_language(
2286 automatic_captions, base_url, translation_language_code,
774d79cc 2287 try_get(translation_language, lambda x: x['languageName']['simpleText']),
545cc85d 2288 {'tlang': translation_language_code})
2289 info['automatic_captions'] = automatic_captions
2290 info['subtitles'] = subtitles
7e72694b 2291
545cc85d 2292 parsed_url = compat_urllib_parse_urlparse(url)
2293 for component in [parsed_url.fragment, parsed_url.query]:
2294 query = compat_parse_qs(component)
2295 for k, v in query.items():
2296 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2297 d_k += '_time'
2298 if d_k not in info and k in s_ks:
2299 info[d_k] = parse_duration(query[k][0])
822b9d9c
RA
2300
2301 # Youtube Music Auto-generated description
822b9d9c 2302 if video_description:
38d70284 2303 mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
822b9d9c 2304 if mobj:
822b9d9c
RA
2305 release_year = mobj.group('release_year')
2306 release_date = mobj.group('release_date')
2307 if release_date:
2308 release_date = release_date.replace('-', '')
2309 if not release_year:
545cc85d 2310 release_year = release_date[:4]
2311 info.update({
2312 'album': mobj.group('album'.strip()),
2313 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2314 'track': mobj.group('track').strip(),
2315 'release_date': release_date,
cc2db878 2316 'release_year': int_or_none(release_year),
545cc85d 2317 })
7e72694b 2318
545cc85d 2319 initial_data = None
2320 if webpage:
2321 initial_data = self._extract_yt_initial_variable(
2322 webpage, self._YT_INITIAL_DATA_RE, video_id,
2323 'yt initial data')
2324 if not initial_data:
2325 initial_data = self._call_api(
f4f751af 2326 'next', {'videoId': video_id}, video_id, fatal=False, api_key=self._extract_api_key(ytcfg))
545cc85d 2327
2328 if not is_live:
2329 try:
2330 # This will error if there is no livechat
2331 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2332 info['subtitles']['live_chat'] = [{
394dcd44 2333 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies
545cc85d 2334 'video_id': video_id,
2335 'ext': 'json',
2336 'protocol': 'youtube_live_chat_replay',
2337 }]
2338 except (KeyError, IndexError, TypeError):
2339 pass
2340
2341 if initial_data:
2342 chapters = self._extract_chapters_from_json(
2343 initial_data, video_id, duration)
2344 if not chapters:
2345 for engagment_pannel in (initial_data.get('engagementPanels') or []):
2346 contents = try_get(
2347 engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
2348 list)
2349 if not contents:
2350 continue
2351
2352 def chapter_time(mmlir):
2353 return parse_duration(
2354 get_text(mmlir.get('timeDescription')))
2355
2356 chapters = []
2357 for next_num, content in enumerate(contents, start=1):
2358 mmlir = content.get('macroMarkersListItemRenderer') or {}
2359 start_time = chapter_time(mmlir)
2360 end_time = chapter_time(try_get(
2361 contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
2362 if next_num < len(contents) else duration
2363 if start_time is None or end_time is None:
2364 continue
2365 chapters.append({
2366 'start_time': start_time,
2367 'end_time': end_time,
2368 'title': get_text(mmlir.get('title')),
2369 })
2370 if chapters:
2371 break
2372 if chapters:
2373 info['chapters'] = chapters
2374
2375 contents = try_get(
2376 initial_data,
2377 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2378 list) or []
2379 for content in contents:
2380 vpir = content.get('videoPrimaryInfoRenderer')
2381 if vpir:
2382 stl = vpir.get('superTitleLink')
2383 if stl:
2384 stl = get_text(stl)
2385 if try_get(
2386 vpir,
2387 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2388 info['location'] = stl
2389 else:
2390 mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2391 if mobj:
2392 info.update({
2393 'series': mobj.group(1),
2394 'season_number': int(mobj.group(2)),
2395 'episode_number': int(mobj.group(3)),
2396 })
2397 for tlb in (try_get(
2398 vpir,
2399 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2400 list) or []):
2401 tbr = tlb.get('toggleButtonRenderer') or {}
2402 for getter, regex in [(
2403 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2404 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2405 lambda x: x['accessibility'],
2406 lambda x: x['accessibilityData']['accessibilityData'],
2407 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2408 label = (try_get(tbr, getter, dict) or {}).get('label')
2409 if label:
2410 mobj = re.match(regex, label)
2411 if mobj:
2412 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2413 break
2414 sbr_tooltip = try_get(
2415 vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2416 if sbr_tooltip:
2417 like_count, dislike_count = sbr_tooltip.split(' / ')
2418 info.update({
2419 'like_count': str_to_int(like_count),
2420 'dislike_count': str_to_int(dislike_count),
2421 })
2422 vsir = content.get('videoSecondaryInfoRenderer')
2423 if vsir:
2424 info['channel'] = get_text(try_get(
2425 vsir,
2426 lambda x: x['owner']['videoOwnerRenderer']['title'],
cce889b9 2427 dict))
545cc85d 2428 rows = try_get(
2429 vsir,
2430 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2431 list) or []
2432 multiple_songs = False
2433 for row in rows:
2434 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2435 multiple_songs = True
2436 break
2437 for row in rows:
2438 mrr = row.get('metadataRowRenderer') or {}
2439 mrr_title = mrr.get('title')
2440 if not mrr_title:
2441 continue
2442 mrr_title = get_text(mrr['title'])
2443 mrr_contents_text = get_text(mrr['contents'][0])
2444 if mrr_title == 'License':
2445 info['license'] = mrr_contents_text
2446 elif not multiple_songs:
2447 if mrr_title == 'Album':
2448 info['album'] = mrr_contents_text
2449 elif mrr_title == 'Artist':
2450 info['artist'] = mrr_contents_text
2451 elif mrr_title == 'Song':
2452 info['track'] = mrr_contents_text
2453
2454 fallbacks = {
2455 'channel': 'uploader',
2456 'channel_id': 'uploader_id',
2457 'channel_url': 'uploader_url',
2458 }
2459 for to, frm in fallbacks.items():
2460 if not info.get(to):
2461 info[to] = info.get(frm)
2462
2463 for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
2464 v = info.get(s_k)
2465 if v:
2466 info[d_k] = v
b84071c0 2467
c224251a
M
2468 is_private = bool_or_none(video_details.get('isPrivate'))
2469 is_unlisted = bool_or_none(microformat.get('isUnlisted'))
2470 is_membersonly = None
b28f8d24 2471 is_premium = None
c224251a
M
2472 if initial_data and is_private is not None:
2473 is_membersonly = False
b28f8d24 2474 is_premium = False
c224251a
M
2475 contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list)
2476 for content in contents or []:
2477 badges = try_get(content, lambda x: x['videoPrimaryInfoRenderer']['badges'], list)
2478 for badge in badges or []:
2479 label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label']) or ''
2480 if label.lower() == 'members only':
2481 is_membersonly = True
2482 break
b28f8d24
M
2483 elif label.lower() == 'premium':
2484 is_premium = True
2485 break
2486 if is_membersonly or is_premium:
c224251a
M
2487 break
2488
2489 # TODO: Add this for playlists
2490 info['availability'] = self._availability(
2491 is_private=is_private,
b28f8d24 2492 needs_premium=is_premium,
c224251a
M
2493 needs_subscription=is_membersonly,
2494 needs_auth=info['age_limit'] >= 18,
2495 is_unlisted=None if is_private is None else is_unlisted)
2496
06167fbb 2497 # get xsrf for annotations or comments
a06916d9 2498 get_annotations = self.get_param('writeannotations', False)
2499 get_comments = self.get_param('getcomments', False)
06167fbb 2500 if get_annotations or get_comments:
29f7c58a 2501 xsrf_token = None
545cc85d 2502 ytcfg = self._extract_ytcfg(video_id, webpage)
29f7c58a 2503 if ytcfg:
2504 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2505 if not xsrf_token:
2506 xsrf_token = self._search_regex(
2507 r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
8a784c74 2508 webpage, 'xsrf token', group='xsrf_token', fatal=False)
06167fbb 2509
2510 # annotations
06167fbb 2511 if get_annotations:
64b6a4e9
RA
2512 invideo_url = try_get(
2513 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2514 if xsrf_token and invideo_url:
29f7c58a 2515 xsrf_field_name = None
2516 if ytcfg:
2517 xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2518 if not xsrf_field_name:
2519 xsrf_field_name = self._search_regex(
2520 r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
8a784c74 2521 webpage, 'xsrf field name',
29f7c58a 2522 group='xsrf_field_name', default='session_token')
8a784c74 2523 info['annotations'] = self._download_webpage(
64b6a4e9
RA
2524 self._proto_relative_url(invideo_url),
2525 video_id, note='Downloading annotations',
2526 errnote='Unable to download video annotations', fatal=False,
2527 data=urlencode_postdata({xsrf_field_name: xsrf_token}))
7e72694b 2528
277d6ff5 2529 if get_comments:
a1c5d2ca 2530 info['__post_extractor'] = lambda: self._extract_comments(ytcfg, video_id, contents, webpage, xsrf_token)
4ea3be0a 2531
545cc85d 2532 self.mark_watched(video_id, player_response)
d77ab8e2 2533
545cc85d 2534 return info
c5e8d7af 2535
5f6a1245 2536
8bdd16b4 2537class YoutubeTabIE(YoutubeBaseInfoExtractor):
2538 IE_DESC = 'YouTube.com tab'
70d5c17b 2539 _VALID_URL = r'''(?x)
2540 https?://
2541 (?:\w+\.)?
2542 (?:
2543 youtube(?:kids)?\.com|
2544 invidio\.us
2545 )/
2546 (?:
fe03a6cd 2547 (?P<channel_type>channel|c|user|browse)/|
70d5c17b 2548 (?P<not_channel>
9ba5705a 2549 feed/|hashtag/|
70d5c17b 2550 (?:playlist|watch)\?.*?\blist=
2551 )|
29f7c58a 2552 (?!(?:%s)\b) # Direct URLs
70d5c17b 2553 )
2554 (?P<id>[^/?\#&]+)
2555 ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
8bdd16b4 2556 IE_NAME = 'youtube:tab'
2557
81127aa5 2558 _TESTS = [{
da692b79 2559 'note': 'playlists, multipage',
8bdd16b4 2560 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
2561 'playlist_mincount': 94,
2562 'info_dict': {
2563 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2564 'title': 'Игорь Клейнер - Playlists',
2565 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2566 'uploader': 'Игорь Клейнер',
2567 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
8bdd16b4 2568 },
2569 }, {
da692b79 2570 'note': 'playlists, multipage, different order',
8bdd16b4 2571 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
2572 'playlist_mincount': 94,
2573 'info_dict': {
2574 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
2575 'title': 'Игорь Клейнер - Playlists',
2576 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
deaec5af 2577 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
2578 'uploader': 'Игорь Клейнер',
8bdd16b4 2579 },
201c1459 2580 }, {
da692b79 2581 'note': 'playlists, series',
201c1459 2582 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
2583 'playlist_mincount': 5,
2584 'info_dict': {
2585 'id': 'UCYO_jab_esuFRV4b17AJtAw',
2586 'title': '3Blue1Brown - Playlists',
2587 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
da692b79 2588 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
2589 'uploader': '3Blue1Brown',
201c1459 2590 },
8bdd16b4 2591 }, {
da692b79 2592 'note': 'playlists, singlepage',
8bdd16b4 2593 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
2594 'playlist_mincount': 4,
2595 'info_dict': {
2596 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
2597 'title': 'ThirstForScience - Playlists',
2598 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
deaec5af 2599 'uploader': 'ThirstForScience',
2600 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
8bdd16b4 2601 }
2602 }, {
2603 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
2604 'only_matching': True,
2605 }, {
da692b79 2606 'note': 'basic, single video playlist',
0e30a7b9 2607 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
81127aa5 2608 'info_dict': {
0e30a7b9 2609 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2610 'uploader': 'Sergey M.',
2611 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3867038a 2612 'title': 'youtube-dl public playlist',
81127aa5 2613 },
0e30a7b9 2614 'playlist_count': 1,
9291475f 2615 }, {
da692b79 2616 'note': 'empty playlist',
0e30a7b9 2617 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
9291475f 2618 'info_dict': {
0e30a7b9 2619 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
2620 'uploader': 'Sergey M.',
2621 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3867038a 2622 'title': 'youtube-dl empty playlist',
9291475f
PH
2623 },
2624 'playlist_count': 0,
2625 }, {
da692b79 2626 'note': 'Home tab',
8bdd16b4 2627 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
9291475f 2628 'info_dict': {
8bdd16b4 2629 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2630 'title': 'lex will - Home',
2631 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2632 'uploader': 'lex will',
2633 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2634 },
8bdd16b4 2635 'playlist_mincount': 2,
9291475f 2636 }, {
da692b79 2637 'note': 'Videos tab',
8bdd16b4 2638 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
9291475f 2639 'info_dict': {
8bdd16b4 2640 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2641 'title': 'lex will - Videos',
2642 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2643 'uploader': 'lex will',
2644 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2645 },
8bdd16b4 2646 'playlist_mincount': 975,
9291475f 2647 }, {
da692b79 2648 'note': 'Videos tab, sorted by popular',
8bdd16b4 2649 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
9291475f 2650 'info_dict': {
8bdd16b4 2651 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2652 'title': 'lex will - Videos',
2653 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2654 'uploader': 'lex will',
2655 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2656 },
8bdd16b4 2657 'playlist_mincount': 199,
9291475f 2658 }, {
da692b79 2659 'note': 'Playlists tab',
8bdd16b4 2660 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
9291475f 2661 'info_dict': {
8bdd16b4 2662 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2663 'title': 'lex will - Playlists',
2664 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2665 'uploader': 'lex will',
2666 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
9291475f 2667 },
8bdd16b4 2668 'playlist_mincount': 17,
ac7553d0 2669 }, {
da692b79 2670 'note': 'Community tab',
8bdd16b4 2671 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
ac7553d0 2672 'info_dict': {
8bdd16b4 2673 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2674 'title': 'lex will - Community',
2675 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2676 'uploader': 'lex will',
2677 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2678 },
2679 'playlist_mincount': 18,
87dadd45 2680 }, {
da692b79 2681 'note': 'Channels tab',
8bdd16b4 2682 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
87dadd45 2683 'info_dict': {
8bdd16b4 2684 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
2685 'title': 'lex will - Channels',
2686 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
deaec5af 2687 'uploader': 'lex will',
2688 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
8bdd16b4 2689 },
deaec5af 2690 'playlist_mincount': 12,
cd684175 2691 }, {
2692 'note': 'Search tab',
2693 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
2694 'playlist_mincount': 40,
2695 'info_dict': {
2696 'id': 'UCYO_jab_esuFRV4b17AJtAw',
2697 'title': '3Blue1Brown - Search - linear algebra',
2698 'description': 'md5:e1384e8a133307dd10edee76e875d62f',
2699 'uploader': '3Blue1Brown',
2700 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
2701 },
6b08cdf6 2702 }, {
a0566bbf 2703 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2704 'only_matching': True,
2705 }, {
a0566bbf 2706 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2707 'only_matching': True,
2708 }, {
a0566bbf 2709 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
8bdd16b4 2710 'only_matching': True,
2711 }, {
2712 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
2713 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2714 'info_dict': {
2715 'title': '29C3: Not my department',
2716 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
2717 'uploader': 'Christiaan008',
2718 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
deaec5af 2719 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
8bdd16b4 2720 },
2721 'playlist_count': 96,
2722 }, {
2723 'note': 'Large playlist',
2724 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6b08cdf6 2725 'info_dict': {
8bdd16b4 2726 'title': 'Uploads from Cauchemar',
2727 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
2728 'uploader': 'Cauchemar',
2729 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
13a75688 2730 },
8bdd16b4 2731 'playlist_mincount': 1123,
2732 }, {
da692b79 2733 'note': 'even larger playlist, 8832 videos',
8bdd16b4 2734 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
2735 'only_matching': True,
4b7df0d3
JMF
2736 }, {
2737 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
2738 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
2739 'info_dict': {
acf757f4
PH
2740 'title': 'Uploads from Interstellar Movie',
2741 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
13a75688 2742 'uploader': 'Interstellar Movie',
8bdd16b4 2743 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
4b7df0d3 2744 },
481cc733 2745 'playlist_mincount': 21,
358de58c 2746 }, {
2747 'note': 'Playlist with "show unavailable videos" button',
2748 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
2749 'info_dict': {
2750 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
2751 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
2752 'uploader': 'Phim Siêu Nhân Nhật Bản',
2753 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
2754 },
da692b79 2755 'playlist_mincount': 200,
5d342002 2756 }, {
da692b79 2757 'note': 'Playlist with unavailable videos in page 7',
5d342002 2758 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
2759 'info_dict': {
2760 'title': 'Uploads from BlankTV',
2761 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
2762 'uploader': 'BlankTV',
2763 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
2764 },
da692b79 2765 'playlist_mincount': 1000,
8bdd16b4 2766 }, {
da692b79 2767 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
8bdd16b4 2768 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2769 'info_dict': {
2770 'title': 'Data Analysis with Dr Mike Pound',
2771 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
2772 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
2773 'uploader': 'Computerphile',
deaec5af 2774 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
8bdd16b4 2775 },
2776 'playlist_mincount': 11,
2777 }, {
a0566bbf 2778 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
8bdd16b4 2779 'only_matching': True,
dacb3a86 2780 }, {
da692b79 2781 'note': 'Playlist URL that does not actually serve a playlist',
dacb3a86
S
2782 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
2783 'info_dict': {
2784 'id': 'FqZTN594JQw',
2785 'ext': 'webm',
2786 'title': "Smiley's People 01 detective, Adventure Series, Action",
2787 'uploader': 'STREEM',
2788 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
ec85ded8 2789 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
dacb3a86
S
2790 'upload_date': '20150526',
2791 'license': 'Standard YouTube License',
2792 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
2793 'categories': ['People & Blogs'],
2794 'tags': list,
dbdaaa23 2795 'view_count': int,
dacb3a86
S
2796 'like_count': int,
2797 'dislike_count': int,
2798 },
2799 'params': {
2800 'skip_download': True,
2801 },
13a75688 2802 'skip': 'This video is not available.',
dacb3a86 2803 'add_ie': [YoutubeIE.ie_key()],
481cc733 2804 }, {
8bdd16b4 2805 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
c0345b82 2806 'only_matching': True,
66b48727 2807 }, {
8bdd16b4 2808 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
66b48727 2809 'only_matching': True,
a0566bbf 2810 }, {
2811 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
2812 'info_dict': {
da692b79 2813 'id': 'X1whbWASnNQ', # This will keep changing
a0566bbf 2814 'ext': 'mp4',
deaec5af 2815 'title': compat_str,
a0566bbf 2816 'uploader': 'Sky News',
2817 'uploader_id': 'skynews',
2818 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
da692b79 2819 'upload_date': r're:\d{8}',
2820 'description': compat_str,
a0566bbf 2821 'categories': ['News & Politics'],
2822 'tags': list,
2823 'like_count': int,
2824 'dislike_count': int,
2825 },
2826 'params': {
2827 'skip_download': True,
2828 },
da692b79 2829 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
a0566bbf 2830 }, {
2831 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
2832 'info_dict': {
2833 'id': 'a48o2S1cPoo',
2834 'ext': 'mp4',
2835 'title': 'The Young Turks - Live Main Show',
2836 'uploader': 'The Young Turks',
2837 'uploader_id': 'TheYoungTurks',
2838 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
2839 'upload_date': '20150715',
2840 'license': 'Standard YouTube License',
2841 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
2842 'categories': ['News & Politics'],
2843 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
2844 'like_count': int,
2845 'dislike_count': int,
2846 },
2847 'params': {
2848 'skip_download': True,
2849 },
2850 'only_matching': True,
2851 }, {
2852 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
2853 'only_matching': True,
2854 }, {
2855 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
2856 'only_matching': True,
09f1580e 2857 }, {
2858 'note': 'A channel that is not live. Should raise error',
2859 'url': 'https://www.youtube.com/user/numberphile/live',
2860 'only_matching': True,
3d3dddc9 2861 }, {
2862 'url': 'https://www.youtube.com/feed/trending',
2863 'only_matching': True,
2864 }, {
3d3dddc9 2865 'url': 'https://www.youtube.com/feed/library',
2866 'only_matching': True,
2867 }, {
3d3dddc9 2868 'url': 'https://www.youtube.com/feed/history',
2869 'only_matching': True,
2870 }, {
3d3dddc9 2871 'url': 'https://www.youtube.com/feed/subscriptions',
2872 'only_matching': True,
2873 }, {
3d3dddc9 2874 'url': 'https://www.youtube.com/feed/watch_later',
2875 'only_matching': True,
2876 }, {
da692b79 2877 'note': 'Recommended - redirects to home page',
3d3dddc9 2878 'url': 'https://www.youtube.com/feed/recommended',
2879 'only_matching': True,
29f7c58a 2880 }, {
da692b79 2881 'note': 'inline playlist with not always working continuations',
29f7c58a 2882 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
2883 'only_matching': True,
2884 }, {
2885 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
2886 'only_matching': True,
2887 }, {
2888 'url': 'https://www.youtube.com/course',
2889 'only_matching': True,
2890 }, {
2891 'url': 'https://www.youtube.com/zsecurity',
2892 'only_matching': True,
2893 }, {
2894 'url': 'http://www.youtube.com/NASAgovVideo/videos',
2895 'only_matching': True,
2896 }, {
2897 'url': 'https://www.youtube.com/TheYoungTurks/live',
2898 'only_matching': True,
39ed931e 2899 }, {
2900 'url': 'https://www.youtube.com/hashtag/cctv9',
2901 'info_dict': {
2902 'id': 'cctv9',
2903 'title': '#cctv9',
2904 },
2905 'playlist_mincount': 350,
201c1459 2906 }, {
2907 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
2908 'only_matching': True,
9297939e 2909 }, {
da692b79 2910 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
9297939e 2911 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
2912 'only_matching': True
fe03a6cd 2913 }, {
2914 'note': '/browse/ should redirect to /channel/',
2915 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
2916 'only_matching': True
2917 }, {
2918 'note': 'VLPL, should redirect to playlist?list=PL...',
2919 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
2920 'info_dict': {
2921 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
2922 'uploader': 'NoCopyrightSounds',
2923 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
2924 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
2925 'title': 'NCS Releases',
2926 },
2927 'playlist_mincount': 166,
18db7548 2928 }, {
2929 'note': 'Topic, should redirect to playlist?list=UU...',
2930 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
2931 'info_dict': {
2932 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
2933 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
2934 'title': 'Uploads from Royalty Free Music - Topic',
2935 'uploader': 'Royalty Free Music - Topic',
2936 },
2937 'expected_warnings': [
2938 'A channel/user page was given',
2939 'The URL does not have a videos tab',
2940 ],
2941 'playlist_mincount': 101,
2942 }, {
2943 'note': 'Topic without a UU playlist',
2944 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
2945 'info_dict': {
2946 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
2947 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
2948 },
2949 'expected_warnings': [
2950 'A channel/user page was given',
2951 'The URL does not have a videos tab',
2952 'Falling back to channel URL',
2953 ],
2954 'playlist_mincount': 9,
abcdd12b 2955 }, {
2956 'note': 'Youtube music Album',
2957 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
2958 'info_dict': {
2959 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
2960 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
2961 },
2962 'playlist_count': 50,
29f7c58a 2963 }]
2964
2965 @classmethod
2966 def suitable(cls, url):
2967 return False if YoutubeIE.suitable(url) else super(
2968 YoutubeTabIE, cls).suitable(url)
8bdd16b4 2969
2970 def _extract_channel_id(self, webpage):
2971 channel_id = self._html_search_meta(
2972 'channelId', webpage, 'channel id', default=None)
2973 if channel_id:
2974 return channel_id
2975 channel_url = self._html_search_meta(
2976 ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
2977 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
2978 'twitter:app:url:googleplay'), webpage, 'channel url')
2979 return self._search_regex(
2980 r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
2981 channel_url, 'channel id')
15f6397c 2982
8bdd16b4 2983 @staticmethod
cd7c66cf 2984 def _extract_basic_item_renderer(item):
2985 # Modified from _extract_grid_item_renderer
201c1459 2986 known_basic_renderers = (
2987 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
cd7c66cf 2988 )
2989 for key, renderer in item.items():
201c1459 2990 if not isinstance(renderer, dict):
cd7c66cf 2991 continue
201c1459 2992 elif key in known_basic_renderers:
2993 return renderer
2994 elif key.startswith('grid') and key.endswith('Renderer'):
2995 return renderer
8bdd16b4 2996
8bdd16b4 2997 def _grid_entries(self, grid_renderer):
2998 for item in grid_renderer['items']:
2999 if not isinstance(item, dict):
39b62db1 3000 continue
cd7c66cf 3001 renderer = self._extract_basic_item_renderer(item)
8bdd16b4 3002 if not isinstance(renderer, dict):
3003 continue
3004 title = try_get(
201c1459 3005 renderer, (lambda x: x['title']['runs'][0]['text'],
3006 lambda x: x['title']['simpleText']), compat_str)
8bdd16b4 3007 # playlist
3008 playlist_id = renderer.get('playlistId')
3009 if playlist_id:
3010 yield self.url_result(
3011 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3012 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3013 video_title=title)
201c1459 3014 continue
8bdd16b4 3015 # video
3016 video_id = renderer.get('videoId')
3017 if video_id:
3018 yield self._extract_video(renderer)
201c1459 3019 continue
8bdd16b4 3020 # channel
3021 channel_id = renderer.get('channelId')
3022 if channel_id:
3023 title = try_get(
3024 renderer, lambda x: x['title']['simpleText'], compat_str)
3025 yield self.url_result(
3026 'https://www.youtube.com/channel/%s' % channel_id,
3027 ie=YoutubeTabIE.ie_key(), video_title=title)
201c1459 3028 continue
3029 # generic endpoint URL support
3030 ep_url = urljoin('https://www.youtube.com/', try_get(
3031 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3032 compat_str))
3033 if ep_url:
3034 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3035 if ie.suitable(ep_url):
3036 yield self.url_result(
3037 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3038 break
8bdd16b4 3039
3d3dddc9 3040 def _shelf_entries_from_content(self, shelf_renderer):
3041 content = shelf_renderer.get('content')
3042 if not isinstance(content, dict):
8bdd16b4 3043 return
cd7c66cf 3044 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3d3dddc9 3045 if renderer:
3046 # TODO: add support for nested playlists so each shelf is processed
3047 # as separate playlist
3048 # TODO: this includes only first N items
3049 for entry in self._grid_entries(renderer):
3050 yield entry
3051 renderer = content.get('horizontalListRenderer')
3052 if renderer:
3053 # TODO
3054 pass
8bdd16b4 3055
29f7c58a 3056 def _shelf_entries(self, shelf_renderer, skip_channels=False):
8bdd16b4 3057 ep = try_get(
3058 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3059 compat_str)
3060 shelf_url = urljoin('https://www.youtube.com', ep)
3d3dddc9 3061 if shelf_url:
29f7c58a 3062 # Skipping links to another channels, note that checking for
3063 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3064 # will not work
3065 if skip_channels and '/channels?' in shelf_url:
3066 return
3d3dddc9 3067 title = try_get(
3068 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
3069 yield self.url_result(shelf_url, video_title=title)
3070 # Shelf may not contain shelf URL, fallback to extraction from content
3071 for entry in self._shelf_entries_from_content(shelf_renderer):
3072 yield entry
c5e8d7af 3073
8bdd16b4 3074 def _playlist_entries(self, video_list_renderer):
3075 for content in video_list_renderer['contents']:
3076 if not isinstance(content, dict):
3077 continue
3078 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3079 if not isinstance(renderer, dict):
3080 continue
3081 video_id = renderer.get('videoId')
3082 if not video_id:
3083 continue
3084 yield self._extract_video(renderer)
07aeced6 3085
3462ffa8 3086 def _rich_entries(self, rich_grid_renderer):
3087 renderer = try_get(
70d5c17b 3088 rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3462ffa8 3089 video_id = renderer.get('videoId')
3090 if not video_id:
3091 return
3092 yield self._extract_video(renderer)
3093
8bdd16b4 3094 def _video_entry(self, video_renderer):
3095 video_id = video_renderer.get('videoId')
3096 if video_id:
3097 return self._extract_video(video_renderer)
dacb3a86 3098
8bdd16b4 3099 def _post_thread_entries(self, post_thread_renderer):
3100 post_renderer = try_get(
3101 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3102 if not post_renderer:
3103 return
3104 # video attachment
3105 video_renderer = try_get(
895b0931 3106 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3107 video_id = video_renderer.get('videoId')
3108 if video_id:
3109 entry = self._extract_video(video_renderer)
8bdd16b4 3110 if entry:
3111 yield entry
895b0931 3112 # playlist attachment
3113 playlist_id = try_get(
3114 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3115 if playlist_id:
3116 yield self.url_result(
e28f1c0a 3117 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3118 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3119 # inline video links
3120 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3121 for run in runs:
3122 if not isinstance(run, dict):
3123 continue
3124 ep_url = try_get(
3125 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3126 if not ep_url:
3127 continue
3128 if not YoutubeIE.suitable(ep_url):
3129 continue
3130 ep_video_id = YoutubeIE._match_id(ep_url)
3131 if video_id == ep_video_id:
3132 continue
895b0931 3133 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
dacb3a86 3134
8bdd16b4 3135 def _post_thread_continuation_entries(self, post_thread_continuation):
3136 contents = post_thread_continuation.get('contents')
3137 if not isinstance(contents, list):
3138 return
3139 for content in contents:
3140 renderer = content.get('backstagePostThreadRenderer')
3141 if not isinstance(renderer, dict):
3142 continue
3143 for entry in self._post_thread_entries(renderer):
3144 yield entry
07aeced6 3145
39ed931e 3146 r''' # unused
3147 def _rich_grid_entries(self, contents):
3148 for content in contents:
3149 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3150 if video_renderer:
3151 entry = self._video_entry(video_renderer)
3152 if entry:
3153 yield entry
3154 '''
3155
29f7c58a 3156 @staticmethod
3157 def _build_continuation_query(continuation, ctp=None):
3158 query = {
3159 'ctoken': continuation,
3160 'continuation': continuation,
3161 }
3162 if ctp:
3163 query['itct'] = ctp
3164 return query
3165
8bdd16b4 3166 @staticmethod
3167 def _extract_next_continuation_data(renderer):
3168 next_continuation = try_get(
3169 renderer, lambda x: x['continuations'][0]['nextContinuationData'], dict)
3170 if not next_continuation:
3171 return
3172 continuation = next_continuation.get('continuation')
3173 if not continuation:
3174 return
3175 ctp = next_continuation.get('clickTrackingParams')
29f7c58a 3176 return YoutubeTabIE._build_continuation_query(continuation, ctp)
c5e8d7af 3177
8bdd16b4 3178 @classmethod
3179 def _extract_continuation(cls, renderer):
3180 next_continuation = cls._extract_next_continuation_data(renderer)
3181 if next_continuation:
3182 return next_continuation
cc2db878 3183 contents = []
3184 for key in ('contents', 'items'):
3185 contents.extend(try_get(renderer, lambda x: x[key], list) or [])
8bdd16b4 3186 for content in contents:
3187 if not isinstance(content, dict):
3188 continue
3189 continuation_ep = try_get(
3190 content, lambda x: x['continuationItemRenderer']['continuationEndpoint'],
3191 dict)
3192 if not continuation_ep:
3193 continue
3194 continuation = try_get(
3195 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
3196 if not continuation:
3197 continue
3198 ctp = continuation_ep.get('clickTrackingParams')
29f7c58a 3199 return YoutubeTabIE._build_continuation_query(continuation, ctp)
448830ce 3200
f4f751af 3201 def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3462ffa8 3202
70d5c17b 3203 def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds
3204 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3205 for content in contents:
3206 if not isinstance(content, dict):
8bdd16b4 3207 continue
70d5c17b 3208 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3462ffa8 3209 if not is_renderer:
70d5c17b 3210 renderer = content.get('richItemRenderer')
3462ffa8 3211 if renderer:
3212 for entry in self._rich_entries(renderer):
3213 yield entry
3214 continuation_list[0] = self._extract_continuation(parent_renderer)
8bdd16b4 3215 continue
3462ffa8 3216 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3217 for isr_content in isr_contents:
3218 if not isinstance(isr_content, dict):
3219 continue
69184e41 3220
3221 known_renderers = {
3222 'playlistVideoListRenderer': self._playlist_entries,
3223 'gridRenderer': self._grid_entries,
3224 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3225 'backstagePostThreadRenderer': self._post_thread_entries,
3226 'videoRenderer': lambda x: [self._video_entry(x)],
3227 }
3228 for key, renderer in isr_content.items():
3229 if key not in known_renderers:
3230 continue
3231 for entry in known_renderers[key](renderer):
3232 if entry:
3233 yield entry
3462ffa8 3234 continuation_list[0] = self._extract_continuation(renderer)
69184e41 3235 break
70d5c17b 3236
3462ffa8 3237 if not continuation_list[0]:
3238 continuation_list[0] = self._extract_continuation(is_renderer)
70d5c17b 3239
3240 if not continuation_list[0]:
3241 continuation_list[0] = self._extract_continuation(parent_renderer)
3462ffa8 3242
3243 continuation_list = [None] # Python 2 doesnot support nonlocal
29f7c58a 3244 tab_content = try_get(tab, lambda x: x['content'], dict)
3245 if not tab_content:
3246 return
3462ffa8 3247 parent_renderer = (
29f7c58a 3248 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3249 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
70d5c17b 3250 for entry in extract_entries(parent_renderer):
3251 yield entry
3462ffa8 3252 continuation = continuation_list[0]
f4f751af 3253 context = self._extract_context(ytcfg)
3254 visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
d069eca7 3255
8bdd16b4 3256 for page_num in itertools.count(1):
3257 if not continuation:
3258 break
79360d99 3259 query = {
3260 'continuation': continuation['continuation'],
3261 'clickTracking': {'clickTrackingParams': continuation['itct']}
3262 }
f4f751af 3263 headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
79360d99 3264 response = self._extract_response(
3265 item_id='%s page %s' % (item_id, page_num),
3266 query=query, headers=headers, ytcfg=ytcfg,
3267 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
a5c56234
M
3268
3269 if not response:
8bdd16b4 3270 break
f4f751af 3271 visitor_data = try_get(
3272 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
ebf1b291 3273
69184e41 3274 known_continuation_renderers = {
3275 'playlistVideoListContinuation': self._playlist_entries,
3276 'gridContinuation': self._grid_entries,
3277 'itemSectionContinuation': self._post_thread_continuation_entries,
3278 'sectionListContinuation': extract_entries, # for feeds
3279 }
8bdd16b4 3280 continuation_contents = try_get(
69184e41 3281 response, lambda x: x['continuationContents'], dict) or {}
3282 continuation_renderer = None
3283 for key, value in continuation_contents.items():
3284 if key not in known_continuation_renderers:
3462ffa8 3285 continue
69184e41 3286 continuation_renderer = value
3287 continuation_list = [None]
3288 for entry in known_continuation_renderers[key](continuation_renderer):
3289 yield entry
3290 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3291 break
3292 if continuation_renderer:
3293 continue
c5e8d7af 3294
a1b535bd 3295 known_renderers = {
3296 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3297 'gridVideoRenderer': (self._grid_entries, 'items'),
3298 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
cd7c66cf 3299 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
9ba5705a 3300 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
26fe8ffe 3301 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
a1b535bd 3302 }
cce889b9 3303 on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
8bdd16b4 3304 continuation_items = try_get(
cce889b9 3305 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
a1b535bd 3306 continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3307 video_items_renderer = None
3308 for key, value in continuation_item.items():
3309 if key not in known_renderers:
8bdd16b4 3310 continue
a1b535bd 3311 video_items_renderer = {known_renderers[key][1]: continuation_items}
9ba5705a 3312 continuation_list = [None]
a1b535bd 3313 for entry in known_renderers[key][0](video_items_renderer):
3314 yield entry
9ba5705a 3315 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
a1b535bd 3316 break
3317 if video_items_renderer:
3318 continue
8bdd16b4 3319 break
9558dcec 3320
8bdd16b4 3321 @staticmethod
3322 def _extract_selected_tab(tabs):
3323 for tab in tabs:
cd684175 3324 renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3325 if renderer.get('selected') is True:
3326 return renderer
2b3c2546 3327 else:
8bdd16b4 3328 raise ExtractorError('Unable to find selected tab')
b82f815f 3329
8bdd16b4 3330 @staticmethod
3331 def _extract_uploader(data):
3332 uploader = {}
3333 sidebar_renderer = try_get(
3334 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3335 if sidebar_renderer:
3336 for item in sidebar_renderer:
3337 if not isinstance(item, dict):
3338 continue
3339 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3340 if not isinstance(renderer, dict):
3341 continue
3342 owner = try_get(
3343 renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3344 if owner:
3345 uploader['uploader'] = owner.get('text')
3346 uploader['uploader_id'] = try_get(
3347 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3348 uploader['uploader_url'] = urljoin(
3349 'https://www.youtube.com/',
3350 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
9c3fe2ef 3351 return {k: v for k, v in uploader.items() if v is not None}
8bdd16b4 3352
d069eca7 3353 def _extract_from_tabs(self, item_id, webpage, data, tabs):
b60419c5 3354 playlist_id = title = description = channel_url = channel_name = channel_id = None
3355 thumbnails_list = tags = []
3356
8bdd16b4 3357 selected_tab = self._extract_selected_tab(tabs)
3358 renderer = try_get(
3359 data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3360 if renderer:
b60419c5 3361 channel_name = renderer.get('title')
3362 channel_url = renderer.get('channelUrl')
3363 channel_id = renderer.get('externalId')
39ed931e 3364 else:
64c0d954 3365 renderer = try_get(
3366 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
39ed931e 3367
8bdd16b4 3368 if renderer:
3369 title = renderer.get('title')
ecc97af3 3370 description = renderer.get('description', '')
b60419c5 3371 playlist_id = channel_id
3372 tags = renderer.get('keywords', '').split()
3373 thumbnails_list = (
3374 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
ff84930c 3375 or try_get(
3376 data,
3377 lambda x: x['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3378 list)
b60419c5 3379 or [])
3380
3381 thumbnails = []
3382 for t in thumbnails_list:
3383 if not isinstance(t, dict):
3384 continue
3385 thumbnail_url = url_or_none(t.get('url'))
3386 if not thumbnail_url:
3387 continue
3388 thumbnails.append({
3389 'url': thumbnail_url,
3390 'width': int_or_none(t.get('width')),
3391 'height': int_or_none(t.get('height')),
3392 })
3462ffa8 3393 if playlist_id is None:
70d5c17b 3394 playlist_id = item_id
3395 if title is None:
39ed931e 3396 title = (
3397 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3398 or playlist_id)
b60419c5 3399 title += format_field(selected_tab, 'title', ' - %s')
cd684175 3400 title += format_field(selected_tab, 'expandedText', ' - %s')
b60419c5 3401
3402 metadata = {
3403 'playlist_id': playlist_id,
3404 'playlist_title': title,
3405 'playlist_description': description,
3406 'uploader': channel_name,
3407 'uploader_id': channel_id,
3408 'uploader_url': channel_url,
3409 'thumbnails': thumbnails,
3410 'tags': tags,
3411 }
3412 if not channel_id:
3413 metadata.update(self._extract_uploader(data))
3414 metadata.update({
3415 'channel': metadata['uploader'],
3416 'channel_id': metadata['uploader_id'],
3417 'channel_url': metadata['uploader_url']})
3418 return self.playlist_result(
d069eca7
M
3419 self._entries(
3420 selected_tab, playlist_id,
3421 self._extract_identity_token(webpage, item_id),
f4f751af 3422 self._extract_account_syncid(data),
3423 self._extract_ytcfg(item_id, webpage)),
b60419c5 3424 **metadata)
73c4ac2c 3425
79360d99 3426 def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
2be71994 3427 first_id = last_id = None
79360d99 3428 ytcfg = self._extract_ytcfg(playlist_id, webpage)
3429 headers = self._generate_api_headers(
3430 ytcfg, account_syncid=self._extract_account_syncid(data),
3431 identity_token=self._extract_identity_token(webpage, item_id=playlist_id),
3432 visitor_data=try_get(self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
2be71994 3433 for page_num in itertools.count(1):
cd7c66cf 3434 videos = list(self._playlist_entries(playlist))
3435 if not videos:
3436 return
2be71994 3437 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3438 if start >= len(videos):
3439 return
3440 for video in videos[start:]:
3441 if video['id'] == first_id:
3442 self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3443 return
3444 yield video
3445 first_id = first_id or videos[0]['id']
3446 last_id = videos[-1]['id']
79360d99 3447 watch_endpoint = try_get(
3448 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3449 query = {
3450 'playlistId': playlist_id,
3451 'videoId': watch_endpoint.get('videoId') or last_id,
3452 'index': watch_endpoint.get('index') or len(videos),
3453 'params': watch_endpoint.get('params') or 'OAE%3D'
3454 }
3455 response = self._extract_response(
3456 item_id='%s page %d' % (playlist_id, page_num),
3457 query=query,
3458 ep='next',
3459 headers=headers,
3460 check_get_keys='contents'
3461 )
cd7c66cf 3462 playlist = try_get(
79360d99 3463 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
cd7c66cf 3464
79360d99 3465 def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
8bdd16b4 3466 title = playlist.get('title') or try_get(
3467 data, lambda x: x['titleText']['simpleText'], compat_str)
3468 playlist_id = playlist.get('playlistId') or item_id
cd7c66cf 3469
3470 # Delegating everything except mix playlists to regular tab-based playlist URL
29f7c58a 3471 playlist_url = urljoin(url, try_get(
3472 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3473 compat_str))
3474 if playlist_url and playlist_url != url:
3475 return self.url_result(
3476 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3477 video_title=title)
cd7c66cf 3478
8bdd16b4 3479 return self.playlist_result(
79360d99 3480 self._extract_mix_playlist(playlist, playlist_id, data, webpage),
cd7c66cf 3481 playlist_id=playlist_id, playlist_title=title)
c5e8d7af 3482
95c01b6c 3483 @staticmethod
3484 def _extract_alerts(data):
3485 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
3486 if not isinstance(alert_dict, dict):
3487 continue
3488 for alert in alert_dict.values():
3489 alert_type = alert.get('type')
3490 if not alert_type:
02ced43c 3491 continue
95c01b6c 3492 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str) or ''
3493 if message:
3494 yield alert_type, message
3495 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
3496 message += try_get(run, lambda x: x['text'], compat_str)
3497 if message:
3498 yield alert_type, message
3499
3500 def _report_alerts(self, alerts, expected=True):
3ffc7c89 3501 errors = []
3502 warnings = []
95c01b6c 3503 for alert_type, alert_message in alerts:
f3eaa8dd 3504 if alert_type.lower() == 'error':
3ffc7c89 3505 errors.append([alert_type, alert_message])
f3eaa8dd 3506 else:
3ffc7c89 3507 warnings.append([alert_type, alert_message])
f3eaa8dd 3508
3ffc7c89 3509 for alert_type, alert_message in (warnings + errors[:-1]):
6a39ee13 3510 self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
3ffc7c89 3511 if errors:
3512 raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
02ced43c 3513
95c01b6c 3514 def _extract_and_report_alerts(self, data, *args, **kwargs):
3515 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
3516
358de58c 3517 def _reload_with_unavailable_videos(self, item_id, data, webpage):
3518 """
3519 Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3520 """
3521 sidebar_renderer = try_get(
5d342002 3522 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3523 if not sidebar_renderer:
3524 return
3525 browse_id = params = None
358de58c 3526 for item in sidebar_renderer:
3527 if not isinstance(item, dict):
3528 continue
3529 renderer = item.get('playlistSidebarPrimaryInfoRenderer')
3530 menu_renderer = try_get(
3531 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3532 for menu_item in menu_renderer:
3533 if not isinstance(menu_item, dict):
3534 continue
3535 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3536 text = try_get(
3537 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3538 if not text or text.lower() != 'show unavailable videos':
3539 continue
3540 browse_endpoint = try_get(
3541 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3542 browse_id = browse_endpoint.get('browseId')
3543 params = browse_endpoint.get('params')
5d342002 3544 break
3545
3546 ytcfg = self._extract_ytcfg(item_id, webpage)
3547 headers = self._generate_api_headers(
3548 ytcfg, account_syncid=self._extract_account_syncid(ytcfg),
3549 identity_token=self._extract_identity_token(webpage, item_id=item_id),
3550 visitor_data=try_get(
3551 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
3552 query = {
3553 'params': params or 'wgYCCAA=',
3554 'browseId': browse_id or 'VL%s' % item_id
3555 }
3556 return self._extract_response(
3557 item_id=item_id, headers=headers, query=query,
3558 check_get_keys='contents', fatal=False,
3559 note='Downloading API JSON with unavailable videos')
358de58c 3560
79360d99 3561 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
358de58c 3562 ytcfg=None, check_get_keys=None, ep='browse', fatal=True):
79360d99 3563 response = None
3564 last_error = None
3565 count = -1
a06916d9 3566 retries = self.get_param('extractor_retries', 3)
79360d99 3567 if check_get_keys is None:
3568 check_get_keys = []
3569 while count < retries:
3570 count += 1
3571 if last_error:
3572 self.report_warning('%s. Retrying ...' % last_error)
3573 try:
3574 response = self._call_api(
3575 ep=ep, fatal=True, headers=headers,
358de58c 3576 video_id=item_id, query=query,
79360d99 3577 context=self._extract_context(ytcfg),
3578 api_key=self._extract_api_key(ytcfg),
3579 note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
3580 except ExtractorError as e:
3581 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
3582 # Downloading page may result in intermittent 5xx HTTP error
3583 # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
3584 last_error = 'HTTP Error %s' % e.cause.code
3585 if count < retries:
3586 continue
358de58c 3587 if fatal:
3588 raise
3589 else:
3590 self.report_warning(error_to_compat_str(e))
3591 return
3592
79360d99 3593 else:
3594 # Youtube may send alerts if there was an issue with the continuation page
95c01b6c 3595 self._extract_and_report_alerts(response, expected=False)
79360d99 3596 if not check_get_keys or dict_get(response, check_get_keys):
3597 break
3598 # Youtube sometimes sends incomplete data
3599 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
3600 last_error = 'Incomplete data received'
3601 if count >= retries:
358de58c 3602 if fatal:
3603 raise ExtractorError(last_error)
3604 else:
3605 self.report_warning(last_error)
3606 return
79360d99 3607 return response
3608
cd7c66cf 3609 def _extract_webpage(self, url, item_id):
a06916d9 3610 retries = self.get_param('extractor_retries', 3)
62bff2c1 3611 count = -1
c705177d 3612 last_error = 'Incomplete yt initial data recieved'
14fdfea9 3613 while count < retries:
62bff2c1 3614 count += 1
14fdfea9 3615 # Sometimes youtube returns a webpage with incomplete ytInitialData
62bff2c1 3616 # See: https://github.com/yt-dlp/yt-dlp/issues/116
3617 if count:
c705177d 3618 self.report_warning('%s. Retrying ...' % last_error)
5ef7d9bd 3619 webpage = self._download_webpage(
3620 url, item_id,
cd7c66cf 3621 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
14fdfea9 3622 data = self._extract_yt_initial_data(item_id, webpage)
14fdfea9 3623 if data.get('contents') or data.get('currentVideoEndpoint'):
3624 break
95c01b6c 3625 # Extract alerts here only when there is error
3626 self._extract_and_report_alerts(data)
c705177d 3627 if count >= retries:
6a39ee13 3628 raise ExtractorError(last_error)
cd7c66cf 3629 return webpage, data
3630
9297939e 3631 @staticmethod
3632 def _smuggle_data(entries, data):
3633 for entry in entries:
3634 if data:
3635 entry['url'] = smuggle_url(entry['url'], data)
3636 yield entry
3637
cd7c66cf 3638 def _real_extract(self, url):
9297939e 3639 url, smuggled_data = unsmuggle_url(url, {})
3640 if self.is_music_url(url):
3641 smuggled_data['is_music_url'] = True
fe03a6cd 3642 info_dict = self.__real_extract(url, smuggled_data)
9297939e 3643 if info_dict.get('entries'):
3644 info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
3645 return info_dict
3646
fe03a6cd 3647 _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
3648
3649 def __real_extract(self, url, smuggled_data):
cd7c66cf 3650 item_id = self._match_id(url)
3651 url = compat_urlparse.urlunparse(
3652 compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
a06916d9 3653 compat_opts = self.get_param('compat_opts', [])
cd7c66cf 3654
fe03a6cd 3655 def get_mobj(url):
3656 mobj = self._url_re.match(url).groupdict()
07cce701 3657 mobj.update((k, '') for k, v in mobj.items() if v is None)
fe03a6cd 3658 return mobj
3659
3660 mobj = get_mobj(url)
3661 # Youtube returns incomplete data if tabname is not lower case
3662 pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
3663
3664 if is_channel:
3665 if smuggled_data.get('is_music_url'):
3666 if item_id[:2] == 'VL':
3667 # Youtube music VL channels have an equivalent playlist
3668 item_id = item_id[2:]
3669 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
abcdd12b 3670 elif item_id[:2] == 'MP':
3671 # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
3672 item_id = self._search_regex(
3673 r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
3674 self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
3675 'playlist id')
3676 pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
fe03a6cd 3677 elif mobj['channel_type'] == 'browse':
3678 # Youtube music /browse/ should be changed to /channel/
3679 pre = 'https://www.youtube.com/channel/%s' % item_id
3680 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
3681 # Home URLs should redirect to /videos/
6a39ee13 3682 self.report_warning(
cd7c66cf 3683 'A channel/user page was given. All the channel\'s videos will be downloaded. '
3684 'To download only the videos in the home page, add a "/featured" to the URL')
fe03a6cd 3685 tab = '/videos'
3686
3687 url = ''.join((pre, tab, post))
3688 mobj = get_mobj(url)
cd7c66cf 3689
3690 # Handle both video/playlist URLs
201c1459 3691 qs = parse_qs(url)
cd7c66cf 3692 video_id = qs.get('v', [None])[0]
3693 playlist_id = qs.get('list', [None])[0]
3694
fe03a6cd 3695 if not video_id and mobj['not_channel'].startswith('watch'):
cd7c66cf 3696 if not playlist_id:
fe03a6cd 3697 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
cd7c66cf 3698 raise ExtractorError('Unable to recognize tab page')
fe03a6cd 3699 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6a39ee13 3700 self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
cd7c66cf 3701 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
18db7548 3702 mobj = get_mobj(url)
cd7c66cf 3703
3704 if video_id and playlist_id:
a06916d9 3705 if self.get_param('noplaylist'):
cd7c66cf 3706 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
3707 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
3708 self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
3709
3710 webpage, data = self._extract_webpage(url, item_id)
14fdfea9 3711
18db7548 3712 tabs = try_get(
3713 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3714 if tabs:
3715 selected_tab = self._extract_selected_tab(tabs)
3716 tab_name = selected_tab.get('title', '')
09f1580e 3717 if 'no-youtube-channel-redirect' not in compat_opts:
3718 if mobj['tab'] == '/live':
3719 # Live tab should have redirected to the video
3720 raise ExtractorError('The channel is not currently live', expected=True)
3721 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
3722 if not mobj['not_channel'] and item_id[:2] == 'UC':
3723 # Topic channels don't have /videos. Use the equivalent playlist instead
3724 self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
3725 pl_id = 'UU%s' % item_id[2:]
3726 pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
3727 try:
3728 pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
3729 for alert_type, alert_message in self._extract_alerts(pl_data):
3730 if alert_type == 'error':
3731 raise ExtractorError('Youtube said: %s' % alert_message)
3732 item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
3733 except ExtractorError:
3734 self.report_warning('The playlist gave error. Falling back to channel URL')
3735 else:
3736 self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
18db7548 3737
3738 self.write_debug('Final URL: %s' % url)
3739
358de58c 3740 # YouTube sometimes provides a button to reload playlist with unavailable videos.
53ed7066 3741 if 'no-youtube-unavailable-videos' not in compat_opts:
3742 data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
95c01b6c 3743 self._extract_and_report_alerts(data)
358de58c 3744
8bdd16b4 3745 tabs = try_get(
3746 data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
3747 if tabs:
d069eca7 3748 return self._extract_from_tabs(item_id, webpage, data, tabs)
cd7c66cf 3749
8bdd16b4 3750 playlist = try_get(
3751 data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3752 if playlist:
79360d99 3753 return self._extract_from_playlist(item_id, url, data, playlist, webpage)
cd7c66cf 3754
a0566bbf 3755 video_id = try_get(
3756 data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
3757 compat_str) or video_id
8bdd16b4 3758 if video_id:
09f1580e 3759 if mobj['tab'] != '/live': # live tab is expected to redirect to video
3760 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
8bdd16b4 3761 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
cd7c66cf 3762
8bdd16b4 3763 raise ExtractorError('Unable to recognize tab page')
c5e8d7af 3764
c5e8d7af 3765
8bdd16b4 3766class YoutubePlaylistIE(InfoExtractor):
3767 IE_DESC = 'YouTube.com playlists'
3768 _VALID_URL = r'''(?x)(?:
3769 (?:https?://)?
3770 (?:\w+\.)?
3771 (?:
3772 (?:
3773 youtube(?:kids)?\.com|
29f7c58a 3774 invidio\.us
8bdd16b4 3775 )
3776 /.*?\?.*?\blist=
3777 )?
3778 (?P<id>%(playlist_id)s)
3779 )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3780 IE_NAME = 'youtube:playlist'
cdc628a4 3781 _TESTS = [{
8bdd16b4 3782 'note': 'issue #673',
3783 'url': 'PLBB231211A4F62143',
cdc628a4 3784 'info_dict': {
8bdd16b4 3785 'title': '[OLD]Team Fortress 2 (Class-based LP)',
3786 'id': 'PLBB231211A4F62143',
3787 'uploader': 'Wickydoo',
3788 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
3789 },
3790 'playlist_mincount': 29,
3791 }, {
3792 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3793 'info_dict': {
3794 'title': 'YDL_safe_search',
3795 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
3796 },
3797 'playlist_count': 2,
3798 'skip': 'This playlist is private',
9558dcec 3799 }, {
8bdd16b4 3800 'note': 'embedded',
3801 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3802 'playlist_count': 4,
9558dcec 3803 'info_dict': {
8bdd16b4 3804 'title': 'JODA15',
3805 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
3806 'uploader': 'milan',
3807 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
9558dcec 3808 }
cdc628a4 3809 }, {
8bdd16b4 3810 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3811 'playlist_mincount': 982,
3812 'info_dict': {
3813 'title': '2018 Chinese New Singles (11/6 updated)',
3814 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
3815 'uploader': 'LBK',
3816 'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
3817 }
daa0df9e 3818 }, {
29f7c58a 3819 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
3820 'only_matching': True,
3821 }, {
3822 # music album playlist
3823 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
3824 'only_matching': True,
3825 }]
3826
3827 @classmethod
3828 def suitable(cls, url):
201c1459 3829 if YoutubeTabIE.suitable(url):
3830 return False
1bdae7d3 3831 # Hack for lazy extractors until more generic solution is implemented
3832 # (see #28780)
3833 from .youtube import parse_qs
201c1459 3834 qs = parse_qs(url)
3835 if qs.get('v', [None])[0]:
3836 return False
3837 return super(YoutubePlaylistIE, cls).suitable(url)
29f7c58a 3838
3839 def _real_extract(self, url):
3840 playlist_id = self._match_id(url)
46953e7e 3841 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
9297939e 3842 url = update_url_query(
3843 'https://www.youtube.com/playlist',
3844 parse_qs(url) or {'list': playlist_id})
3845 if is_music_url:
3846 url = smuggle_url(url, {'is_music_url': True})
3847 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
29f7c58a 3848
3849
3850class YoutubeYtBeIE(InfoExtractor):
c76eb41b 3851 IE_DESC = 'youtu.be'
29f7c58a 3852 _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
3853 _TESTS = [{
8bdd16b4 3854 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
3855 'info_dict': {
3856 'id': 'yeWKywCrFtk',
3857 'ext': 'mp4',
3858 'title': 'Small Scale Baler and Braiding Rugs',
3859 'uploader': 'Backus-Page House Museum',
3860 'uploader_id': 'backuspagemuseum',
3861 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
3862 'upload_date': '20161008',
3863 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
3864 'categories': ['Nonprofits & Activism'],
3865 'tags': list,
3866 'like_count': int,
3867 'dislike_count': int,
3868 },
3869 'params': {
3870 'noplaylist': True,
3871 'skip_download': True,
3872 },
39e7107d 3873 }, {
8bdd16b4 3874 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
39e7107d 3875 'only_matching': True,
cdc628a4
PH
3876 }]
3877
8bdd16b4 3878 def _real_extract(self, url):
29f7c58a 3879 mobj = re.match(self._VALID_URL, url)
3880 video_id = mobj.group('id')
3881 playlist_id = mobj.group('playlist_id')
8bdd16b4 3882 return self.url_result(
29f7c58a 3883 update_url_query('https://www.youtube.com/watch', {
3884 'v': video_id,
3885 'list': playlist_id,
3886 'feature': 'youtu.be',
3887 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
8bdd16b4 3888
3889
3890class YoutubeYtUserIE(InfoExtractor):
c76eb41b 3891 IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
8bdd16b4 3892 _VALID_URL = r'ytuser:(?P<id>.+)'
3893 _TESTS = [{
3894 'url': 'ytuser:phihag',
3895 'only_matching': True,
3896 }]
3897
3898 def _real_extract(self, url):
3899 user_id = self._match_id(url)
3900 return self.url_result(
3901 'https://www.youtube.com/user/%s' % user_id,
3902 ie=YoutubeTabIE.ie_key(), video_id=user_id)
9558dcec 3903
b05654f0 3904
3d3dddc9 3905class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
70d5c17b 3906 IE_NAME = 'youtube:favorites'
3907 IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
3908 _VALID_URL = r':ytfav(?:ou?rite)?s?'
3909 _LOGIN_REQUIRED = True
3910 _TESTS = [{
3911 'url': ':ytfav',
3912 'only_matching': True,
3913 }, {
3914 'url': ':ytfavorites',
3915 'only_matching': True,
3916 }]
3917
3918 def _real_extract(self, url):
3919 return self.url_result(
3920 'https://www.youtube.com/playlist?list=LL',
3921 ie=YoutubeTabIE.ie_key())
3922
3923
79360d99 3924class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
69184e41 3925 IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
b4c08069
JMF
3926 # there doesn't appear to be a real limit, for example if you search for
3927 # 'python' you get more than 8.000.000 results
3928 _MAX_RESULTS = float('inf')
78caa52a 3929 IE_NAME = 'youtube:search'
b05654f0 3930 _SEARCH_KEY = 'ytsearch'
6c894ea1 3931 _SEARCH_PARAMS = None
9dd8e46a 3932 _TESTS = []
b05654f0 3933
6c894ea1 3934 def _entries(self, query, n):
a5c56234 3935 data = {'query': query}
6c894ea1
U
3936 if self._SEARCH_PARAMS:
3937 data['params'] = self._SEARCH_PARAMS
3938 total = 0
3939 for page_num in itertools.count(1):
79360d99 3940 search = self._extract_response(
3941 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
3942 check_get_keys=('contents', 'onResponseReceivedCommands')
3943 )
6c894ea1 3944 if not search:
b4c08069 3945 break
6c894ea1
U
3946 slr_contents = try_get(
3947 search,
3948 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3949 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3950 list)
3951 if not slr_contents:
a22b2fd1 3952 break
0366ae87 3953
0366ae87
M
3954 # Youtube sometimes adds promoted content to searches,
3955 # changing the index location of videos and token.
3956 # So we search through all entries till we find them.
30a074c2 3957 continuation_token = None
3958 for slr_content in slr_contents:
a96c6d15 3959 if continuation_token is None:
3960 continuation_token = try_get(
3961 slr_content,
3962 lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
3963 compat_str)
3964
30a074c2 3965 isr_contents = try_get(
3966 slr_content,
3967 lambda x: x['itemSectionRenderer']['contents'],
3968 list)
9da76d30 3969 if not isr_contents:
30a074c2 3970 continue
3971 for content in isr_contents:
3972 if not isinstance(content, dict):
3973 continue
3974 video = content.get('videoRenderer')
3975 if not isinstance(video, dict):
3976 continue
3977 video_id = video.get('videoId')
3978 if not video_id:
3979 continue
3980
3981 yield self._extract_video(video)
3982 total += 1
3983 if total == n:
3984 return
0366ae87 3985
0366ae87 3986 if not continuation_token:
6c894ea1 3987 break
0366ae87 3988 data['continuation'] = continuation_token
b05654f0 3989
6c894ea1
U
3990 def _get_n_results(self, query, n):
3991 """Get a specified number of results for a query"""
3992 return self.playlist_result(self._entries(query, n), query)
75dff0ee 3993
c9ae7b95 3994
a3dd9248 3995class YoutubeSearchDateIE(YoutubeSearchIE):
cb7fb546 3996 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
a3dd9248 3997 _SEARCH_KEY = 'ytsearchdate'
c76eb41b 3998 IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
6c894ea1 3999 _SEARCH_PARAMS = 'CAI%3D'
75dff0ee 4000
c9ae7b95 4001
386e1dd9 4002class YoutubeSearchURLIE(YoutubeSearchIE):
69184e41 4003 IE_DESC = 'YouTube.com search URLs'
386e1dd9 4004 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4005 _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
ef2f3c7f 4006 # _MAX_RESULTS = 100
3462ffa8 4007 _TESTS = [{
4008 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4009 'playlist_mincount': 5,
4010 'info_dict': {
4011 'title': 'youtube-dl test video',
4012 }
4013 }, {
4014 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4015 'only_matching': True,
4016 }]
4017
386e1dd9 4018 @classmethod
4019 def _make_valid_url(cls):
4020 return cls._VALID_URL
4021
3462ffa8 4022 def _real_extract(self, url):
386e1dd9 4023 qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4024 query = (qs.get('search_query') or qs.get('q'))[0]
4025 self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4026 return self._get_n_results(query, self._MAX_RESULTS)
3462ffa8 4027
4028
4029class YoutubeFeedsInfoExtractor(YoutubeTabIE):
d7ae0639 4030 """
25f14e9f 4031 Base class for feed extractors
3d3dddc9 4032 Subclasses must define the _FEED_NAME property.
d7ae0639 4033 """
b2e8bc1b 4034 _LOGIN_REQUIRED = True
ef2f3c7f 4035 _TESTS = []
d7ae0639
JMF
4036
4037 @property
4038 def IE_NAME(self):
78caa52a 4039 return 'youtube:%s' % self._FEED_NAME
04cc9617 4040
3853309f 4041 def _real_extract(self, url):
3d3dddc9 4042 return self.url_result(
4043 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4044 ie=YoutubeTabIE.ie_key())
25f14e9f
S
4045
4046
ef2f3c7f 4047class YoutubeWatchLaterIE(InfoExtractor):
4048 IE_NAME = 'youtube:watchlater'
70d5c17b 4049 IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
3d3dddc9 4050 _VALID_URL = r':ytwatchlater'
bc7a9cd8 4051 _TESTS = [{
8bdd16b4 4052 'url': ':ytwatchlater',
bc7a9cd8
S
4053 'only_matching': True,
4054 }]
25f14e9f
S
4055
4056 def _real_extract(self, url):
ef2f3c7f 4057 return self.url_result(
4058 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
3462ffa8 4059
4060
25f14e9f
S
4061class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4062 IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
3d3dddc9 4063 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
25f14e9f 4064 _FEED_NAME = 'recommended'
3d3dddc9 4065 _TESTS = [{
4066 'url': ':ytrec',
4067 'only_matching': True,
4068 }, {
4069 'url': ':ytrecommended',
4070 'only_matching': True,
4071 }, {
4072 'url': 'https://youtube.com',
4073 'only_matching': True,
4074 }]
1ed5b5c9 4075
1ed5b5c9 4076
25f14e9f 4077class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
70d5c17b 4078 IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
3d3dddc9 4079 _VALID_URL = r':ytsub(?:scription)?s?'
25f14e9f 4080 _FEED_NAME = 'subscriptions'
3d3dddc9 4081 _TESTS = [{
4082 'url': ':ytsubs',
4083 'only_matching': True,
4084 }, {
4085 'url': ':ytsubscriptions',
4086 'only_matching': True,
4087 }]
1ed5b5c9 4088
1ed5b5c9 4089
25f14e9f 4090class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
a5c56234
M
4091 IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4092 _VALID_URL = r':ythis(?:tory)?'
25f14e9f 4093 _FEED_NAME = 'history'
3d3dddc9 4094 _TESTS = [{
4095 'url': ':ythistory',
4096 'only_matching': True,
4097 }]
1ed5b5c9
JMF
4098
4099
15870e90
PH
4100class YoutubeTruncatedURLIE(InfoExtractor):
4101 IE_NAME = 'youtube:truncated_url'
4102 IE_DESC = False # Do not list
975d35db 4103 _VALID_URL = r'''(?x)
b95aab84
PH
4104 (?:https?://)?
4105 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4106 (?:watch\?(?:
c4808c60 4107 feature=[a-z_]+|
b95aab84
PH
4108 annotation_id=annotation_[^&]+|
4109 x-yt-cl=[0-9]+|
c1708b89 4110 hl=[^&]*|
287be8c6 4111 t=[0-9]+
b95aab84
PH
4112 )?
4113 |
4114 attribution_link\?a=[^&]+
4115 )
4116 $
975d35db 4117 '''
15870e90 4118
c4808c60 4119 _TESTS = [{
2d3d2997 4120 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
c4808c60 4121 'only_matching': True,
dc2fc736 4122 }, {
2d3d2997 4123 'url': 'https://www.youtube.com/watch?',
dc2fc736 4124 'only_matching': True,
b95aab84
PH
4125 }, {
4126 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4127 'only_matching': True,
4128 }, {
4129 'url': 'https://www.youtube.com/watch?feature=foo',
4130 'only_matching': True,
c1708b89
PH
4131 }, {
4132 'url': 'https://www.youtube.com/watch?hl=en-GB',
4133 'only_matching': True,
287be8c6
PH
4134 }, {
4135 'url': 'https://www.youtube.com/watch?t=2372',
4136 'only_matching': True,
c4808c60
PH
4137 }]
4138
15870e90
PH
4139 def _real_extract(self, url):
4140 raise ExtractorError(
78caa52a
PH
4141 'Did you forget to quote the URL? Remember that & is a meta '
4142 'character in most shells, so you want to put the URL in quotes, '
3867038a 4143 'like youtube-dl '
2d3d2997 4144 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
3867038a 4145 ' or simply youtube-dl BaW_jenozKc .',
15870e90 4146 expected=True)
772fd5cc
PH
4147
4148
4149class YoutubeTruncatedIDIE(InfoExtractor):
4150 IE_NAME = 'youtube:truncated_id'
4151 IE_DESC = False # Do not list
b95aab84 4152 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
772fd5cc
PH
4153
4154 _TESTS = [{
4155 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4156 'only_matching': True,
4157 }]
4158
4159 def _real_extract(self, url):
4160 video_id = self._match_id(url)
4161 raise ExtractorError(
4162 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4163 expected=True)